pbs 1.1.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,24 +0,0 @@
1
- oakley: &oakley
2
- lib: '/usr/local/torque-4.2.8/lib/libtorque.so'
3
- server: &oakley_server 'oak-batch.osc.edu'
4
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque-4.2.8/lib:$LD_LIBRARY_PATH /usr/local/torque-4.2.8/bin/qsub'
5
- *oakley_server:
6
- <<: *oakley
7
- ruby: &ruby
8
- lib: '/usr/local/torque-4.2.8/lib/libtorque.so'
9
- server: &ruby_server 'ruby-batch.ten.osc.edu'
10
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque-4.2.8/lib:$LD_LIBRARY_PATH /usr/local/torque-4.2.8/bin/qsub'
11
- *ruby_server:
12
- <<: *ruby
13
- oxymoron: &oxymoron
14
- lib: '/usr/local/torque-4.2.8/lib/libtorque.so'
15
- server: &oxymoron_server 'oak-batch.osc.edu:17001'
16
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque-4.2.8/lib:$LD_LIBRARY_PATH /usr/local/torque-4.2.8/bin/qsub'
17
- *oxymoron_server:
18
- <<: *oxymoron
19
- quick: &quick
20
- lib: '/usr/local/torque-4.2.8/lib/libtorque.so'
21
- server: &quick_server 'quick-batch.osc.edu'
22
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque-4.2.8/lib:$LD_LIBRARY_PATH /usr/local/torque-4.2.8/bin/qsub'
23
- *quick_server:
24
- <<: *quick
@@ -1,18 +0,0 @@
1
- oakley: &oakley
2
- lib: '/usr/local/torque/5.1.1-1_fba25d92/lib/libtorque.so'
3
- server: &oakley_server 'oak-batch.osc.edu'
4
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque/5.1.1-1_fba25d92/lib:$LD_LIBRARY_PATH /usr/local/torque/5.1.1-1_fba25d92/bin/qsub'
5
- *oakley_server:
6
- <<: *oakley
7
- ruby: &ruby
8
- lib: '/usr/local/torque/5.1.1-1_fba25d92/lib/libtorque.so'
9
- server: &ruby_server 'ruby-batch.ten.osc.edu'
10
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque/5.1.1-1_fba25d92/lib:$LD_LIBRARY_PATH /usr/local/torque/5.1.1-1_fba25d92/bin/qsub'
11
- *ruby_server:
12
- <<: *ruby
13
- quick: &quick
14
- lib: '/usr/local/torque/5.1.1-1_fba25d92/lib/libtorque.so'
15
- server: &quick_server 'quick-batch.osc.edu'
16
- qsub: 'LD_LIBRARY_PATH=/usr/local/torque/5.1.1-1_fba25d92/lib:$LD_LIBRARY_PATH /usr/local/torque/5.1.1-1_fba25d92/bin/qsub'
17
- *quick_server:
18
- <<: *quick
@@ -1,66 +0,0 @@
1
- require 'pbs'
2
- require 'yaml'
3
-
4
- # Set up connection to local server
5
- c = PBS::Conn.batch 'oakley'
6
-
7
- # Check info for local server
8
- q = PBS::Query.new(conn: c, type: :server)
9
- puts "# Batch server information ---"
10
- puts q.find.to_yaml
11
- puts ""
12
-
13
- # Check if I have any jobs currently running
14
- q = PBS::Query.new(conn: c, type: :job)
15
- filters = [PBS::ATTR[:state], PBS::ATTR[:owner]]
16
- puts "# All jobs you currently have in the batch ---"
17
- puts q.where.user(ENV['USER']).find(filters: filters).to_yaml
18
- puts ""
19
-
20
- # Setup new job
21
- j = PBS::Job.new(conn: c)
22
-
23
- headers = { PBS::ATTR[:N] => "SimpleJob" }
24
- resources = { walltime: "00:10:00" }
25
- envvars = { WORLD: "world" }
26
- script = "echo \"Hello ${WORLD}!\""
27
-
28
-
29
- # Submit new job
30
- puts "# Submitting new job ---"
31
- puts j.submit(string: script, headers: headers, resources: resources, envvars: envvars, qsub: true).id
32
- puts ""
33
-
34
- # Show details of new job
35
- puts "# Details of submitted job ---"
36
- puts j.status.to_yaml
37
- puts ""
38
-
39
- # Hold job
40
- puts "# Holding job now ---"
41
- j.hold
42
- puts j.status(filter: PBS::ATTR[:state]).to_yaml
43
- puts ""
44
-
45
- # Show only jobs on hold
46
- puts "# All running jobs on hold ---"
47
- puts q.where.user(ENV['USER']).where(PBS::ATTR[:state]) {|v| v == 'H'}.find(filters: filters).to_yaml
48
- puts ""
49
- puts "# All running jobs not on hold ---"
50
- puts q.where.user(ENV['USER']).where.not(PBS::ATTR[:state] => 'H').find(filters: filters).to_yaml
51
- puts ""
52
- puts "# All running jobs not queued ---"
53
- puts q.where.user(ENV['USER']).where.is(PBS::ATTR[:state] => 'H').find(filters: filters).to_yaml
54
- puts ""
55
-
56
- # Release job
57
- puts "# Releasing job now ---"
58
- j.release
59
- puts j.status(filter: PBS::ATTR[:state]).to_yaml
60
- puts ""
61
-
62
- # Delete submitted job
63
- puts "# Deleting job now ---"
64
- j.delete
65
- puts "Complete."
66
- puts ""
@@ -1,75 +0,0 @@
1
- module PBS
2
- class Conn
3
- # @example Torque 4.2.8
4
- # "/usr/local/torque-4.2.8/lib/libtorque.so"
5
- # @return [String] The torque library to use for connection.
6
- attr_reader :lib
7
-
8
- # @example Oakley
9
- # "oak-batch.osc.edu"
10
- # @return [String] The batch server to connect to.
11
- attr_reader :server
12
-
13
- # @example Torque 4.2.8
14
- # "PATH=/usr/local/torque-4.2.8/bin:$PATH LD_LIBRARY_PATH=/usr/local/torque-4.2.8/lib:$LD_LIBRARY_PATH"
15
- # @return [String] The qsub command to be called from the command line.
16
- attr_reader :qsub
17
-
18
- # @return [Fixnum, nil] The connection id number if connected.
19
- attr_reader :conn_id
20
-
21
- # Create a new connection object from pre-defined batch server defined in
22
- # batch config yaml.
23
- # @example Create Oakley connection
24
- # PBS::Conn.batch 'oakley'
25
- #
26
- # @param name [String] The name of the pre-defined batch server.
27
- # @param opts [Hash] The options to create a connection object with.
28
- # @option opts [String] :lib The torque library used to establish connection.
29
- # @option opts [String] :server The batch server to connect to.
30
- # @option opts [String] :qsub The qsub command to be called from the command line.
31
- # @raise [Error] if pre-defined batch server doesn't exist.
32
- def self.batch(name, opts = {})
33
- context = PBS.batch_config[name] || raise(PBS::Error, "No pre-defined batch server (#{name})")
34
- lib = opts[:lib] || context.fetch('lib', nil)
35
- svr = opts[:server] || context.fetch('server', nil)
36
- qsb = opts[:qsub] || context.fetch('qsub', nil)
37
- Conn.new(lib: lib, server: svr, qsub: qsb)
38
- end
39
-
40
- # @param opts [Hash] The options to create a connection object with.
41
- # @option opts [String] :lib The torque library used to establish connection.
42
- # @option opts [String] :server The batch server to connect to.
43
- # @option opts [String] :qsub The qsub command to be called from the command line.
44
- def initialize(opts)
45
- @lib = opts[:lib] || "torque"
46
- @server = opts[:server]
47
- @qsub = opts[:qsub] || "qsub"
48
- end
49
-
50
- # Creates a torque connection
51
- #
52
- # @return [Integer] The connection id.
53
- def connect
54
- Torque.init lib: lib # reset library used in Torque
55
- disconnect if connected? # clean up any old connection
56
- @conn_id = Torque.pbs_connect(server)
57
- Torque.raise_error(@conn_id.abs) if @conn_id < 0 # raise error if negative conn_id
58
- Torque.check_for_error # check for any other error that slipped by
59
- conn_id
60
- end
61
-
62
- # Disconnects from the connection and sets the connection id to nil.
63
- def disconnect
64
- Torque.pbs_disconnect(@conn_id)
65
- @conn_id = nil # reset connection id
66
- end
67
-
68
- # Returns true if the connection id is not nil and is greater than zero.
69
- #
70
- # @return [Boolean] Are we connected?
71
- def connected?
72
- !@conn_id.nil? && @conn_id > 0
73
- end
74
- end
75
- end
@@ -1,189 +0,0 @@
1
- require "socket"
2
- require "tempfile"
3
- require "open3"
4
-
5
- module PBS
6
- class Job
7
- HOSTNAME = Socket.gethostname
8
-
9
- attr_accessor :id
10
- attr_reader :conn
11
-
12
- # Needs a connection object and headers
13
- # Examples of headers found in 'headers.rb'
14
- def initialize(args = {})
15
- # Job specific args
16
- @id = args[:id]
17
- @conn = args[:conn] || Conn.new
18
- end
19
-
20
- # Put job on hold
21
- def hold(args = {})
22
- # hold_type::
23
- # The parameter, hold_type, contains the type of hold to be applied. The possible values are (default is 'u'):
24
- # "u" : Available to the owner of the job, the batch operator and the batch administrator.
25
- # "o" : Available to the batch operator and the batch administrator.
26
- # "s" : Available only to the batch administrator.
27
- hold_type = args[:hold_type] || "u"
28
-
29
- _pbs_hold(hold_type)
30
- self
31
- end
32
-
33
- # Release job from hold
34
- def release(args = {})
35
- # hold_type::
36
- # The parameter, hold_type, contains the type of hold to be applied. The possible values are (default is 'u'):
37
- # "u" : Available to the owner of the job, the batch operator and the batch administrator.
38
- # "o" : Available to the batch operator and the batch administrator.
39
- # "s" : Available only to the batch administrator.
40
- hold_type = args[:hold_type] || "u"
41
-
42
- _pbs_release(hold_type)
43
- self
44
- end
45
-
46
- # Delete job
47
- def delete(args = {})
48
- _pbs_delete()
49
- end
50
-
51
- # Get status of job by creating a Query object
52
- def status(args = {})
53
- q = Query.new(type: :job, conn: conn)
54
- q.find(args.merge(id: id))[0]
55
- end
56
-
57
- # Can submit a script as a file or string
58
- # @param args [Hash] The options when submitting a job.
59
- # @option args [String] :string The batch script as a string.
60
- # @option args [String] :file The batch script file if a string is not supplied.
61
- # @option args [Boolean] :qsub (true) Whether the <tt>qsub</tt> command is used from command line.
62
- # @option args [Hash] :headers ({}) PBS headers.
63
- # @option args [Hash] :resources ({}) PBS resources.
64
- # @option args [Hash] :envvars ({}) PBS environment variables.
65
- # @raise [Error] if fail to submit batch job.
66
- def submit(args)
67
- string = args.fetch(:string) { File.open(args[:file]).read }
68
- queue = args.fetch(:queue, nil)
69
- qsub = args.fetch(:qsub, true)
70
-
71
- headers = args.fetch(:headers, {})
72
- resources = args.fetch(:resources, {})
73
- envvars = args.fetch(:envvars, {})
74
-
75
- # Create batch script in tmp file, submit, remove tmp file
76
- script = Tempfile.new('qsub.')
77
- begin
78
- script.write string
79
- script.close
80
- if qsub
81
- _qsub_submit(script.path, queue, headers, resources, envvars)
82
- else
83
- _pbs_submit(script.path, queue, headers, resources, envvars)
84
- end
85
- ensure
86
- script.unlink # deletes the temp file
87
- end
88
-
89
- self
90
- end
91
-
92
- private
93
- # Connect to batch server, put job on hold,
94
- # disconnect, and finally check for errors
95
- def _pbs_hold(hold_type)
96
- conn.connect unless conn.connected?
97
- Torque.pbs_holdjob(conn.conn_id, id, hold_type, nil)
98
- conn.disconnect
99
- Torque.check_for_error
100
- end
101
-
102
- # Connect to batch server, release job from hold,
103
- # disconnect, and finally check for errors
104
- def _pbs_release(hold_type)
105
- conn.connect unless conn.connected?
106
- Torque.pbs_rlsjob(conn.conn_id, id, hold_type, nil)
107
- conn.disconnect
108
- Torque.check_for_error
109
- end
110
-
111
- # Connect to batch server, delete job,
112
- # disconnect, and finally check for errors
113
- def _pbs_delete()
114
- conn.connect unless conn.connected?
115
- Torque.pbs_deljob(conn.conn_id, id, nil)
116
- conn.disconnect
117
- Torque.check_for_error
118
- end
119
-
120
- # Connect to server, submit job with headers,
121
- # disconnect, and finally check for errors
122
- def _pbs_submit(script, queue, headers, resources, envvars)
123
- # Generate attribute hash for this job
124
- attribs = _default_headers.merge(headers)
125
- attribs[ATTR[:l]] = _default_resources.merge(resources)
126
- attribs[ATTR[:v]] = _default_envvars.merge(envvars).map{|k,v| "#{k}=#{v}"}.join(",")
127
-
128
- # Filter some of the attributes
129
- attribs[ATTR[:o]].prepend("#{HOSTNAME}:")
130
- attribs[ATTR[:e]].prepend("#{HOSTNAME}:")
131
-
132
- # Submit job
133
- conn.connect unless conn.connected?
134
- attropl = Torque::Attropl.from_hash(attribs)
135
- self.id = Torque.pbs_submit(conn.conn_id, attropl, script, queue, nil)
136
- conn.disconnect
137
- Torque.check_for_error
138
- end
139
-
140
- # Submit using system call `qsub`
141
- # Note: Do not need to filter as OSC has personal torque filter
142
- def _qsub_submit(script, queue, headers, resources, envvars)
143
- params = "-q #{queue}@#{conn.server}"
144
- params << resources.map{|k,v| " -l '#{k}=#{v}'"}.join("")
145
- params << " -v '#{envvars.map{|k,v| "#{k}=#{v}"}.join(",")}'" unless envvars.empty?
146
- params << headers.map do |k,v|
147
- param = ATTR.key(k)
148
- if param && param.length == 1
149
- " -#{param} '#{v}'"
150
- else
151
- " -W '#{k}=#{v}'"
152
- end
153
- end.join("")
154
- cmd = "#{conn.qsub} #{params} #{script}"
155
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thr|
156
- exit_status = wait_thr.value
157
- unless exit_status.success?
158
- raise PBS::Error, "#{stderr.read}"
159
- end
160
-
161
- self.id = stdout.read.chomp # newline char at end of job id
162
- end
163
- end
164
-
165
- # Hash representing the job headers
166
- def _default_headers
167
- {
168
- ATTR[:N] => "Jobname",
169
- ATTR[:o] => "#{Dir.pwd}/",
170
- ATTR[:e] => "#{Dir.pwd}/",
171
- ATTR[:S] => "/bin/bash",
172
- }
173
- end
174
-
175
- # Hash representing the resources used
176
- def _default_resources
177
- {
178
- walltime: "01:00:00",
179
- }
180
- end
181
-
182
- # Hash representing the PBS working directory
183
- def _default_envvars
184
- {
185
- PBS_O_WORKDIR: "#{Dir.pwd}",
186
- }
187
- end
188
- end
189
- end
@@ -1,103 +0,0 @@
1
- module PBS
2
- class Query
3
- attr_reader :type
4
- attr_reader :conn
5
- attr_accessor :where_procs
6
-
7
- STATTYPE = {job: :pbs_statjob, queue: :pbs_statque,
8
- node: :pbs_statnode, server: :pbs_statserver}
9
-
10
- # Needs a connection object and a query type
11
- # Query types: :job, :queue, :server, :node
12
- def initialize(args = {})
13
- @conn = args[:conn] || Conn.new
14
- @type = args[:type] || :job
15
- @where_procs = []
16
- end
17
-
18
- # Boolean procs used to filter out query results
19
- # Examples:
20
- # where {|h| h[PBS::ATTR[:N]] == "SimpleJob"}
21
- # where(PBS::ATTR[:N]) {|v| v == "SimpleJob"}
22
- # where
23
- # the last one is used with other methods
24
- # i.e., where.not(PBS::ATTR[:N]) => "SimpleJob")
25
- def where(arg = nil, &block)
26
- relation = self.clone
27
- relation.where_procs = @where_procs.clone
28
- relation.where_procs << (arg ? Proc.new {|h| block.call(h[arg])} : block)
29
- relation
30
- end
31
-
32
- # Used to filter where key attrib is equal to value
33
- # where.is(PBS::ATTR[:N] => "SimpleJob")
34
- def is(hash)
35
- key, value = hash.first
36
- raise PBS::Error, "`where' method not called before" if where_procs.empty? || where_procs[-1]
37
- self.where_procs[-1] = Proc.new {|h| h[key] == value}
38
- self
39
- end
40
-
41
- # Used to filter where key attrib is NOT equal to value
42
- # where.not(PBS::ATTR[:N] => "SimpleJob")
43
- def not(hash)
44
- key, value = hash.first
45
- raise PBS::Error, "`where' method not called before" if where_procs.empty? || where_procs[-1]
46
- self.where_procs[-1] = Proc.new {|h| h[key] != value}
47
- self
48
- end
49
-
50
- # Used to filter specific user
51
- # where.user("username")
52
- def user(name)
53
- raise PBS::Error, "`where' method not called before" if where_procs.empty? || where_procs[-1]
54
- self.where_procs[-1] = Proc.new {|h| /^#{name}@/ =~ h[ATTR[:owner]]}
55
- self
56
- end
57
-
58
- def find(args = {})
59
- id = args[:id] || nil
60
- filters = args[:filters]
61
- filters = [args[:filter]] if args[:filter]
62
-
63
- # Get array of batch status hashes
64
- batch_list = _pbs_batchstat(id, filters)
65
-
66
- # Further filter results and then output them
67
- _filter_where_values(batch_list)
68
- end
69
-
70
- # Filter an array of hashes based on the defined where procs
71
- # Comparisons are done inside the :attribs hash only
72
- def _filter_where_values(array)
73
- array.select do |hash|
74
- pass = true
75
- where_procs.each do |p|
76
- pass = false unless p.call(hash[:attribs])
77
- end
78
- pass
79
- end
80
- end
81
-
82
- # Connect, get status on batch server,
83
- # disconnect, parse output, and finally check for errors
84
- # Don't forget to free up memory the C-library creates
85
- def _pbs_batchstat(id, filters)
86
- # Generate attribute list from filter list
87
- attrib_list = PBS::Torque::Attrl.from_list(filters) if filters
88
-
89
- batch_status = nil
90
- conn.connect unless conn.connected?
91
- if type == :server
92
- batch_status = Torque.send(STATTYPE[type], conn.conn_id, attrib_list, nil)
93
- else
94
- batch_status = Torque.send(STATTYPE[type], conn.conn_id, id, attrib_list, nil)
95
- end
96
- conn.disconnect
97
- batch_list = batch_status.to_a
98
- Torque.pbs_statfree(batch_status)
99
- Torque.check_for_error
100
- batch_list
101
- end
102
- end
103
- end