postjob 0.5.15 → 0.5.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/postjob.rb +4 -0
- data/lib/postjob/cli.rb +1 -1
- data/lib/postjob/cli/heartbeat.rb +6 -6
- data/lib/postjob/cli/hosts.rb +1 -1
- data/lib/postjob/cli/run.rb +60 -8
- data/lib/postjob/cli/sessions.rb +1 -0
- data/lib/postjob/host.rb +22 -13
- data/lib/postjob/migrations/012_hosts.sql +2 -2
- data/lib/postjob/migrations/016_sessions_functions.sql +36 -1
- data/lib/postjob/queue/notifications.rb +1 -1
- data/lib/postjob/registry.rb +12 -0
- data/lib/postjob/worker_session.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: b740f5e877d921e9581484fd0f4e43d7e5dc6152
|
|
4
|
+
data.tar.gz: 736ef026988aa18e7d9e1f23c11a0e1876b62815
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 539d5be9482e50c5b911cb65f5ad689e2dd4a25dbada7adc50c8b96f9d6d06a2527edc09df285aae11185589d6918db6f63352145ba77f7bee0706a49321a48a
|
|
7
|
+
data.tar.gz: eab7349a9dab72cdf7231d0d88cd2b7ec83085eb12be8063645eda74a6f16dd5d471e843f7eb042008311768a15b987224520d67741ec190d42550342457ee53
|
data/lib/postjob.rb
CHANGED
data/lib/postjob/cli.rb
CHANGED
|
@@ -10,12 +10,12 @@ module Postjob::CLI
|
|
|
10
10
|
SELECT
|
|
11
11
|
name,
|
|
12
12
|
postjob_id AS job_id,
|
|
13
|
-
host_id,
|
|
14
|
-
(attributes->>'uptime')::interval AS uptime,
|
|
15
|
-
to_char((attributes->>'cpu_load_1min')::float, '99D99') AS cpu_load,
|
|
16
|
-
attributes->>'net_in_1min' AS net_in,
|
|
17
|
-
attributes->>'net_out_1min' AS net_out,
|
|
18
|
-
attributes->>'net_errors_1min' AS net_errors,
|
|
13
|
+
events.host_id,
|
|
14
|
+
(events.attributes->>'uptime')::interval AS uptime,
|
|
15
|
+
to_char((events.attributes->>'cpu_load_1min')::float, '99D99') AS cpu_load,
|
|
16
|
+
events.attributes->>'net_in_1min' AS net_in,
|
|
17
|
+
events.attributes->>'net_out_1min' AS net_out,
|
|
18
|
+
events.attributes->>'net_errors_1min' AS net_errors,
|
|
19
19
|
now() at time zone 'utc' - events.created_at AS age
|
|
20
20
|
FROM postjob.events events
|
|
21
21
|
LEFT JOIN postjob.worker_sessions worker_sessions ON events.worker_session_id=worker_sessions.id
|
data/lib/postjob/cli/hosts.rb
CHANGED
|
@@ -72,7 +72,7 @@ module Postjob::CLI
|
|
|
72
72
|
def host_shutdown
|
|
73
73
|
connect_to_database!
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
Postjob::Host.shutdown!(host_id: ::Postjob.host_id)
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
# Set the host to running again
|
data/lib/postjob/cli/run.rb
CHANGED
|
@@ -1,11 +1,39 @@
|
|
|
1
|
-
# rubocop:disable Metrics/PerceivedComplexity
|
|
2
|
-
|
|
3
1
|
module Postjob::CLI
|
|
4
2
|
# Run a single job
|
|
5
3
|
def step(count: 1, queue: nil, host_id: nil)
|
|
6
4
|
run count: count, queue: queue, host_id: host_id, heartbeat: false
|
|
7
5
|
end
|
|
8
6
|
|
|
7
|
+
# Start the control connection
|
|
8
|
+
#
|
|
9
|
+
# The control connection runs the heartbeat, and checks for jobs on the
|
|
10
|
+
# control queue.
|
|
11
|
+
def run_control(host_id: nil)
|
|
12
|
+
# By running the "control" queue this code is quite efficient, it does
|
|
13
|
+
# not poll the database.
|
|
14
|
+
_run(queue: "control", host_id: host_id, heartbeat: true)
|
|
15
|
+
|
|
16
|
+
host_id ||= Postjob.host_id
|
|
17
|
+
|
|
18
|
+
# Poll until there are no more working sessions.
|
|
19
|
+
# [TODO] - improve by listening.
|
|
20
|
+
wait_for_no_running_session(host_id: host_id)
|
|
21
|
+
|
|
22
|
+
run_control_shutdown(host_id: host_id)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
# This is called after the control connection did shut down
|
|
28
|
+
#
|
|
29
|
+
# This method can be reimplemented in a plugin to allow shutting down
|
|
30
|
+
# worker machines.
|
|
31
|
+
def run_control_shutdown(host_id: nil)
|
|
32
|
+
Postjob.logger.success "postjob control:shutdown host_id: #{host_id}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
public
|
|
36
|
+
|
|
9
37
|
# Run postjobs.
|
|
10
38
|
#
|
|
11
39
|
# This method runs jobs as they become ready.
|
|
@@ -16,12 +44,23 @@ module Postjob::CLI
|
|
|
16
44
|
# - --queue=queue1,queue2,queue3 run only the specified queues.
|
|
17
45
|
# - --heartbeat=no don't start heartbeat process.
|
|
18
46
|
#
|
|
19
|
-
def run(count: nil, queue: nil, fast: false, host_id: nil
|
|
20
|
-
expect! Integer(host_id, 16) => 1..0xffffffff if host_id
|
|
47
|
+
def run(count: nil, queue: nil, fast: false, host_id: nil)
|
|
21
48
|
count = Integer(count) if count
|
|
49
|
+
processed = _run(count: count, queue: queue, fast: fast, host_id: host_id, heartbeat: false)
|
|
50
|
+
|
|
51
|
+
if !count || processed < count
|
|
52
|
+
# The runner has been shut down externally. Wait for interrupt.
|
|
53
|
+
Postjob.logger.success "External shut down initiated."
|
|
54
|
+
STDERR.puts "External shut down initiated. Press ^C to terminate process."
|
|
55
|
+
STDIN.read
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def _run(count: nil, queue:, fast: false, host_id:, heartbeat:)
|
|
62
|
+
expect! Integer(host_id, 16) => 1..0xffffffff if host_id
|
|
22
63
|
|
|
23
|
-
expect! heartbeat => [ "yes", "no" ] if heartbeat.is_a?(String)
|
|
24
|
-
heartbeat = %w(yes true).include?(heartbeat) if heartbeat.is_a?(String)
|
|
25
64
|
expect! heartbeat => [ true, false ]
|
|
26
65
|
|
|
27
66
|
Postjob.fast_mode = (fast ? true : false)
|
|
@@ -33,12 +72,25 @@ module Postjob::CLI
|
|
|
33
72
|
Postjob.logger.info "Using host_id: #{Postjob::Host.host_id}"
|
|
34
73
|
end
|
|
35
74
|
|
|
36
|
-
logger.success "Starting runner with pid #{$$}"
|
|
37
|
-
|
|
38
75
|
processed = Postjob.run(count: count, queues: queue&.split(","), heartbeat: heartbeat) do |job_id|
|
|
39
76
|
logger.info "Processed job w/id #{job_id}" if job_id
|
|
40
77
|
end
|
|
41
78
|
|
|
42
79
|
logger.info "Processed #{processed} jobs"
|
|
80
|
+
|
|
81
|
+
processed
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def wait_for_no_running_session(host_id:)
|
|
85
|
+
Postjob.logger.debug "Waiting for shutdown"
|
|
86
|
+
loop do
|
|
87
|
+
count = Simple::SQL.ask "SELECT COUNT (*) FROM postjob.worker_sessions WHERE id=$1", host_id
|
|
88
|
+
Postjob.logger.info "#{host_id}: #{count} running sessions"
|
|
89
|
+
|
|
90
|
+
break if count == 0
|
|
91
|
+
sleep 0.2
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
Postjob.logger.info "#{host_id}: no more running sessions"
|
|
43
95
|
end
|
|
44
96
|
end
|
data/lib/postjob/cli/sessions.rb
CHANGED
|
@@ -11,6 +11,7 @@ module Postjob::CLI
|
|
|
11
11
|
SELECT
|
|
12
12
|
worker_sessions.id,
|
|
13
13
|
(substring(worker_sessions.host_id::varchar for 9) || '...') AS host_id,
|
|
14
|
+
worker_sessions.status,
|
|
14
15
|
array_to_string(worker_sessions.queues, ', ') AS queues,
|
|
15
16
|
worker_sessions.client_socket,
|
|
16
17
|
worker_sessions.workflows,
|
data/lib/postjob/host.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require_relative "./record"
|
|
2
2
|
require "tempfile"
|
|
3
|
+
require "zlib"
|
|
3
4
|
|
|
4
5
|
class Postjob::Host < Postjob::Record
|
|
5
6
|
attr_reader :id
|
|
@@ -29,6 +30,10 @@ class Postjob::Host < Postjob::Record
|
|
|
29
30
|
@host_id ||= atomic_set_and_get(storage_path) { register_host(host_id: nil) }
|
|
30
31
|
end
|
|
31
32
|
|
|
33
|
+
def shutdown!(host_id:)
|
|
34
|
+
Simple::SQL.ask "UPDATE postjob.hosts SET status='shutdown' WHERE id=$1::uuid", host_id
|
|
35
|
+
end
|
|
36
|
+
|
|
32
37
|
private
|
|
33
38
|
|
|
34
39
|
# This method returns the path to a file which will hold the host_id. Two runners
|
|
@@ -40,28 +45,32 @@ class Postjob::Host < Postjob::Record
|
|
|
40
45
|
# would be even better - however, our systems do not have a user-writable /var).
|
|
41
46
|
def storage_path
|
|
42
47
|
@storage_path ||= begin
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
storage_path = File.join Dir.tmpdir, "postjob.#{env}.#{Process.uid}.#{here.hash.abs.to_s(36)}.host_id"
|
|
46
|
-
Simple::SQL.logger.info "Keeping host identifier in #{storage_path}"
|
|
47
|
-
storage_path
|
|
48
|
+
here_hash = Zlib.crc32(Dir.getwd).to_s(36)
|
|
49
|
+
File.join Dir.tmpdir, "postjob.#{Postjob.env}.#{Process.uid}.#{here_hash}.host_id"
|
|
48
50
|
end
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
def atomic_set_and_get(path)
|
|
52
|
-
value = nil
|
|
53
|
-
|
|
54
54
|
File.open(path, File::RDWR | File::CREAT, 0644) do |f|
|
|
55
55
|
f.flock(File::LOCK_EX)
|
|
56
56
|
|
|
57
57
|
value = f.read
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
|
|
59
|
+
if value == "" || value.nil?
|
|
60
|
+
value = yield
|
|
61
|
+
|
|
62
|
+
f.rewind
|
|
63
|
+
f.write(value)
|
|
64
|
+
f.flush
|
|
65
|
+
f.truncate(f.pos)
|
|
66
|
+
|
|
67
|
+
Postjob.logger.info "Registering new host with host_id #{value}, in #{path}"
|
|
68
|
+
else
|
|
69
|
+
Postjob.logger.info "Reusing host_id #{value}, from #{path}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
value
|
|
63
73
|
end
|
|
64
|
-
value
|
|
65
74
|
end
|
|
66
75
|
|
|
67
76
|
def register_host(host_id:)
|
|
@@ -62,8 +62,8 @@ AS $$
|
|
|
62
62
|
v_id := gen_random_uuid();
|
|
63
63
|
END IF;
|
|
64
64
|
|
|
65
|
-
INSERT INTO {SCHEMA_NAME}.hosts (id, attributes)
|
|
66
|
-
VALUES (v_id, p_attrs)
|
|
65
|
+
INSERT INTO {SCHEMA_NAME}.hosts (id, attributes, status)
|
|
66
|
+
VALUES (v_id, p_attrs, 'running')
|
|
67
67
|
ON CONFLICT(id) DO UPDATE SET attributes=p_attrs;
|
|
68
68
|
RETURN v_id;
|
|
69
69
|
END;
|
|
@@ -13,13 +13,48 @@ BEGIN
|
|
|
13
13
|
INSERT INTO {SCHEMA_NAME}.worker_sessions (host_id, client_socket, workflows, queues)
|
|
14
14
|
VALUES (p_host_id, v_client_socket, p_workflows, p_queues) RETURNING id INTO v_worker_session_id;
|
|
15
15
|
|
|
16
|
+
UPDATE {SCHEMA_NAME}.hosts
|
|
17
|
+
SET status = 'running'
|
|
18
|
+
WHERE id=p_host_id;
|
|
19
|
+
|
|
16
20
|
RETURN QUERY SELECT * FROM {SCHEMA_NAME}.worker_sessions WHERE id = v_worker_session_id;
|
|
17
21
|
END;
|
|
18
22
|
$$ LANGUAGE plpgsql;
|
|
19
23
|
|
|
20
24
|
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.worker_session_stop(p_worker_session_id UUID)
|
|
21
25
|
RETURNS VOID AS $$
|
|
26
|
+
DECLARE
|
|
27
|
+
v_host_id uuid;
|
|
22
28
|
BEGIN
|
|
23
|
-
UPDATE {SCHEMA_NAME}.worker_sessions
|
|
29
|
+
UPDATE {SCHEMA_NAME}.worker_sessions
|
|
30
|
+
SET status='stopped' WHERE id=p_worker_session_id;
|
|
24
31
|
END;
|
|
25
32
|
$$ LANGUAGE plpgsql;
|
|
33
|
+
|
|
34
|
+
-- wakeup runners after changing hosts
|
|
35
|
+
|
|
36
|
+
-- when a host changes its status to shutdown, all of its runners should
|
|
37
|
+
-- shutdown quickly.
|
|
38
|
+
|
|
39
|
+
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}._recalculate_host_status() RETURNS TRIGGER AS $$
|
|
40
|
+
BEGIN
|
|
41
|
+
IF NOT EXISTS (
|
|
42
|
+
SELECT 1 FROM {SCHEMA_NAME}.worker_sessions WHERE id=NEW.host_id AND status = 'running'
|
|
43
|
+
) THEN
|
|
44
|
+
UPDATE {SCHEMA_NAME}.hosts
|
|
45
|
+
SET status = 'stopped'
|
|
46
|
+
WHERE id=NEW.host_id;
|
|
47
|
+
END IF;
|
|
48
|
+
|
|
49
|
+
RETURN NEW;
|
|
50
|
+
END;
|
|
51
|
+
$$ LANGUAGE plpgsql;
|
|
52
|
+
|
|
53
|
+
BEGIN;
|
|
54
|
+
DROP TRIGGER IF EXISTS _recalculate_host_status ON {SCHEMA_NAME}.worker_sessions;
|
|
55
|
+
|
|
56
|
+
CREATE TRIGGER _recalculate_host_status AFTER UPDATE
|
|
57
|
+
ON {SCHEMA_NAME}.worker_sessions
|
|
58
|
+
FOR EACH ROW
|
|
59
|
+
EXECUTE PROCEDURE {SCHEMA_NAME}._recalculate_host_status();
|
|
60
|
+
COMMIT;
|
|
@@ -20,7 +20,7 @@ module Postjob::Queue::Notifications
|
|
|
20
20
|
return if wait_time && wait_time <= 0
|
|
21
21
|
|
|
22
22
|
if !wait_time && ::Postjob::Queue.should_shutdown?(worker_session_id)
|
|
23
|
-
Postjob.logger.
|
|
23
|
+
Postjob.logger.debug "Shutting down runner: host is set to 'shutdown'"
|
|
24
24
|
return :shutdown
|
|
25
25
|
end
|
|
26
26
|
|
data/lib/postjob/registry.rb
CHANGED
|
@@ -50,6 +50,18 @@ class Postjob::Registry
|
|
|
50
50
|
instance.register(workflow, options)
|
|
51
51
|
end
|
|
52
52
|
|
|
53
|
+
def self.load(glob_pattern)
|
|
54
|
+
Dir.glob(glob_pattern).sort.each do |path|
|
|
55
|
+
before = Postjob::Registry.workflow_names
|
|
56
|
+
Kernel.load path
|
|
57
|
+
after = Postjob::Registry.workflow_names
|
|
58
|
+
new_workflows = after - before
|
|
59
|
+
next if new_workflows.empty?
|
|
60
|
+
|
|
61
|
+
Postjob.logger.debug "#{path}: registered workflow(s) #{new_workflows.join(', ')}"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
53
65
|
class WorkflowSpec
|
|
54
66
|
class Options
|
|
55
67
|
DEFAULTS = {
|
|
@@ -12,7 +12,7 @@ class Postjob::WorkerSession < Postjob::Record
|
|
|
12
12
|
host_id = ::Postjob.host_id
|
|
13
13
|
worker_session = ::Postjob::Queue.worker_session_start(workflows_with_versions, host_id: host_id, queues: queues)
|
|
14
14
|
|
|
15
|
-
Postjob.logger.info "Starting worker_session #{worker_session.inspect}"
|
|
15
|
+
Postjob.logger.info "Starting worker_session #{worker_session.inspect}, on pid #{$$}"
|
|
16
16
|
|
|
17
17
|
start_heartbeat_monitor(host_id) if heartbeat
|
|
18
18
|
worker_session
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: postjob
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.16
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- radiospiel
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-08-
|
|
11
|
+
date: 2018-08-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -327,7 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
327
327
|
version: '0'
|
|
328
328
|
requirements: []
|
|
329
329
|
rubyforge_project:
|
|
330
|
-
rubygems_version: 2.
|
|
330
|
+
rubygems_version: 2.5.1
|
|
331
331
|
signing_key:
|
|
332
332
|
specification_version: 4
|
|
333
333
|
summary: restartable, asynchronous, and distributed processes
|