postjob 0.5.11 → 0.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/postjob/cli/cron.rb +24 -0
- data/lib/postjob/cli/db.rb +1 -2
- data/lib/postjob/cli/events.rb +2 -2
- data/lib/postjob/cli/heartbeat.rb +2 -2
- data/lib/postjob/cli/helpers.rb +28 -0
- data/lib/postjob/cli/hosts.rb +32 -15
- data/lib/postjob/cli/job.rb +2 -0
- data/lib/postjob/cli/ps.rb +4 -26
- data/lib/postjob/cli/queues.rb +66 -0
- data/lib/postjob/cli/run.rb +19 -6
- data/lib/postjob/cli/sessions.rb +5 -4
- data/lib/postjob/host.rb +26 -5
- data/lib/postjob/migrations/001_helpers.sql +19 -0
- data/lib/postjob/migrations/007_job_results.sql +0 -26
- data/lib/postjob/migrations/012_hosts.sql +48 -5
- data/lib/postjob/migrations/013_worker_sessions.sql +12 -1
- data/lib/postjob/migrations/013a_checkout_runnable.sql +47 -5
- data/lib/postjob/migrations/016_sessions_functions.sql +5 -3
- data/lib/postjob/migrations/017_zombie_check.sql +64 -18
- data/lib/postjob/migrations/018_heartbeat.sql +36 -3
- data/lib/postjob/migrations/021_cron_jobs.sql +12 -11
- data/lib/postjob/migrations.rb +1 -1
- data/lib/postjob/queue/notifications.rb +15 -7
- data/lib/postjob/queue.rb +21 -8
- data/lib/postjob/runner.rb +1 -1
- data/lib/postjob/worker_session.rb +9 -5
- data/lib/postjob.rb +62 -26
- data/lib/tools/heartbeat.rb +2 -1
- data/spec/postjob/events/job_event_spec.rb +2 -2
- data/spec/postjob/worker_session_spec.rb +1 -1
- data/spec/postjob/zombie_spec.rb +54 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/support/test_helper.rb +3 -8
- metadata +12 -9
- data/spec/postjob/events/zombie_event_spec.rb +0 -61
@@ -1,3 +1,35 @@
|
|
1
|
+
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.session_should_shutdown(p_worker_session_id UUID)
|
2
|
+
RETURNS BOOLEAN
|
3
|
+
AS $$
|
4
|
+
DECLARE
|
5
|
+
session {SCHEMA_NAME}.worker_sessions;
|
6
|
+
host {SCHEMA_NAME}.hosts;
|
7
|
+
BEGIN
|
8
|
+
SELECT * INTO session
|
9
|
+
FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
|
10
|
+
|
11
|
+
SELECT * INTO host
|
12
|
+
FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id;
|
13
|
+
|
14
|
+
IF host.status != 'shutdown' THEN
|
15
|
+
RETURN FALSE;
|
16
|
+
END IF;
|
17
|
+
|
18
|
+
-- If there unfinished sticky jobs on this host we do not shutdown this session.
|
19
|
+
|
20
|
+
IF EXISTS (
|
21
|
+
SELECT 1 FROM postjob.postjobs WHERE
|
22
|
+
status NOT IN ('ok', 'failed', 'timeout')
|
23
|
+
AND sticky_host_id=host.id
|
24
|
+
) THEN
|
25
|
+
RETURN FALSE;
|
26
|
+
END IF;
|
27
|
+
|
28
|
+
RETURN TRUE;
|
29
|
+
END;
|
30
|
+
$$ LANGUAGE plpgsql;
|
31
|
+
|
32
|
+
|
1
33
|
DROP FUNCTION IF EXISTS {SCHEMA_NAME}.time_to_next_job(workflows_with_versions varchar[]); -- removed in 0.5.0
|
2
34
|
DROP FUNCTION IF EXISTS {SCHEMA_NAME}.time_to_next_job(p_worker_session_id UUID); -- removed in 0.5.7
|
3
35
|
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.time_to_next_job(p_worker_session_id UUID, p_queue varchar[])
|
@@ -6,16 +38,20 @@ AS $$
|
|
6
38
|
DECLARE
|
7
39
|
p_processable_at timestamp;
|
8
40
|
session {SCHEMA_NAME}.worker_sessions;
|
41
|
+
host {SCHEMA_NAME}.hosts;
|
9
42
|
p_current_greedy_job {SCHEMA_NAME}.postjobs;
|
10
43
|
BEGIN
|
11
44
|
SELECT * INTO session
|
12
45
|
FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
|
13
46
|
|
47
|
+
SELECT * INTO host
|
48
|
+
FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id;
|
49
|
+
|
14
50
|
SELECT * INTO p_current_greedy_job
|
15
51
|
FROM {SCHEMA_NAME}.postjobs WHERE
|
16
|
-
status NOT IN ('ok', 'failed')
|
17
|
-
id=root_id
|
18
|
-
is_greedy
|
52
|
+
status NOT IN ('ok', 'failed', 'timeout')
|
53
|
+
AND id=root_id
|
54
|
+
AND is_greedy
|
19
55
|
LIMIT 1;
|
20
56
|
|
21
57
|
SELECT MIN(processable_at) INTO p_processable_at FROM (
|
@@ -34,6 +70,7 @@ BEGIN
|
|
34
70
|
p_current_greedy_job.id IS NULL OR root_id=p_current_greedy_job.root_id -- if there is a greedy job on this host_id which is not finished yet,
|
35
71
|
-- only jobs belonging to this root jobs are allowed.
|
36
72
|
)
|
73
|
+
AND ((host.status != 'shutdown') OR root_id=p_current_greedy_job.root_id) -- during shutdown only get greedy jobs.
|
37
74
|
) sq;
|
38
75
|
|
39
76
|
RETURN EXTRACT(EPOCH FROM p_processable_at - (now() at time zone 'utc'));
|
@@ -52,11 +89,14 @@ AS $$
|
|
52
89
|
DECLARE
|
53
90
|
job {SCHEMA_NAME}.postjobs;
|
54
91
|
session {SCHEMA_NAME}.worker_sessions;
|
92
|
+
host {SCHEMA_NAME}.hosts;
|
55
93
|
p_current_greedy_job {SCHEMA_NAME}.postjobs;
|
56
94
|
BEGIN
|
57
95
|
SELECT * INTO session
|
58
96
|
FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
|
59
97
|
|
98
|
+
--
|
99
|
+
-- Note on "FOR UPDATE":
|
60
100
|
--
|
61
101
|
-- We don't want multiple sessions to run this function in parallel. This can lead to a situation
|
62
102
|
-- where multiple greedy root jobs could be selected for different workers with identical host ids
|
@@ -64,11 +104,12 @@ BEGIN
|
|
64
104
|
-- hosts table for locking. This look will be released automatically with the current transaction,
|
65
105
|
-- i.e. typically after the "SELECT * FROM checkout(..)" returns.
|
66
106
|
--
|
67
|
-
|
107
|
+
SELECT * INTO host
|
108
|
+
FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id FOR UPDATE;
|
68
109
|
|
69
110
|
SELECT * INTO p_current_greedy_job
|
70
111
|
FROM {SCHEMA_NAME}.postjobs WHERE
|
71
|
-
status NOT IN ('ok', 'failed')
|
112
|
+
status NOT IN ('ok', 'failed', 'timeout')
|
72
113
|
AND id=root_id
|
73
114
|
AND sticky_host_id=session.host_id
|
74
115
|
AND is_greedy
|
@@ -95,6 +136,7 @@ BEGIN
|
|
95
136
|
p_current_greedy_job.id IS NULL OR s.root_id=p_current_greedy_job.root_id -- if there is a greedy job on this host_id which is not finished yet,
|
96
137
|
-- only jobs belonging to this root jobs are allowed.
|
97
138
|
)
|
139
|
+
AND ((host.status != 'shutdown') OR root_id=p_current_greedy_job.root_id) -- during shutdown only get greedy jobs.
|
98
140
|
)
|
99
141
|
ORDER BY (LEAST(s.next_run_at, s.timing_out_at))
|
100
142
|
FOR UPDATE SKIP LOCKED
|
@@ -1,6 +1,8 @@
|
|
1
1
|
-- worker_session_start: starts or reuses a worker_session ----------------------------------
|
2
2
|
|
3
|
-
|
3
|
+
DROP FUNCTION IF EXISTS {SCHEMA_NAME}.worker_session_start(p_host_id UUID, p_workflows VARCHAR[]);
|
4
|
+
|
5
|
+
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.worker_session_start(p_host_id UUID, p_workflows VARCHAR[], p_queues VARCHAR[])
|
4
6
|
RETURNS SETOF {SCHEMA_NAME}.worker_sessions AS $$
|
5
7
|
DECLARE
|
6
8
|
v_worker_session_id UUID;
|
@@ -8,8 +10,8 @@ DECLARE
|
|
8
10
|
BEGIN
|
9
11
|
SELECT client_addr || ':' || client_port INTO v_client_socket FROM pg_stat_activity WHERE pid = pg_backend_pid();
|
10
12
|
|
11
|
-
INSERT INTO {SCHEMA_NAME}.worker_sessions (host_id, client_socket, workflows)
|
12
|
-
VALUES (p_host_id, v_client_socket, p_workflows) RETURNING id INTO v_worker_session_id;
|
13
|
+
INSERT INTO {SCHEMA_NAME}.worker_sessions (host_id, client_socket, workflows, queues)
|
14
|
+
VALUES (p_host_id, v_client_socket, p_workflows, p_queues) RETURNING id INTO v_worker_session_id;
|
13
15
|
|
14
16
|
RETURN QUERY SELECT * FROM {SCHEMA_NAME}.worker_sessions WHERE id = v_worker_session_id;
|
15
17
|
END;
|
@@ -1,5 +1,37 @@
|
|
1
1
|
-- zombie checks --------------------------------------------------------------
|
2
2
|
|
3
|
+
|
4
|
+
CREATE OR REPLACE FUNCTION {SCHEMA_NAME}._set_job_zombie(
|
5
|
+
job_id BIGINT,
|
6
|
+
p_fast_mode BOOLEAN) RETURNS VOID AS $$
|
7
|
+
DECLARE
|
8
|
+
p_worker_session_id UUID;
|
9
|
+
session {SCHEMA_NAME}.worker_sessions;
|
10
|
+
BEGIN
|
11
|
+
SELECT * INTO session
|
12
|
+
FROM {SCHEMA_NAME}.worker_sessions worker_sessions
|
13
|
+
INNER JOIN {SCHEMA_NAME}.postjobs postjobs ON postjobs.last_worker_session_id=worker_sessions.id
|
14
|
+
WHERE postjobs.id=job_id;
|
15
|
+
|
16
|
+
p_worker_session_id := {SCHEMA_NAME}._null_uuid();
|
17
|
+
PERFORM {SCHEMA_NAME}._reset_job_processing(p_worker_session_id, job_id);
|
18
|
+
|
19
|
+
-- write error info
|
20
|
+
UPDATE {SCHEMA_NAME}.postjobs
|
21
|
+
SET
|
22
|
+
error='Zombie',
|
23
|
+
error_message='host ' || session.host_id || ' disappeared',
|
24
|
+
error_backtrace=NULL,
|
25
|
+
failed_attempts=failed_attempts+1,
|
26
|
+
next_run_at=NULL
|
27
|
+
WHERE id=job_id;
|
28
|
+
|
29
|
+
-- prepare next run, if any
|
30
|
+
PERFORM {SCHEMA_NAME}._prepare_rerun(job_id, 'err', p_fast_mode);
|
31
|
+
PERFORM {SCHEMA_NAME}._wakeup_parent_job(p_worker_session_id, job_id);
|
32
|
+
END;
|
33
|
+
$$ LANGUAGE plpgsql;
|
34
|
+
|
3
35
|
-- This method runs a zombie check. Its result is written into the database as
|
4
36
|
-- a 'zombie' event. A zombie check should only happen once per minute.
|
5
37
|
--
|
@@ -15,12 +47,11 @@ DECLARE
|
|
15
47
|
BEGIN
|
16
48
|
zombie_count := 0;
|
17
49
|
FOR zombie_id, _one IN
|
18
|
-
-- select jobs that have a last_worker_session_id, which points to a
|
19
|
-
--
|
50
|
+
-- select jobs that have a last_worker_session_id, which points to a host
|
51
|
+
-- whose latest heartbeat is older than +zombie_threshold+.
|
20
52
|
--
|
21
|
-
--
|
22
|
-
--
|
23
|
-
-- tests.
|
53
|
+
-- We ignore hosts (and, for that matter, jobs) that don't have any heartbeats,
|
54
|
+
-- since this scenario should only appear during tests.
|
24
55
|
SELECT jobs.id, 1
|
25
56
|
FROM {SCHEMA_NAME}.postjobs jobs
|
26
57
|
LEFT JOIN {SCHEMA_NAME}.worker_sessions sessions ON jobs.last_worker_session_id=sessions.id
|
@@ -30,10 +61,9 @@ BEGIN
|
|
30
61
|
FROM {SCHEMA_NAME}.events
|
31
62
|
WHERE name = 'heartbeat'
|
32
63
|
GROUP BY host_id
|
33
|
-
)
|
34
|
-
WHERE
|
35
|
-
|
36
|
-
AND heartbeat.created_at < ((now() at time zone 'utc') - zombie_threshold)
|
64
|
+
) latest_heartbeat ON sessions.host_id = latest_heartbeat.host_id
|
65
|
+
WHERE jobs.status IN ('processing')
|
66
|
+
AND latest_heartbeat.created_at < ((now() at time zone 'utc') - zombie_threshold)
|
37
67
|
LOOP
|
38
68
|
PERFORM {SCHEMA_NAME}._set_job_zombie(zombie_id, p_fast_mode);
|
39
69
|
zombie_count := zombie_count + 1;
|
@@ -49,15 +79,31 @@ DECLARE
|
|
49
79
|
zombie_check_interval interval := '1 minute';
|
50
80
|
p_zombie_count int;
|
51
81
|
BEGIN
|
52
|
-
|
53
|
-
--
|
54
|
-
--
|
55
|
-
IF
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
82
|
+
-- In order to properly throttle the zombie check we keep the latest zombie
|
83
|
+
-- check result in the database. If this is younger than 1 minute we do not
|
84
|
+
-- run the zombie check.
|
85
|
+
IF EXISTS (
|
86
|
+
SELECT 1 FROM {SCHEMA_NAME}.events
|
87
|
+
WHERE name='zombie'
|
88
|
+
AND created_at > (now() at time zone 'utc') - zombie_check_interval
|
89
|
+
) THEN
|
90
|
+
RETURN;
|
61
91
|
END IF;
|
92
|
+
|
93
|
+
-- Historically we do not need zombie_check results with 0 zombies. We do
|
94
|
+
-- write them, initially, for the throttling to be effective, but later on
|
95
|
+
-- we can remove them again. We only look for events younger then a 1 hour
|
96
|
+
-- threshold, for optimization reasons.
|
97
|
+
|
98
|
+
DELETE FROM {SCHEMA_NAME}.events
|
99
|
+
WHERE name='zombie'
|
100
|
+
AND created_at > (now() at time zone 'utc') - interval '1 hour'
|
101
|
+
AND attributes @> '{"zombie_count": 0}';
|
102
|
+
|
103
|
+
-- run the zombie checker and keep the result.
|
104
|
+
p_zombie_count := {SCHEMA_NAME}._zombie_check(p_fast_mode);
|
105
|
+
|
106
|
+
INSERT INTO {SCHEMA_NAME}.events(name, host_id, attributes)
|
107
|
+
VALUES('zombie', {SCHEMA_NAME}._null_uuid(), jsonb_build_object('zombie_count', p_zombie_count));
|
62
108
|
END;
|
63
109
|
$$ LANGUAGE plpgsql;
|
@@ -12,17 +12,50 @@ DECLARE
|
|
12
12
|
p_latest_zombie_event_id bigint;
|
13
13
|
p_zombie_count int;
|
14
14
|
BEGIN
|
15
|
-
--
|
16
|
-
|
15
|
+
-- RAISE NOTICE '*** heartbeat % %', p_host_id, p_metrics;
|
16
|
+
|
17
|
+
-- We perform the zombie_check before we insert the heartbeats event. This
|
18
|
+
-- should not make any difference functionality-wise, but helps us building
|
19
|
+
-- tests for this feature..
|
20
|
+
--
|
21
|
+
-- Note: The zombie_check function throttles itself.
|
17
22
|
PERFORM {SCHEMA_NAME}.zombie_check(p_fast_mode);
|
18
23
|
|
19
24
|
IF NOT EXISTS (
|
20
25
|
SELECT 1 FROM {SCHEMA_NAME}.events
|
21
|
-
WHERE (name,host_id)=('heartbeat', p_host_id)
|
26
|
+
WHERE (name,host_id)=('heartbeat', p_host_id)
|
27
|
+
AND created_at > (now() at time zone 'utc') - interval '1 minute'
|
22
28
|
)
|
23
29
|
THEN
|
30
|
+
-- The code below is throttled to run only once per host and minute.
|
24
31
|
INSERT INTO {SCHEMA_NAME}.events(name, host_id, attributes)
|
25
32
|
VALUES ('heartbeat', p_host_id, p_metrics);
|
33
|
+
|
34
|
+
-- This host is set to 'running' if it currently was stopped. This would
|
35
|
+
-- only happen if a host was manually stopped and then restarted again.
|
36
|
+
-- Note: we do not touch the status of a host during 'shutdown', since
|
37
|
+
-- even in this mode the is expected to send in heartbeats.
|
38
|
+
UPDATE {SCHEMA_NAME}.hosts
|
39
|
+
SET status='running' WHERE status='stopped' AND id=p_host_id;
|
40
|
+
|
41
|
+
-- Find hosts that are not stopped that don't have a heartbeat in the
|
42
|
+
-- last 5 minutes, and set those to 'stopped'.
|
43
|
+
UPDATE {SCHEMA_NAME}.hosts
|
44
|
+
SET status='stopped'
|
45
|
+
WHERE
|
46
|
+
id IN (
|
47
|
+
SELECT hosts.id
|
48
|
+
FROM {SCHEMA_NAME}.hosts hosts
|
49
|
+
LEFT JOIN (
|
50
|
+
SELECT id, host_id
|
51
|
+
FROM postjob.events events
|
52
|
+
WHERE name='heartbeat'
|
53
|
+
AND created_at > now() at time zone 'utc' - interval '5 minutes'
|
54
|
+
) heartbeats ON hosts.id=heartbeats.host_id
|
55
|
+
WHERE status IN ('running', 'shutdown')
|
56
|
+
AND heartbeats.id IS NULL
|
57
|
+
);
|
58
|
+
|
26
59
|
END IF;
|
27
60
|
END;
|
28
61
|
$$ LANGUAGE plpgsql;
|
@@ -8,7 +8,7 @@ $$;
|
|
8
8
|
|
9
9
|
--- define triggers to automatically restart cron jobs ------------------------
|
10
10
|
|
11
|
-
-- This method is called whenever a job's status changes. It enqueues a fresh
|
11
|
+
-- This method is called whenever a job's status changes. It enqueues a fresh
|
12
12
|
|
13
13
|
--
|
14
14
|
-- An event is created whenever a job's status changes.
|
@@ -33,24 +33,25 @@ BEGIN
|
|
33
33
|
END IF;
|
34
34
|
|
35
35
|
SELECT id INTO p_new_jonb_id FROM {SCHEMA_NAME}.enqueue(
|
36
|
-
NEW.last_worker_session_id, -- p_worker_session_id
|
37
|
-
NEW.queue, -- queue
|
38
|
-
NEW.workflow, -- workflow
|
39
|
-
NEW.workflow_method, -- workflow_method
|
40
|
-
NULL, -- workflow_version
|
41
|
-
NEW.args, -- args
|
42
|
-
NULL, -- parent_id
|
43
|
-
NEW.tags, -- tags
|
36
|
+
NEW.last_worker_session_id, -- p_worker_session_id
|
37
|
+
NEW.queue, -- queue
|
38
|
+
NEW.workflow, -- workflow
|
39
|
+
NEW.workflow_method, -- workflow_method
|
40
|
+
NULL, -- workflow_version
|
41
|
+
NEW.args, -- args
|
42
|
+
NULL, -- parent_id
|
43
|
+
NEW.tags, -- tags
|
44
44
|
NEW.max_attempts, -- max_attempts
|
45
45
|
(EXTRACT(EPOCH FROM NEW.timing_out_at) - EXTRACT(EPOCH FROM NEW.created_at)), -- timeout
|
46
46
|
NEW.cron_interval,
|
47
|
-
NEW.is_sticky
|
47
|
+
NEW.is_sticky,
|
48
|
+
NEW.is_greedy
|
48
49
|
);
|
49
50
|
|
50
51
|
UPDATE {SCHEMA_NAME}.postjobs
|
51
52
|
SET next_run_at = now() at time zone 'utc' + NEW.cron_interval * interval '1 second'
|
52
53
|
WHERE id=p_new_jonb_id;
|
53
|
-
|
54
|
+
|
54
55
|
RETURN NEW;
|
55
56
|
END;
|
56
57
|
$$ LANGUAGE plpgsql;
|
data/lib/postjob/migrations.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
2
|
+
|
2
3
|
# The Postjob::Queue manages enqueueing and fetching jobs from a job queue.
|
3
4
|
module Postjob::Queue::Notifications
|
4
5
|
extend self
|
@@ -8,16 +9,21 @@ module Postjob::Queue::Notifications
|
|
8
9
|
SCHEMA_NAME = ::Postjob::Queue::SCHEMA_NAME
|
9
10
|
MAX_WAIT_TIME = 120
|
10
11
|
|
11
|
-
def wait_for_new_job(worker_session_id,
|
12
|
+
def wait_for_new_job(worker_session_id, queues:)
|
12
13
|
started_at = Time.now
|
13
14
|
|
14
15
|
start_listening
|
15
16
|
|
16
17
|
# Determine when the next job is up. If we don't have a next job within MAX_WAIT_TIME
|
17
18
|
# we wake up regardless.
|
18
|
-
wait_time = time_to_next_job(worker_session_id,
|
19
|
+
wait_time = time_to_next_job(worker_session_id, queues: queues)
|
19
20
|
return if wait_time && wait_time <= 0
|
20
21
|
|
22
|
+
if !wait_time && ::Postjob::Queue.should_shutdown?(worker_session_id)
|
23
|
+
Postjob.logger.warn "Shutting down runner: host is set to 'shutdown'"
|
24
|
+
return :shutdown
|
25
|
+
end
|
26
|
+
|
21
27
|
wait_time = MAX_WAIT_TIME if !wait_time || wait_time > MAX_WAIT_TIME
|
22
28
|
Postjob.logger.debug "postjob: waiting for notification for up to #{wait_time} seconds"
|
23
29
|
Simple::SQL.wait_for_notify(wait_time)
|
@@ -30,6 +36,9 @@ module Postjob::Queue::Notifications
|
|
30
36
|
end
|
31
37
|
|
32
38
|
Postjob.logger.debug "postjob: awoke after #{format('%.03f secs', (Time.now - started_at))}"
|
39
|
+
rescue Interrupt
|
40
|
+
Postjob.logger.info "postjob: shutdown after receiving Interrupt"
|
41
|
+
:shutdown
|
33
42
|
end
|
34
43
|
|
35
44
|
private
|
@@ -43,11 +52,10 @@ module Postjob::Queue::Notifications
|
|
43
52
|
|
44
53
|
# returns the maximum number of seconds to wait until the
|
45
54
|
# next runnable or timeoutable job comes up.
|
46
|
-
def time_to_next_job(worker_session_id,
|
55
|
+
def time_to_next_job(worker_session_id, queues:)
|
47
56
|
expect! worker_session_id => /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i
|
48
|
-
expect!
|
57
|
+
expect! queues => Array
|
49
58
|
|
50
|
-
|
51
|
-
Simple::SQL.ask "SELECT * FROM #{SCHEMA_NAME}.time_to_next_job($1::uuid, $2)", worker_session_id, queue
|
59
|
+
Simple::SQL.ask "SELECT * FROM #{SCHEMA_NAME}.time_to_next_job($1::uuid, $2)", worker_session_id, queues
|
52
60
|
end
|
53
61
|
end
|
data/lib/postjob/queue.rb
CHANGED
@@ -91,17 +91,27 @@ module Postjob::Queue
|
|
91
91
|
Simple::SQL.ask sql, workflow, Encoder.encode(args)
|
92
92
|
end
|
93
93
|
|
94
|
+
# Asks the database whether this session should be shut down.
|
95
|
+
def should_shutdown?(worker_session_id)
|
96
|
+
SQL.ask "SELECT #{SCHEMA_NAME}.session_should_shutdown($1::uuid)", worker_session_id
|
97
|
+
end
|
98
|
+
|
94
99
|
def set_job_result(worker_session_id, job, value, version:)
|
100
|
+
expect! worker_session_id => UUID_REGEXP
|
101
|
+
|
95
102
|
value = Encoder.encode([value]) unless value.nil?
|
96
103
|
SQL.ask "SELECT #{SCHEMA_NAME}.set_job_result($1::uuid, $2, $3, $4)", worker_session_id, job.id, value, version
|
97
104
|
end
|
98
105
|
|
99
106
|
def set_job_pending(worker_session_id, job, version:)
|
107
|
+
expect! worker_session_id => UUID_REGEXP
|
108
|
+
|
100
109
|
SQL.ask "SELECT #{SCHEMA_NAME}.set_job_pending($1::uuid, $2, $3)", worker_session_id, job.id, version
|
101
110
|
end
|
102
111
|
|
103
112
|
def set_job_error(worker_session_id, job, error, error_message, error_backtrace = nil, status:, version:)
|
104
113
|
expect! status => [ :failed, :err, :timeout ]
|
114
|
+
expect! worker_session_id => UUID_REGEXP
|
105
115
|
|
106
116
|
SQL.ask "SELECT #{SCHEMA_NAME}.set_job_error($1::uuid, $2, $3, $4, $5, $6, $7, $8)",
|
107
117
|
worker_session_id, job.id, error, error_message, Encoder.encode(error_backtrace), status, version, Postjob.fast_mode
|
@@ -164,13 +174,12 @@ module Postjob::Queue
|
|
164
174
|
|
165
175
|
UUID_REGEXP = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i
|
166
176
|
|
167
|
-
def checkout(worker_session_id,
|
177
|
+
def checkout(worker_session_id, queues:)
|
168
178
|
expect! worker_session_id => UUID_REGEXP
|
169
|
-
expect!
|
179
|
+
expect! queues => [ nil, Array ]
|
170
180
|
|
171
|
-
queue = Array(queue) if queue
|
172
181
|
SQL.ask "SELECT * FROM #{SCHEMA_NAME}.checkout($1::uuid, $2::boolean, $3)",
|
173
|
-
worker_session_id, Postjob.fast_mode,
|
182
|
+
worker_session_id, Postjob.fast_mode, queues, into: Job
|
174
183
|
end
|
175
184
|
|
176
185
|
def find_or_create_token(job)
|
@@ -184,18 +193,22 @@ module Postjob::Queue
|
|
184
193
|
# -- registers a host -------------------------------------------------------
|
185
194
|
|
186
195
|
# returns the host id
|
187
|
-
def host_register(attributes)
|
196
|
+
def host_register(attributes, host_id:)
|
188
197
|
expect! attributes => [ nil, Hash ]
|
189
|
-
|
198
|
+
expect! host_id => [ nil, UUID_REGEXP ]
|
199
|
+
|
200
|
+
Simple::SQL.ask "SELECT postjob.host_register($1, $2::uuid)", JSON.generate(attributes), host_id
|
190
201
|
end
|
191
202
|
|
192
203
|
# starts a session
|
193
204
|
WorkerSession = ::Postjob::WorkerSession
|
194
205
|
|
195
|
-
def
|
206
|
+
def worker_session_start(workflows_with_versions, host_id:, queues:)
|
196
207
|
expect! host_id => UUID_REGEXP
|
208
|
+
expect! queues => Array
|
209
|
+
expect! queues.first => String
|
197
210
|
|
198
|
-
Simple::SQL.ask "SELECT * FROM postjob.worker_session_start($1::uuid, $2)", host_id, workflows_with_versions, into: ::Postjob::WorkerSession
|
211
|
+
Simple::SQL.ask "SELECT * FROM postjob.worker_session_start($1::uuid, $2, $3)", host_id, workflows_with_versions, queues, into: ::Postjob::WorkerSession
|
199
212
|
end
|
200
213
|
|
201
214
|
# sends in a heartbeat
|
data/lib/postjob/runner.rb
CHANGED
@@ -37,7 +37,7 @@ module Postjob::Runner
|
|
37
37
|
# returns a subjob within the current job, for a +runner+
|
38
38
|
# description and +args+.
|
39
39
|
def async(workflow, *args, timeout: nil, max_attempts: nil, queue: nil)
|
40
|
-
worker_session_id = Postjob.
|
40
|
+
worker_session_id = Postjob.current_session_id
|
41
41
|
|
42
42
|
queue = current_job.queue if queue.nil?
|
43
43
|
|
@@ -8,13 +8,13 @@ require "tools/heartbeat"
|
|
8
8
|
class Postjob::WorkerSession < Postjob::Record
|
9
9
|
class << self
|
10
10
|
# Starts a worker session.
|
11
|
-
def start!(workflows_with_versions)
|
11
|
+
def start!(workflows_with_versions, heartbeat: true, queues:)
|
12
12
|
host_id = ::Postjob.host_id
|
13
|
-
worker_session = ::Postjob::Queue.
|
13
|
+
worker_session = ::Postjob::Queue.worker_session_start(workflows_with_versions, host_id: host_id, queues: queues)
|
14
14
|
|
15
15
|
Postjob.logger.info "Starting worker_session #{worker_session.inspect}"
|
16
16
|
|
17
|
-
start_heartbeat_monitor(host_id)
|
17
|
+
start_heartbeat_monitor(host_id) if heartbeat
|
18
18
|
worker_session
|
19
19
|
end
|
20
20
|
|
@@ -48,13 +48,17 @@ class Postjob::WorkerSession < Postjob::Record
|
|
48
48
|
attr_reader :workflows
|
49
49
|
attr_reader :attributes
|
50
50
|
attr_reader :created_at
|
51
|
+
attr_reader :queues
|
51
52
|
|
52
53
|
def to_s
|
53
54
|
"Session##{id}"
|
54
55
|
end
|
55
56
|
|
56
57
|
def inspect
|
57
|
-
|
58
|
-
|
58
|
+
# [TODO] - grepping workflows by /\d$/ to only count workflows with a version number is a bit hackish.
|
59
|
+
workflow_count = self.workflows.grep(/\d$/)
|
60
|
+
queues = self.queues.map(&:inspect).join(", ")
|
61
|
+
|
62
|
+
"<Session##{id} w/host_id: #{host_id}, queues: #{queues}, client_socket: #{client_socket}, #{workflow_count} workflows>"
|
59
63
|
end
|
60
64
|
end
|