postjob 0.5.11 → 0.5.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/lib/postjob/cli/cron.rb +24 -0
  3. data/lib/postjob/cli/db.rb +1 -2
  4. data/lib/postjob/cli/events.rb +2 -2
  5. data/lib/postjob/cli/heartbeat.rb +2 -2
  6. data/lib/postjob/cli/helpers.rb +28 -0
  7. data/lib/postjob/cli/hosts.rb +32 -15
  8. data/lib/postjob/cli/job.rb +2 -0
  9. data/lib/postjob/cli/ps.rb +4 -26
  10. data/lib/postjob/cli/queues.rb +66 -0
  11. data/lib/postjob/cli/run.rb +19 -6
  12. data/lib/postjob/cli/sessions.rb +5 -4
  13. data/lib/postjob/host.rb +26 -5
  14. data/lib/postjob/migrations/001_helpers.sql +19 -0
  15. data/lib/postjob/migrations/007_job_results.sql +0 -26
  16. data/lib/postjob/migrations/012_hosts.sql +48 -5
  17. data/lib/postjob/migrations/013_worker_sessions.sql +12 -1
  18. data/lib/postjob/migrations/013a_checkout_runnable.sql +47 -5
  19. data/lib/postjob/migrations/016_sessions_functions.sql +5 -3
  20. data/lib/postjob/migrations/017_zombie_check.sql +64 -18
  21. data/lib/postjob/migrations/018_heartbeat.sql +36 -3
  22. data/lib/postjob/migrations/021_cron_jobs.sql +12 -11
  23. data/lib/postjob/migrations.rb +1 -1
  24. data/lib/postjob/queue/notifications.rb +15 -7
  25. data/lib/postjob/queue.rb +21 -8
  26. data/lib/postjob/runner.rb +1 -1
  27. data/lib/postjob/worker_session.rb +9 -5
  28. data/lib/postjob.rb +62 -26
  29. data/lib/tools/heartbeat.rb +2 -1
  30. data/spec/postjob/events/job_event_spec.rb +2 -2
  31. data/spec/postjob/worker_session_spec.rb +1 -1
  32. data/spec/postjob/zombie_spec.rb +54 -0
  33. data/spec/spec_helper.rb +2 -0
  34. data/spec/support/test_helper.rb +3 -8
  35. metadata +12 -9
  36. data/spec/postjob/events/zombie_event_spec.rb +0 -61
@@ -1,3 +1,35 @@
1
+ CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.session_should_shutdown(p_worker_session_id UUID)
2
+ RETURNS BOOLEAN
3
+ AS $$
4
+ DECLARE
5
+ session {SCHEMA_NAME}.worker_sessions;
6
+ host {SCHEMA_NAME}.hosts;
7
+ BEGIN
8
+ SELECT * INTO session
9
+ FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
10
+
11
+ SELECT * INTO host
12
+ FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id;
13
+
14
+ IF host.status != 'shutdown' THEN
15
+ RETURN FALSE;
16
+ END IF;
17
+
18
+ -- If there unfinished sticky jobs on this host we do not shutdown this session.
19
+
20
+ IF EXISTS (
21
+ SELECT 1 FROM postjob.postjobs WHERE
22
+ status NOT IN ('ok', 'failed', 'timeout')
23
+ AND sticky_host_id=host.id
24
+ ) THEN
25
+ RETURN FALSE;
26
+ END IF;
27
+
28
+ RETURN TRUE;
29
+ END;
30
+ $$ LANGUAGE plpgsql;
31
+
32
+
1
33
  DROP FUNCTION IF EXISTS {SCHEMA_NAME}.time_to_next_job(workflows_with_versions varchar[]); -- removed in 0.5.0
2
34
  DROP FUNCTION IF EXISTS {SCHEMA_NAME}.time_to_next_job(p_worker_session_id UUID); -- removed in 0.5.7
3
35
  CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.time_to_next_job(p_worker_session_id UUID, p_queue varchar[])
@@ -6,16 +38,20 @@ AS $$
6
38
  DECLARE
7
39
  p_processable_at timestamp;
8
40
  session {SCHEMA_NAME}.worker_sessions;
41
+ host {SCHEMA_NAME}.hosts;
9
42
  p_current_greedy_job {SCHEMA_NAME}.postjobs;
10
43
  BEGIN
11
44
  SELECT * INTO session
12
45
  FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
13
46
 
47
+ SELECT * INTO host
48
+ FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id;
49
+
14
50
  SELECT * INTO p_current_greedy_job
15
51
  FROM {SCHEMA_NAME}.postjobs WHERE
16
- status NOT IN ('ok', 'failed') AND
17
- id=root_id AND
18
- is_greedy
52
+ status NOT IN ('ok', 'failed', 'timeout')
53
+ AND id=root_id
54
+ AND is_greedy
19
55
  LIMIT 1;
20
56
 
21
57
  SELECT MIN(processable_at) INTO p_processable_at FROM (
@@ -34,6 +70,7 @@ BEGIN
34
70
  p_current_greedy_job.id IS NULL OR root_id=p_current_greedy_job.root_id -- if there is a greedy job on this host_id which is not finished yet,
35
71
  -- only jobs belonging to this root jobs are allowed.
36
72
  )
73
+ AND ((host.status != 'shutdown') OR root_id=p_current_greedy_job.root_id) -- during shutdown only get greedy jobs.
37
74
  ) sq;
38
75
 
39
76
  RETURN EXTRACT(EPOCH FROM p_processable_at - (now() at time zone 'utc'));
@@ -52,11 +89,14 @@ AS $$
52
89
  DECLARE
53
90
  job {SCHEMA_NAME}.postjobs;
54
91
  session {SCHEMA_NAME}.worker_sessions;
92
+ host {SCHEMA_NAME}.hosts;
55
93
  p_current_greedy_job {SCHEMA_NAME}.postjobs;
56
94
  BEGIN
57
95
  SELECT * INTO session
58
96
  FROM {SCHEMA_NAME}.worker_sessions WHERE id=p_worker_session_id;
59
97
 
98
+ --
99
+ -- Note on "FOR UPDATE":
60
100
  --
61
101
  -- We don't want multiple sessions to run this function in parallel. This can lead to a situation
62
102
  -- where multiple greedy root jobs could be selected for different workers with identical host ids
@@ -64,11 +104,12 @@ BEGIN
64
104
  -- hosts table for locking. This look will be released automatically with the current transaction,
65
105
  -- i.e. typically after the "SELECT * FROM checkout(..)" returns.
66
106
  --
67
- PERFORM * FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id FOR UPDATE;
107
+ SELECT * INTO host
108
+ FROM {SCHEMA_NAME}.hosts WHERE id=session.host_id FOR UPDATE;
68
109
 
69
110
  SELECT * INTO p_current_greedy_job
70
111
  FROM {SCHEMA_NAME}.postjobs WHERE
71
- status NOT IN ('ok', 'failed')
112
+ status NOT IN ('ok', 'failed', 'timeout')
72
113
  AND id=root_id
73
114
  AND sticky_host_id=session.host_id
74
115
  AND is_greedy
@@ -95,6 +136,7 @@ BEGIN
95
136
  p_current_greedy_job.id IS NULL OR s.root_id=p_current_greedy_job.root_id -- if there is a greedy job on this host_id which is not finished yet,
96
137
  -- only jobs belonging to this root jobs are allowed.
97
138
  )
139
+ AND ((host.status != 'shutdown') OR root_id=p_current_greedy_job.root_id) -- during shutdown only get greedy jobs.
98
140
  )
99
141
  ORDER BY (LEAST(s.next_run_at, s.timing_out_at))
100
142
  FOR UPDATE SKIP LOCKED
@@ -1,6 +1,8 @@
1
1
  -- worker_session_start: starts or reuses a worker_session ----------------------------------
2
2
 
3
- CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.worker_session_start(p_host_id UUID, p_workflows VARCHAR[])
3
+ DROP FUNCTION IF EXISTS {SCHEMA_NAME}.worker_session_start(p_host_id UUID, p_workflows VARCHAR[]);
4
+
5
+ CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.worker_session_start(p_host_id UUID, p_workflows VARCHAR[], p_queues VARCHAR[])
4
6
  RETURNS SETOF {SCHEMA_NAME}.worker_sessions AS $$
5
7
  DECLARE
6
8
  v_worker_session_id UUID;
@@ -8,8 +10,8 @@ DECLARE
8
10
  BEGIN
9
11
  SELECT client_addr || ':' || client_port INTO v_client_socket FROM pg_stat_activity WHERE pid = pg_backend_pid();
10
12
 
11
- INSERT INTO {SCHEMA_NAME}.worker_sessions (host_id, client_socket, workflows)
12
- VALUES (p_host_id, v_client_socket, p_workflows) RETURNING id INTO v_worker_session_id;
13
+ INSERT INTO {SCHEMA_NAME}.worker_sessions (host_id, client_socket, workflows, queues)
14
+ VALUES (p_host_id, v_client_socket, p_workflows, p_queues) RETURNING id INTO v_worker_session_id;
13
15
 
14
16
  RETURN QUERY SELECT * FROM {SCHEMA_NAME}.worker_sessions WHERE id = v_worker_session_id;
15
17
  END;
@@ -1,5 +1,37 @@
1
1
  -- zombie checks --------------------------------------------------------------
2
2
 
3
+
4
+ CREATE OR REPLACE FUNCTION {SCHEMA_NAME}._set_job_zombie(
5
+ job_id BIGINT,
6
+ p_fast_mode BOOLEAN) RETURNS VOID AS $$
7
+ DECLARE
8
+ p_worker_session_id UUID;
9
+ session {SCHEMA_NAME}.worker_sessions;
10
+ BEGIN
11
+ SELECT * INTO session
12
+ FROM {SCHEMA_NAME}.worker_sessions worker_sessions
13
+ INNER JOIN {SCHEMA_NAME}.postjobs postjobs ON postjobs.last_worker_session_id=worker_sessions.id
14
+ WHERE postjobs.id=job_id;
15
+
16
+ p_worker_session_id := {SCHEMA_NAME}._null_uuid();
17
+ PERFORM {SCHEMA_NAME}._reset_job_processing(p_worker_session_id, job_id);
18
+
19
+ -- write error info
20
+ UPDATE {SCHEMA_NAME}.postjobs
21
+ SET
22
+ error='Zombie',
23
+ error_message='host ' || session.host_id || ' disappeared',
24
+ error_backtrace=NULL,
25
+ failed_attempts=failed_attempts+1,
26
+ next_run_at=NULL
27
+ WHERE id=job_id;
28
+
29
+ -- prepare next run, if any
30
+ PERFORM {SCHEMA_NAME}._prepare_rerun(job_id, 'err', p_fast_mode);
31
+ PERFORM {SCHEMA_NAME}._wakeup_parent_job(p_worker_session_id, job_id);
32
+ END;
33
+ $$ LANGUAGE plpgsql;
34
+
3
35
  -- This method runs a zombie check. Its result is written into the database as
4
36
  -- a 'zombie' event. A zombie check should only happen once per minute.
5
37
  --
@@ -15,12 +47,11 @@ DECLARE
15
47
  BEGIN
16
48
  zombie_count := 0;
17
49
  FOR zombie_id, _one IN
18
- -- select jobs that have a last_worker_session_id, which points to a
19
- -- host whose latest heartbeat is older than +zombie_threshold+.
50
+ -- select jobs that have a last_worker_session_id, which points to a host
51
+ -- whose latest heartbeat is older than +zombie_threshold+.
20
52
  --
21
- -- note that we ignore hosts (and, for that matter, jobs) that don't
22
- -- have any heartbeats, since this scenario should only appear during
23
- -- tests.
53
+ -- We ignore hosts (and, for that matter, jobs) that don't have any heartbeats,
54
+ -- since this scenario should only appear during tests.
24
55
  SELECT jobs.id, 1
25
56
  FROM {SCHEMA_NAME}.postjobs jobs
26
57
  LEFT JOIN {SCHEMA_NAME}.worker_sessions sessions ON jobs.last_worker_session_id=sessions.id
@@ -30,10 +61,9 @@ BEGIN
30
61
  FROM {SCHEMA_NAME}.events
31
62
  WHERE name = 'heartbeat'
32
63
  GROUP BY host_id
33
- ) heartbeat ON sessions.host_id = heartbeat.host_id
34
- WHERE
35
- jobs.status IN ('processing')
36
- AND heartbeat.created_at < ((now() at time zone 'utc') - zombie_threshold)
64
+ ) latest_heartbeat ON sessions.host_id = latest_heartbeat.host_id
65
+ WHERE jobs.status IN ('processing')
66
+ AND latest_heartbeat.created_at < ((now() at time zone 'utc') - zombie_threshold)
37
67
  LOOP
38
68
  PERFORM {SCHEMA_NAME}._set_job_zombie(zombie_id, p_fast_mode);
39
69
  zombie_count := zombie_count + 1;
@@ -49,15 +79,31 @@ DECLARE
49
79
  zombie_check_interval interval := '1 minute';
50
80
  p_zombie_count int;
51
81
  BEGIN
52
-
53
- -- once per minute run a zombie check. This is marked in the database as a zombie
54
- -- event, which has a zombie count value in its attributes.
55
- IF NOT EXISTS (SELECT 1 FROM {SCHEMA_NAME}.events WHERE name='zombie' AND created_at > (now() at time zone 'utc') - zombie_check_interval) THEN
56
- p_zombie_count := {SCHEMA_NAME}._zombie_check(p_fast_mode);
57
- IF p_zombie_count > 0 THEN
58
- INSERT INTO {SCHEMA_NAME}.events(name, host_id, attributes)
59
- VALUES('zombie', {SCHEMA_NAME}._null_uuid(), jsonb_build_object('zombie_count', p_zombie_count));
60
- END IF;
82
+ -- In order to properly throttle the zombie check we keep the latest zombie
83
+ -- check result in the database. If this is younger than 1 minute we do not
84
+ -- run the zombie check.
85
+ IF EXISTS (
86
+ SELECT 1 FROM {SCHEMA_NAME}.events
87
+ WHERE name='zombie'
88
+ AND created_at > (now() at time zone 'utc') - zombie_check_interval
89
+ ) THEN
90
+ RETURN;
61
91
  END IF;
92
+
93
+ -- Historically we do not need zombie_check results with 0 zombies. We do
94
+ -- write them, initially, for the throttling to be effective, but later on
95
+ -- we can remove them again. We only look for events younger then a 1 hour
96
+ -- threshold, for optimization reasons.
97
+
98
+ DELETE FROM {SCHEMA_NAME}.events
99
+ WHERE name='zombie'
100
+ AND created_at > (now() at time zone 'utc') - interval '1 hour'
101
+ AND attributes @> '{"zombie_count": 0}';
102
+
103
+ -- run the zombie checker and keep the result.
104
+ p_zombie_count := {SCHEMA_NAME}._zombie_check(p_fast_mode);
105
+
106
+ INSERT INTO {SCHEMA_NAME}.events(name, host_id, attributes)
107
+ VALUES('zombie', {SCHEMA_NAME}._null_uuid(), jsonb_build_object('zombie_count', p_zombie_count));
62
108
  END;
63
109
  $$ LANGUAGE plpgsql;
@@ -12,17 +12,50 @@ DECLARE
12
12
  p_latest_zombie_event_id bigint;
13
13
  p_zombie_count int;
14
14
  BEGIN
15
- -- We perform the zombie_check first. This should not make any difference,
16
- -- functionality-wise, but helps us test this.
15
+ -- RAISE NOTICE '*** heartbeat % %', p_host_id, p_metrics;
16
+
17
+ -- We perform the zombie_check before we insert the heartbeats event. This
18
+ -- should not make any difference functionality-wise, but helps us building
19
+ -- tests for this feature..
20
+ --
21
+ -- Note: The zombie_check function throttles itself.
17
22
  PERFORM {SCHEMA_NAME}.zombie_check(p_fast_mode);
18
23
 
19
24
  IF NOT EXISTS (
20
25
  SELECT 1 FROM {SCHEMA_NAME}.events
21
- WHERE (name,host_id)=('heartbeat', p_host_id) AND created_at > (now() at time zone 'utc') - interval '1 minute'
26
+ WHERE (name,host_id)=('heartbeat', p_host_id)
27
+ AND created_at > (now() at time zone 'utc') - interval '1 minute'
22
28
  )
23
29
  THEN
30
+ -- The code below is throttled to run only once per host and minute.
24
31
  INSERT INTO {SCHEMA_NAME}.events(name, host_id, attributes)
25
32
  VALUES ('heartbeat', p_host_id, p_metrics);
33
+
34
+ -- This host is set to 'running' if it currently was stopped. This would
35
+ -- only happen if a host was manually stopped and then restarted again.
36
+ -- Note: we do not touch the status of a host during 'shutdown', since
37
+ -- even in this mode the is expected to send in heartbeats.
38
+ UPDATE {SCHEMA_NAME}.hosts
39
+ SET status='running' WHERE status='stopped' AND id=p_host_id;
40
+
41
+ -- Find hosts that are not stopped that don't have a heartbeat in the
42
+ -- last 5 minutes, and set those to 'stopped'.
43
+ UPDATE {SCHEMA_NAME}.hosts
44
+ SET status='stopped'
45
+ WHERE
46
+ id IN (
47
+ SELECT hosts.id
48
+ FROM {SCHEMA_NAME}.hosts hosts
49
+ LEFT JOIN (
50
+ SELECT id, host_id
51
+ FROM postjob.events events
52
+ WHERE name='heartbeat'
53
+ AND created_at > now() at time zone 'utc' - interval '5 minutes'
54
+ ) heartbeats ON hosts.id=heartbeats.host_id
55
+ WHERE status IN ('running', 'shutdown')
56
+ AND heartbeats.id IS NULL
57
+ );
58
+
26
59
  END IF;
27
60
  END;
28
61
  $$ LANGUAGE plpgsql;
@@ -8,7 +8,7 @@ $$;
8
8
 
9
9
  --- define triggers to automatically restart cron jobs ------------------------
10
10
 
11
- -- This method is called whenever a job's status changes. It enqueues a fresh
11
+ -- This method is called whenever a job's status changes. It enqueues a fresh
12
12
 
13
13
  --
14
14
  -- An event is created whenever a job's status changes.
@@ -33,24 +33,25 @@ BEGIN
33
33
  END IF;
34
34
 
35
35
  SELECT id INTO p_new_jonb_id FROM {SCHEMA_NAME}.enqueue(
36
- NEW.last_worker_session_id, -- p_worker_session_id
37
- NEW.queue, -- queue
38
- NEW.workflow, -- workflow
39
- NEW.workflow_method, -- workflow_method
40
- NULL, -- workflow_version
41
- NEW.args, -- args
42
- NULL, -- parent_id
43
- NEW.tags, -- tags
36
+ NEW.last_worker_session_id, -- p_worker_session_id
37
+ NEW.queue, -- queue
38
+ NEW.workflow, -- workflow
39
+ NEW.workflow_method, -- workflow_method
40
+ NULL, -- workflow_version
41
+ NEW.args, -- args
42
+ NULL, -- parent_id
43
+ NEW.tags, -- tags
44
44
  NEW.max_attempts, -- max_attempts
45
45
  (EXTRACT(EPOCH FROM NEW.timing_out_at) - EXTRACT(EPOCH FROM NEW.created_at)), -- timeout
46
46
  NEW.cron_interval,
47
- NEW.is_sticky
47
+ NEW.is_sticky,
48
+ NEW.is_greedy
48
49
  );
49
50
 
50
51
  UPDATE {SCHEMA_NAME}.postjobs
51
52
  SET next_run_at = now() at time zone 'utc' + NEW.cron_interval * interval '1 second'
52
53
  WHERE id=p_new_jonb_id;
53
-
54
+
54
55
  RETURN NEW;
55
56
  END;
56
57
  $$ LANGUAGE plpgsql;
@@ -47,7 +47,7 @@ module Postjob
47
47
 
48
48
  def run_migration_sql(file)
49
49
  sql = File.read(file)
50
- sql.gsub!(/\{([^\}]+)\}/) { |_| const_get(Regexp.last_match(1)) }
50
+ sql.gsub!(/\{([_A-Za-z0-9+]+)\}/) { |_| const_get(Regexp.last_match(1)) }
51
51
  SQL.exec sql
52
52
  end
53
53
  end
@@ -1,4 +1,5 @@
1
- #
1
+ # rubocop:disable Metrics/PerceivedComplexity
2
+
2
3
  # The Postjob::Queue manages enqueueing and fetching jobs from a job queue.
3
4
  module Postjob::Queue::Notifications
4
5
  extend self
@@ -8,16 +9,21 @@ module Postjob::Queue::Notifications
8
9
  SCHEMA_NAME = ::Postjob::Queue::SCHEMA_NAME
9
10
  MAX_WAIT_TIME = 120
10
11
 
11
- def wait_for_new_job(worker_session_id, queue:)
12
+ def wait_for_new_job(worker_session_id, queues:)
12
13
  started_at = Time.now
13
14
 
14
15
  start_listening
15
16
 
16
17
  # Determine when the next job is up. If we don't have a next job within MAX_WAIT_TIME
17
18
  # we wake up regardless.
18
- wait_time = time_to_next_job(worker_session_id, queue: queue)
19
+ wait_time = time_to_next_job(worker_session_id, queues: queues)
19
20
  return if wait_time && wait_time <= 0
20
21
 
22
+ if !wait_time && ::Postjob::Queue.should_shutdown?(worker_session_id)
23
+ Postjob.logger.warn "Shutting down runner: host is set to 'shutdown'"
24
+ return :shutdown
25
+ end
26
+
21
27
  wait_time = MAX_WAIT_TIME if !wait_time || wait_time > MAX_WAIT_TIME
22
28
  Postjob.logger.debug "postjob: waiting for notification for up to #{wait_time} seconds"
23
29
  Simple::SQL.wait_for_notify(wait_time)
@@ -30,6 +36,9 @@ module Postjob::Queue::Notifications
30
36
  end
31
37
 
32
38
  Postjob.logger.debug "postjob: awoke after #{format('%.03f secs', (Time.now - started_at))}"
39
+ rescue Interrupt
40
+ Postjob.logger.info "postjob: shutdown after receiving Interrupt"
41
+ :shutdown
33
42
  end
34
43
 
35
44
  private
@@ -43,11 +52,10 @@ module Postjob::Queue::Notifications
43
52
 
44
53
  # returns the maximum number of seconds to wait until the
45
54
  # next runnable or timeoutable job comes up.
46
- def time_to_next_job(worker_session_id, queue:)
55
+ def time_to_next_job(worker_session_id, queues:)
47
56
  expect! worker_session_id => /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i
48
- expect! queue => [ String, Array, nil ]
57
+ expect! queues => Array
49
58
 
50
- queue = Array(queue)
51
- Simple::SQL.ask "SELECT * FROM #{SCHEMA_NAME}.time_to_next_job($1::uuid, $2)", worker_session_id, queue
59
+ Simple::SQL.ask "SELECT * FROM #{SCHEMA_NAME}.time_to_next_job($1::uuid, $2)", worker_session_id, queues
52
60
  end
53
61
  end
data/lib/postjob/queue.rb CHANGED
@@ -91,17 +91,27 @@ module Postjob::Queue
91
91
  Simple::SQL.ask sql, workflow, Encoder.encode(args)
92
92
  end
93
93
 
94
+ # Asks the database whether this session should be shut down.
95
+ def should_shutdown?(worker_session_id)
96
+ SQL.ask "SELECT #{SCHEMA_NAME}.session_should_shutdown($1::uuid)", worker_session_id
97
+ end
98
+
94
99
  def set_job_result(worker_session_id, job, value, version:)
100
+ expect! worker_session_id => UUID_REGEXP
101
+
95
102
  value = Encoder.encode([value]) unless value.nil?
96
103
  SQL.ask "SELECT #{SCHEMA_NAME}.set_job_result($1::uuid, $2, $3, $4)", worker_session_id, job.id, value, version
97
104
  end
98
105
 
99
106
  def set_job_pending(worker_session_id, job, version:)
107
+ expect! worker_session_id => UUID_REGEXP
108
+
100
109
  SQL.ask "SELECT #{SCHEMA_NAME}.set_job_pending($1::uuid, $2, $3)", worker_session_id, job.id, version
101
110
  end
102
111
 
103
112
  def set_job_error(worker_session_id, job, error, error_message, error_backtrace = nil, status:, version:)
104
113
  expect! status => [ :failed, :err, :timeout ]
114
+ expect! worker_session_id => UUID_REGEXP
105
115
 
106
116
  SQL.ask "SELECT #{SCHEMA_NAME}.set_job_error($1::uuid, $2, $3, $4, $5, $6, $7, $8)",
107
117
  worker_session_id, job.id, error, error_message, Encoder.encode(error_backtrace), status, version, Postjob.fast_mode
@@ -164,13 +174,12 @@ module Postjob::Queue
164
174
 
165
175
  UUID_REGEXP = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i
166
176
 
167
- def checkout(worker_session_id, queue:)
177
+ def checkout(worker_session_id, queues:)
168
178
  expect! worker_session_id => UUID_REGEXP
169
- expect! queue => [ nil, Array, String ]
179
+ expect! queues => [ nil, Array ]
170
180
 
171
- queue = Array(queue) if queue
172
181
  SQL.ask "SELECT * FROM #{SCHEMA_NAME}.checkout($1::uuid, $2::boolean, $3)",
173
- worker_session_id, Postjob.fast_mode, queue, into: Job
182
+ worker_session_id, Postjob.fast_mode, queues, into: Job
174
183
  end
175
184
 
176
185
  def find_or_create_token(job)
@@ -184,18 +193,22 @@ module Postjob::Queue
184
193
  # -- registers a host -------------------------------------------------------
185
194
 
186
195
  # returns the host id
187
- def host_register(attributes)
196
+ def host_register(attributes, host_id:)
188
197
  expect! attributes => [ nil, Hash ]
189
- Simple::SQL.ask "SELECT postjob.host_register($1)", JSON.generate(attributes)
198
+ expect! host_id => [ nil, UUID_REGEXP ]
199
+
200
+ Simple::SQL.ask "SELECT postjob.host_register($1, $2::uuid)", JSON.generate(attributes), host_id
190
201
  end
191
202
 
192
203
  # starts a session
193
204
  WorkerSession = ::Postjob::WorkerSession
194
205
 
195
- def start_worker_session(workflows_with_versions, host_id:)
206
+ def worker_session_start(workflows_with_versions, host_id:, queues:)
196
207
  expect! host_id => UUID_REGEXP
208
+ expect! queues => Array
209
+ expect! queues.first => String
197
210
 
198
- Simple::SQL.ask "SELECT * FROM postjob.worker_session_start($1::uuid, $2)", host_id, workflows_with_versions, into: ::Postjob::WorkerSession
211
+ Simple::SQL.ask "SELECT * FROM postjob.worker_session_start($1::uuid, $2, $3)", host_id, workflows_with_versions, queues, into: ::Postjob::WorkerSession
199
212
  end
200
213
 
201
214
  # sends in a heartbeat
@@ -37,7 +37,7 @@ module Postjob::Runner
37
37
  # returns a subjob within the current job, for a +runner+
38
38
  # description and +args+.
39
39
  def async(workflow, *args, timeout: nil, max_attempts: nil, queue: nil)
40
- worker_session_id = Postjob.current_worker_session.id
40
+ worker_session_id = Postjob.current_session_id
41
41
 
42
42
  queue = current_job.queue if queue.nil?
43
43
 
@@ -8,13 +8,13 @@ require "tools/heartbeat"
8
8
  class Postjob::WorkerSession < Postjob::Record
9
9
  class << self
10
10
  # Starts a worker session.
11
- def start!(workflows_with_versions)
11
+ def start!(workflows_with_versions, heartbeat: true, queues:)
12
12
  host_id = ::Postjob.host_id
13
- worker_session = ::Postjob::Queue.start_worker_session(workflows_with_versions, host_id: host_id)
13
+ worker_session = ::Postjob::Queue.worker_session_start(workflows_with_versions, host_id: host_id, queues: queues)
14
14
 
15
15
  Postjob.logger.info "Starting worker_session #{worker_session.inspect}"
16
16
 
17
- start_heartbeat_monitor(host_id)
17
+ start_heartbeat_monitor(host_id) if heartbeat
18
18
  worker_session
19
19
  end
20
20
 
@@ -48,13 +48,17 @@ class Postjob::WorkerSession < Postjob::Record
48
48
  attr_reader :workflows
49
49
  attr_reader :attributes
50
50
  attr_reader :created_at
51
+ attr_reader :queues
51
52
 
52
53
  def to_s
53
54
  "Session##{id}"
54
55
  end
55
56
 
56
57
  def inspect
57
- versionized_workflows = workflows.grep(/\d$/)
58
- "<Session##{id} w/host_id: #{host_id}, client_socket: #{client_socket}, #{versionized_workflows.count} workflows>"
58
+ # [TODO] - grepping workflows by /\d$/ to only count workflows with a version number is a bit hackish.
59
+ workflow_count = self.workflows.grep(/\d$/)
60
+ queues = self.queues.map(&:inspect).join(", ")
61
+
62
+ "<Session##{id} w/host_id: #{host_id}, queues: #{queues}, client_socket: #{client_socket}, #{workflow_count} workflows>"
59
63
  end
60
64
  end