postjob 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/lib/postjob.rb +22 -13
  3. data/lib/postjob/cli/events.rb +60 -0
  4. data/lib/postjob/cli/heartbeat.rb +55 -0
  5. data/lib/postjob/cli/hosts.rb +67 -0
  6. data/lib/postjob/cli/ps.rb +1 -13
  7. data/lib/postjob/cli/sessions.rb +83 -0
  8. data/lib/postjob/job.rb +4 -15
  9. data/lib/postjob/migrations/003_postjobs.sql +10 -8
  10. data/lib/postjob/migrations/003b_processing_columns.sql +8 -8
  11. data/lib/postjob/migrations/005_helpers.sql +3 -1
  12. data/lib/postjob/migrations/006_enqueue.sql +3 -0
  13. data/lib/postjob/migrations/006a_processing.sql +6 -26
  14. data/lib/postjob/migrations/007_job_results.sql +32 -13
  15. data/lib/postjob/migrations/008_checkout_runnable.sql +15 -21
  16. data/lib/postjob/migrations/008a_childjobs.sql +13 -0
  17. data/lib/postjob/migrations/010_settings.sql +18 -3
  18. data/lib/postjob/migrations/011_null_uuid.sql +7 -0
  19. data/lib/postjob/migrations/012_hosts.sql +42 -0
  20. data/lib/postjob/migrations/013_worker_sessions.sql +44 -0
  21. data/lib/postjob/migrations/014_postjob_session_id.sql +17 -0
  22. data/lib/postjob/migrations/015_events.sql +76 -0
  23. data/lib/postjob/migrations/016_sessions_functions.sql +16 -0
  24. data/lib/postjob/migrations/017_zombie_check.sql +58 -0
  25. data/lib/postjob/migrations/018_heartbeat.sql +28 -0
  26. data/lib/postjob/migrations/019_heartbeat_indices.sql +5 -0
  27. data/lib/postjob/queue.rb +41 -27
  28. data/lib/postjob/queue/notifications.rb +5 -4
  29. data/lib/postjob/queue/search.rb +2 -0
  30. data/lib/postjob/queue/settings.rb +11 -1
  31. data/lib/postjob/record.rb +17 -0
  32. data/lib/postjob/runner.rb +9 -2
  33. data/lib/postjob/worker_session.rb +76 -0
  34. data/lib/postjob/workflow.rb +0 -4
  35. data/lib/tools/atomic_store.rb +17 -0
  36. data/lib/tools/heartbeat.rb +151 -0
  37. data/lib/tools/history.rb +25 -0
  38. data/spec/postjob/events/heartbeat_event_spec.rb +85 -0
  39. data/spec/postjob/events/job_event_spec.rb +80 -0
  40. data/spec/postjob/job_control/max_attempts_spec.rb +0 -2
  41. data/spec/postjob/queue/search_spec.rb +0 -14
  42. data/spec/postjob/worker_session_spec.rb +41 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/test_helper.rb +11 -1
  45. metadata +43 -3
  46. data/spec/postjob/job_control/workflow_status_spec.rb +0 -52
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78a70bee1c76f0285da58ecc0e1f38f4d420d9f1
4
- data.tar.gz: 4fe406b37ebe91e317c00a6c8cb84fceb6b5ce55
3
+ metadata.gz: 00a785d7f9bd7a640601fb385902e157a76bf02a
4
+ data.tar.gz: 7368f6f79e2f0392979656ff05bd12acc20751e9
5
5
  SHA512:
6
- metadata.gz: 3e55f152396f288ee5f51ccbc08ab23cb54e82f8afac7b8b69a248d38ff2ada757c075292008810ed749ba585f01591d87cb2780ab49289b46948a83b7360296
7
- data.tar.gz: b7ccda0358d5377e8d4bcb089e736fd3f06efe5149f82a6ee792870dcfc05f5966f739313fd53834f501fa9901afdd76473b75be22fd70a780b751cc621a0241
6
+ metadata.gz: a09cd7a9014e1dddf95f493b064ae8e818ff65b98fd0db3ba15bf8de50e499b5d8c45ec919db52f994692c14b0550aa184dd74d9d4b76ba033c4109de59562f5
7
+ data.tar.gz: 748047574250f33ea51cad3a823ecb2c7d66afa7263c4388d66157a0e58948433676935816794dde089b0c8f1cbcf10c24de6310723d12f61e578a6a08c4986e
@@ -12,6 +12,7 @@ end
12
12
  require_relative "postjob/workflow"
13
13
  require_relative "postjob/registry"
14
14
  require_relative "postjob/job"
15
+ require_relative "postjob/worker_session"
15
16
  require_relative "postjob/error"
16
17
  require_relative "postjob/queue"
17
18
  require_relative "postjob/runner"
@@ -52,12 +53,12 @@ module Postjob
52
53
  end
53
54
 
54
55
  tags = stringify_hash(tags) if tags
55
- job = Queue.enqueue_job workflow, *args, queue: queue,
56
- parent_id: parent_id,
57
- max_attempts: max_attempts,
58
- timeout: timeout,
59
- tags: tags,
60
- version: version
56
+ job = Queue.enqueue_job current_worker_session.id, workflow, *args, queue: queue,
57
+ parent_id: parent_id,
58
+ max_attempts: max_attempts,
59
+ timeout: timeout,
60
+ tags: tags,
61
+ version: version
61
62
  logger.info "Generated process #{job}"
62
63
  job.id
63
64
  end
@@ -120,7 +121,7 @@ module Postjob
120
121
  break if shutdown == :shutdown
121
122
 
122
123
  next if processed_job_id
123
- Queue::Notifications.wait_for_new_job
124
+ Queue::Notifications.wait_for_new_job(current_worker_session.id)
124
125
  end
125
126
 
126
127
  processed_jobs_count
@@ -139,10 +140,16 @@ module Postjob
139
140
  #
140
141
  # or nil, when no job could be checked out.
141
142
  def step
142
- job = Queue.checkout(Registry.workflows_with_versions)
143
+ job = Postjob::Queue.checkout(current_worker_session.id)
143
144
  [ job.id, process_job(job) ] if job
144
145
  end
145
146
 
147
+ # This method connects to the queue. This means it registers as a new worker_session,
148
+ # if there was no worker_session yet.
149
+ def current_worker_session
150
+ @worker_session ||= WorkerSession.start!(Registry.workflows_with_versions)
151
+ end
152
+
146
153
  private
147
154
 
148
155
  # This method is called from tests. Otherwise it is supposed to be private.
@@ -158,11 +165,13 @@ module Postjob
158
165
  raise "Integrity check failed: job's workflow version changed (from #{job.workflow_version} to #{version})"
159
166
  end
160
167
 
168
+ worker_session_id = current_worker_session.id
169
+
161
170
  case status
162
- when :failed then Queue.set_job_error job, *value, status: :failed, version: version
163
- when :err then Queue.set_job_error job, *value, status: :err, version: version
164
- when :pending then Queue.set_job_pending job, version: version
165
- when :ok then Queue.set_job_result job, value, version: version
171
+ when :failed then Queue.set_job_error worker_session_id, job, *value, status: :failed, version: version
172
+ when :err then Queue.set_job_error worker_session_id, job, *value, status: :err, version: version
173
+ when :pending then Queue.set_job_pending worker_session_id, job, version: version
174
+ when :ok then Queue.set_job_result worker_session_id, job, value, version: version
166
175
  else raise ArgumentError, "Invalid status #{status.inspect}"
167
176
  end
168
177
 
@@ -175,7 +184,7 @@ module Postjob
175
184
  job = Queue.find_job_by_token(token)
176
185
  raise "No job with token #{token}" unless job
177
186
 
178
- Queue.set_job_result job, result, version: nil
187
+ Queue.set_job_result current_worker_session.id, job, result, version: nil
179
188
  end
180
189
 
181
190
  def register_workflow(workflow, options = {})
@@ -0,0 +1,60 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def events_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ events.id,
13
+ events.name,
14
+ events.postjob_id AS job_id,
15
+ postjobs.workflow
16
+ || (CASE WHEN postjobs.workflow_version != '' THEN '@' ELSE '' END)
17
+ || postjobs.workflow_version
18
+ || (CASE WHEN postjobs.workflow_method != 'run' THEN '.' || postjobs.workflow_method ELSE '' END)
19
+ || postjobs.args AS job,
20
+ worker_session_id,
21
+ events.created_at
22
+ FROM postjob.events events
23
+ LEFT JOIN postjob.postjobs postjobs ON events.postjob_id=postjobs.id
24
+ WHERE events.name != 'heartbeat'
25
+ SQL
26
+
27
+ scope = Simple::SQL::Scope.new(sql)
28
+ scope
29
+ .order_by("events.id DESC")
30
+ .paginate(per: limit, page: 1)
31
+ end
32
+
33
+ public
34
+
35
+ # Show the latest job event
36
+ #
37
+ # Example:
38
+ #
39
+ # postjob events
40
+ def events(limit: "100")
41
+ expect! limit => /\A\d+\z/
42
+ limit = Integer(limit)
43
+
44
+ connect_to_database!
45
+
46
+ query = events_query(limit: limit)
47
+
48
+ print_results query: query
49
+ end
50
+
51
+ # Show up-to-date events information once per second
52
+ def events_top(limit: "100")
53
+ loop do
54
+ system "clear"
55
+ events(limit: limit)
56
+ sleep 1
57
+ end
58
+ rescue Interrupt
59
+ end
60
+ end
@@ -0,0 +1,55 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+
3
+ module Postjob::CLI
4
+ private
5
+
6
+ def heartbeat_query(limit:)
7
+ limit = Integer(limit)
8
+
9
+ sql = <<-SQL
10
+ SELECT
11
+ name,
12
+ postjob_id AS job_id,
13
+ host_id,
14
+ (attributes->>'uptime')::interval AS uptime,
15
+ to_char((attributes->>'cpu_load_1min')::float, '99D99') AS cpu_load,
16
+ attributes->>'net_in_1min' AS net_in,
17
+ attributes->>'net_out_1min' AS net_out,
18
+ attributes->>'net_errors_1min' AS net_errors,
19
+ now() at time zone 'utc' - events.created_at AS age
20
+ FROM postjob.events events
21
+ LEFT JOIN postjob.worker_sessions worker_sessions ON events.worker_session_id=worker_sessions.id
22
+ WHERE events.name = 'heartbeat'
23
+ SQL
24
+
25
+ scope = Simple::SQL::Scope.new(sql)
26
+ scope
27
+ .order_by("events.id DESC")
28
+ .paginate(per: limit, page: 1)
29
+ end
30
+
31
+ public
32
+
33
+ # Show the latest heartbeat events
34
+ def heartbeat(limit: "100")
35
+ expect! limit => /\A\d+\z/
36
+ limit = Integer(limit)
37
+
38
+ connect_to_database!
39
+
40
+ query = heartbeat_query(limit: limit)
41
+
42
+ Postjob.logger.info "CPU load and friends are for the last minute"
43
+ print_results query: query
44
+ end
45
+
46
+ # Show up-to-date heartbeat information once per second
47
+ def heartbeat_top(limit: "100")
48
+ loop do
49
+ system "clear"
50
+ heartbeat(limit: limit)
51
+ sleep 1
52
+ end
53
+ rescue Interrupt
54
+ end
55
+ end
@@ -0,0 +1,67 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def hosts_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ hosts.id,
13
+ hosts.attributes,
14
+ hosts.created_at,
15
+ heartbeat.attributes AS heartbeat,
16
+ heartbeat.created_at AS heartbeat_created_at
17
+ FROM postjob.hosts hosts
18
+ LEFT JOIN (
19
+ SELECT
20
+ worker_sessions.host_id,
21
+ MAX(events.id) AS event_id
22
+ FROM postjob.worker_sessions
23
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
24
+ WHERE events.name = 'heartbeat'
25
+ GROUP BY worker_sessions.host_id
26
+ ) q ON q.host_id=hosts.id
27
+ LEFT JOIN events heartbeat ON heartbeat.id=event_id
28
+ SQL
29
+
30
+ scope = Simple::SQL::Scope.new(sql)
31
+ scope
32
+ .order_by("hosts.created_at DESC NULLS LAST")
33
+ .paginate(per: limit, page: 1)
34
+ end
35
+
36
+ public
37
+
38
+ # Show hosts status
39
+ #
40
+ # This command lists all worker_sessions currently in the system.
41
+ #
42
+ # Example:
43
+ #
44
+ # postjob hosts
45
+ def hosts(limit: "100")
46
+ expect! limit => /\A\d+\z/
47
+ limit = Integer(limit)
48
+
49
+ connect_to_database!
50
+
51
+ query = hosts_query(limit: limit)
52
+
53
+ print_results query: query
54
+ end
55
+
56
+ # Show up-to-date hosts information once per second
57
+ #
58
+ #
59
+ def hosts_top(limit: "100")
60
+ loop do
61
+ system "clear"
62
+ hosts(limit: limit)
63
+ sleep 1
64
+ end
65
+ rescue Interrupt
66
+ end
67
+ end
@@ -28,18 +28,6 @@ module Postjob::CLI
28
28
  next_run_at - (now() at time zone 'utc') AS next_run_in,
29
29
  to_char(EXTRACT(EPOCH FROM (now() at time zone 'utc') - postjobs.created_at), '999999999.99') AS age,
30
30
 
31
- CASE
32
- WHEN processing_started_at IS NOT NULL THEN
33
- format(
34
- '%s/%s',
35
- to_char(EXTRACT(EPOCH FROM (now() at time zone 'utc') - processing_started_at), '999999999.99'),
36
- processing_max_duration
37
- )
38
- WHEN status IN ('failed', 'err', 'ok') THEN
39
- format('%s', to_char(EXTRACT(EPOCH FROM (updated_at - created_at)), '999999999.99'))
40
- END AS processing,
41
-
42
- COALESCE(processing_client, '') || COALESCE('/' || processing_client_identifier, '') AS worker,
43
31
  tags
44
32
  FROM postjob.postjobs AS postjobs
45
33
  SQL
@@ -172,7 +160,7 @@ module Postjob::CLI
172
160
  tp records
173
161
 
174
162
  if records.total_count > records.length
175
- logger.warn "Output limited up to limit #{records.length}. Use the --limit command line option for a different limit."
163
+ logger.warn "Output limited up to limit #{records.length}. Use the --limit=<NN> command line option for a different limit."
176
164
  end
177
165
 
178
166
  if records.empty? && on_empty
@@ -0,0 +1,83 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def sessions_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ worker_sessions.id,
13
+ worker_sessions.host_id,
14
+ worker_sessions.client_socket,
15
+ worker_sessions.workflows,
16
+ worker_sessions.created_at,
17
+ job_event.name AS event_name,
18
+ job_event.created_at AS event_created_at,
19
+ heartbeat.attributes AS heartbeat,
20
+ heartbeat.created_at AS heartbeat_created_at
21
+ FROM postjob.worker_sessions AS worker_sessions
22
+ LEFT JOIN (
23
+ SELECT
24
+ worker_sessions.id,
25
+ MAX(events.id) AS event_id
26
+ FROM postjob.worker_sessions
27
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
28
+ WHERE events.name != 'heartbeat'
29
+ GROUP BY worker_sessions.id
30
+ ) last_job_event ON last_job_event.id=worker_sessions.id
31
+ LEFT JOIN postjob.events job_event ON job_event.id=last_job_event.event_id
32
+ LEFT JOIN (
33
+ SELECT
34
+ worker_sessions.id,
35
+ MAX(events.id) AS event_id
36
+ FROM postjob.worker_sessions
37
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
38
+ WHERE events.name = 'heartbeat'
39
+ GROUP BY worker_sessions.id
40
+ ) last_heartbeat ON last_heartbeat.id=worker_sessions.id
41
+ LEFT JOIN postjob.events heartbeat ON heartbeat.id=last_heartbeat.event_id
42
+ SQL
43
+
44
+ scope = Simple::SQL::Scope.new(sql)
45
+
46
+ scope
47
+ .paginate(per: limit, page: 1)
48
+ .order_by("heartbeat_created_at DESC NULLS LAST")
49
+ end
50
+
51
+ public
52
+
53
+ # Show sessions status
54
+ #
55
+ # This command lists all worker sessions currently in the system.
56
+ #
57
+ # Example:
58
+ #
59
+ # postjob sessions
60
+ def sessions(limit: "100")
61
+ expect! limit => /\A\d+\z/
62
+ limit = Integer(limit)
63
+
64
+ connect_to_database!
65
+
66
+ # check for timed out and zombie processes
67
+ # ::Postjob::Queue.checkout(nil)
68
+
69
+ query = sessions_query(limit: limit)
70
+
71
+ print_results query: query
72
+ end
73
+
74
+ # Show up-to-date session information once per second
75
+ def sessions_top(limit: "100")
76
+ loop do
77
+ system "clear"
78
+ sessions(limit: limit)
79
+ sleep 1
80
+ end
81
+ rescue Interrupt
82
+ end
83
+ end
@@ -1,25 +1,14 @@
1
- # rubocop:disable Style/EvalWithLocation
2
- # rubocop:disable Security/Eval
1
+ require_relative "./record"
3
2
 
4
3
  #
5
- # A job class in-memory representation.
4
+ # A job
6
5
  #
7
- class Postjob::Job < Hash
8
- def initialize(hsh)
9
- replace hsh.dup
10
- end
11
-
6
+ class Postjob::Job < Postjob::Record
12
7
  def self.find(job_id)
13
8
  scope = Postjob::Queue.search(id: job_id)
14
9
  Simple::SQL.ask(scope, into: Postjob::Job)
15
10
  end
16
11
 
17
- def self.attribute(sym)
18
- eval <<~RUBY
19
- define_method(:#{sym}) { self[:#{sym}] }
20
- RUBY
21
- end
22
-
23
12
  attribute :id
24
13
  attribute :parent_id
25
14
  attribute :full_id
@@ -40,9 +29,9 @@ class Postjob::Job < Hash
40
29
  attribute :error_message
41
30
  attribute :error_backtrace
42
31
  attribute :recipients
43
- attribute :workflow_status
44
32
  attribute :timed_out
45
33
  attribute :tags
34
+ attribute :last_worker_session_id
46
35
 
47
36
  STATUSES = %w(ok ready processing sleep err failed timeout)
48
37
 
@@ -38,6 +38,8 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
38
38
  -- Number of failed attempts so far.
39
39
  failed_attempts INTEGER NOT NULL DEFAULT 0,
40
40
 
41
+ -- last_worker_session_id UUID NOT NULL REFERENCES {SCHEMA_NAME}.worker_sessions ON DELETE CASCADE,
42
+
41
43
  -- process result ---------------------------------------------------------------------------------------
42
44
 
43
45
  results JSONB, -- The process result, if any. Only valid when status == 'ok'
@@ -46,8 +48,8 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
46
48
  error_backtrace JSONB, -- additional error information, for debugging purposes
47
49
 
48
50
  -- custom fields ----------------------------------------------------------------------------------------
49
- workflow_status VARCHAR,
50
- tags JSONB,
51
+ -- workflow_status VARCHAR,
52
+ tags JSONB
51
53
 
52
54
  -- processing_client information ------------------------------------------------------------------------
53
55
  -- This information is passed along from workers during processing. They are only valid
@@ -55,12 +57,12 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
55
57
  --
56
58
  -- Initially these columns didn't exist, and have been created via another migration
57
59
  -- (003b_processing_columns.sql). They are listed here for documentation purposes.
58
- processing_client varchar, -- host:port of client (taken from pg_stat_activity)
59
- processing_client_identifier varchar, -- free text info, set via set_client_identifier()
60
- processing_started_at timestamp, -- when did processing start?
61
- processing_max_duration float -- maximum expected duration of processing. Afterwards the
62
- -- processing is considered failed for unknown reasons, and
63
- -- potentially restarted.
60
+ -- processing_client varchar, -- host:port of client (taken from pg_stat_activity)
61
+ -- processing_client_identifier varchar, -- free text info, set via set_client_identifier()
62
+ -- processing_started_at timestamp -- when did processing start?
63
+ -- processing_max_duration float -- maximum expected duration of processing. Afterwards the
64
+ -- processing is considered failed for unknown reasons, and
65
+ -- potentially restarted.
64
66
  );
65
67
 
66
68
  -- [TODO] check indices