postjob 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/lib/postjob.rb +22 -13
  3. data/lib/postjob/cli/events.rb +60 -0
  4. data/lib/postjob/cli/heartbeat.rb +55 -0
  5. data/lib/postjob/cli/hosts.rb +67 -0
  6. data/lib/postjob/cli/ps.rb +1 -13
  7. data/lib/postjob/cli/sessions.rb +83 -0
  8. data/lib/postjob/job.rb +4 -15
  9. data/lib/postjob/migrations/003_postjobs.sql +10 -8
  10. data/lib/postjob/migrations/003b_processing_columns.sql +8 -8
  11. data/lib/postjob/migrations/005_helpers.sql +3 -1
  12. data/lib/postjob/migrations/006_enqueue.sql +3 -0
  13. data/lib/postjob/migrations/006a_processing.sql +6 -26
  14. data/lib/postjob/migrations/007_job_results.sql +32 -13
  15. data/lib/postjob/migrations/008_checkout_runnable.sql +15 -21
  16. data/lib/postjob/migrations/008a_childjobs.sql +13 -0
  17. data/lib/postjob/migrations/010_settings.sql +18 -3
  18. data/lib/postjob/migrations/011_null_uuid.sql +7 -0
  19. data/lib/postjob/migrations/012_hosts.sql +42 -0
  20. data/lib/postjob/migrations/013_worker_sessions.sql +44 -0
  21. data/lib/postjob/migrations/014_postjob_session_id.sql +17 -0
  22. data/lib/postjob/migrations/015_events.sql +76 -0
  23. data/lib/postjob/migrations/016_sessions_functions.sql +16 -0
  24. data/lib/postjob/migrations/017_zombie_check.sql +58 -0
  25. data/lib/postjob/migrations/018_heartbeat.sql +28 -0
  26. data/lib/postjob/migrations/019_heartbeat_indices.sql +5 -0
  27. data/lib/postjob/queue.rb +41 -27
  28. data/lib/postjob/queue/notifications.rb +5 -4
  29. data/lib/postjob/queue/search.rb +2 -0
  30. data/lib/postjob/queue/settings.rb +11 -1
  31. data/lib/postjob/record.rb +17 -0
  32. data/lib/postjob/runner.rb +9 -2
  33. data/lib/postjob/worker_session.rb +76 -0
  34. data/lib/postjob/workflow.rb +0 -4
  35. data/lib/tools/atomic_store.rb +17 -0
  36. data/lib/tools/heartbeat.rb +151 -0
  37. data/lib/tools/history.rb +25 -0
  38. data/spec/postjob/events/heartbeat_event_spec.rb +85 -0
  39. data/spec/postjob/events/job_event_spec.rb +80 -0
  40. data/spec/postjob/job_control/max_attempts_spec.rb +0 -2
  41. data/spec/postjob/queue/search_spec.rb +0 -14
  42. data/spec/postjob/worker_session_spec.rb +41 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/test_helper.rb +11 -1
  45. metadata +43 -3
  46. data/spec/postjob/job_control/workflow_status_spec.rb +0 -52
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78a70bee1c76f0285da58ecc0e1f38f4d420d9f1
4
- data.tar.gz: 4fe406b37ebe91e317c00a6c8cb84fceb6b5ce55
3
+ metadata.gz: 00a785d7f9bd7a640601fb385902e157a76bf02a
4
+ data.tar.gz: 7368f6f79e2f0392979656ff05bd12acc20751e9
5
5
  SHA512:
6
- metadata.gz: 3e55f152396f288ee5f51ccbc08ab23cb54e82f8afac7b8b69a248d38ff2ada757c075292008810ed749ba585f01591d87cb2780ab49289b46948a83b7360296
7
- data.tar.gz: b7ccda0358d5377e8d4bcb089e736fd3f06efe5149f82a6ee792870dcfc05f5966f739313fd53834f501fa9901afdd76473b75be22fd70a780b751cc621a0241
6
+ metadata.gz: a09cd7a9014e1dddf95f493b064ae8e818ff65b98fd0db3ba15bf8de50e499b5d8c45ec919db52f994692c14b0550aa184dd74d9d4b76ba033c4109de59562f5
7
+ data.tar.gz: 748047574250f33ea51cad3a823ecb2c7d66afa7263c4388d66157a0e58948433676935816794dde089b0c8f1cbcf10c24de6310723d12f61e578a6a08c4986e
@@ -12,6 +12,7 @@ end
12
12
  require_relative "postjob/workflow"
13
13
  require_relative "postjob/registry"
14
14
  require_relative "postjob/job"
15
+ require_relative "postjob/worker_session"
15
16
  require_relative "postjob/error"
16
17
  require_relative "postjob/queue"
17
18
  require_relative "postjob/runner"
@@ -52,12 +53,12 @@ module Postjob
52
53
  end
53
54
 
54
55
  tags = stringify_hash(tags) if tags
55
- job = Queue.enqueue_job workflow, *args, queue: queue,
56
- parent_id: parent_id,
57
- max_attempts: max_attempts,
58
- timeout: timeout,
59
- tags: tags,
60
- version: version
56
+ job = Queue.enqueue_job current_worker_session.id, workflow, *args, queue: queue,
57
+ parent_id: parent_id,
58
+ max_attempts: max_attempts,
59
+ timeout: timeout,
60
+ tags: tags,
61
+ version: version
61
62
  logger.info "Generated process #{job}"
62
63
  job.id
63
64
  end
@@ -120,7 +121,7 @@ module Postjob
120
121
  break if shutdown == :shutdown
121
122
 
122
123
  next if processed_job_id
123
- Queue::Notifications.wait_for_new_job
124
+ Queue::Notifications.wait_for_new_job(current_worker_session.id)
124
125
  end
125
126
 
126
127
  processed_jobs_count
@@ -139,10 +140,16 @@ module Postjob
139
140
  #
140
141
  # or nil, when no job could be checked out.
141
142
  def step
142
- job = Queue.checkout(Registry.workflows_with_versions)
143
+ job = Postjob::Queue.checkout(current_worker_session.id)
143
144
  [ job.id, process_job(job) ] if job
144
145
  end
145
146
 
147
+ # This method connects to the queue. This means it registers as a new worker_session,
148
+ # if there was no worker_session yet.
149
+ def current_worker_session
150
+ @worker_session ||= WorkerSession.start!(Registry.workflows_with_versions)
151
+ end
152
+
146
153
  private
147
154
 
148
155
  # This method is called from tests. Otherwise it is supposed to be private.
@@ -158,11 +165,13 @@ module Postjob
158
165
  raise "Integrity check failed: job's workflow version changed (from #{job.workflow_version} to #{version})"
159
166
  end
160
167
 
168
+ worker_session_id = current_worker_session.id
169
+
161
170
  case status
162
- when :failed then Queue.set_job_error job, *value, status: :failed, version: version
163
- when :err then Queue.set_job_error job, *value, status: :err, version: version
164
- when :pending then Queue.set_job_pending job, version: version
165
- when :ok then Queue.set_job_result job, value, version: version
171
+ when :failed then Queue.set_job_error worker_session_id, job, *value, status: :failed, version: version
172
+ when :err then Queue.set_job_error worker_session_id, job, *value, status: :err, version: version
173
+ when :pending then Queue.set_job_pending worker_session_id, job, version: version
174
+ when :ok then Queue.set_job_result worker_session_id, job, value, version: version
166
175
  else raise ArgumentError, "Invalid status #{status.inspect}"
167
176
  end
168
177
 
@@ -175,7 +184,7 @@ module Postjob
175
184
  job = Queue.find_job_by_token(token)
176
185
  raise "No job with token #{token}" unless job
177
186
 
178
- Queue.set_job_result job, result, version: nil
187
+ Queue.set_job_result current_worker_session.id, job, result, version: nil
179
188
  end
180
189
 
181
190
  def register_workflow(workflow, options = {})
@@ -0,0 +1,60 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def events_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ events.id,
13
+ events.name,
14
+ events.postjob_id AS job_id,
15
+ postjobs.workflow
16
+ || (CASE WHEN postjobs.workflow_version != '' THEN '@' ELSE '' END)
17
+ || postjobs.workflow_version
18
+ || (CASE WHEN postjobs.workflow_method != 'run' THEN '.' || postjobs.workflow_method ELSE '' END)
19
+ || postjobs.args AS job,
20
+ worker_session_id,
21
+ events.created_at
22
+ FROM postjob.events events
23
+ LEFT JOIN postjob.postjobs postjobs ON events.postjob_id=postjobs.id
24
+ WHERE events.name != 'heartbeat'
25
+ SQL
26
+
27
+ scope = Simple::SQL::Scope.new(sql)
28
+ scope
29
+ .order_by("events.id DESC")
30
+ .paginate(per: limit, page: 1)
31
+ end
32
+
33
+ public
34
+
35
+ # Show the latest job event
36
+ #
37
+ # Example:
38
+ #
39
+ # postjob events
40
+ def events(limit: "100")
41
+ expect! limit => /\A\d+\z/
42
+ limit = Integer(limit)
43
+
44
+ connect_to_database!
45
+
46
+ query = events_query(limit: limit)
47
+
48
+ print_results query: query
49
+ end
50
+
51
+ # Show up-to-date events information once per second
52
+ def events_top(limit: "100")
53
+ loop do
54
+ system "clear"
55
+ events(limit: limit)
56
+ sleep 1
57
+ end
58
+ rescue Interrupt
59
+ end
60
+ end
@@ -0,0 +1,55 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+
3
+ module Postjob::CLI
4
+ private
5
+
6
+ def heartbeat_query(limit:)
7
+ limit = Integer(limit)
8
+
9
+ sql = <<-SQL
10
+ SELECT
11
+ name,
12
+ postjob_id AS job_id,
13
+ host_id,
14
+ (attributes->>'uptime')::interval AS uptime,
15
+ to_char((attributes->>'cpu_load_1min')::float, '99D99') AS cpu_load,
16
+ attributes->>'net_in_1min' AS net_in,
17
+ attributes->>'net_out_1min' AS net_out,
18
+ attributes->>'net_errors_1min' AS net_errors,
19
+ now() at time zone 'utc' - events.created_at AS age
20
+ FROM postjob.events events
21
+ LEFT JOIN postjob.worker_sessions worker_sessions ON events.worker_session_id=worker_sessions.id
22
+ WHERE events.name = 'heartbeat'
23
+ SQL
24
+
25
+ scope = Simple::SQL::Scope.new(sql)
26
+ scope
27
+ .order_by("events.id DESC")
28
+ .paginate(per: limit, page: 1)
29
+ end
30
+
31
+ public
32
+
33
+ # Show the latest heartbeat events
34
+ def heartbeat(limit: "100")
35
+ expect! limit => /\A\d+\z/
36
+ limit = Integer(limit)
37
+
38
+ connect_to_database!
39
+
40
+ query = heartbeat_query(limit: limit)
41
+
42
+ Postjob.logger.info "CPU load and friends are for the last minute"
43
+ print_results query: query
44
+ end
45
+
46
+ # Show up-to-date heartbeat information once per second
47
+ def heartbeat_top(limit: "100")
48
+ loop do
49
+ system "clear"
50
+ heartbeat(limit: limit)
51
+ sleep 1
52
+ end
53
+ rescue Interrupt
54
+ end
55
+ end
@@ -0,0 +1,67 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def hosts_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ hosts.id,
13
+ hosts.attributes,
14
+ hosts.created_at,
15
+ heartbeat.attributes AS heartbeat,
16
+ heartbeat.created_at AS heartbeat_created_at
17
+ FROM postjob.hosts hosts
18
+ LEFT JOIN (
19
+ SELECT
20
+ worker_sessions.host_id,
21
+ MAX(events.id) AS event_id
22
+ FROM postjob.worker_sessions
23
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
24
+ WHERE events.name = 'heartbeat'
25
+ GROUP BY worker_sessions.host_id
26
+ ) q ON q.host_id=hosts.id
27
+ LEFT JOIN events heartbeat ON heartbeat.id=event_id
28
+ SQL
29
+
30
+ scope = Simple::SQL::Scope.new(sql)
31
+ scope
32
+ .order_by("hosts.created_at DESC NULLS LAST")
33
+ .paginate(per: limit, page: 1)
34
+ end
35
+
36
+ public
37
+
38
+ # Show hosts status
39
+ #
40
+ # This command lists all worker_sessions currently in the system.
41
+ #
42
+ # Example:
43
+ #
44
+ # postjob hosts
45
+ def hosts(limit: "100")
46
+ expect! limit => /\A\d+\z/
47
+ limit = Integer(limit)
48
+
49
+ connect_to_database!
50
+
51
+ query = hosts_query(limit: limit)
52
+
53
+ print_results query: query
54
+ end
55
+
56
+ # Show up-to-date hosts information once per second
57
+ #
58
+ #
59
+ def hosts_top(limit: "100")
60
+ loop do
61
+ system "clear"
62
+ hosts(limit: limit)
63
+ sleep 1
64
+ end
65
+ rescue Interrupt
66
+ end
67
+ end
@@ -28,18 +28,6 @@ module Postjob::CLI
28
28
  next_run_at - (now() at time zone 'utc') AS next_run_in,
29
29
  to_char(EXTRACT(EPOCH FROM (now() at time zone 'utc') - postjobs.created_at), '999999999.99') AS age,
30
30
 
31
- CASE
32
- WHEN processing_started_at IS NOT NULL THEN
33
- format(
34
- '%s/%s',
35
- to_char(EXTRACT(EPOCH FROM (now() at time zone 'utc') - processing_started_at), '999999999.99'),
36
- processing_max_duration
37
- )
38
- WHEN status IN ('failed', 'err', 'ok') THEN
39
- format('%s', to_char(EXTRACT(EPOCH FROM (updated_at - created_at)), '999999999.99'))
40
- END AS processing,
41
-
42
- COALESCE(processing_client, '') || COALESCE('/' || processing_client_identifier, '') AS worker,
43
31
  tags
44
32
  FROM postjob.postjobs AS postjobs
45
33
  SQL
@@ -172,7 +160,7 @@ module Postjob::CLI
172
160
  tp records
173
161
 
174
162
  if records.total_count > records.length
175
- logger.warn "Output limited up to limit #{records.length}. Use the --limit command line option for a different limit."
163
+ logger.warn "Output limited up to limit #{records.length}. Use the --limit=<NN> command line option for a different limit."
176
164
  end
177
165
 
178
166
  if records.empty? && on_empty
@@ -0,0 +1,83 @@
1
+ # rubocop:disable Lint/HandleExceptions
2
+ # rubocop:disable Metrics/MethodLength
3
+
4
+ module Postjob::CLI
5
+ private
6
+
7
+ def sessions_query(limit:)
8
+ limit = Integer(limit)
9
+
10
+ sql = <<-SQL
11
+ SELECT
12
+ worker_sessions.id,
13
+ worker_sessions.host_id,
14
+ worker_sessions.client_socket,
15
+ worker_sessions.workflows,
16
+ worker_sessions.created_at,
17
+ job_event.name AS event_name,
18
+ job_event.created_at AS event_created_at,
19
+ heartbeat.attributes AS heartbeat,
20
+ heartbeat.created_at AS heartbeat_created_at
21
+ FROM postjob.worker_sessions AS worker_sessions
22
+ LEFT JOIN (
23
+ SELECT
24
+ worker_sessions.id,
25
+ MAX(events.id) AS event_id
26
+ FROM postjob.worker_sessions
27
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
28
+ WHERE events.name != 'heartbeat'
29
+ GROUP BY worker_sessions.id
30
+ ) last_job_event ON last_job_event.id=worker_sessions.id
31
+ LEFT JOIN postjob.events job_event ON job_event.id=last_job_event.event_id
32
+ LEFT JOIN (
33
+ SELECT
34
+ worker_sessions.id,
35
+ MAX(events.id) AS event_id
36
+ FROM postjob.worker_sessions
37
+ LEFT JOIN postjob.events events ON events.worker_session_id=worker_sessions.id
38
+ WHERE events.name = 'heartbeat'
39
+ GROUP BY worker_sessions.id
40
+ ) last_heartbeat ON last_heartbeat.id=worker_sessions.id
41
+ LEFT JOIN postjob.events heartbeat ON heartbeat.id=last_heartbeat.event_id
42
+ SQL
43
+
44
+ scope = Simple::SQL::Scope.new(sql)
45
+
46
+ scope
47
+ .paginate(per: limit, page: 1)
48
+ .order_by("heartbeat_created_at DESC NULLS LAST")
49
+ end
50
+
51
+ public
52
+
53
+ # Show sessions status
54
+ #
55
+ # This command lists all worker sessions currently in the system.
56
+ #
57
+ # Example:
58
+ #
59
+ # postjob sessions
60
+ def sessions(limit: "100")
61
+ expect! limit => /\A\d+\z/
62
+ limit = Integer(limit)
63
+
64
+ connect_to_database!
65
+
66
+ # check for timed out and zombie processes
67
+ # ::Postjob::Queue.checkout(nil)
68
+
69
+ query = sessions_query(limit: limit)
70
+
71
+ print_results query: query
72
+ end
73
+
74
+ # Show up-to-date session information once per second
75
+ def sessions_top(limit: "100")
76
+ loop do
77
+ system "clear"
78
+ sessions(limit: limit)
79
+ sleep 1
80
+ end
81
+ rescue Interrupt
82
+ end
83
+ end
@@ -1,25 +1,14 @@
1
- # rubocop:disable Style/EvalWithLocation
2
- # rubocop:disable Security/Eval
1
+ require_relative "./record"
3
2
 
4
3
  #
5
- # A job class in-memory representation.
4
+ # A job
6
5
  #
7
- class Postjob::Job < Hash
8
- def initialize(hsh)
9
- replace hsh.dup
10
- end
11
-
6
+ class Postjob::Job < Postjob::Record
12
7
  def self.find(job_id)
13
8
  scope = Postjob::Queue.search(id: job_id)
14
9
  Simple::SQL.ask(scope, into: Postjob::Job)
15
10
  end
16
11
 
17
- def self.attribute(sym)
18
- eval <<~RUBY
19
- define_method(:#{sym}) { self[:#{sym}] }
20
- RUBY
21
- end
22
-
23
12
  attribute :id
24
13
  attribute :parent_id
25
14
  attribute :full_id
@@ -40,9 +29,9 @@ class Postjob::Job < Hash
40
29
  attribute :error_message
41
30
  attribute :error_backtrace
42
31
  attribute :recipients
43
- attribute :workflow_status
44
32
  attribute :timed_out
45
33
  attribute :tags
34
+ attribute :last_worker_session_id
46
35
 
47
36
  STATUSES = %w(ok ready processing sleep err failed timeout)
48
37
 
@@ -38,6 +38,8 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
38
38
  -- Number of failed attempts so far.
39
39
  failed_attempts INTEGER NOT NULL DEFAULT 0,
40
40
 
41
+ -- last_worker_session_id UUID NOT NULL REFERENCES {SCHEMA_NAME}.worker_sessions ON DELETE CASCADE,
42
+
41
43
  -- process result ---------------------------------------------------------------------------------------
42
44
 
43
45
  results JSONB, -- The process result, if any. Only valid when status == 'ok'
@@ -46,8 +48,8 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
46
48
  error_backtrace JSONB, -- additional error information, for debugging purposes
47
49
 
48
50
  -- custom fields ----------------------------------------------------------------------------------------
49
- workflow_status VARCHAR,
50
- tags JSONB,
51
+ -- workflow_status VARCHAR,
52
+ tags JSONB
51
53
 
52
54
  -- processing_client information ------------------------------------------------------------------------
53
55
  -- This information is passed along from workers during processing. They are only valid
@@ -55,12 +57,12 @@ CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.postjobs (
55
57
  --
56
58
  -- Initially these columns didn't exist, and have been created via another migration
57
59
  -- (003b_processing_columns.sql). They are listed here for documentation purposes.
58
- processing_client varchar, -- host:port of client (taken from pg_stat_activity)
59
- processing_client_identifier varchar, -- free text info, set via set_client_identifier()
60
- processing_started_at timestamp, -- when did processing start?
61
- processing_max_duration float -- maximum expected duration of processing. Afterwards the
62
- -- processing is considered failed for unknown reasons, and
63
- -- potentially restarted.
60
+ -- processing_client varchar, -- host:port of client (taken from pg_stat_activity)
61
+ -- processing_client_identifier varchar, -- free text info, set via set_client_identifier()
62
+ -- processing_started_at timestamp -- when did processing start?
63
+ -- processing_max_duration float -- maximum expected duration of processing. Afterwards the
64
+ -- processing is considered failed for unknown reasons, and
65
+ -- potentially restarted.
64
66
  );
65
67
 
66
68
  -- [TODO] check indices