postjob 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +23 -0
  3. data/bin/postjob +11 -0
  4. data/lib/postjob/cli/db.rb +39 -0
  5. data/lib/postjob/cli/job.rb +67 -0
  6. data/lib/postjob/cli/ps.rb +110 -0
  7. data/lib/postjob/cli/run.rb +19 -0
  8. data/lib/postjob/cli.rb +31 -0
  9. data/lib/postjob/error.rb +16 -0
  10. data/lib/postjob/job.rb +66 -0
  11. data/lib/postjob/migrations.rb +97 -0
  12. data/lib/postjob/queue/encoder.rb +40 -0
  13. data/lib/postjob/queue/notifications.rb +72 -0
  14. data/lib/postjob/queue/search.rb +82 -0
  15. data/lib/postjob/queue.rb +331 -0
  16. data/lib/postjob/registry.rb +52 -0
  17. data/lib/postjob/runner.rb +153 -0
  18. data/lib/postjob/workflow.rb +60 -0
  19. data/lib/postjob.rb +170 -0
  20. data/spec/postjob/enqueue_spec.rb +86 -0
  21. data/spec/postjob/full_workflow_spec.rb +86 -0
  22. data/spec/postjob/job_control/manual_spec.rb +45 -0
  23. data/spec/postjob/job_control/max_attempts_spec.rb +70 -0
  24. data/spec/postjob/job_control/timeout_spec.rb +31 -0
  25. data/spec/postjob/job_control/workflow_status_spec.rb +52 -0
  26. data/spec/postjob/process_job_spec.rb +25 -0
  27. data/spec/postjob/queue/encoder_spec.rb +46 -0
  28. data/spec/postjob/queue/search_spec.rb +141 -0
  29. data/spec/postjob/run_spec.rb +69 -0
  30. data/spec/postjob/step_spec.rb +26 -0
  31. data/spec/postjob/sub_workflow_spec.rb +27 -0
  32. data/spec/spec_helper.rb +35 -0
  33. data/spec/support/configure_active_record.rb +18 -0
  34. data/spec/support/configure_database.rb +19 -0
  35. data/spec/support/configure_simple_sql.rb +17 -0
  36. data/spec/support/connect_active_record.rb +6 -0
  37. data/spec/support/test_helper.rb +53 -0
  38. metadata +269 -0
@@ -0,0 +1,331 @@
1
+ # rubocop:disable Layout/IndentationWidth
2
+ # rubocop:disable Style/UnneededInterpolation
3
+ # rubocop:disable Metrics/ModuleLength
4
+ # rubocop:disable Metrics/LineLength
5
+ # rubocop:disable Lint/EndAlignment
6
+ # rubocop:disable Layout/SpaceInsideArrayLiteralBrackets
7
+
8
+ # The Postjob::Queue manages enqueueing and fetching jobs from a job queue.
9
+ module Postjob::Queue
10
+ extend self
11
+
12
+ SCHEMA_NAME = "postjob"
13
+ TABLE_NAME = "#{SCHEMA_NAME}.postjobs"
14
+
15
+ SQL = ::Simple::SQL
16
+ end
17
+
18
+ require_relative "queue/encoder"
19
+ require_relative "queue/notifications"
20
+ require_relative "queue/search"
21
+
22
+ module Postjob::Queue
23
+ Job = ::Postjob::Job
24
+
25
+ DEFAULT_OPTIONS = {
26
+ version: "",
27
+ queue: "q",
28
+ max_attempts: 5
29
+ }.freeze
30
+
31
+ # enqueues a new job with the given arguments
32
+ #
33
+ # Parameters:
34
+ #
35
+ # - queue - the name of the queue
36
+ # - workflow - the name of the workflow (e.g. "FooBar", "FooBar#method_name")
37
+ # - version - the version of the workflow, e.g. "0.2"
38
+ # - args - an array of arguments, must be encodable via Postjob::JSON.encode
39
+ # - parent_id - the id of the parent job, if any
40
+ # - tags - # a Hash[String => String]
41
+ #
42
+ def enqueue_job(workflow, *args, options)
43
+ expect! workflow => String
44
+ expect! options => {
45
+ queue: [String, nil],
46
+ version: [/\A\d(\.\d)+\z/, nil],
47
+ parent_id: [Integer, nil],
48
+ tags: [Hash, nil],
49
+ timeout: [Numeric, nil],
50
+ max_attempts: [Integer, nil]
51
+ }
52
+
53
+ workflow, workflow_method = parse_workflow(workflow)
54
+
55
+ options.update(DEFAULT_OPTIONS) { |_k, v1, v2| v1 || v2 }
56
+
57
+ SQL.transaction do
58
+ queue, version, parent_id, tags, timeout, max_attempts =
59
+ options.values_at :queue, :version, :parent_id, :tags, :timeout, :max_attempts
60
+
61
+ args = Encoder.encode(args) if args
62
+ tags = Encoder.encode(tags) if tags
63
+
64
+ id = SQL.ask <<~SQL, queue, workflow, workflow_method, version, args, parent_id, tags, max_attempts
65
+ INSERT INTO #{TABLE_NAME}(queue, workflow, workflow_method, workflow_version, args, parent_id, tags, max_attempts)
66
+ VALUES($1, $2, $3, $4, $5, $6, $7, $8)
67
+ RETURNING id
68
+ SQL
69
+
70
+ if timeout
71
+ SQL.ask <<~SQL, id, timeout
72
+ UPDATE #{TABLE_NAME}
73
+ SET timing_out_at = (now() at time zone 'utc') + $2 * interval '1 second'
74
+ WHERE id=$1
75
+ SQL
76
+ end
77
+
78
+ root_id, parent_full_id = if parent_id
79
+ SQL.ask "SELECT root_id, full_id FROM #{TABLE_NAME} WHERE id=$1", parent_id
80
+ end
81
+
82
+ root_id ||= id
83
+ full_id = parent_full_id ? "#{parent_full_id}.#{id}" : "#{id}"
84
+
85
+ SQL.ask "UPDATE #{TABLE_NAME} SET full_id=$2, root_id=$3 WHERE id=$1",
86
+ id, full_id, root_id
87
+
88
+ Notifications.notify_listeners
89
+
90
+ SQL.record "SELECT * FROM #{TABLE_NAME} WHERE id=$1", id, into: Job
91
+ end
92
+ end
93
+
94
+ def set_job_result(job, value, version:)
95
+ update_job(job, version: version)
96
+ results = Encoder.encode([value]) unless value.nil?
97
+
98
+ SQL.ask <<~SQL, job.id, results
99
+ UPDATE #{TABLE_NAME}
100
+ SET results=$2, status='ok', next_run_at=NULL
101
+ WHERE id=$1
102
+ SQL
103
+
104
+ wakeup(job.parent_id)
105
+ end
106
+
107
+ def set_job_pending(job, version:)
108
+ update_job(job, version: version)
109
+ SQL.ask <<~SQL, job.id
110
+ UPDATE #{TABLE_NAME}
111
+ SET status='sleep', next_run_at=NULL
112
+ WHERE id=$1
113
+ SQL
114
+ end
115
+
116
+ private
117
+
118
+ def remaining_attempts(job)
119
+ SQL.ask <<~SQL, job.id
120
+ SELECT max_attempts - failed_attempts
121
+ FROM #{TABLE_NAME}
122
+ WHERE id=$1
123
+ SQL
124
+ end
125
+
126
+ def next_status_and_next_run_of_failed_job(job, status)
127
+ # If this is a recoverable error and if we have another run possible we'll
128
+ # set next_run_at, and the status to "sleep", otherwise next_run_at will be
129
+ # NULL and the status would be "failed"
130
+ #
131
+ # To check if we have another run we check (max_attempts - failed_attempts).
132
+ #
133
+ # This is only necessary with a status of :err. Note that we need to
134
+ # subtract 1, since this check runs *after* the current run was done,
135
+ # but before it was written to the database.
136
+ if status == :err && remaining_attempts(job) > 1
137
+ [ "ready", next_run_at_fragment ]
138
+ elsif status == :timeout
139
+ [ "timeout", "NULL" ]
140
+ else
141
+ [ "failed", "NULL" ]
142
+ end
143
+ end
144
+
145
+ public
146
+
147
+ def set_job_error(job, error, error_message, error_backtrace = nil, status:, version:)
148
+ update_job(job, version: version)
149
+
150
+ new_status, next_run_at = next_status_and_next_run_of_failed_job job, status
151
+
152
+ unless error_backtrace.nil?
153
+ error_backtrace = error_backtrace.map { |path| make_relative_path(path) }
154
+ error_backtrace = Encoder.encode(error_backtrace)
155
+ end
156
+
157
+ SQL.ask <<~SQL, job.id, new_status, error, error_message, error_backtrace
158
+ UPDATE #{TABLE_NAME}
159
+ SET
160
+ status=$2, error=$3, error_message=$4, error_backtrace=$5,
161
+ failed_attempts=failed_attempts+1, next_run_at=#{next_run_at}
162
+ WHERE id=$1
163
+ SQL
164
+
165
+ wakeup(job.parent_id)
166
+ end
167
+
168
+ private
169
+
170
+ def make_relative_path(path)
171
+ @here ||= "#{Dir.getwd}/"
172
+ path.start_with?(@here) ? path[@here.length..-1] : path
173
+ end
174
+
175
+ def update_job(job, version:)
176
+ return unless version
177
+
178
+ SQL.ask <<~SQL, job.id, version
179
+ UPDATE #{TABLE_NAME}
180
+ SET workflow_version=$2, updated_at=(now() at time zone 'utc')
181
+ WHERE id=$1
182
+ SQL
183
+ end
184
+
185
+ #
186
+ # The timeout until a job can be run next is calculated by running the
187
+ # +next_run_at_fragment+ sql in the database.
188
+ #
189
+ def next_run_at_fragment
190
+ # The basetime to use with the NEXT_RUN_AT_FRAGMENT below.
191
+ next_run_at_basetime = Postjob.fast_mode ? 0.01 : 10
192
+ "(now() at time zone 'utc') + #{next_run_at_basetime} * pow(1.5, failed_attempts) * interval '1 second'"
193
+ end
194
+
195
+ def wakeup(id)
196
+ return unless id
197
+
198
+ SQL.ask <<~SQL, id
199
+ UPDATE #{TABLE_NAME}
200
+ SET status='ready', next_run_at=(now() at time zone 'utc'), updated_at=(now() at time zone 'utc')
201
+ WHERE id=$1 AND status='sleep'
202
+ SQL
203
+
204
+ Notifications.notify_listeners
205
+ end
206
+
207
+ public
208
+
209
+ def childjobs(parent)
210
+ expect! parent => Job
211
+
212
+ SQL.records <<~SQL, parent.id, into: Job
213
+ SELECT * FROM #{TABLE_NAME}
214
+ WHERE parent_id=$1
215
+ ORDER BY id
216
+ SQL
217
+ end
218
+
219
+ def next_unresolved_childjob(parent)
220
+ expect! parent => Job
221
+
222
+ SQL.record <<~SQL, parent.id, into: Job
223
+ SELECT * FROM #{TABLE_NAME}
224
+ WHERE parent_id=$1 AND status NOT IN ('ok', 'failed')
225
+ ORDER BY next_run_at
226
+ LIMIT 1
227
+ SQL
228
+ end
229
+
230
+ def find_or_create_childjob(parent, workflow, args, timeout:, max_attempts:)
231
+ expect! parent => Job, workflow => String, args => Array
232
+
233
+ workflow, workflow_method = parse_workflow(workflow)
234
+
235
+ job = SQL.record <<~SQL, parent.id, workflow, workflow_method, Encoder.encode(args), into: Job
236
+ SELECT * FROM #{TABLE_NAME}
237
+ WHERE parent_id=$1
238
+ AND workflow=$2
239
+ AND workflow_method=$3
240
+ AND args=$4
241
+ SQL
242
+
243
+ return job if job
244
+
245
+ enqueue_job("#{workflow}.#{workflow_method}", *args, queue: parent.queue, parent_id: parent.id, timeout: timeout, max_attempts: max_attempts)
246
+ end
247
+
248
+ def set_workflow_status(job, status)
249
+ # [TODO] Try to reduce the number of writes.
250
+ #
251
+ # The current implementation updates a status potentially multiple times
252
+ # within a single run of a job (all within the same transaction and therefore
253
+ # invisible to the outside).
254
+ SQL.ask <<~SQL, job.id, status
255
+ UPDATE #{TABLE_NAME}
256
+ SET workflow_status=$2
257
+ WHERE id=$1
258
+ SQL
259
+ end
260
+
261
+ private
262
+
263
+ def parse_workflow(workflow)
264
+ workflow, workflow_method = workflow.split(".", 2)
265
+ workflow_method ||= "run"
266
+
267
+ expect! workflow => /./
268
+ expect! workflow_method => /^[_a-z][_a-z0-9]*$/
269
+
270
+ [workflow, workflow_method]
271
+ end
272
+
273
+ def runnable_sql_fragment
274
+ escaped_workflows_and_versions = Postjob::Registry.sql_escaped_workflows_and_versions
275
+ return "FALSE" if escaped_workflows_and_versions == ""
276
+
277
+ <<~SQL
278
+ next_run_at <= (now() at time zone 'utc')
279
+ AND status = 'ready'
280
+ AND ((workflow, workflow_version) IN (#{escaped_workflows_and_versions}))
281
+ SQL
282
+ end
283
+
284
+ def timing_out_sql_fragment
285
+ <<~SQL
286
+ timing_out_at <= (now() at time zone 'utc')
287
+ AND status IN ('ready', 'sleep')
288
+ SQL
289
+ end
290
+
291
+ public
292
+
293
+ def checkout_runnable
294
+ sql = <<~SQL
295
+ SELECT
296
+ *,
297
+ timing_out_at <= (now() at time zone 'utc') AS timed_out
298
+ FROM #{TABLE_NAME}
299
+ WHERE
300
+ (#{runnable_sql_fragment})
301
+ OR
302
+ (#{timing_out_sql_fragment})
303
+ ORDER BY (LEAST(next_run_at, timing_out_at))
304
+ FOR UPDATE SKIP LOCKED
305
+ LIMIT 1
306
+ SQL
307
+
308
+ SQL.transaction do
309
+ job = SQL.record sql, into: Job
310
+ yield job if job
311
+ job
312
+ end
313
+ end
314
+
315
+ def find_or_create_token(job)
316
+ token = SQL.ask "SELECT token FROM postjob.tokens WHERE postjob_id=$1", job.id
317
+ return token if token
318
+
319
+ token = SecureRandom.uuid
320
+ SQL.ask "INSERT INTO postjob.tokens(postjob_id, token) VALUES($1, $2)", job.id, token
321
+ token
322
+ end
323
+
324
+ def find_job_by_token(token)
325
+ SQL.record <<~SQL, token, into: Job
326
+ SELECT postjob.postjobs.* FROM postjob.postjobs
327
+ INNER JOIN postjob.tokens ON postjob.tokens.postjob_id=postjob.postjobs.id
328
+ WHERE postjob.tokens.token=$1
329
+ SQL
330
+ end
331
+ end
@@ -0,0 +1,52 @@
1
+ # The registry holds a list of all available workflows
2
+ module Postjob::Registry
3
+ extend self
4
+
5
+ def workflows
6
+ instance.values.uniq
7
+ end
8
+
9
+ # [TODO] - it would be nicer if Simple::SQL would properly build a Postgres version
10
+ # of the workflow names and versions.
11
+ #
12
+ def sql_escaped_workflows_and_versions
13
+ @sql_escaped_workflows_and_versions ||= begin
14
+ instance.keys.map do |name, workflow_version|
15
+ escaped_name = sql_escape(name)
16
+ escaped_version = sql_escape(workflow_version)
17
+ "(#{escaped_name}, #{escaped_version})"
18
+ end.join(", ")
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def sql_escape(s)
25
+ "'" + PG::Connection.escape_string(s) + "'"
26
+ end
27
+
28
+ public
29
+
30
+ # Used for tests
31
+ def reset! # :nodoc:
32
+ @instance = @sql_escaped_workflows_and_versions = nil
33
+ end
34
+
35
+ def register(workflow, _options = {})
36
+ instance[[workflow.name, ""]] = workflow
37
+ instance[[workflow.name, workflow.workflow_version]] = workflow
38
+ end
39
+
40
+ def lookup!(name:, version:)
41
+ expect! name => String
42
+ expect! version => String
43
+
44
+ instance.fetch([name, version])
45
+ end
46
+
47
+ private
48
+
49
+ def instance
50
+ @instance ||= {}
51
+ end
52
+ end
@@ -0,0 +1,153 @@
1
+ # rubocop:disable Style/RedundantSelf
2
+ module Postjob::Runner
3
+ extend self
4
+
5
+ extend Forwardable
6
+ delegate [:logger] => Postjob
7
+
8
+ Job = Postjob::Job
9
+
10
+ # returns the job that is currently running.
11
+ #
12
+ # This value is set by +process_job+ (via +with_current_job+), and
13
+ # currently only used from <tt>Postjob::Runner.async</tt>
14
+ def current_job
15
+ Thread.current[:current_job]
16
+ end
17
+
18
+ private
19
+
20
+ def with_current_job(job)
21
+ expect! current_job => nil
22
+ Thread.current[:current_job] = job
23
+ yield
24
+ ensure
25
+ Thread.current[:current_job] = nil
26
+ end
27
+
28
+ public
29
+
30
+ # returns a subjob within the current job, for a +runner+
31
+ # description and +args+.
32
+ def async(workflow, *args, timeout: nil, max_attempts:)
33
+ # if the workflow is a symbol, then we change it into "__manual__"
34
+ # - there should never be a workflow with that name - or into
35
+ # "CurrentWorkshop.#{workflow}", denoting the \a workflow method of the
36
+ # current workflow.
37
+ case workflow
38
+ when :manual then workflow = "__manual__"
39
+ when Symbol then workflow = "#{current_job.workflow}.#{workflow}"
40
+ when Module then workflow = workflow.name
41
+ end
42
+
43
+ ::Postjob::Queue.find_or_create_childjob(self.current_job, workflow, args,
44
+ timeout: timeout,
45
+ max_attempts: max_attempts)
46
+ end
47
+
48
+ # tries to resolve a job.
49
+ def await(job, *args, timeout: nil, max_attempts: nil)
50
+ case job
51
+ when :all
52
+ if Postjob::Queue.next_unresolved_childjob(current_job)
53
+ Postjob.logger.warn "await :all: Found an unresolved childjob"
54
+ throw :pending, :pending
55
+ else
56
+ childjobs = Postjob::Queue.childjobs(current_job)
57
+ childjobs.map(&:resolve)
58
+ end
59
+ when Job
60
+ expect! args == []
61
+ expect! timeout => nil, max_attempts => nil
62
+ r = job.resolve
63
+ throw :pending, :pending if r == :pending
64
+ r
65
+ else
66
+ job = async(job, *args, timeout: timeout, max_attempts: max_attempts)
67
+ await(job)
68
+ end
69
+ end
70
+
71
+ STATUSES = [ :sleep, :ok, :err, :failed ]
72
+
73
+ #
74
+ # runs a specific job
75
+ #
76
+ # returns a tuple [status, value], which follows the following pattern:
77
+ #
78
+ # - <tt>[ <runner-version>, :ok, value ]</tt>: job completed successfully
79
+ # - <tt>[ <runner-version>, :sleep, nil ]</tt>: job has to wait on a child job
80
+ # - <tt>[ <runner-version>, :err, <err> ]</tt>: job errored with a recoverable error
81
+ # - <tt>[ <runner-version>, :failed, <err> ]</tt>: job failed with a non-recoverable error
82
+ #
83
+ # <err> is a tuple [ error-class-name, error-message, stacktrace ].
84
+ #
85
+ def process_job(job)
86
+ expect! job => Job
87
+
88
+ workflow = Postjob::Registry.lookup!(name: job.workflow, version: job.workflow_version)
89
+
90
+ with_current_job(job) do
91
+ status, value = invoke_workflow workflow, job
92
+ log_result! job, status, value
93
+ [ workflow.workflow_version, status, value ]
94
+ end
95
+ end
96
+
97
+ private
98
+
99
+ # runs a job. Returns a [ runner, status, value ] tuple.
100
+ def invoke_workflow(workflow, job)
101
+ value = catch(:pending) {
102
+ expect! job.args => [Array, nil]
103
+
104
+ workflow_method = job.workflow_method
105
+ args = job.args
106
+
107
+ insp_args = args.map(&:inspect).join(", ")
108
+ logger.info "Running Postjob##{job.id}: #{job.workflow}.#{workflow_method}(#{insp_args})"
109
+
110
+ workflow.public_send workflow_method, *args
111
+ }
112
+
113
+ case value
114
+ when :pending then [ :pending, nil ]
115
+ else [ :ok, value ]
116
+ end
117
+ rescue RuntimeError
118
+ return_exception :err, $!
119
+ rescue StandardError
120
+ Postjob.logger.error "#{$!}, from\n\t#{$!.backtrace[0, 10].join("\n\t")}"
121
+ return_exception :failed, $!
122
+ end
123
+
124
+ def return_exception(state, exception)
125
+ error_backtrace = exception.backtrace[0, 10]
126
+ [ state, [exception.class.name, exception.to_s, error_backtrace] ]
127
+ end
128
+
129
+ def log_result!(job, status, value)
130
+ case status
131
+ when :err
132
+ severity = job.parent_id ? :warn : :error
133
+ logger.send severity, error_message(job, status, value)
134
+ when :failed
135
+ logger.error error_message(job, status, value)
136
+ when :ok
137
+ runtime = Time.now.utc - job.created_at
138
+ runtime = "%.03f secs" % runtime
139
+ severity = job.parent_id ? :info : :warn
140
+ msg = "#{job} successful w/result #{value.inspect}: #{runtime}"
141
+ logger.send severity, msg
142
+ end
143
+ end
144
+
145
+ def error_message(job, status, value)
146
+ runtime = Time.now.utc - job.created_at
147
+ runtime = "%.03f secs" % runtime
148
+ error_class, err_message, error_backtrace = value
149
+
150
+ "#{job} #{status} #{error_class} #{err_message.inspect}: #{runtime}"
151
+ # + "\n backtrace information:\n #{error_backtrace.join("\n ")}"
152
+ end
153
+ end
@@ -0,0 +1,60 @@
1
+
2
+ # including the Workflow module marks a module as a Postjob::Job, and mixes
3
+ # in a couple of methods into the module (see Workflow::JobMethods)
4
+ module Postjob::Workflow
5
+ def self.included(job)
6
+ expect! job.class.name => "Module"
7
+
8
+ # This method should make sure there is a public run method. This, however,
9
+ # is not possible before the run method is defined. We could do the check
10
+ # with a job like
11
+ #
12
+ # module Foo
13
+ # def self.run(arg); ..; end
14
+ # include Postjob::Workflow
15
+ # end
16
+ #
17
+ # but not the other way around:
18
+ #
19
+ # module Foo
20
+ # include Postjob::Workflow
21
+ # def self.run(arg); ..; end
22
+ # end
23
+ #
24
+ # We therefore do not make this test here.
25
+ job.extend JobMethods
26
+ end
27
+
28
+ module JobMethods
29
+ def async(runner, *args, timeout: nil, max_attempts: nil)
30
+ ::Postjob::Runner.async(runner, *args, timeout: timeout, max_attempts: max_attempts)
31
+ end
32
+
33
+ def await(job, *args, timeout: nil, max_attempts: nil)
34
+ ::Postjob::Runner.await(job, *args, timeout: timeout, max_attempts: max_attempts)
35
+ end
36
+
37
+ def workflow_token(job)
38
+ expect! job.parent_id => ::Postjob::Runner.current_job.id
39
+ ::Postjob::Queue.find_or_create_token(job)
40
+ end
41
+
42
+ def set_workflow_status(status)
43
+ ::Postjob::Queue.set_workflow_status ::Postjob::Runner.current_job, status
44
+ end
45
+
46
+ def workflow_version
47
+ @workflow_version || "0.0"
48
+ end
49
+
50
+ def workflow_name
51
+ "#{name}@#{workflow_version}"
52
+ end
53
+
54
+ private
55
+
56
+ def set_workflow_version(workflow_version)
57
+ @workflow_version = workflow_version
58
+ end
59
+ end
60
+ end