postjob 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +23 -0
  3. data/bin/postjob +11 -0
  4. data/lib/postjob/cli/db.rb +39 -0
  5. data/lib/postjob/cli/job.rb +67 -0
  6. data/lib/postjob/cli/ps.rb +110 -0
  7. data/lib/postjob/cli/run.rb +19 -0
  8. data/lib/postjob/cli.rb +31 -0
  9. data/lib/postjob/error.rb +16 -0
  10. data/lib/postjob/job.rb +66 -0
  11. data/lib/postjob/migrations.rb +97 -0
  12. data/lib/postjob/queue/encoder.rb +40 -0
  13. data/lib/postjob/queue/notifications.rb +72 -0
  14. data/lib/postjob/queue/search.rb +82 -0
  15. data/lib/postjob/queue.rb +331 -0
  16. data/lib/postjob/registry.rb +52 -0
  17. data/lib/postjob/runner.rb +153 -0
  18. data/lib/postjob/workflow.rb +60 -0
  19. data/lib/postjob.rb +170 -0
  20. data/spec/postjob/enqueue_spec.rb +86 -0
  21. data/spec/postjob/full_workflow_spec.rb +86 -0
  22. data/spec/postjob/job_control/manual_spec.rb +45 -0
  23. data/spec/postjob/job_control/max_attempts_spec.rb +70 -0
  24. data/spec/postjob/job_control/timeout_spec.rb +31 -0
  25. data/spec/postjob/job_control/workflow_status_spec.rb +52 -0
  26. data/spec/postjob/process_job_spec.rb +25 -0
  27. data/spec/postjob/queue/encoder_spec.rb +46 -0
  28. data/spec/postjob/queue/search_spec.rb +141 -0
  29. data/spec/postjob/run_spec.rb +69 -0
  30. data/spec/postjob/step_spec.rb +26 -0
  31. data/spec/postjob/sub_workflow_spec.rb +27 -0
  32. data/spec/spec_helper.rb +35 -0
  33. data/spec/support/configure_active_record.rb +18 -0
  34. data/spec/support/configure_database.rb +19 -0
  35. data/spec/support/configure_simple_sql.rb +17 -0
  36. data/spec/support/connect_active_record.rb +6 -0
  37. data/spec/support/test_helper.rb +53 -0
  38. metadata +269 -0
@@ -0,0 +1,331 @@
1
+ # rubocop:disable Layout/IndentationWidth
2
+ # rubocop:disable Style/UnneededInterpolation
3
+ # rubocop:disable Metrics/ModuleLength
4
+ # rubocop:disable Metrics/LineLength
5
+ # rubocop:disable Lint/EndAlignment
6
+ # rubocop:disable Layout/SpaceInsideArrayLiteralBrackets
7
+
8
+ # The Postjob::Queue manages enqueueing and fetching jobs from a job queue.
9
+ module Postjob::Queue
10
+ extend self
11
+
12
+ SCHEMA_NAME = "postjob"
13
+ TABLE_NAME = "#{SCHEMA_NAME}.postjobs"
14
+
15
+ SQL = ::Simple::SQL
16
+ end
17
+
18
+ require_relative "queue/encoder"
19
+ require_relative "queue/notifications"
20
+ require_relative "queue/search"
21
+
22
+ module Postjob::Queue
23
+ Job = ::Postjob::Job
24
+
25
+ DEFAULT_OPTIONS = {
26
+ version: "",
27
+ queue: "q",
28
+ max_attempts: 5
29
+ }.freeze
30
+
31
+ # enqueues a new job with the given arguments
32
+ #
33
+ # Parameters:
34
+ #
35
+ # - queue - the name of the queue
36
+ # - workflow - the name of the workflow (e.g. "FooBar", "FooBar#method_name")
37
+ # - version - the version of the workflow, e.g. "0.2"
38
+ # - args - an array of arguments, must be encodable via Postjob::JSON.encode
39
+ # - parent_id - the id of the parent job, if any
40
+ # - tags - # a Hash[String => String]
41
+ #
42
+ def enqueue_job(workflow, *args, options)
43
+ expect! workflow => String
44
+ expect! options => {
45
+ queue: [String, nil],
46
+ version: [/\A\d(\.\d)+\z/, nil],
47
+ parent_id: [Integer, nil],
48
+ tags: [Hash, nil],
49
+ timeout: [Numeric, nil],
50
+ max_attempts: [Integer, nil]
51
+ }
52
+
53
+ workflow, workflow_method = parse_workflow(workflow)
54
+
55
+ options.update(DEFAULT_OPTIONS) { |_k, v1, v2| v1 || v2 }
56
+
57
+ SQL.transaction do
58
+ queue, version, parent_id, tags, timeout, max_attempts =
59
+ options.values_at :queue, :version, :parent_id, :tags, :timeout, :max_attempts
60
+
61
+ args = Encoder.encode(args) if args
62
+ tags = Encoder.encode(tags) if tags
63
+
64
+ id = SQL.ask <<~SQL, queue, workflow, workflow_method, version, args, parent_id, tags, max_attempts
65
+ INSERT INTO #{TABLE_NAME}(queue, workflow, workflow_method, workflow_version, args, parent_id, tags, max_attempts)
66
+ VALUES($1, $2, $3, $4, $5, $6, $7, $8)
67
+ RETURNING id
68
+ SQL
69
+
70
+ if timeout
71
+ SQL.ask <<~SQL, id, timeout
72
+ UPDATE #{TABLE_NAME}
73
+ SET timing_out_at = (now() at time zone 'utc') + $2 * interval '1 second'
74
+ WHERE id=$1
75
+ SQL
76
+ end
77
+
78
+ root_id, parent_full_id = if parent_id
79
+ SQL.ask "SELECT root_id, full_id FROM #{TABLE_NAME} WHERE id=$1", parent_id
80
+ end
81
+
82
+ root_id ||= id
83
+ full_id = parent_full_id ? "#{parent_full_id}.#{id}" : "#{id}"
84
+
85
+ SQL.ask "UPDATE #{TABLE_NAME} SET full_id=$2, root_id=$3 WHERE id=$1",
86
+ id, full_id, root_id
87
+
88
+ Notifications.notify_listeners
89
+
90
+ SQL.record "SELECT * FROM #{TABLE_NAME} WHERE id=$1", id, into: Job
91
+ end
92
+ end
93
+
94
+ def set_job_result(job, value, version:)
95
+ update_job(job, version: version)
96
+ results = Encoder.encode([value]) unless value.nil?
97
+
98
+ SQL.ask <<~SQL, job.id, results
99
+ UPDATE #{TABLE_NAME}
100
+ SET results=$2, status='ok', next_run_at=NULL
101
+ WHERE id=$1
102
+ SQL
103
+
104
+ wakeup(job.parent_id)
105
+ end
106
+
107
+ def set_job_pending(job, version:)
108
+ update_job(job, version: version)
109
+ SQL.ask <<~SQL, job.id
110
+ UPDATE #{TABLE_NAME}
111
+ SET status='sleep', next_run_at=NULL
112
+ WHERE id=$1
113
+ SQL
114
+ end
115
+
116
+ private
117
+
118
+ def remaining_attempts(job)
119
+ SQL.ask <<~SQL, job.id
120
+ SELECT max_attempts - failed_attempts
121
+ FROM #{TABLE_NAME}
122
+ WHERE id=$1
123
+ SQL
124
+ end
125
+
126
+ def next_status_and_next_run_of_failed_job(job, status)
127
+ # If this is a recoverable error and if we have another run possible we'll
128
+ # set next_run_at, and the status to "sleep", otherwise next_run_at will be
129
+ # NULL and the status would be "failed"
130
+ #
131
+ # To check if we have another run we check (max_attempts - failed_attempts).
132
+ #
133
+ # This is only necessary with a status of :err. Note that we need to
134
+ # subtract 1, since this check runs *after* the current run was done,
135
+ # but before it was written to the database.
136
+ if status == :err && remaining_attempts(job) > 1
137
+ [ "ready", next_run_at_fragment ]
138
+ elsif status == :timeout
139
+ [ "timeout", "NULL" ]
140
+ else
141
+ [ "failed", "NULL" ]
142
+ end
143
+ end
144
+
145
+ public
146
+
147
+ def set_job_error(job, error, error_message, error_backtrace = nil, status:, version:)
148
+ update_job(job, version: version)
149
+
150
+ new_status, next_run_at = next_status_and_next_run_of_failed_job job, status
151
+
152
+ unless error_backtrace.nil?
153
+ error_backtrace = error_backtrace.map { |path| make_relative_path(path) }
154
+ error_backtrace = Encoder.encode(error_backtrace)
155
+ end
156
+
157
+ SQL.ask <<~SQL, job.id, new_status, error, error_message, error_backtrace
158
+ UPDATE #{TABLE_NAME}
159
+ SET
160
+ status=$2, error=$3, error_message=$4, error_backtrace=$5,
161
+ failed_attempts=failed_attempts+1, next_run_at=#{next_run_at}
162
+ WHERE id=$1
163
+ SQL
164
+
165
+ wakeup(job.parent_id)
166
+ end
167
+
168
+ private
169
+
170
+ def make_relative_path(path)
171
+ @here ||= "#{Dir.getwd}/"
172
+ path.start_with?(@here) ? path[@here.length..-1] : path
173
+ end
174
+
175
+ def update_job(job, version:)
176
+ return unless version
177
+
178
+ SQL.ask <<~SQL, job.id, version
179
+ UPDATE #{TABLE_NAME}
180
+ SET workflow_version=$2, updated_at=(now() at time zone 'utc')
181
+ WHERE id=$1
182
+ SQL
183
+ end
184
+
185
+ #
186
+ # The timeout until a job can be run next is calculated by running the
187
+ # +next_run_at_fragment+ sql in the database.
188
+ #
189
+ def next_run_at_fragment
190
+ # The basetime to use with the NEXT_RUN_AT_FRAGMENT below.
191
+ next_run_at_basetime = Postjob.fast_mode ? 0.01 : 10
192
+ "(now() at time zone 'utc') + #{next_run_at_basetime} * pow(1.5, failed_attempts) * interval '1 second'"
193
+ end
194
+
195
+ def wakeup(id)
196
+ return unless id
197
+
198
+ SQL.ask <<~SQL, id
199
+ UPDATE #{TABLE_NAME}
200
+ SET status='ready', next_run_at=(now() at time zone 'utc'), updated_at=(now() at time zone 'utc')
201
+ WHERE id=$1 AND status='sleep'
202
+ SQL
203
+
204
+ Notifications.notify_listeners
205
+ end
206
+
207
+ public
208
+
209
+ def childjobs(parent)
210
+ expect! parent => Job
211
+
212
+ SQL.records <<~SQL, parent.id, into: Job
213
+ SELECT * FROM #{TABLE_NAME}
214
+ WHERE parent_id=$1
215
+ ORDER BY id
216
+ SQL
217
+ end
218
+
219
+ def next_unresolved_childjob(parent)
220
+ expect! parent => Job
221
+
222
+ SQL.record <<~SQL, parent.id, into: Job
223
+ SELECT * FROM #{TABLE_NAME}
224
+ WHERE parent_id=$1 AND status NOT IN ('ok', 'failed')
225
+ ORDER BY next_run_at
226
+ LIMIT 1
227
+ SQL
228
+ end
229
+
230
+ def find_or_create_childjob(parent, workflow, args, timeout:, max_attempts:)
231
+ expect! parent => Job, workflow => String, args => Array
232
+
233
+ workflow, workflow_method = parse_workflow(workflow)
234
+
235
+ job = SQL.record <<~SQL, parent.id, workflow, workflow_method, Encoder.encode(args), into: Job
236
+ SELECT * FROM #{TABLE_NAME}
237
+ WHERE parent_id=$1
238
+ AND workflow=$2
239
+ AND workflow_method=$3
240
+ AND args=$4
241
+ SQL
242
+
243
+ return job if job
244
+
245
+ enqueue_job("#{workflow}.#{workflow_method}", *args, queue: parent.queue, parent_id: parent.id, timeout: timeout, max_attempts: max_attempts)
246
+ end
247
+
248
+ def set_workflow_status(job, status)
249
+ # [TODO] Try to reduce the number of writes.
250
+ #
251
+ # The current implementation updates a status potentially multiple times
252
+ # within a single run of a job (all within the same transaction and therefore
253
+ # invisible to the outside).
254
+ SQL.ask <<~SQL, job.id, status
255
+ UPDATE #{TABLE_NAME}
256
+ SET workflow_status=$2
257
+ WHERE id=$1
258
+ SQL
259
+ end
260
+
261
+ private
262
+
263
+ def parse_workflow(workflow)
264
+ workflow, workflow_method = workflow.split(".", 2)
265
+ workflow_method ||= "run"
266
+
267
+ expect! workflow => /./
268
+ expect! workflow_method => /^[_a-z][_a-z0-9]*$/
269
+
270
+ [workflow, workflow_method]
271
+ end
272
+
273
+ def runnable_sql_fragment
274
+ escaped_workflows_and_versions = Postjob::Registry.sql_escaped_workflows_and_versions
275
+ return "FALSE" if escaped_workflows_and_versions == ""
276
+
277
+ <<~SQL
278
+ next_run_at <= (now() at time zone 'utc')
279
+ AND status = 'ready'
280
+ AND ((workflow, workflow_version) IN (#{escaped_workflows_and_versions}))
281
+ SQL
282
+ end
283
+
284
+ def timing_out_sql_fragment
285
+ <<~SQL
286
+ timing_out_at <= (now() at time zone 'utc')
287
+ AND status IN ('ready', 'sleep')
288
+ SQL
289
+ end
290
+
291
+ public
292
+
293
+ def checkout_runnable
294
+ sql = <<~SQL
295
+ SELECT
296
+ *,
297
+ timing_out_at <= (now() at time zone 'utc') AS timed_out
298
+ FROM #{TABLE_NAME}
299
+ WHERE
300
+ (#{runnable_sql_fragment})
301
+ OR
302
+ (#{timing_out_sql_fragment})
303
+ ORDER BY (LEAST(next_run_at, timing_out_at))
304
+ FOR UPDATE SKIP LOCKED
305
+ LIMIT 1
306
+ SQL
307
+
308
+ SQL.transaction do
309
+ job = SQL.record sql, into: Job
310
+ yield job if job
311
+ job
312
+ end
313
+ end
314
+
315
+ def find_or_create_token(job)
316
+ token = SQL.ask "SELECT token FROM postjob.tokens WHERE postjob_id=$1", job.id
317
+ return token if token
318
+
319
+ token = SecureRandom.uuid
320
+ SQL.ask "INSERT INTO postjob.tokens(postjob_id, token) VALUES($1, $2)", job.id, token
321
+ token
322
+ end
323
+
324
+ def find_job_by_token(token)
325
+ SQL.record <<~SQL, token, into: Job
326
+ SELECT postjob.postjobs.* FROM postjob.postjobs
327
+ INNER JOIN postjob.tokens ON postjob.tokens.postjob_id=postjob.postjobs.id
328
+ WHERE postjob.tokens.token=$1
329
+ SQL
330
+ end
331
+ end
@@ -0,0 +1,52 @@
1
+ # The registry holds a list of all available workflows
2
+ module Postjob::Registry
3
+ extend self
4
+
5
+ def workflows
6
+ instance.values.uniq
7
+ end
8
+
9
+ # [TODO] - it would be nicer if Simple::SQL would properly build a Postgres version
10
+ # of the workflow names and versions.
11
+ #
12
+ def sql_escaped_workflows_and_versions
13
+ @sql_escaped_workflows_and_versions ||= begin
14
+ instance.keys.map do |name, workflow_version|
15
+ escaped_name = sql_escape(name)
16
+ escaped_version = sql_escape(workflow_version)
17
+ "(#{escaped_name}, #{escaped_version})"
18
+ end.join(", ")
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def sql_escape(s)
25
+ "'" + PG::Connection.escape_string(s) + "'"
26
+ end
27
+
28
+ public
29
+
30
+ # Used for tests
31
+ def reset! # :nodoc:
32
+ @instance = @sql_escaped_workflows_and_versions = nil
33
+ end
34
+
35
+ def register(workflow, _options = {})
36
+ instance[[workflow.name, ""]] = workflow
37
+ instance[[workflow.name, workflow.workflow_version]] = workflow
38
+ end
39
+
40
+ def lookup!(name:, version:)
41
+ expect! name => String
42
+ expect! version => String
43
+
44
+ instance.fetch([name, version])
45
+ end
46
+
47
+ private
48
+
49
+ def instance
50
+ @instance ||= {}
51
+ end
52
+ end
@@ -0,0 +1,153 @@
1
+ # rubocop:disable Style/RedundantSelf
2
+ module Postjob::Runner
3
+ extend self
4
+
5
+ extend Forwardable
6
+ delegate [:logger] => Postjob
7
+
8
+ Job = Postjob::Job
9
+
10
+ # returns the job that is currently running.
11
+ #
12
+ # This value is set by +process_job+ (via +with_current_job+), and
13
+ # currently only used from <tt>Postjob::Runner.async</tt>
14
+ def current_job
15
+ Thread.current[:current_job]
16
+ end
17
+
18
+ private
19
+
20
+ def with_current_job(job)
21
+ expect! current_job => nil
22
+ Thread.current[:current_job] = job
23
+ yield
24
+ ensure
25
+ Thread.current[:current_job] = nil
26
+ end
27
+
28
+ public
29
+
30
+ # returns a subjob within the current job, for a +runner+
31
+ # description and +args+.
32
+ def async(workflow, *args, timeout: nil, max_attempts:)
33
+ # if the workflow is a symbol, then we change it into "__manual__"
34
+ # - there should never be a workflow with that name - or into
35
+ # "CurrentWorkshop.#{workflow}", denoting the \a workflow method of the
36
+ # current workflow.
37
+ case workflow
38
+ when :manual then workflow = "__manual__"
39
+ when Symbol then workflow = "#{current_job.workflow}.#{workflow}"
40
+ when Module then workflow = workflow.name
41
+ end
42
+
43
+ ::Postjob::Queue.find_or_create_childjob(self.current_job, workflow, args,
44
+ timeout: timeout,
45
+ max_attempts: max_attempts)
46
+ end
47
+
48
+ # tries to resolve a job.
49
+ def await(job, *args, timeout: nil, max_attempts: nil)
50
+ case job
51
+ when :all
52
+ if Postjob::Queue.next_unresolved_childjob(current_job)
53
+ Postjob.logger.warn "await :all: Found an unresolved childjob"
54
+ throw :pending, :pending
55
+ else
56
+ childjobs = Postjob::Queue.childjobs(current_job)
57
+ childjobs.map(&:resolve)
58
+ end
59
+ when Job
60
+ expect! args == []
61
+ expect! timeout => nil, max_attempts => nil
62
+ r = job.resolve
63
+ throw :pending, :pending if r == :pending
64
+ r
65
+ else
66
+ job = async(job, *args, timeout: timeout, max_attempts: max_attempts)
67
+ await(job)
68
+ end
69
+ end
70
+
71
+ STATUSES = [ :sleep, :ok, :err, :failed ]
72
+
73
+ #
74
+ # runs a specific job
75
+ #
76
+ # returns a tuple [status, value], which follows the following pattern:
77
+ #
78
+ # - <tt>[ <runner-version>, :ok, value ]</tt>: job completed successfully
79
+ # - <tt>[ <runner-version>, :sleep, nil ]</tt>: job has to wait on a child job
80
+ # - <tt>[ <runner-version>, :err, <err> ]</tt>: job errored with a recoverable error
81
+ # - <tt>[ <runner-version>, :failed, <err> ]</tt>: job failed with a non-recoverable error
82
+ #
83
+ # <err> is a tuple [ error-class-name, error-message, stacktrace ].
84
+ #
85
+ def process_job(job)
86
+ expect! job => Job
87
+
88
+ workflow = Postjob::Registry.lookup!(name: job.workflow, version: job.workflow_version)
89
+
90
+ with_current_job(job) do
91
+ status, value = invoke_workflow workflow, job
92
+ log_result! job, status, value
93
+ [ workflow.workflow_version, status, value ]
94
+ end
95
+ end
96
+
97
+ private
98
+
99
+ # runs a job. Returns a [ runner, status, value ] tuple.
100
+ def invoke_workflow(workflow, job)
101
+ value = catch(:pending) {
102
+ expect! job.args => [Array, nil]
103
+
104
+ workflow_method = job.workflow_method
105
+ args = job.args
106
+
107
+ insp_args = args.map(&:inspect).join(", ")
108
+ logger.info "Running Postjob##{job.id}: #{job.workflow}.#{workflow_method}(#{insp_args})"
109
+
110
+ workflow.public_send workflow_method, *args
111
+ }
112
+
113
+ case value
114
+ when :pending then [ :pending, nil ]
115
+ else [ :ok, value ]
116
+ end
117
+ rescue RuntimeError
118
+ return_exception :err, $!
119
+ rescue StandardError
120
+ Postjob.logger.error "#{$!}, from\n\t#{$!.backtrace[0, 10].join("\n\t")}"
121
+ return_exception :failed, $!
122
+ end
123
+
124
+ def return_exception(state, exception)
125
+ error_backtrace = exception.backtrace[0, 10]
126
+ [ state, [exception.class.name, exception.to_s, error_backtrace] ]
127
+ end
128
+
129
+ def log_result!(job, status, value)
130
+ case status
131
+ when :err
132
+ severity = job.parent_id ? :warn : :error
133
+ logger.send severity, error_message(job, status, value)
134
+ when :failed
135
+ logger.error error_message(job, status, value)
136
+ when :ok
137
+ runtime = Time.now.utc - job.created_at
138
+ runtime = "%.03f secs" % runtime
139
+ severity = job.parent_id ? :info : :warn
140
+ msg = "#{job} successful w/result #{value.inspect}: #{runtime}"
141
+ logger.send severity, msg
142
+ end
143
+ end
144
+
145
+ def error_message(job, status, value)
146
+ runtime = Time.now.utc - job.created_at
147
+ runtime = "%.03f secs" % runtime
148
+ error_class, err_message, error_backtrace = value
149
+
150
+ "#{job} #{status} #{error_class} #{err_message.inspect}: #{runtime}"
151
+ # + "\n backtrace information:\n #{error_backtrace.join("\n ")}"
152
+ end
153
+ end
@@ -0,0 +1,60 @@
1
+
2
+ # including the Workflow module marks a module as a Postjob::Job, and mixes
3
+ # in a couple of methods into the module (see Workflow::JobMethods)
4
+ module Postjob::Workflow
5
+ def self.included(job)
6
+ expect! job.class.name => "Module"
7
+
8
+ # This method should make sure there is a public run method. This, however,
9
+ # is not possible before the run method is defined. We could do the check
10
+ # with a job like
11
+ #
12
+ # module Foo
13
+ # def self.run(arg); ..; end
14
+ # include Postjob::Workflow
15
+ # end
16
+ #
17
+ # but not the other way around:
18
+ #
19
+ # module Foo
20
+ # include Postjob::Workflow
21
+ # def self.run(arg); ..; end
22
+ # end
23
+ #
24
+ # We therefore do not make this test here.
25
+ job.extend JobMethods
26
+ end
27
+
28
+ module JobMethods
29
+ def async(runner, *args, timeout: nil, max_attempts: nil)
30
+ ::Postjob::Runner.async(runner, *args, timeout: timeout, max_attempts: max_attempts)
31
+ end
32
+
33
+ def await(job, *args, timeout: nil, max_attempts: nil)
34
+ ::Postjob::Runner.await(job, *args, timeout: timeout, max_attempts: max_attempts)
35
+ end
36
+
37
+ def workflow_token(job)
38
+ expect! job.parent_id => ::Postjob::Runner.current_job.id
39
+ ::Postjob::Queue.find_or_create_token(job)
40
+ end
41
+
42
+ def set_workflow_status(status)
43
+ ::Postjob::Queue.set_workflow_status ::Postjob::Runner.current_job, status
44
+ end
45
+
46
+ def workflow_version
47
+ @workflow_version || "0.0"
48
+ end
49
+
50
+ def workflow_name
51
+ "#{name}@#{workflow_version}"
52
+ end
53
+
54
+ private
55
+
56
+ def set_workflow_version(workflow_version)
57
+ @workflow_version = workflow_version
58
+ end
59
+ end
60
+ end