durable_flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DurableFlow
4
+ class Workflow < ActiveJob::Base
5
+ include ActiveJob::Continuable
6
+
7
+ before_enqueue :record_enqueued_workflow
8
+
9
+ alias_method :continuable_step, :step
10
+
11
+ attr_reader :workflow_run
12
+
13
+ def checkpoint!
14
+ refresh_execution_lock!
15
+ interrupt!(reason: :stopping) if queue_adapter.respond_to?(:stopping?) && queue_adapter.stopping?
16
+ end
17
+
18
+ def step(name = nil, start: nil, isolated: false, &block)
19
+ return StepProxy.new(self) if name.nil?
20
+
21
+ durable_step(name, start: start, isolated: isolated, &block)
22
+ end
23
+
24
+ def sleep_step(name, duration = nil, until_time: nil)
25
+ durable_step(name) do
26
+ step_record = current_workflow_step
27
+ metadata = step_record.metadata_hash
28
+ wake_at = parse_time(metadata["wake_at"]) || time_from(duration, explicit_time: until_time)
29
+
30
+ raise ArgumentError, "Provide a duration or until: time for sleep step #{name.inspect}" unless wake_at
31
+
32
+ if Time.current < wake_at
33
+ metadata["wake_at"] = wake_at.utc.iso8601(9)
34
+ step_record.update!(status: "sleeping", metadata: metadata)
35
+ pause_or_interrupt!(reason: :sleeping, status: "sleeping", resume_options: { wait_until: wake_at })
36
+ end
37
+
38
+ nil
39
+ end
40
+ end
41
+
42
+ def wait_for_event_step(name, event_name:, timeout:, match:)
43
+ durable_step(name) do
44
+ step_record = current_workflow_step
45
+ wait = find_or_initialize_wait(step_record, event_name: event_name, timeout: timeout, match: match)
46
+
47
+ if (event = matched_event_for(wait))
48
+ wait.update!(status: "matched", workflow_event: event)
49
+ event.payload_value
50
+ elsif wait.timeout_at && Time.current >= wait.timeout_at
51
+ wait.update!(status: "timed_out")
52
+ step_record.update!(status: "failed", metadata: step_record.metadata_hash.merge("timeout_at" => wait.timeout_at.utc.iso8601(9)))
53
+ raise WaitTimeoutError.new(event_name: event_name.to_s, step_name: name.to_s)
54
+ else
55
+ step_record.update!(
56
+ status: "waiting",
57
+ metadata: step_record.metadata_hash.merge(
58
+ "event_name" => event_name.to_s,
59
+ "timeout_at" => wait.timeout_at&.utc&.iso8601(9),
60
+ ).compact,
61
+ )
62
+
63
+ if wait.timeout_at
64
+ pause_or_interrupt!(reason: :waiting, status: "waiting", resume_options: { wait_until: wait.timeout_at })
65
+ else
66
+ pause_or_interrupt!(reason: :waiting, status: "waiting")
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ def wait_for_workflow(name, workflow_or_run_id, timeout: nil)
73
+ StepProxy.new(self).wait_for_workflow(name, workflow_or_run_id, timeout: timeout)
74
+ end
75
+
76
+ def log
77
+ @workflow_logger ||= WorkflowLogger.new(self)
78
+ end
79
+
80
+ private
81
+ attr_reader :current_workflow_step
82
+
83
+ def continue(&block)
84
+ ensure_workflow_run!
85
+ return if workflow_run.terminal?
86
+ return unless acquire_execution_lock!
87
+
88
+ begin
89
+ return if workflow_run.terminal?
90
+
91
+ if continuation.started?
92
+ self.resumptions += 1
93
+ instrument :resume, **continuation.instrumentation
94
+ end
95
+
96
+ mark_workflow_running!
97
+ block.call
98
+ complete_workflow!
99
+ rescue Pause => pause
100
+ persist_interrupted_workflow!(pause.status)
101
+ rescue ActiveJob::Continuation::Interrupt => interrupt
102
+ resume_job(interrupt)
103
+ rescue ActiveJob::Continuation::Error => error
104
+ fail_workflow!(error)
105
+ raise
106
+ rescue StandardError => error
107
+ if resume_errors_after_advancing? && continuation.advanced?
108
+ resume_job(exception: error)
109
+ else
110
+ fail_workflow!(error)
111
+ raise
112
+ end
113
+ ensure
114
+ release_execution_lock!
115
+ end
116
+ end
117
+
118
+ def acquire_execution_lock!
119
+ @execution_lock_owner ||= "#{job_id}:#{SecureRandom.uuid}"
120
+ workflow_run.acquire_execution_lock!(owner: @execution_lock_owner, ttl: DurableFlow.execution_lock_ttl)
121
+ end
122
+
123
+ def release_execution_lock!
124
+ return unless @execution_lock_owner
125
+
126
+ workflow_run.release_execution_lock!(owner: @execution_lock_owner)
127
+ end
128
+
129
+ def refresh_execution_lock!
130
+ return unless @execution_lock_owner
131
+
132
+ workflow_run.refresh_execution_lock!(owner: @execution_lock_owner, ttl: DurableFlow.execution_lock_ttl)
133
+ end
134
+
135
+ def durable_step(name, start: nil, isolated: false, &block)
136
+ block = block_from_method(name) unless block_given?
137
+ normalized_name = normalize_step_name(name)
138
+ loaded = false
139
+ value = nil
140
+
141
+ continuable_step(normalized_name, start: start, isolated: isolated) do |continuation_step|
142
+ step_record = start_step_record!(normalized_name)
143
+
144
+ if step_record.succeeded?
145
+ value = step_record.result_value
146
+ loaded = true
147
+ else
148
+ begin
149
+ @current_workflow_step = step_record
150
+ value = block.arity == 0 ? block.call : block.call(continuation_step)
151
+ step_record.complete!(value)
152
+ loaded = true
153
+ ensure
154
+ @current_workflow_step = nil
155
+ end
156
+ end
157
+ end
158
+
159
+ if loaded
160
+ value
161
+ else
162
+ load_completed_step_result!(normalized_name)
163
+ end
164
+ end
165
+
166
+ def block_from_method(name)
167
+ step_method = method(name)
168
+
169
+ raise ArgumentError, "Step method '#{name}' must accept 0 or 1 arguments" if step_method.arity > 1
170
+
171
+ if step_method.parameters.any? { |type, _| type == :key || type == :keyreq }
172
+ raise ArgumentError, "Step method '#{name}' must not accept keyword arguments"
173
+ end
174
+
175
+ step_method.arity == 0 ? -> { step_method.call } : step_method
176
+ end
177
+
178
+ def normalize_step_name(name)
179
+ case name
180
+ when Symbol
181
+ name
182
+ when String
183
+ name.to_sym
184
+ else
185
+ raise ActiveJob::Continuation::InvalidStepError, "Step '#{name}' must be a Symbol or String, found '#{name.class}'"
186
+ end
187
+ end
188
+
189
+ def start_step_record!(name)
190
+ ensure_workflow_run!
191
+ refresh_execution_lock!
192
+
193
+ WorkflowStep.create_or_find_by!(workflow_run: workflow_run, name: name.to_s) do |step_record|
194
+ step_record.status = "pending"
195
+ step_record.metadata = {}
196
+ end.tap do |step_record|
197
+ next if step_record.succeeded?
198
+
199
+ step_record.update!(
200
+ status: "running",
201
+ started_at: step_record.started_at || Time.current,
202
+ attempts: step_record.attempts + 1,
203
+ )
204
+ end
205
+ end
206
+
207
+ def load_completed_step_result!(name)
208
+ step_record = workflow_run.workflow_steps.find_by(name: name.to_s)
209
+ return step_record.result_value if step_record&.succeeded?
210
+
211
+ raise MissingStepResultError, "Missing result for completed step #{name.inspect}"
212
+ end
213
+
214
+ def find_or_initialize_wait(step_record, event_name:, timeout:, match:)
215
+ (WorkflowWait.find_by(workflow_run: workflow_run, workflow_step: step_record) ||
216
+ WorkflowWait.create!(
217
+ workflow_run: workflow_run,
218
+ workflow_step: step_record,
219
+ event_name: event_name.to_s,
220
+ status: "pending",
221
+ match: Serializer.dump(match || {}),
222
+ timeout_at: (time_from(timeout) if timeout),
223
+ )).tap do |wait|
224
+ updates = {}
225
+ updates[:event_name] = event_name.to_s if wait.event_name != event_name.to_s
226
+ updates[:match] = Serializer.dump(match || {}) if wait.match.blank?
227
+ updates[:timeout_at] = time_from(timeout) if timeout && wait.timeout_at.blank?
228
+ wait.update!(updates) if updates.any?
229
+ end
230
+ end
231
+
232
+ def matched_event_for(wait)
233
+ return wait.workflow_event if wait.workflow_event
234
+
235
+ WorkflowEvent.named(wait.event_name).where("created_at >= ?", wait.created_at).order(:created_at).detect do |event|
236
+ wait.matches_event?(event)
237
+ end
238
+ end
239
+
240
+ def pause_or_interrupt!(reason:, status:, resume_options: nil)
241
+ persist_interrupted_workflow!(status)
242
+ instrument :interrupt, reason: reason, **continuation.instrumentation
243
+
244
+ if resume_options
245
+ raise Interrupt.new(reason: reason, status: status, resume_options: resume_options)
246
+ else
247
+ raise Pause.new(reason: reason, status: status)
248
+ end
249
+ end
250
+
251
+ def resume_job(exception)
252
+ if exception.is_a?(Interrupt)
253
+ executions_for(exception)
254
+ persist_interrupted_workflow!(exception.status)
255
+
256
+ if max_resumptions.nil? || resumptions < max_resumptions
257
+ retry_job(**exception.resume_options)
258
+ else
259
+ error = ActiveJob::Continuation::ResumeLimitError.new("Job was resumed a maximum of #{max_resumptions} times")
260
+ fail_workflow!(error)
261
+ raise error
262
+ end
263
+ else
264
+ persist_interrupted_workflow!("retrying")
265
+ super
266
+ end
267
+ end
268
+
269
+ def record_enqueued_workflow
270
+ ensure_workflow_run!
271
+ workflow_run.update!(
272
+ status: workflow_run.status.presence || "enqueued",
273
+ serialized_job: serialize,
274
+ queue_name: queue_name,
275
+ priority: priority,
276
+ )
277
+ end
278
+
279
+ def ensure_workflow_run!
280
+ @workflow_run ||= WorkflowRun.create_or_find_by!(run_id: job_id) do |run|
281
+ run.job_id = job_id
282
+ run.workflow_class = self.class.name
283
+ run.status = "enqueued"
284
+ run.arguments = Serializer.dump(arguments)
285
+ run.queue_name = queue_name
286
+ run.priority = priority
287
+ end
288
+ end
289
+
290
+ def mark_workflow_running!
291
+ workflow_run.update!(
292
+ status: "running",
293
+ started_at: workflow_run.started_at || Time.current,
294
+ serialized_job: serialize,
295
+ queue_name: queue_name,
296
+ priority: priority,
297
+ )
298
+ end
299
+
300
+ def persist_interrupted_workflow!(status)
301
+ workflow_run.update!(
302
+ status: status,
303
+ interrupted_at: Time.current,
304
+ serialized_job: serialize,
305
+ queue_name: queue_name,
306
+ priority: priority,
307
+ )
308
+ end
309
+
310
+ def complete_workflow!
311
+ workflow_run.update!(
312
+ status: "completed",
313
+ completed_at: Time.current,
314
+ serialized_job: serialize,
315
+ last_error: nil,
316
+ )
317
+
318
+ DurableFlow.notify(DurableFlow::WORKFLOW_COMPLETED_EVENT, {
319
+ run_id: workflow_run.run_id,
320
+ job_id: job_id,
321
+ workflow_class: self.class.name,
322
+ })
323
+ end
324
+
325
+ def fail_workflow!(error)
326
+ workflow_run.update!(
327
+ status: "failed",
328
+ failed_at: Time.current,
329
+ serialized_job: serialize,
330
+ last_error: {
331
+ "class" => error.class.name,
332
+ "message" => error.message,
333
+ "backtrace" => Array(error.backtrace).first(10),
334
+ },
335
+ )
336
+
337
+ DurableFlow.notify(DurableFlow::WORKFLOW_FAILED_EVENT, {
338
+ run_id: workflow_run.run_id,
339
+ job_id: job_id,
340
+ workflow_class: self.class.name,
341
+ error_class: error.class.name,
342
+ error_message: error.message,
343
+ })
344
+ end
345
+
346
+ def time_from(value, explicit_time: nil)
347
+ return explicit_time.to_time if explicit_time.respond_to?(:to_time)
348
+ return nil if value.nil?
349
+ return value.to_time if value.respond_to?(:to_time)
350
+
351
+ Time.current + value
352
+ end
353
+
354
+ def parse_time(value)
355
+ return if value.blank?
356
+ return value if value.is_a?(Time)
357
+
358
+ Time.iso8601(value.to_s)
359
+ end
360
+ end
361
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DurableFlow
4
+ class WorkflowLogger
5
+ LEVELS = %w[debug info warn error].freeze
6
+
7
+ def initialize(workflow)
8
+ @workflow = workflow
9
+ end
10
+
11
+ LEVELS.each do |level|
12
+ define_method(level) do |message, data = nil, **kwargs|
13
+ write(level, message, data, **kwargs)
14
+ end
15
+ end
16
+
17
+ def write(level, message, data = nil, **kwargs)
18
+ level = level.to_s
19
+ raise ArgumentError, "Unknown log level #{level.inspect}" unless LEVELS.include?(level)
20
+
21
+ workflow.send(:ensure_workflow_run!)
22
+
23
+ WorkflowLog.create!(
24
+ workflow_run: workflow.workflow_run,
25
+ workflow_step: workflow.send(:current_workflow_step),
26
+ level: level,
27
+ message: message.to_s,
28
+ data: Serializer.dump(normalize_data(data, kwargs)),
29
+ )
30
+ end
31
+
32
+ private
33
+ attr_reader :workflow
34
+
35
+ def normalize_data(data, kwargs)
36
+ base = data.nil? ? {} : data
37
+ raise ArgumentError, "Workflow log data must be a Hash" unless base.is_a?(Hash)
38
+
39
+ base.merge(kwargs)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DurableFlow
4
+ class WorkflowTimeline
5
+ ITEM_TYPE_ORDER = {
6
+ step: 0,
7
+ wait: 1,
8
+ event: 2,
9
+ log: 3,
10
+ }.freeze
11
+
12
+ Item = Struct.new(:type, :record, :timestamp, :step, :step_id, keyword_init: true) do
13
+ def id
14
+ "#{type}-#{record.id}"
15
+ end
16
+
17
+ def name
18
+ case type
19
+ when :step
20
+ record.name
21
+ when :wait
22
+ record.event_name
23
+ when :event
24
+ record.name
25
+ when :log
26
+ record.message
27
+ end
28
+ end
29
+
30
+ def status
31
+ record.status if record.respond_to?(:status)
32
+ end
33
+
34
+ def run_level?
35
+ step_id.nil?
36
+ end
37
+ end
38
+
39
+ StepEntry = Struct.new(:step, :logs, :waits, :events, keyword_init: true) do
40
+ def id
41
+ step.id
42
+ end
43
+
44
+ def name
45
+ step.name
46
+ end
47
+
48
+ def status
49
+ step.status
50
+ end
51
+
52
+ def attempts
53
+ step.attempts
54
+ end
55
+ end
56
+
57
+ attr_reader :workflow_run
58
+
59
+ def initialize(workflow_run)
60
+ @workflow_run = workflow_run
61
+ end
62
+
63
+ def step_entries
64
+ @step_entries ||= steps.map do |step|
65
+ waits = waits_for(step)
66
+
67
+ StepEntry.new(
68
+ step: step,
69
+ logs: logs_for(step),
70
+ waits: waits,
71
+ events: waits.filter_map(&:workflow_event),
72
+ )
73
+ end
74
+ end
75
+
76
+ def step_entry_for(step_or_id)
77
+ step_entries.find { |entry| entry.id == record_id(step_or_id) }
78
+ end
79
+
80
+ def steps
81
+ @steps ||= workflow_run.workflow_steps.order(:created_at, :id).to_a
82
+ end
83
+
84
+ def waits
85
+ @waits ||= workflow_run.workflow_waits.includes(:workflow_event).order(:created_at, :id).to_a
86
+ end
87
+
88
+ def logs
89
+ @logs ||= workflow_run.workflow_logs.includes(:workflow_step).ordered.to_a
90
+ end
91
+
92
+ def events
93
+ @events ||= waits.filter_map(&:workflow_event).uniq
94
+ end
95
+
96
+ def run_logs
97
+ @run_logs ||= logs.reject(&:workflow_step_id)
98
+ end
99
+
100
+ def logs_for(step_or_id)
101
+ logs_by_step_id.fetch(record_id(step_or_id), [])
102
+ end
103
+
104
+ def waits_for(step_or_id)
105
+ waits_by_step_id.fetch(record_id(step_or_id), [])
106
+ end
107
+
108
+ def items
109
+ @items ||= [
110
+ steps.map { |step| item_for_step(step) },
111
+ waits.map { |wait| item_for_wait(wait) },
112
+ events.map { |event| item_for_event(event) },
113
+ logs.map { |log| item_for_log(log) },
114
+ ].flatten.sort_by { |item| item_sort_key(item) }
115
+ end
116
+
117
+ private
118
+ def logs_by_step_id
119
+ @logs_by_step_id ||= logs.select(&:workflow_step_id).group_by(&:workflow_step_id)
120
+ end
121
+
122
+ def waits_by_step_id
123
+ @waits_by_step_id ||= waits.group_by(&:workflow_step_id)
124
+ end
125
+
126
+ def wait_by_event_id
127
+ @wait_by_event_id ||= waits.select(&:workflow_event_id).index_by(&:workflow_event_id)
128
+ end
129
+
130
+ def item_for_step(step)
131
+ Item.new(
132
+ type: :step,
133
+ record: step,
134
+ timestamp: step.started_at || step.created_at,
135
+ step: step,
136
+ step_id: step.id,
137
+ )
138
+ end
139
+
140
+ def item_for_wait(wait)
141
+ Item.new(
142
+ type: :wait,
143
+ record: wait,
144
+ timestamp: wait.created_at,
145
+ step: step_by_id[wait.workflow_step_id],
146
+ step_id: wait.workflow_step_id,
147
+ )
148
+ end
149
+
150
+ def item_for_event(event)
151
+ wait = wait_by_event_id[event.id]
152
+
153
+ Item.new(
154
+ type: :event,
155
+ record: event,
156
+ timestamp: event.occurred_at || event.created_at,
157
+ step: step_by_id[wait&.workflow_step_id],
158
+ step_id: wait&.workflow_step_id,
159
+ )
160
+ end
161
+
162
+ def item_for_log(log)
163
+ Item.new(
164
+ type: :log,
165
+ record: log,
166
+ timestamp: log.created_at,
167
+ step: step_by_id[log.workflow_step_id],
168
+ step_id: log.workflow_step_id,
169
+ )
170
+ end
171
+
172
+ def step_by_id
173
+ @step_by_id ||= steps.index_by(&:id)
174
+ end
175
+
176
+ def item_sort_key(item)
177
+ [
178
+ item.timestamp || Time.at(0),
179
+ ITEM_TYPE_ORDER.fetch(item.type),
180
+ item.record.id || 0,
181
+ ]
182
+ end
183
+
184
+ def record_id(record_or_id)
185
+ record_or_id.respond_to?(:id) ? record_or_id.id : record_or_id
186
+ end
187
+ end
188
+ end