chrono_forge 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +94 -0
- data/lib/chrono_forge/executor/lock_strategy.rb +8 -5
- data/lib/chrono_forge/executor/methods/durably_execute.rb +1 -1
- data/lib/chrono_forge/executor/methods/workflow_states.rb +128 -0
- data/lib/chrono_forge/executor/methods.rb +1 -0
- data/lib/chrono_forge/executor.rb +60 -88
- data/lib/chrono_forge/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbb73c055f7439bc5e787d68a6d46bbb687143a85a3d57dcade8910aaae93916
|
4
|
+
data.tar.gz: 7b40ca8cc17398e695434c3e1ab61c7cfa12dfeb0ac941dd257daa18b633dbed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a6db6125a515d250b19ba3bd59db16056973a0d23df3acf76b5341a7990f962694dec5b9732302a0a850844c62a97847640c289bdebcee0f684f691e214049f
|
7
|
+
data.tar.gz: 852d241932f2cfc28e48bb6713f79d0c5086dfe8f92b92828f6ef03943250c548cef331de43d208d756c209ea99633f3ab99229391dd8c207fed47b6e4bf31f4
|
data/README.md
CHANGED
@@ -288,6 +288,100 @@ ChronoForge is ideal for:
|
|
288
288
|
- **Multi-step workflows** - Onboarding flows, approval processes, multi-stage jobs
|
289
289
|
- **State machines with time-based transitions** - Document approval, subscription lifecycle
|
290
290
|
|
291
|
+
## 🧠 Advanced State Management
|
292
|
+
|
293
|
+
ChronoForge workflows follow a sophisticated state machine model to ensure durability and fault tolerance. Understanding these states and transitions is essential for troubleshooting and recovery.
|
294
|
+
|
295
|
+
### Workflow State Diagram
|
296
|
+
|
297
|
+
```mermaid
|
298
|
+
stateDiagram-v2
|
299
|
+
[*] --> created: Workflow Created
|
300
|
+
created --> idle: Initial State
|
301
|
+
idle --> running: Job Started
|
302
|
+
running --> idle: Waiting
|
303
|
+
running --> completed: All Steps Completed
|
304
|
+
running --> failed: Max Retries Exhausted
|
305
|
+
running --> stalled: Unrecoverable Error
|
306
|
+
idle --> running: Resumed
|
307
|
+
stalled --> [*]: Requires Manual Intervention
|
308
|
+
failed --> [*]: Requires Manual Intervention
|
309
|
+
completed --> [*]: Workflow Succeeded
|
310
|
+
```
|
311
|
+
|
312
|
+
### State Descriptions
|
313
|
+
|
314
|
+
#### Created
|
315
|
+
- **Description**: Initial state when a workflow record is first created
|
316
|
+
- **Behavior**: Transitions immediately to idle state
|
317
|
+
- **Duration**: Momentary
|
318
|
+
|
319
|
+
#### Idle
|
320
|
+
- **Description**: The workflow is waiting to be processed or between processing steps
|
321
|
+
- **Behavior**: Not locked, available to be picked up by job processor
|
322
|
+
- **Duration**: Can be minutes to days, depending on wait conditions
|
323
|
+
|
324
|
+
#### Running
|
325
|
+
- **Description**: The workflow is actively being processed
|
326
|
+
- **Identifiers**: Has locked_at and locked_by values set
|
327
|
+
- **Behavior**: Protected against concurrent execution
|
328
|
+
- **Duration**: Should be brief unless performing long operations
|
329
|
+
|
330
|
+
#### Completed
|
331
|
+
- **Description**: The workflow has successfully executed all steps
|
332
|
+
- **Identifiers**: Has completed_at timestamp, state = "completed"
|
333
|
+
- **Behavior**: Final state, no further processing
|
334
|
+
- **Typical Exit Points**: All processing completed successfully
|
335
|
+
|
336
|
+
#### Failed
|
337
|
+
- **Description**: The workflow has failed after exhausting retry attempts
|
338
|
+
- **Identifiers**: Has failure-related data in error_logs, state = "failed"
|
339
|
+
- **Behavior**: No automatic recovery, requires manual intervention
|
340
|
+
- **Typical Exit Points**: Max retries exhausted, explicit failure, non-retryable error
|
341
|
+
|
342
|
+
#### Stalled
|
343
|
+
- **Description**: The workflow encountered an unrecoverable error but wasn't explicitly failed
|
344
|
+
- **Identifiers**: Not completed, not running, has errors in error_logs
|
345
|
+
- **Behavior**: Requires manual investigation and intervention
|
346
|
+
- **Typical Exit Points**: ExecutionFailedError, unexpected exceptions, system failures
|
347
|
+
|
348
|
+
### Handling Different Workflow States
|
349
|
+
|
350
|
+
#### Recovering Stalled/Failed Workflows
|
351
|
+
|
352
|
+
```ruby
|
353
|
+
workflow = ChronoForge::Workflow.find_by(key: "order-123")
|
354
|
+
|
355
|
+
if workflow.stalled? || workflow.failed?
|
356
|
+
job_class = workflow.job_class.constantize
|
357
|
+
|
358
|
+
# Retry immediately
|
359
|
+
job_class.retry_now(workflow.key)
|
360
|
+
|
361
|
+
# Or retry asynchronously
|
362
|
+
job_class.retry_later(workflow.key)
|
363
|
+
end
|
364
|
+
```
|
365
|
+
|
366
|
+
#### Monitoring Running Workflows
|
367
|
+
|
368
|
+
Long-running workflows might indicate issues:
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
# Find workflows running for too long
|
372
|
+
long_running = ChronoForge::Workflow.where(state: :running)
|
373
|
+
.where('locked_at < ?', 30.minutes.ago)
|
374
|
+
|
375
|
+
long_running.each do |workflow|
|
376
|
+
# Log potential issues for investigation
|
377
|
+
Rails.logger.warn "Workflow #{workflow.key} has been running for >30 minutes"
|
378
|
+
|
379
|
+
# Optionally force unlock if you suspect deadlock
|
380
|
+
# CAUTION: Only do this if you're certain the job is stuck
|
381
|
+
# workflow.update!(locked_at: nil, locked_by: nil, state: :idle)
|
382
|
+
end
|
383
|
+
```
|
384
|
+
|
291
385
|
## 🚀 Development
|
292
386
|
|
293
387
|
After checking out the repo, run:
|
@@ -22,20 +22,23 @@ module ChronoForge
|
|
22
22
|
state: :running
|
23
23
|
)
|
24
24
|
|
25
|
+
Rails.logger.debug { "ChronoForge:#{self.class} job(#{job_id}) acquired lock for workflow(#{workflow.key})" }
|
26
|
+
|
25
27
|
workflow
|
26
28
|
end
|
27
29
|
end
|
28
30
|
|
29
|
-
def self.release_lock(job_id, workflow)
|
31
|
+
def self.release_lock(job_id, workflow, force: false)
|
30
32
|
workflow = workflow.reload
|
31
|
-
if workflow.locked_by != job_id
|
33
|
+
if !force && workflow.locked_by != job_id
|
32
34
|
raise LongRunningConcurrentExecutionError,
|
33
|
-
"
|
34
|
-
"
|
35
|
+
"ChronoForge:#{self.class} job(#{job_id}) executed longer than specified max_duration, " \
|
36
|
+
"allowed another instance job(#{workflow.locked_by}) to acquire the lock."
|
35
37
|
end
|
36
38
|
|
37
39
|
columns = {locked_at: nil, locked_by: nil}
|
38
|
-
columns[:state] = :idle if workflow.running?
|
40
|
+
columns[:state] = :idle if force || workflow.running?
|
41
|
+
|
39
42
|
|
40
43
|
workflow.update_columns(columns)
|
41
44
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
module ChronoForge
|
2
|
+
module Executor
|
3
|
+
module Methods
|
4
|
+
module WorkflowStates
|
5
|
+
private
|
6
|
+
|
7
|
+
def complete_workflow!
|
8
|
+
# Create an execution log for workflow completion
|
9
|
+
execution_log = ExecutionLog.create_or_find_by!(
|
10
|
+
workflow: workflow,
|
11
|
+
step_name: "$workflow_completion$"
|
12
|
+
) do |log|
|
13
|
+
log.started_at = Time.current
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
execution_log.update!(
|
18
|
+
attempts: execution_log.attempts + 1,
|
19
|
+
last_executed_at: Time.current
|
20
|
+
)
|
21
|
+
|
22
|
+
workflow.completed_at = Time.current
|
23
|
+
workflow.completed!
|
24
|
+
|
25
|
+
# Mark execution log as completed
|
26
|
+
execution_log.update!(
|
27
|
+
state: :completed,
|
28
|
+
completed_at: Time.current
|
29
|
+
)
|
30
|
+
|
31
|
+
# Return the execution log for tracking
|
32
|
+
execution_log
|
33
|
+
rescue => e
|
34
|
+
# Log any errors
|
35
|
+
execution_log.update!(
|
36
|
+
state: :failed,
|
37
|
+
error_message: e.message,
|
38
|
+
error_class: e.class.name
|
39
|
+
)
|
40
|
+
raise
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def fail_workflow!(error_log)
|
45
|
+
# Create an execution log for workflow failure
|
46
|
+
execution_log = ExecutionLog.create_or_find_by!(
|
47
|
+
workflow: workflow,
|
48
|
+
step_name: "$workflow_failure$#{error_log.id}"
|
49
|
+
) do |log|
|
50
|
+
log.started_at = Time.current
|
51
|
+
log.metadata = {
|
52
|
+
error_log_id: error_log.id
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
begin
|
57
|
+
execution_log.update!(
|
58
|
+
attempts: execution_log.attempts + 1,
|
59
|
+
last_executed_at: Time.current
|
60
|
+
)
|
61
|
+
|
62
|
+
workflow.failed!
|
63
|
+
|
64
|
+
# Mark execution log as completed
|
65
|
+
execution_log.update!(
|
66
|
+
state: :completed,
|
67
|
+
completed_at: Time.current
|
68
|
+
)
|
69
|
+
|
70
|
+
# Return the execution log for tracking
|
71
|
+
execution_log
|
72
|
+
rescue => e
|
73
|
+
# Log any errors
|
74
|
+
execution_log.update!(
|
75
|
+
state: :failed,
|
76
|
+
error_message: e.message,
|
77
|
+
error_class: e.class.name
|
78
|
+
)
|
79
|
+
raise
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def retry_workflow!
|
84
|
+
# Check if the workflow is stalled or failed
|
85
|
+
unless workflow.stalled? || workflow.failed?
|
86
|
+
raise WorkflowNotRetryableError, "Cannot retry workflow(#{workflow.key}) in #{workflow.state} state. Only stalled or failed workflows can be retried."
|
87
|
+
end
|
88
|
+
|
89
|
+
# Create an execution log for workflow retry
|
90
|
+
execution_log = ExecutionLog.create!(
|
91
|
+
workflow: workflow,
|
92
|
+
step_name: "$workflow_retry$#{Time.current.to_i}",
|
93
|
+
started_at: Time.current,
|
94
|
+
attempts: 1,
|
95
|
+
last_executed_at: Time.current,
|
96
|
+
metadata: {
|
97
|
+
previous_state: workflow.state,
|
98
|
+
requested_at: Time.current,
|
99
|
+
job_id: job_id
|
100
|
+
}
|
101
|
+
)
|
102
|
+
|
103
|
+
begin
|
104
|
+
# Release any existing locks
|
105
|
+
self.class::LockStrategy.release_lock(job_id, workflow, force: true)
|
106
|
+
|
107
|
+
# Mark execution log as completed
|
108
|
+
execution_log.update!(
|
109
|
+
state: :completed,
|
110
|
+
completed_at: Time.current
|
111
|
+
)
|
112
|
+
|
113
|
+
# Return the execution log for tracking
|
114
|
+
execution_log
|
115
|
+
rescue => e
|
116
|
+
# Log any errors
|
117
|
+
execution_log.update!(
|
118
|
+
state: :failed,
|
119
|
+
error_message: e.message,
|
120
|
+
error_class: e.class.name
|
121
|
+
)
|
122
|
+
raise
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -8,24 +8,68 @@ module ChronoForge
|
|
8
8
|
|
9
9
|
class HaltExecutionFlow < ExecutionFlowControl; end
|
10
10
|
|
11
|
+
class NotExecutableError < Error; end
|
12
|
+
|
13
|
+
class WorkflowNotRetryableError < NotExecutableError; end
|
14
|
+
|
11
15
|
include Methods
|
12
16
|
|
13
|
-
|
17
|
+
# Add class methods
|
18
|
+
def self.prepended(base)
|
19
|
+
class << base
|
20
|
+
# Enforce expected signature for perform_now with key as first arg and keywords after
|
21
|
+
def perform_now(key, **kwargs)
|
22
|
+
if !key.is_a?(String)
|
23
|
+
raise ArgumentError, "Workflow key must be a string as the first argument"
|
24
|
+
end
|
25
|
+
super(key, **kwargs)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Enforce expected signature for perform_later with key as first arg and keywords after
|
29
|
+
def perform_later(key, **kwargs)
|
30
|
+
if !key.is_a?(String)
|
31
|
+
raise ArgumentError, "Workflow key must be a string as the first argument"
|
32
|
+
end
|
33
|
+
super(key, **kwargs)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add retry_now class method that calls perform_now with retry_workflow: true
|
37
|
+
def retry_now(key, **kwargs)
|
38
|
+
perform_now(key, retry_workflow: true, **kwargs)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Add retry_later class method that calls perform_later with retry_workflow: true
|
42
|
+
def retry_later(key, **kwargs)
|
43
|
+
perform_later(key, retry_workflow: true, **kwargs)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def perform(key, attempt: 0, retry_workflow: false, options: {}, **kwargs)
|
14
49
|
# Prevent excessive retries
|
15
50
|
if attempt >= self.class::RetryStrategy.max_attempts
|
16
|
-
Rails.logger.error { "
|
51
|
+
Rails.logger.error { "ChronoForge:#{self.class} max attempts reached for job workflow(#{key})" }
|
17
52
|
return
|
18
53
|
end
|
19
54
|
|
20
55
|
# Find or create job with comprehensive tracking
|
21
56
|
setup_workflow(key, options, kwargs)
|
22
57
|
|
58
|
+
# Handle retry parameter - unlock and continue execution
|
59
|
+
retry_workflow! if retry_workflow
|
60
|
+
|
61
|
+
# Track if we acquired the lock
|
62
|
+
lock_acquired = false
|
63
|
+
|
23
64
|
begin
|
24
|
-
#
|
25
|
-
|
65
|
+
# Raise error if workflow cannot be executed
|
66
|
+
unless workflow.executable?
|
67
|
+
raise NotExecutableError, "#{self.class}(#{key}) is not in an executable state"
|
68
|
+
end
|
26
69
|
|
27
70
|
# Acquire lock with advanced concurrency protection
|
28
|
-
self.class::LockStrategy.acquire_lock(job_id, workflow, max_duration: max_duration)
|
71
|
+
@workflow = self.class::LockStrategy.acquire_lock(job_id, workflow, max_duration: max_duration)
|
72
|
+
lock_acquired = true
|
29
73
|
|
30
74
|
# Execute core job logic
|
31
75
|
super(**workflow.kwargs.symbolize_keys)
|
@@ -33,20 +77,22 @@ module ChronoForge
|
|
33
77
|
# Mark as complete
|
34
78
|
complete_workflow!
|
35
79
|
rescue ExecutionFailedError => e
|
36
|
-
Rails.logger.error { "
|
80
|
+
Rails.logger.error { "ChronoForge:#{self.class} execution step failed for workflow(#{key})" }
|
37
81
|
self.class::ExecutionTracker.track_error(workflow, e)
|
38
82
|
workflow.stalled!
|
39
83
|
nil
|
40
84
|
rescue HaltExecutionFlow
|
41
85
|
# Halt execution
|
42
|
-
Rails.logger.debug { "
|
86
|
+
Rails.logger.debug { "ChronoForge:#{self.class} execution halted for workflow(#{key})" }
|
43
87
|
nil
|
44
88
|
rescue ConcurrentExecutionError
|
45
89
|
# Graceful handling of concurrent execution
|
46
|
-
Rails.logger.warn { "
|
90
|
+
Rails.logger.warn { "ChronoForge:#{self.class} concurrent execution detected for job #{key}" }
|
47
91
|
nil
|
92
|
+
rescue NotExecutableError
|
93
|
+
raise
|
48
94
|
rescue => e
|
49
|
-
Rails.logger.error { "
|
95
|
+
Rails.logger.error { "ChronoForge:#{self.class} an error occurred during execution of workflow(#{key})" }
|
50
96
|
error_log = self.class::ExecutionTracker.track_error(workflow, e)
|
51
97
|
|
52
98
|
# Retry if applicable
|
@@ -56,90 +102,16 @@ module ChronoForge
|
|
56
102
|
fail_workflow! error_log
|
57
103
|
end
|
58
104
|
ensure
|
59
|
-
|
60
|
-
|
61
|
-
|
105
|
+
# Only release lock if we acquired it
|
106
|
+
if lock_acquired
|
107
|
+
context.save!
|
108
|
+
self.class::LockStrategy.release_lock(job_id, workflow)
|
109
|
+
end
|
62
110
|
end
|
63
111
|
end
|
64
112
|
|
65
113
|
private
|
66
114
|
|
67
|
-
def complete_workflow!
|
68
|
-
# Create an execution log for workflow completion
|
69
|
-
execution_log = ExecutionLog.create_or_find_by!(
|
70
|
-
workflow: workflow,
|
71
|
-
step_name: "$workflow_completion$"
|
72
|
-
) do |log|
|
73
|
-
log.started_at = Time.current
|
74
|
-
end
|
75
|
-
|
76
|
-
begin
|
77
|
-
execution_log.update!(
|
78
|
-
attempts: execution_log.attempts + 1,
|
79
|
-
last_executed_at: Time.current
|
80
|
-
)
|
81
|
-
|
82
|
-
workflow.completed_at = Time.current
|
83
|
-
workflow.completed!
|
84
|
-
|
85
|
-
# Mark execution log as completed
|
86
|
-
execution_log.update!(
|
87
|
-
state: :completed,
|
88
|
-
completed_at: Time.current
|
89
|
-
)
|
90
|
-
|
91
|
-
# Return the execution log for tracking
|
92
|
-
execution_log
|
93
|
-
rescue => e
|
94
|
-
# Log any completion errors
|
95
|
-
execution_log.update!(
|
96
|
-
state: :failed,
|
97
|
-
error_message: e.message,
|
98
|
-
error_class: e.class.name
|
99
|
-
)
|
100
|
-
raise
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def fail_workflow!(error_log)
|
105
|
-
# Create an execution log for workflow failure
|
106
|
-
execution_log = ExecutionLog.create_or_find_by!(
|
107
|
-
workflow: workflow,
|
108
|
-
step_name: "$workflow_failure$"
|
109
|
-
) do |log|
|
110
|
-
log.started_at = Time.current
|
111
|
-
log.metadata = {
|
112
|
-
error_log_id: error_log.id
|
113
|
-
}
|
114
|
-
end
|
115
|
-
|
116
|
-
begin
|
117
|
-
execution_log.update!(
|
118
|
-
attempts: execution_log.attempts + 1,
|
119
|
-
last_executed_at: Time.current
|
120
|
-
)
|
121
|
-
|
122
|
-
workflow.failed!
|
123
|
-
|
124
|
-
# Mark execution log as completed
|
125
|
-
execution_log.update!(
|
126
|
-
state: :completed,
|
127
|
-
completed_at: Time.current
|
128
|
-
)
|
129
|
-
|
130
|
-
# Return the execution log for tracking
|
131
|
-
execution_log
|
132
|
-
rescue => e
|
133
|
-
# Log any completion errors
|
134
|
-
execution_log.update!(
|
135
|
-
state: :failed,
|
136
|
-
error_message: e.message,
|
137
|
-
error_class: e.class.name
|
138
|
-
)
|
139
|
-
raise
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
115
|
def setup_workflow(key, options, kwargs)
|
144
116
|
@workflow = find_workflow(key, options, kwargs)
|
145
117
|
@context = Context.new(@workflow)
|
data/lib/chrono_forge/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chrono_forge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stefan Froelich
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-04-
|
11
|
+
date: 2025-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -196,6 +196,7 @@ files:
|
|
196
196
|
- lib/chrono_forge/executor/methods/durably_execute.rb
|
197
197
|
- lib/chrono_forge/executor/methods/wait.rb
|
198
198
|
- lib/chrono_forge/executor/methods/wait_until.rb
|
199
|
+
- lib/chrono_forge/executor/methods/workflow_states.rb
|
199
200
|
- lib/chrono_forge/executor/retry_strategy.rb
|
200
201
|
- lib/chrono_forge/version.rb
|
201
202
|
- lib/chrono_forge/workflow.rb
|