good_pipeline 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/README.md +1 -1
- data/demo/test/good_pipeline/test_bulk_enqueue.rb +193 -0
- data/demo/test/good_pipeline/test_queue_configuration.rb +157 -0
- data/demo/test/integration/test_bulk_enqueue_end_to_end.rb +109 -0
- data/demo/test/integration/test_end_to_end.rb +0 -15
- data/demo/test/integration/test_halt_execution.rb +0 -13
- data/demo/test/integration/test_queue_configuration.rb +82 -0
- data/demo/test/test_helper.rb +15 -0
- data/docs/architecture.md +8 -0
- data/docs/branching.md +2 -0
- data/docs/callbacks.md +2 -0
- data/docs/defining-pipelines.md +4 -0
- data/docs/getting-started.md +12 -0
- data/docs/index.md +1 -1
- data/docs/introduction.md +19 -1
- data/docs/pipeline-chaining.md +2 -0
- data/lib/good_pipeline/chain_coordinator.rb +1 -5
- data/lib/good_pipeline/coordinator.rb +96 -15
- data/lib/good_pipeline/pipeline.rb +24 -1
- data/lib/good_pipeline/runner.rb +3 -3
- data/lib/good_pipeline/version.rb +1 -1
- data/lib/good_pipeline.rb +15 -0
- metadata +7 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 38ec7eb9fb3cec2b9109b9695b0084aff2fa111440cd8fb76dbe53be59bd8e06
|
|
4
|
+
data.tar.gz: 71094436abc0d2c393b3d188510608bf714867201f2fc13eac4cc218d6b8a420
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c3520cff350de86f2d2820d7a4406f2b81407079c1d00ee8ecfc4dff02e23f54dcedb1ae61d3a32594a4680b111b4b127fe2808ecf08c4705c4e0732e04643b9
|
|
7
|
+
data.tar.gz: 2a91e6a2b050392b6e5e8c1af31bd93a8e05fe5d23f1f940d0bbe97df59c381b7286bf10a8f3590789b342d079d66b9e1c1433e00a03eaa42d31c277e8e6218f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.4.0] - 2026-04-02
|
|
4
|
+
|
|
5
|
+
### Performance
|
|
6
|
+
|
|
7
|
+
- **Bulk root step enqueuing** — pipelines with multiple root steps now enqueue all of them via `GoodJob::Batch.enqueue_all` in a fixed number of queries instead of ~9 queries per step. Both `Runner#enqueue_root_steps` and `ChainCoordinator#start_pipeline` use the new `Coordinator.bulk_enqueue_steps` method.
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **Configurable queue names for internal jobs** — new `coordination_queue_name` and `callback_queue_name` settings control which queues `StepFinishedJob`, `PipelineReconciliationJob`, and `PipelineCallbackJob` run on. Configurable globally (`GoodPipeline.coordination_queue_name = "x"`) and per-pipeline via the class DSL. Defaults to `"good_pipeline_coordination"` and `"good_pipeline_callbacks"`.
|
|
12
|
+
- **`Coordinator.bulk_enqueue_steps`** — public method that loads pending steps, partitions branch steps for individual handling, and bulk-enqueues the rest via `Batch.enqueue_all`. Invalid job classes are failed individually without blocking valid steps.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
|
|
16
|
+
- **Minimum GoodJob version** — bumped from `>= 3.10` to `>= 4.14` (required for `Batch.enqueue_all`).
|
|
17
|
+
- **`run_pipeline_to_completion` test helper** — extracted from 3 integration test files into `test_helper.rb`.
|
|
18
|
+
|
|
3
19
|
## [0.3.1] - 2026-03-26
|
|
4
20
|
|
|
5
21
|
### Added
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
DAG-based job pipeline orchestration for Rails, built on [GoodJob](https://github.com/bensheldon/good_job).
|
|
4
4
|
|
|
5
|
-
Define multi-step workflows as directed acyclic graphs
|
|
5
|
+
Define multi-step workflows as directed acyclic graphs — not linear chains. Steps run in parallel when they can and wait for dependencies when they must. GoodPipeline handles dependency resolution, parallel execution, failure strategies, conditional branching, pipeline chaining, and lifecycle callbacks. It also ships with a web dashboard.
|
|
6
6
|
|
|
7
7
|
## Requirements
|
|
8
8
|
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class TestBulkEnqueue < ActiveSupport::TestCase
|
|
6
|
+
# --- basic enqueuing ---
|
|
7
|
+
|
|
8
|
+
def test_enqueues_multiple_steps
|
|
9
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
10
|
+
pipeline.update_columns(status: "running")
|
|
11
|
+
step_a = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
12
|
+
step_b = build_step(pipeline, key: "step_b", job_class: "TranscodeJob")
|
|
13
|
+
|
|
14
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step_a.id, step_b.id])
|
|
15
|
+
|
|
16
|
+
step_a.reload
|
|
17
|
+
step_b.reload
|
|
18
|
+
|
|
19
|
+
assert_equal "enqueued", step_a.coordination_status
|
|
20
|
+
assert_equal "enqueued", step_b.coordination_status
|
|
21
|
+
refute_nil step_a.good_job_batch_id
|
|
22
|
+
refute_nil step_b.good_job_batch_id
|
|
23
|
+
refute_nil step_a.good_job_id
|
|
24
|
+
refute_nil step_b.good_job_id
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_each_step_gets_its_own_batch
|
|
28
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
29
|
+
pipeline.update_columns(status: "running")
|
|
30
|
+
step_a = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
31
|
+
step_b = build_step(pipeline, key: "step_b", job_class: "TranscodeJob")
|
|
32
|
+
|
|
33
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step_a.id, step_b.id])
|
|
34
|
+
|
|
35
|
+
refute_equal step_a.reload.good_job_batch_id, step_b.reload.good_job_batch_id
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def test_good_job_id_points_to_real_job_record
|
|
39
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
40
|
+
pipeline.update_columns(status: "running")
|
|
41
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
42
|
+
|
|
43
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
44
|
+
|
|
45
|
+
step.reload
|
|
46
|
+
good_job = GoodJob::Job.find_by(id: step.good_job_id)
|
|
47
|
+
|
|
48
|
+
refute_nil good_job, "good_job_id should point to a real GoodJob::Job record"
|
|
49
|
+
assert_equal step.good_job_batch_id, good_job.batch_id
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# --- batch callback setup ---
|
|
53
|
+
|
|
54
|
+
def test_batch_has_step_finished_callback
|
|
55
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
56
|
+
pipeline.update_columns(status: "running")
|
|
57
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
58
|
+
|
|
59
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
60
|
+
|
|
61
|
+
batch_record = GoodJob::BatchRecord.find(step.reload.good_job_batch_id)
|
|
62
|
+
|
|
63
|
+
assert_equal "GoodPipeline::StepFinishedJob", batch_record.on_finish
|
|
64
|
+
assert_equal({ step_id: step.id }, batch_record.properties)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# --- enqueue_options ---
|
|
68
|
+
|
|
69
|
+
def test_respects_queue_and_priority_options
|
|
70
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
71
|
+
pipeline.update_columns(status: "running")
|
|
72
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob",
|
|
73
|
+
enqueue_options: { "queue" => "critical", "priority" => 3 })
|
|
74
|
+
|
|
75
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
76
|
+
|
|
77
|
+
good_job = GoodJob::Job.find_by(id: step.reload.good_job_id)
|
|
78
|
+
|
|
79
|
+
assert_equal "critical", good_job.queue_name
|
|
80
|
+
assert_equal 3, good_job.priority
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def test_respects_wait_option
|
|
84
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
85
|
+
pipeline.update_columns(status: "running")
|
|
86
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob",
|
|
87
|
+
enqueue_options: { "wait" => 300 })
|
|
88
|
+
|
|
89
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
90
|
+
|
|
91
|
+
good_job = GoodJob::Job.find_by(id: step.reload.good_job_id)
|
|
92
|
+
|
|
93
|
+
refute_nil good_job.scheduled_at
|
|
94
|
+
assert_in_delta 300, good_job.scheduled_at - good_job.created_at, 5
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def test_passes_step_params_to_job
|
|
98
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
99
|
+
pipeline.update_columns(status: "running")
|
|
100
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob",
|
|
101
|
+
params: { "video_id" => 42 })
|
|
102
|
+
|
|
103
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
104
|
+
|
|
105
|
+
good_job = GoodJob::Job.find_by(id: step.reload.good_job_id)
|
|
106
|
+
arguments = good_job.serialized_params["arguments"]
|
|
107
|
+
|
|
108
|
+
assert_equal 42, arguments.first["video_id"]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def test_handles_empty_params
|
|
112
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
113
|
+
pipeline.update_columns(status: "running")
|
|
114
|
+
step = build_step(pipeline, key: "step_a", job_class: "DownloadJob", params: {})
|
|
115
|
+
|
|
116
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step.id])
|
|
117
|
+
|
|
118
|
+
assert_equal "enqueued", step.reload.coordination_status
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# --- guard clauses ---
|
|
122
|
+
|
|
123
|
+
def test_skips_non_pending_steps
|
|
124
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
125
|
+
pipeline.update_columns(status: "running")
|
|
126
|
+
step_a = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
127
|
+
step_a.update_columns(coordination_status: "enqueued")
|
|
128
|
+
step_b = build_step(pipeline, key: "step_b", job_class: "TranscodeJob")
|
|
129
|
+
|
|
130
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step_a.id, step_b.id])
|
|
131
|
+
|
|
132
|
+
assert_equal "enqueued", step_b.reload.coordination_status
|
|
133
|
+
refute_nil step_b.good_job_id
|
|
134
|
+
assert_nil step_a.reload.good_job_id, "Non-pending step should not have been re-enqueued"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def test_skips_steps_with_good_job_id
|
|
138
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
139
|
+
pipeline.update_columns(status: "running")
|
|
140
|
+
step_a = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
141
|
+
existing_job_id = SecureRandom.uuid
|
|
142
|
+
step_a.update_columns(good_job_id: existing_job_id)
|
|
143
|
+
step_b = build_step(pipeline, key: "step_b", job_class: "TranscodeJob")
|
|
144
|
+
|
|
145
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([step_a.id, step_b.id])
|
|
146
|
+
|
|
147
|
+
assert_equal "enqueued", step_b.reload.coordination_status
|
|
148
|
+
assert_equal existing_job_id, step_a.reload.good_job_id, "Step with good_job_id should be left alone"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def test_handles_empty_array
|
|
152
|
+
# Should not raise
|
|
153
|
+
result = GoodPipeline::Coordinator.bulk_enqueue_steps([])
|
|
154
|
+
assert_nil result
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# --- branch step fallback ---
|
|
158
|
+
|
|
159
|
+
def test_falls_back_to_try_enqueue_step_for_branch_steps
|
|
160
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
161
|
+
pipeline.update_columns(status: "running")
|
|
162
|
+
branch_step = build_step(pipeline, key: "format_check",
|
|
163
|
+
job_class: GoodPipeline::BRANCH_JOB_CLASS)
|
|
164
|
+
branch_step.update_columns(branch: { "decides" => "pick_format", "empty_arms" => %w[hd sd] })
|
|
165
|
+
normal_step = build_step(pipeline, key: "step_a", job_class: "DownloadJob")
|
|
166
|
+
|
|
167
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([branch_step.id, normal_step.id])
|
|
168
|
+
|
|
169
|
+
assert_equal "enqueued", normal_step.reload.coordination_status
|
|
170
|
+
refute_nil normal_step.good_job_id
|
|
171
|
+
|
|
172
|
+
branch_step.reload
|
|
173
|
+
refute_equal "pending", branch_step.coordination_status,
|
|
174
|
+
"Branch step should have been processed by try_enqueue_step fallback"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# --- error handling ---
|
|
178
|
+
|
|
179
|
+
def test_invalid_job_class_fails_step_without_blocking_others
|
|
180
|
+
pipeline = create_pipeline(on_failure_strategy: "halt")
|
|
181
|
+
pipeline.update_columns(status: "running")
|
|
182
|
+
bad_step = build_step(pipeline, key: "bad_step", job_class: "NonExistentJob")
|
|
183
|
+
good_step = build_step(pipeline, key: "good_step", job_class: "DownloadJob")
|
|
184
|
+
|
|
185
|
+
GoodPipeline::Coordinator.bulk_enqueue_steps([bad_step.id, good_step.id])
|
|
186
|
+
|
|
187
|
+
assert_equal "enqueued", good_step.reload.coordination_status
|
|
188
|
+
refute_nil good_step.good_job_id
|
|
189
|
+
|
|
190
|
+
assert_equal "failed", bad_step.reload.coordination_status
|
|
191
|
+
assert_equal "GoodPipeline::ConfigurationError", bad_step.error_class
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class TestQueueConfiguration < ActiveSupport::TestCase
|
|
6
|
+
teardown do
|
|
7
|
+
GoodPipeline.coordination_queue_name = nil
|
|
8
|
+
GoodPipeline.callback_queue_name = nil
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# --- global defaults ---
|
|
12
|
+
|
|
13
|
+
def test_default_coordination_queue_name
|
|
14
|
+
assert_equal "good_pipeline_coordination", GoodPipeline.coordination_queue_name
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_default_callback_queue_name
|
|
18
|
+
assert_equal "good_pipeline_callbacks", GoodPipeline.callback_queue_name
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# --- global override ---
|
|
22
|
+
|
|
23
|
+
def test_global_coordination_queue_override
|
|
24
|
+
GoodPipeline.coordination_queue_name = "custom_coordination"
|
|
25
|
+
|
|
26
|
+
assert_equal "custom_coordination", GoodPipeline.coordination_queue_name
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_global_callback_queue_override
|
|
30
|
+
GoodPipeline.callback_queue_name = "custom_callbacks"
|
|
31
|
+
|
|
32
|
+
assert_equal "custom_callbacks", GoodPipeline.callback_queue_name
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# --- pipeline DSL ---
|
|
36
|
+
|
|
37
|
+
def test_pipeline_dsl_coordination_queue
|
|
38
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
39
|
+
coordination_queue_name "pipeline_coordination"
|
|
40
|
+
def configure(**) = run(:a, DownloadJob)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
assert_equal "pipeline_coordination", klass.coordination_queue_name
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_pipeline_dsl_callback_queue
|
|
47
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
48
|
+
callback_queue_name "pipeline_callbacks"
|
|
49
|
+
def configure(**) = run(:a, DownloadJob)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
assert_equal "pipeline_callbacks", klass.callback_queue_name
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# --- pipeline DSL fallback to global ---
|
|
56
|
+
|
|
57
|
+
def test_pipeline_without_dsl_uses_global_config
|
|
58
|
+
GoodPipeline.coordination_queue_name = "global_coordination"
|
|
59
|
+
|
|
60
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
61
|
+
def configure(**) = run(:a, DownloadJob)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
assert_equal "global_coordination", klass.coordination_queue_name
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def test_pipeline_without_dsl_or_global_uses_default
|
|
68
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
69
|
+
def configure(**) = run(:a, DownloadJob)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
assert_equal "good_pipeline_coordination", klass.coordination_queue_name
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# --- pipeline DSL overrides global ---
|
|
76
|
+
|
|
77
|
+
def test_pipeline_dsl_overrides_global
|
|
78
|
+
GoodPipeline.coordination_queue_name = "global_coordination"
|
|
79
|
+
|
|
80
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
81
|
+
coordination_queue_name "pipeline_coordination"
|
|
82
|
+
def configure(**) = run(:a, DownloadJob)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
assert_equal "pipeline_coordination", klass.coordination_queue_name
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# --- inheritance ---
|
|
89
|
+
|
|
90
|
+
def test_pipeline_inherits_queue_from_parent
|
|
91
|
+
parent = Class.new(GoodPipeline::Pipeline) do
|
|
92
|
+
coordination_queue_name "parent_coordination"
|
|
93
|
+
callback_queue_name "parent_callbacks"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
child = Class.new(parent) do
|
|
97
|
+
def configure(**) = run(:a, DownloadJob)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
assert_equal "parent_coordination", child.coordination_queue_name
|
|
101
|
+
assert_equal "parent_callbacks", child.callback_queue_name
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# --- step batch gets coordination queue ---
|
|
105
|
+
|
|
106
|
+
def test_step_batch_gets_coordination_queue
|
|
107
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
108
|
+
coordination_queue_name "step_coordination"
|
|
109
|
+
def configure(**) = run(:a, DownloadJob)
|
|
110
|
+
end
|
|
111
|
+
klass.define_singleton_method(:name) { "StepBatchQueueTestPipeline" }
|
|
112
|
+
Object.const_set(:StepBatchQueueTestPipeline, klass) unless defined?(::StepBatchQueueTestPipeline)
|
|
113
|
+
|
|
114
|
+
pipeline_record = StepBatchQueueTestPipeline.run
|
|
115
|
+
|
|
116
|
+
step = pipeline_record.steps.first
|
|
117
|
+
batch_record = GoodJob::BatchRecord.find(step.good_job_batch_id)
|
|
118
|
+
|
|
119
|
+
assert_equal "step_coordination", batch_record.callback_queue_name
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# --- pipeline batch gets coordination queue ---
|
|
123
|
+
|
|
124
|
+
def test_pipeline_batch_gets_coordination_queue
|
|
125
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
126
|
+
coordination_queue_name "pipeline_coordination"
|
|
127
|
+
def configure(**) = run(:a, DownloadJob)
|
|
128
|
+
end
|
|
129
|
+
klass.define_singleton_method(:name) { "PipelineBatchQueueTestPipeline" }
|
|
130
|
+
Object.const_set(:PipelineBatchQueueTestPipeline, klass) unless defined?(::PipelineBatchQueueTestPipeline)
|
|
131
|
+
|
|
132
|
+
pipeline_record = PipelineBatchQueueTestPipeline.run
|
|
133
|
+
|
|
134
|
+
actual_record = GoodPipeline::PipelineRecord.find(pipeline_record.id)
|
|
135
|
+
batch_record = GoodJob::BatchRecord.find(actual_record.good_job_batch_id)
|
|
136
|
+
|
|
137
|
+
assert_equal "pipeline_coordination", batch_record.callback_queue_name
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# --- PipelineCallbackJob gets callback queue ---
|
|
141
|
+
|
|
142
|
+
def test_callback_job_gets_callback_queue
|
|
143
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
144
|
+
callback_queue_name "my_callbacks"
|
|
145
|
+
def configure(**) = run(:a, DownloadJob)
|
|
146
|
+
end
|
|
147
|
+
klass.define_singleton_method(:name) { "CallbackQueueTestPipeline" }
|
|
148
|
+
Object.const_set(:CallbackQueueTestPipeline, klass) unless defined?(::CallbackQueueTestPipeline)
|
|
149
|
+
|
|
150
|
+
pipeline_record = CallbackQueueTestPipeline.run
|
|
151
|
+
run_pipeline_to_completion(pipeline_record)
|
|
152
|
+
|
|
153
|
+
callback_job = GoodJob::Job.where(job_class: "GoodPipeline::PipelineCallbackJob").last
|
|
154
|
+
|
|
155
|
+
assert_equal "my_callbacks", callback_job.queue_name
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class TestBulkEnqueueEndToEnd < ActiveSupport::TestCase
|
|
6
|
+
def test_fan_in_pipeline_with_multiple_root_steps_succeeds
|
|
7
|
+
pipeline_class = Class.new(GoodPipeline::Pipeline) do
|
|
8
|
+
failure_strategy :halt
|
|
9
|
+
|
|
10
|
+
define_method(:configure) do |**_kwargs|
|
|
11
|
+
run :root_a, DownloadJob
|
|
12
|
+
run :root_b, TranscodeJob
|
|
13
|
+
run :root_c, ThumbnailJob
|
|
14
|
+
run :collector, PublishJob, after: %i[root_a root_b root_c]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
Object.const_set(:FanInBulkTestPipeline, pipeline_class) unless defined?(::FanInBulkTestPipeline)
|
|
18
|
+
|
|
19
|
+
pipeline_record = FanInBulkTestPipeline.run
|
|
20
|
+
|
|
21
|
+
# All 3 root steps should have been enqueued with distinct batches
|
|
22
|
+
root_steps = pipeline_record.steps.where(key: %w[root_a root_b root_c])
|
|
23
|
+
root_steps.each do |step|
|
|
24
|
+
refute_equal "pending", step.coordination_status,
|
|
25
|
+
"Root step #{step.key} should have been enqueued"
|
|
26
|
+
refute_nil step.good_job_batch_id
|
|
27
|
+
refute_nil step.good_job_id
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
batch_ids = root_steps.pluck(:good_job_batch_id).uniq
|
|
31
|
+
assert_equal 3, batch_ids.size, "Each root step should have a unique batch"
|
|
32
|
+
|
|
33
|
+
result = run_pipeline_to_completion(pipeline_record)
|
|
34
|
+
|
|
35
|
+
assert_equal "succeeded", result.status
|
|
36
|
+
assert(result.steps.all? { |step| step.coordination_status == "succeeded" })
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_all_root_steps_pipeline_succeeds
|
|
40
|
+
pipeline_class = Class.new(GoodPipeline::Pipeline) do
|
|
41
|
+
failure_strategy :continue
|
|
42
|
+
|
|
43
|
+
define_method(:configure) do |**_kwargs|
|
|
44
|
+
run :step_a, DownloadJob
|
|
45
|
+
run :step_b, TranscodeJob
|
|
46
|
+
run :step_c, ThumbnailJob
|
|
47
|
+
run :step_d, PublishJob
|
|
48
|
+
run :step_e, CleanupJob
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
Object.const_set(:AllRootsBulkTestPipeline, pipeline_class) unless defined?(::AllRootsBulkTestPipeline)
|
|
52
|
+
|
|
53
|
+
pipeline_record = AllRootsBulkTestPipeline.run
|
|
54
|
+
result = run_pipeline_to_completion(pipeline_record)
|
|
55
|
+
|
|
56
|
+
assert_equal "succeeded", result.status
|
|
57
|
+
|
|
58
|
+
result.steps.each do |step|
|
|
59
|
+
assert_equal "succeeded", step.coordination_status
|
|
60
|
+
refute_nil step.good_job_batch_id
|
|
61
|
+
refute_nil step.good_job_id
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def test_fan_in_with_failing_root_step_halts
|
|
66
|
+
pipeline_class = Class.new(GoodPipeline::Pipeline) do
|
|
67
|
+
failure_strategy :halt
|
|
68
|
+
|
|
69
|
+
define_method(:configure) do |**_kwargs|
|
|
70
|
+
run :root_a, DownloadJob
|
|
71
|
+
run :root_b, FailingJob
|
|
72
|
+
run :root_c, ThumbnailJob
|
|
73
|
+
run :collector, PublishJob, after: %i[root_a root_b root_c]
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
Object.const_set(:FanInFailBulkTestPipeline, pipeline_class) unless defined?(::FanInFailBulkTestPipeline)
|
|
77
|
+
|
|
78
|
+
pipeline_record = FanInFailBulkTestPipeline.run
|
|
79
|
+
result = run_pipeline_to_completion(pipeline_record)
|
|
80
|
+
|
|
81
|
+
assert_equal "halted", result.status
|
|
82
|
+
assert_equal "failed", result.steps.find_by(key: "root_b").coordination_status
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def test_enqueue_options_forwarded_to_good_job
|
|
86
|
+
pipeline_class = Class.new(GoodPipeline::Pipeline) do
|
|
87
|
+
failure_strategy :halt
|
|
88
|
+
|
|
89
|
+
define_method(:configure) do |**_kwargs|
|
|
90
|
+
run :step_a, DownloadJob, enqueue: { queue: "critical", priority: 1 }
|
|
91
|
+
run :step_b, TranscodeJob, enqueue: { queue: "low", priority: 10 }
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
Object.const_set(:EnqueueOptionsBulkTestPipeline, pipeline_class) unless defined?(::EnqueueOptionsBulkTestPipeline)
|
|
95
|
+
|
|
96
|
+
pipeline_record = EnqueueOptionsBulkTestPipeline.run
|
|
97
|
+
|
|
98
|
+
step_a = pipeline_record.steps.find_by(key: "step_a")
|
|
99
|
+
step_b = pipeline_record.steps.find_by(key: "step_b")
|
|
100
|
+
|
|
101
|
+
good_job_a = GoodJob::Job.find_by(id: step_a.good_job_id)
|
|
102
|
+
good_job_b = GoodJob::Job.find_by(id: step_b.good_job_id)
|
|
103
|
+
|
|
104
|
+
assert_equal "critical", good_job_a.queue_name
|
|
105
|
+
assert_equal 1, good_job_a.priority
|
|
106
|
+
assert_equal "low", good_job_b.queue_name
|
|
107
|
+
assert_equal 10, good_job_b.priority
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -3,21 +3,6 @@
|
|
|
3
3
|
require "test_helper"
|
|
4
4
|
|
|
5
5
|
class TestEndToEnd < ActiveSupport::TestCase
|
|
6
|
-
def run_pipeline_to_completion(pipeline_record, timeout: 15)
|
|
7
|
-
deadline = Time.current + timeout
|
|
8
|
-
loop do
|
|
9
|
-
perform_enqueued_jobs_inline
|
|
10
|
-
pipeline_record.reload
|
|
11
|
-
return pipeline_record if pipeline_record.terminal?
|
|
12
|
-
|
|
13
|
-
if Time.current > deadline
|
|
14
|
-
raise "Pipeline did not reach terminal state within #{timeout}s (status: #{pipeline_record.status})"
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
sleep 0.05
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
6
|
def test_full_pipeline_succeeds
|
|
22
7
|
pipeline_record = VideoProcessingPipeline.run(video_id: 123)
|
|
23
8
|
|
|
@@ -3,19 +3,6 @@
|
|
|
3
3
|
require "test_helper"
|
|
4
4
|
|
|
5
5
|
class TestHaltExecution < ActiveSupport::TestCase
|
|
6
|
-
def run_pipeline_to_completion(pipeline_record, timeout: 15)
|
|
7
|
-
deadline = Time.current + timeout
|
|
8
|
-
loop do
|
|
9
|
-
perform_enqueued_jobs_inline
|
|
10
|
-
pipeline_record.reload
|
|
11
|
-
return pipeline_record if pipeline_record.terminal?
|
|
12
|
-
|
|
13
|
-
raise "Pipeline did not reach terminal state within #{timeout}s (status: #{pipeline_record.status})" if Time.current > deadline
|
|
14
|
-
|
|
15
|
-
sleep 0.05
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
6
|
def test_halt_pipeline_marks_step_halted
|
|
20
7
|
pipeline_class = Class.new(GoodPipeline::Pipeline) do
|
|
21
8
|
failure_strategy :halt
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "test_helper"
|
|
4
|
+
|
|
5
|
+
class TestQueueConfigurationEndToEnd < ActiveSupport::TestCase
|
|
6
|
+
teardown do
|
|
7
|
+
GoodPipeline.coordination_queue_name = nil
|
|
8
|
+
GoodPipeline.callback_queue_name = nil
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_full_pipeline_with_custom_queues
|
|
12
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
13
|
+
failure_strategy :halt
|
|
14
|
+
coordination_queue_name "e2e_coordination"
|
|
15
|
+
callback_queue_name "e2e_callbacks"
|
|
16
|
+
|
|
17
|
+
define_method(:configure) do |**_kwargs|
|
|
18
|
+
run :step_a, DownloadJob
|
|
19
|
+
run :step_b, TranscodeJob
|
|
20
|
+
run :step_c, PublishJob, after: %i[step_a step_b]
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
klass.define_singleton_method(:name) { "QueueE2ETestPipeline" }
|
|
24
|
+
Object.const_set(:QueueE2ETestPipeline, klass) unless defined?(::QueueE2ETestPipeline)
|
|
25
|
+
|
|
26
|
+
pipeline_record = QueueE2ETestPipeline.run
|
|
27
|
+
|
|
28
|
+
# Verify step batch queue names
|
|
29
|
+
pipeline_record.steps.each do |step|
|
|
30
|
+
next unless step.good_job_batch_id
|
|
31
|
+
|
|
32
|
+
batch_record = GoodJob::BatchRecord.find(step.good_job_batch_id)
|
|
33
|
+
|
|
34
|
+
assert_equal "e2e_coordination", batch_record.callback_queue_name,
|
|
35
|
+
"Step #{step.key} batch should use coordination queue"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Verify pipeline batch queue name
|
|
39
|
+
actual_record = GoodPipeline::PipelineRecord.find(pipeline_record.id)
|
|
40
|
+
pipeline_batch = GoodJob::BatchRecord.find(actual_record.good_job_batch_id)
|
|
41
|
+
|
|
42
|
+
assert_equal "e2e_coordination", pipeline_batch.callback_queue_name
|
|
43
|
+
|
|
44
|
+
# Run to completion and verify callback job queue
|
|
45
|
+
result = run_pipeline_to_completion(pipeline_record)
|
|
46
|
+
|
|
47
|
+
assert_equal "succeeded", result.status
|
|
48
|
+
|
|
49
|
+
callback_job = GoodJob::Job.where(job_class: "GoodPipeline::PipelineCallbackJob").last
|
|
50
|
+
|
|
51
|
+
assert_equal "e2e_callbacks", callback_job.queue_name
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def test_global_config_applies_when_no_dsl
|
|
55
|
+
GoodPipeline.coordination_queue_name = "global_coord"
|
|
56
|
+
GoodPipeline.callback_queue_name = "global_cb"
|
|
57
|
+
|
|
58
|
+
klass = Class.new(GoodPipeline::Pipeline) do
|
|
59
|
+
failure_strategy :halt
|
|
60
|
+
define_method(:configure) do |**_kwargs|
|
|
61
|
+
run :step_a, DownloadJob
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
klass.define_singleton_method(:name) { "GlobalQueueTestPipeline" }
|
|
65
|
+
Object.const_set(:GlobalQueueTestPipeline, klass) unless defined?(::GlobalQueueTestPipeline)
|
|
66
|
+
|
|
67
|
+
pipeline_record = GlobalQueueTestPipeline.run
|
|
68
|
+
|
|
69
|
+
step = pipeline_record.steps.first
|
|
70
|
+
step_batch = GoodJob::BatchRecord.find(step.good_job_batch_id)
|
|
71
|
+
|
|
72
|
+
assert_equal "global_coord", step_batch.callback_queue_name
|
|
73
|
+
|
|
74
|
+
result = run_pipeline_to_completion(pipeline_record)
|
|
75
|
+
|
|
76
|
+
assert_equal "succeeded", result.status
|
|
77
|
+
|
|
78
|
+
callback_job = GoodJob::Job.where(job_class: "GoodPipeline::PipelineCallbackJob").last
|
|
79
|
+
|
|
80
|
+
assert_equal "global_cb", callback_job.queue_name
|
|
81
|
+
end
|
|
82
|
+
end
|
data/demo/test/test_helper.rb
CHANGED
|
@@ -33,6 +33,21 @@ module ActiveSupport
|
|
|
33
33
|
GoodJob.perform_inline
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
def run_pipeline_to_completion(pipeline_record, timeout: 15)
|
|
37
|
+
deadline = Time.current + timeout
|
|
38
|
+
loop do
|
|
39
|
+
perform_enqueued_jobs_inline
|
|
40
|
+
pipeline_record.reload
|
|
41
|
+
return pipeline_record if pipeline_record.terminal?
|
|
42
|
+
|
|
43
|
+
if Time.current > deadline
|
|
44
|
+
raise "Pipeline did not reach terminal state within #{timeout}s (status: #{pipeline_record.status})"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
sleep 0.05
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
36
51
|
def wait_until(timeout: 10, interval: 0.1)
|
|
37
52
|
deadline = Time.current + timeout
|
|
38
53
|
loop do
|
data/docs/architecture.md
CHANGED
|
@@ -180,3 +180,11 @@ This ensures a step is never prematurely marked `failed` on attempt 1 of 5.
|
|
|
180
180
|
7. Separate atomic units per transaction boundary to minimize lock contention
|
|
181
181
|
8. DAG validation runs at instantiation, before any database writes
|
|
182
182
|
9. `failure_strategy` and `on_failure` are distinct concepts -- strategy vs. callback, no naming collision
|
|
183
|
+
|
|
184
|
+
## Why these tradeoffs
|
|
185
|
+
|
|
186
|
+
GoodPipeline is intentionally GoodJob-specific and Postgres-only. This is what enables atomic enqueue transactions — step status transitions and GoodJob record inserts happen in a single database transaction, eliminating an entire class of partial-state bugs that adapter-agnostic gems must work around.
|
|
187
|
+
|
|
188
|
+
The DAG execution model (vs. strictly sequential steps) adds coordination complexity — row locks, atomic counters, fan-in race prevention — but unlocks parallel execution of independent steps. For workflows where steps have no dependency on each other, this means wall-clock time is bounded by the longest path through the graph, not the sum of all steps.
|
|
189
|
+
|
|
190
|
+
The four-table data model (pipelines, steps, dependencies, chains) is more tables than a two-table approach, but dedicated dependency and chain tables enable efficient graph queries and keep the step table free of self-referential joins.
|
data/docs/branching.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Conditional branching lets a pipeline take different paths at runtime based on application state. The dashboard renders branches as diamond decision nodes.
|
|
4
4
|
|
|
5
|
+
Runtime branching with `branch` blocks is uncommon among Ruby workflow gems — most offer only `skip_if` conditions on individual steps. GoodPipeline's branching evaluates a decision method when the branch is reached, runs the matching arm, marks non-matching arms as `skipped_by_branch`, and lets downstream steps wait on whichever arm was chosen.
|
|
6
|
+
|
|
5
7
|
## Defining a branch
|
|
6
8
|
|
|
7
9
|
Use `branch` inside `configure` to define a decision point. The `by:` option names a method on your pipeline class that returns the arm to execute:
|
data/docs/callbacks.md
CHANGED
|
@@ -46,6 +46,8 @@ Note: `on_failure` does **not** fire for `skipped` pipelines. Being skipped by a
|
|
|
46
46
|
|
|
47
47
|
Callbacks are dispatched via `PipelineCallbackJob`, a GoodJob job enqueued after the terminal state transaction commits. A slow external call (Slack, webhooks) cannot stall the coordinator, callback execution cannot corrupt pipeline state, and callbacks get GoodJob's retry mechanism if they fail.
|
|
48
48
|
|
|
49
|
+
`PipelineCallbackJob` runs on the queue configured by `callback_queue_name` (default: `"good_pipeline_callbacks"`). This is separate from `coordination_queue_name` which controls the coordination jobs (`StepFinishedJob`, `PipelineReconciliationJob`), so slow callbacks don't block pipeline progression. See [Defining Pipelines](/defining-pipelines) for configuration options.
|
|
50
|
+
|
|
49
51
|
## Exactly-once guarantee
|
|
50
52
|
|
|
51
53
|
The callback bundle (`on_complete` + one of `on_success`/`on_failure`) is dispatched as a **single unit**. A `callbacks_dispatched_at` timestamp is set atomically inside a `FOR UPDATE` locked transaction, ensuring the bundle fires exactly once even if `recompute_pipeline_status` is called from multiple code paths (coordinator or batch reconciliation).
|
data/docs/defining-pipelines.md
CHANGED
|
@@ -8,6 +8,8 @@ Every pipeline is a subclass of `GoodPipeline::Pipeline` that implements `config
|
|
|
8
8
|
class VideoProcessingPipeline < GoodPipeline::Pipeline
|
|
9
9
|
description "Downloads, transcodes and publishes a video"
|
|
10
10
|
failure_strategy :halt
|
|
11
|
+
coordination_queue_name "video_coordination"
|
|
12
|
+
callback_queue_name "video_callbacks"
|
|
11
13
|
|
|
12
14
|
on_complete :notify
|
|
13
15
|
on_success :celebrate
|
|
@@ -39,6 +41,8 @@ end
|
|
|
39
41
|
| `on_complete` | Callback for any terminal state | `nil` |
|
|
40
42
|
| `on_success` | Callback for succeeded | `nil` |
|
|
41
43
|
| `on_failure` | Callback for failed or halted | `nil` |
|
|
44
|
+
| `coordination_queue_name` | Queue for `StepFinishedJob` and `PipelineReconciliationJob` | `"good_pipeline_coordination"` |
|
|
45
|
+
| `callback_queue_name` | Queue for `PipelineCallbackJob` | `"good_pipeline_callbacks"` |
|
|
42
46
|
|
|
43
47
|
## DSL verbs
|
|
44
48
|
|
data/docs/getting-started.md
CHANGED
|
@@ -36,6 +36,18 @@ GoodJob.preserve_job_records = true
|
|
|
36
36
|
|
|
37
37
|
GoodPipeline will raise `GoodPipeline::ConfigurationError` at boot if this is not set.
|
|
38
38
|
|
|
39
|
+
## Configure queue names (optional)
|
|
40
|
+
|
|
41
|
+
GoodPipeline routes its internal jobs to dedicated queues by default. You can override them globally:
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
# config/initializers/good_pipeline.rb
|
|
45
|
+
GoodPipeline.coordination_queue_name = "pipeline_coordination" # StepFinishedJob, PipelineReconciliationJob
|
|
46
|
+
GoodPipeline.callback_queue_name = "pipeline_callbacks" # PipelineCallbackJob
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Defaults are `"good_pipeline_coordination"` and `"good_pipeline_callbacks"`. Per-pipeline overrides are also available via the class DSL — see [Defining Pipelines](/defining-pipelines).
|
|
50
|
+
|
|
39
51
|
## Mount the dashboard (optional)
|
|
40
52
|
|
|
41
53
|
```ruby
|
data/docs/index.md
CHANGED
|
@@ -17,7 +17,7 @@ features:
|
|
|
17
17
|
- title: Postgres only
|
|
18
18
|
details: All state lives in Postgres. No Redis, no external dependencies. Step transitions and job enqueues happen in a single database transaction.
|
|
19
19
|
- title: DAG orchestration
|
|
20
|
-
details: Define pipelines as directed acyclic graphs
|
|
20
|
+
details: Define pipelines as directed acyclic graphs — not just linear chains. Steps run in parallel when possible, wait for dependencies automatically, and take different paths based on runtime decisions. Fan-out, fan-in, branching, and chaining are all first-class.
|
|
21
21
|
- title: Web dashboard
|
|
22
22
|
details: A mountable Rails engine with pipeline executions, step details, DAG visualization, and a pipeline definitions catalog. No build step.
|
|
23
23
|
---
|
data/docs/introduction.md
CHANGED
|
@@ -23,6 +23,24 @@ GoodJob's Batch feature fires a single `on_finish` callback when all jobs in a b
|
|
|
23
23
|
|
|
24
24
|
GoodPipeline adds a coordination state machine, DAG validation, and atomic step transitions on top of Batch.
|
|
25
25
|
|
|
26
|
+
### vs. Active Job Continuation (Rails 8.1)
|
|
27
|
+
|
|
28
|
+
Rails 8.1 ships with `ActiveJob::Continuable`, which lets a single job define sequential steps with cursor-based progress tracking. If a deploy kills the process, the job resumes from its last checkpoint instead of restarting from scratch.
|
|
29
|
+
|
|
30
|
+
This solves a different problem than GoodPipeline. Continuation makes one long-running job resilient to interruption. GoodPipeline orchestrates multiple independent jobs as a DAG with parallel execution, fan-out/fan-in, branching, and pipeline-level failure strategies.
|
|
31
|
+
|
|
32
|
+
The two are complementary: a GoodPipeline step that processes millions of records could use `Continuable` internally for checkpoint/resume, while GoodPipeline handles the higher-level orchestration around it.
|
|
33
|
+
|
|
34
|
+
### vs. Geneva Drive
|
|
35
|
+
|
|
36
|
+
[Geneva Drive](https://github.com/julik/geneva_drive) is a durable workflow framework that executes steps strictly sequentially — one step at a time, like the mechanical gear it's named after. It works with any ActiveJob adapter (Sidekiq, Solid Queue, GoodJob) and supports PostgreSQL, MySQL, and SQLite.
|
|
37
|
+
|
|
38
|
+
Geneva Drive is a strong choice for linear, long-lived workflows with pause/resume with human-in-the-loop recovery and per-hero workflow uniqueness constraints. Its layered exception policy system is particularly sophisticated.
|
|
39
|
+
|
|
40
|
+
GoodPipeline takes a different approach: workflows are DAGs, not linear chains. Independent steps run in parallel across workers. Fan-out, fan-in, conditional branching, and pipeline chaining are first-class primitives. The tradeoff is that GoodPipeline requires GoodJob and PostgreSQL specifically, while Geneva Drive is adapter- and database-agnostic.
|
|
41
|
+
|
|
42
|
+
Choose Geneva Drive when your workflow is inherently sequential and you need pause/resume or adapter flexibility. Choose GoodPipeline when steps can run concurrently, your workflow has branching or fan-in topology, or you want a built-in dashboard with DAG visualization.
|
|
43
|
+
|
|
26
44
|
## Features
|
|
27
45
|
|
|
28
46
|
- `run` and `branch` DSL for defining step dependencies and conditional paths
|
|
@@ -39,4 +57,4 @@ GoodPipeline adds a coordination state machine, DAG validation, and atomic step
|
|
|
39
57
|
- Ruby >= 3.2
|
|
40
58
|
- Rails >= 7.1
|
|
41
59
|
- PostgreSQL
|
|
42
|
-
- GoodJob >=
|
|
60
|
+
- GoodJob >= 4.14 with `preserve_job_records = true`
|
data/docs/pipeline-chaining.md
CHANGED
|
@@ -79,6 +79,8 @@ GoodPipeline.run(
|
|
|
79
79
|
|
|
80
80
|
Both pipelines start immediately. `MergeMediaPipeline` waits for both to succeed.
|
|
81
81
|
|
|
82
|
+
Pipeline chaining is a first-class primitive — upstream/downstream relationships are tracked in a dedicated database table with atomic state propagation, rather than manually creating the next workflow in the last step of the current one.
|
|
83
|
+
|
|
82
84
|
## How `.then` works internally
|
|
83
85
|
|
|
84
86
|
`.then` returns a `GoodPipeline::Chain` object which:
|
|
@@ -42,12 +42,8 @@ module GoodPipeline
|
|
|
42
42
|
|
|
43
43
|
def start_pipeline(pipeline_record)
|
|
44
44
|
pipeline_record.transition_to!(:running)
|
|
45
|
-
|
|
46
45
|
root_step_ids = pipeline_record.steps.where.missing(:upstream_dependencies).pluck(:id)
|
|
47
|
-
|
|
48
|
-
root_step_ids.each do |step_id|
|
|
49
|
-
Coordinator.try_enqueue_step(step_id)
|
|
50
|
-
end
|
|
46
|
+
Coordinator.bulk_enqueue_steps(root_step_ids)
|
|
51
47
|
end
|
|
52
48
|
end
|
|
53
49
|
end
|
|
@@ -49,6 +49,23 @@ module GoodPipeline
|
|
|
49
49
|
step_was_enqueued || downstream_enqueued
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
+
# Enqueues multiple steps in bulk using Batch.enqueue_all.
|
|
53
|
+
# Intended for root steps during pipeline startup where no
|
|
54
|
+
# concurrent enqueue risk exists and no upstream checks are needed.
|
|
55
|
+
def bulk_enqueue_steps(step_ids)
|
|
56
|
+
return if step_ids.empty?
|
|
57
|
+
|
|
58
|
+
steps = StepRecord.where(id: step_ids, coordination_status: "pending")
|
|
59
|
+
.where(good_job_id: nil)
|
|
60
|
+
.to_a
|
|
61
|
+
|
|
62
|
+
branch_steps, enqueueable_steps = steps.partition(&:branch_step?)
|
|
63
|
+
|
|
64
|
+
bulk_enqueue_user_jobs(enqueueable_steps) if enqueueable_steps.any?
|
|
65
|
+
|
|
66
|
+
branch_steps.each { |step| try_enqueue_step(step.id) }
|
|
67
|
+
end
|
|
68
|
+
|
|
52
69
|
def recompute_pipeline_status(pipeline, has_active_steps: nil, has_downstream_chains: nil) # rubocop:disable Metrics/MethodLength
|
|
53
70
|
return if pipeline.terminal?
|
|
54
71
|
|
|
@@ -73,7 +90,8 @@ module GoodPipeline
|
|
|
73
90
|
|
|
74
91
|
return if rows_updated.zero?
|
|
75
92
|
|
|
76
|
-
|
|
93
|
+
queue = pipeline.type.constantize.callback_queue_name
|
|
94
|
+
PipelineCallbackJob.set(queue: queue).perform_later(pipeline.id, new_status.to_s)
|
|
77
95
|
end
|
|
78
96
|
end
|
|
79
97
|
|
|
@@ -129,6 +147,18 @@ module GoodPipeline
|
|
|
129
147
|
scope.update_all(coordination_status: "skipped")
|
|
130
148
|
end
|
|
131
149
|
|
|
150
|
+
def transitive_downstream_ids(step)
|
|
151
|
+
visited = Set.new
|
|
152
|
+
queue = step.downstream_steps.pluck(:id)
|
|
153
|
+
while (current_id = queue.shift)
|
|
154
|
+
next if visited.include?(current_id)
|
|
155
|
+
|
|
156
|
+
visited << current_id
|
|
157
|
+
queue.concat(DependencyRecord.where(depends_on_step_id: current_id).pluck(:step_id))
|
|
158
|
+
end
|
|
159
|
+
visited
|
|
160
|
+
end
|
|
161
|
+
|
|
132
162
|
def unblock_downstream_steps(step)
|
|
133
163
|
sql = <<~SQL
|
|
134
164
|
UPDATE good_pipeline_steps
|
|
@@ -226,6 +256,7 @@ module GoodPipeline
|
|
|
226
256
|
def build_step_batch(step)
|
|
227
257
|
batch = GoodJob::Batch.new
|
|
228
258
|
batch.on_finish = "GoodPipeline::StepFinishedJob"
|
|
259
|
+
batch.callback_queue_name = step.pipeline.type.constantize.coordination_queue_name
|
|
229
260
|
batch.properties = { step_id: step.id }
|
|
230
261
|
batch
|
|
231
262
|
end
|
|
@@ -236,6 +267,14 @@ module GoodPipeline
|
|
|
236
267
|
enqueued_job.provider_job_id || enqueued_job.job_id
|
|
237
268
|
end
|
|
238
269
|
|
|
270
|
+
def fail_step_with_error(step, error)
|
|
271
|
+
step.transition_coordination_status_to!(:failed)
|
|
272
|
+
step.update_columns(
|
|
273
|
+
error_class: error.class.name,
|
|
274
|
+
error_message: error.message
|
|
275
|
+
)
|
|
276
|
+
end
|
|
277
|
+
|
|
239
278
|
def derive_terminal_status(pipeline)
|
|
240
279
|
has_failures = pipeline.steps.where(coordination_status: "failed").exists?
|
|
241
280
|
|
|
@@ -245,24 +284,66 @@ module GoodPipeline
|
|
|
245
284
|
:failed
|
|
246
285
|
end
|
|
247
286
|
|
|
248
|
-
def
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
287
|
+
def bulk_enqueue_user_jobs(steps) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength,Metrics/CyclomaticComplexity
|
|
288
|
+
batch_job_pairs = []
|
|
289
|
+
step_metadata = {}
|
|
290
|
+
failed_steps = []
|
|
291
|
+
coordination_queue = steps.first.pipeline.type.constantize.coordination_queue_name
|
|
292
|
+
|
|
293
|
+
steps.each do |step|
|
|
294
|
+
job_class = begin
|
|
295
|
+
step.job_class.constantize
|
|
296
|
+
rescue NameError => error
|
|
297
|
+
failed_steps << [step, ConfigurationError.new(error.message)]
|
|
298
|
+
next
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
batch = GoodJob::Batch.new
|
|
302
|
+
batch.on_finish = "GoodPipeline::StepFinishedJob"
|
|
303
|
+
batch.callback_queue_name = coordination_queue
|
|
304
|
+
batch.properties = { step_id: step.id }
|
|
305
|
+
|
|
306
|
+
active_job = job_class.new(**step.params.symbolize_keys)
|
|
307
|
+
apply_enqueue_options(active_job, step.enqueue_options.symbolize_keys)
|
|
308
|
+
|
|
309
|
+
batch_job_pairs << [batch, [active_job]]
|
|
310
|
+
step_metadata[step.id] = { batch: batch, active_job: active_job }
|
|
311
|
+
end
|
|
253
312
|
|
|
254
|
-
|
|
255
|
-
|
|
313
|
+
StepRecord.transaction do
|
|
314
|
+
GoodJob::Batch.enqueue_all(batch_job_pairs) if batch_job_pairs.any?
|
|
315
|
+
|
|
316
|
+
now = Time.current
|
|
317
|
+
step_metadata.each do |step_id, metadata|
|
|
318
|
+
StepRecord.where(id: step_id).update_all(
|
|
319
|
+
coordination_status: "enqueued",
|
|
320
|
+
good_job_batch_id: metadata[:batch].id,
|
|
321
|
+
good_job_id: metadata[:active_job].provider_job_id || metadata[:active_job].job_id,
|
|
322
|
+
updated_at: now
|
|
323
|
+
)
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
ensure
|
|
327
|
+
failed_steps.each do |step, error|
|
|
328
|
+
fail_step_with_error(step, error)
|
|
329
|
+
propagate_halt(step) if step.pipeline.halt?
|
|
256
330
|
end
|
|
257
|
-
visited
|
|
258
331
|
end
|
|
259
332
|
|
|
260
|
-
def
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
333
|
+
def apply_enqueue_options(active_job, options) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
334
|
+
return if options.blank?
|
|
335
|
+
|
|
336
|
+
if options[:good_job_labels] && active_job.respond_to?(:good_job_labels=)
|
|
337
|
+
active_job.good_job_labels = Array(options[:good_job_labels])
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
if options.key?(:good_job_notify) && active_job.respond_to?(:good_job_notify=)
|
|
341
|
+
active_job.good_job_notify = options[:good_job_notify]
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
active_job.queue_name = options[:queue].to_s if options[:queue]
|
|
345
|
+
active_job.priority = options[:priority] if options[:priority]
|
|
346
|
+
active_job.scheduled_at = Time.current + options[:wait] if options[:wait]
|
|
266
347
|
end
|
|
267
348
|
|
|
268
349
|
def effective_failure_strategy(step)
|
|
@@ -5,7 +5,16 @@ module GoodPipeline
|
|
|
5
5
|
|
|
6
6
|
class Pipeline # rubocop:disable Metrics/ClassLength
|
|
7
7
|
VALID_FAILURE_STRATEGIES = %i[halt continue ignore].freeze
|
|
8
|
-
DSL_ATTRIBUTES = %i[
|
|
8
|
+
DSL_ATTRIBUTES = %i[
|
|
9
|
+
display_name
|
|
10
|
+
description
|
|
11
|
+
failure_strategy
|
|
12
|
+
on_complete
|
|
13
|
+
on_success
|
|
14
|
+
on_failure
|
|
15
|
+
coordination_queue_name
|
|
16
|
+
callback_queue_name
|
|
17
|
+
].freeze
|
|
9
18
|
|
|
10
19
|
# --- Class-level DSL ---
|
|
11
20
|
|
|
@@ -58,6 +67,18 @@ module GoodPipeline
|
|
|
58
67
|
@on_failure = method_name
|
|
59
68
|
end
|
|
60
69
|
|
|
70
|
+
def coordination_queue_name(name = :__unset__)
|
|
71
|
+
return @coordination_queue_name || GoodPipeline.coordination_queue_name if name == :__unset__
|
|
72
|
+
|
|
73
|
+
@coordination_queue_name = name
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def callback_queue_name(name = :__unset__)
|
|
77
|
+
return @callback_queue_name || GoodPipeline.callback_queue_name if name == :__unset__
|
|
78
|
+
|
|
79
|
+
@callback_queue_name = name
|
|
80
|
+
end
|
|
81
|
+
|
|
61
82
|
alias build new
|
|
62
83
|
|
|
63
84
|
def run(**)
|
|
@@ -95,6 +116,8 @@ module GoodPipeline
|
|
|
95
116
|
def on_complete_callback = self.class.on_complete
|
|
96
117
|
def on_success_callback = self.class.on_success
|
|
97
118
|
def on_failure_callback = self.class.on_failure
|
|
119
|
+
def coordination_queue_name = self.class.coordination_queue_name
|
|
120
|
+
def callback_queue_name = self.class.callback_queue_name
|
|
98
121
|
|
|
99
122
|
def initialize(**kwargs) # rubocop:disable Metrics/MethodLength
|
|
100
123
|
@params = kwargs.freeze
|
data/lib/good_pipeline/runner.rb
CHANGED
|
@@ -32,6 +32,7 @@ module GoodPipeline
|
|
|
32
32
|
def create_pipeline_batch(pipeline_id)
|
|
33
33
|
batch = GoodJob::Batch.new
|
|
34
34
|
batch.on_finish = "GoodPipeline::PipelineReconciliationJob"
|
|
35
|
+
batch.callback_queue_name = @pipeline.coordination_queue_name
|
|
35
36
|
batch.properties = { pipeline_id: pipeline_id }
|
|
36
37
|
batch.save
|
|
37
38
|
batch
|
|
@@ -81,9 +82,8 @@ module GoodPipeline
|
|
|
81
82
|
end
|
|
82
83
|
|
|
83
84
|
def enqueue_root_steps(step_id_by_key)
|
|
84
|
-
@pipeline.root_steps.
|
|
85
|
-
|
|
86
|
-
end
|
|
85
|
+
root_step_ids = @pipeline.root_steps.map { |step_definition| step_id_by_key[step_definition.key] }
|
|
86
|
+
Coordinator.bulk_enqueue_steps(root_step_ids)
|
|
87
87
|
end
|
|
88
88
|
|
|
89
89
|
def resolve_job_class(step_definition)
|
data/lib/good_pipeline.rb
CHANGED
|
@@ -18,6 +18,21 @@ require_relative "good_pipeline/chain"
|
|
|
18
18
|
require_relative "good_pipeline/engine" if defined?(Rails::Engine)
|
|
19
19
|
|
|
20
20
|
module GoodPipeline
|
|
21
|
+
DEFAULT_COORDINATION_QUEUE_NAME = "good_pipeline_coordination"
|
|
22
|
+
DEFAULT_CALLBACK_QUEUE_NAME = "good_pipeline_callbacks"
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
attr_writer :coordination_queue_name, :callback_queue_name
|
|
26
|
+
|
|
27
|
+
def coordination_queue_name
|
|
28
|
+
@coordination_queue_name || DEFAULT_COORDINATION_QUEUE_NAME
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def callback_queue_name
|
|
32
|
+
@callback_queue_name || DEFAULT_CALLBACK_QUEUE_NAME
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
21
36
|
def self.run(*pipeline_configs)
|
|
22
37
|
pipeline_records = pipeline_configs.map do |config|
|
|
23
38
|
pipeline_class, pipeline_params = extract_pipeline_config(config)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: good_pipeline
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ali Hamdi Ali Fadel
|
|
@@ -29,14 +29,14 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '
|
|
32
|
+
version: '4.14'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '
|
|
39
|
+
version: '4.14'
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: railties
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -120,6 +120,7 @@ files:
|
|
|
120
120
|
- demo/db/seeds.rb
|
|
121
121
|
- demo/docs/screenshots/definitions.png
|
|
122
122
|
- demo/docs/screenshots/show.png
|
|
123
|
+
- demo/test/good_pipeline/test_bulk_enqueue.rb
|
|
123
124
|
- demo/test/good_pipeline/test_chain_record.rb
|
|
124
125
|
- demo/test/good_pipeline/test_cleanup.rb
|
|
125
126
|
- demo/test/good_pipeline/test_coordinator.rb
|
|
@@ -129,10 +130,12 @@ files:
|
|
|
129
130
|
- demo/test/good_pipeline/test_pipeline_callback_job.rb
|
|
130
131
|
- demo/test/good_pipeline/test_pipeline_reconciliation_job.rb
|
|
131
132
|
- demo/test/good_pipeline/test_pipeline_record.rb
|
|
133
|
+
- demo/test/good_pipeline/test_queue_configuration.rb
|
|
132
134
|
- demo/test/good_pipeline/test_runner.rb
|
|
133
135
|
- demo/test/good_pipeline/test_step_finished_job.rb
|
|
134
136
|
- demo/test/good_pipeline/test_step_record.rb
|
|
135
137
|
- demo/test/integration/test_branch_execution.rb
|
|
138
|
+
- demo/test/integration/test_bulk_enqueue_end_to_end.rb
|
|
136
139
|
- demo/test/integration/test_concurrent_fan_in.rb
|
|
137
140
|
- demo/test/integration/test_end_to_end.rb
|
|
138
141
|
- demo/test/integration/test_enqueue_atomicity.rb
|
|
@@ -142,6 +145,7 @@ files:
|
|
|
142
145
|
- demo/test/integration/test_late_chain_registration.rb
|
|
143
146
|
- demo/test/integration/test_missing_decision_method.rb
|
|
144
147
|
- demo/test/integration/test_pipeline_chaining.rb
|
|
148
|
+
- demo/test/integration/test_queue_configuration.rb
|
|
145
149
|
- demo/test/integration/test_retry_scenarios.rb
|
|
146
150
|
- demo/test/integration/test_sequential_branches.rb
|
|
147
151
|
- demo/test/integration/test_step_finished_idempotency.rb
|