job-workflow 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -2
- data/CHANGELOG.md +30 -0
- data/README.md +1 -1
- data/app/controllers/job_workflow/monitoring/application_controller.rb +11 -0
- data/app/controllers/job_workflow/monitoring/executions_controller.rb +28 -0
- data/app/controllers/job_workflow/monitoring/workflows_controller.rb +11 -0
- data/app/views/job_workflow/monitoring/executions/index.html.erb +57 -0
- data/app/views/job_workflow/monitoring/executions/show.html.erb +200 -0
- data/app/views/job_workflow/monitoring/workflows/index.html.erb +39 -0
- data/app/views/layouts/job_workflow/monitoring/application.html.erb +117 -0
- data/config/routes.rb +8 -0
- data/guides/API_REFERENCE.md +79 -6
- data/guides/DEPENDENCY_WAIT.md +9 -5
- data/guides/MONITORING_UI.md +74 -0
- data/guides/PARALLEL_PROCESSING.md +33 -21
- data/guides/PRODUCTION_DEPLOYMENT.md +1 -1
- data/guides/README.md +6 -1
- data/guides/THROTTLING.md +24 -0
- data/guides/WORKFLOW_STATUS_QUERY.md +7 -1
- data/lib/job_workflow/context.rb +68 -6
- data/lib/job_workflow/dsl.rb +1 -5
- data/lib/job_workflow/instrumentation/opentelemetry_subscriber.rb +1 -1
- data/lib/job_workflow/instrumentation.rb +14 -14
- data/lib/job_workflow/job_status.rb +16 -1
- data/lib/job_workflow/monitoring/dag_layout.rb +186 -0
- data/lib/job_workflow/monitoring/engine.rb +15 -0
- data/lib/job_workflow/monitoring/execution_page.rb +16 -0
- data/lib/job_workflow/monitoring/execution_registry.rb +50 -0
- data/lib/job_workflow/monitoring/execution_view_model.rb +262 -0
- data/lib/job_workflow/monitoring/parameter_filter.rb +37 -0
- data/lib/job_workflow/monitoring/workflow_definition.rb +24 -0
- data/lib/job_workflow/monitoring/workflow_registry.rb +24 -0
- data/lib/job_workflow/monitoring.rb +120 -0
- data/lib/job_workflow/queue_adapters/abstract.rb +7 -2
- data/lib/job_workflow/queue_adapters/null_adapter.rb +12 -1
- data/lib/job_workflow/queue_adapters/solid_queue_adapter.rb +42 -12
- data/lib/job_workflow/railtie.rb +12 -0
- data/lib/job_workflow/runner.rb +38 -15
- data/lib/job_workflow/sub_task_job.rb +93 -0
- data/lib/job_workflow/task.rb +7 -0
- data/lib/job_workflow/task_enqueue.rb +19 -12
- data/lib/job_workflow/version.rb +1 -1
- data/lib/job_workflow/workflow_status.rb +20 -1
- data/lib/job_workflow.rb +5 -1
- data/sig/generated/job_workflow/context.rbs +31 -7
- data/sig/generated/job_workflow/instrumentation/opentelemetry_subscriber.rbs +0 -1
- data/sig/generated/job_workflow/instrumentation.rbs +28 -28
- data/sig/generated/job_workflow/job_status.rbs +5 -2
- data/sig/generated/job_workflow/monitoring/dag_layout.rbs +80 -0
- data/sig/generated/job_workflow/monitoring/engine.rbs +8 -0
- data/sig/generated/job_workflow/monitoring/execution_page.rbs +14 -0
- data/sig/generated/job_workflow/monitoring/execution_registry.rbs +21 -0
- data/sig/generated/job_workflow/monitoring/execution_view_model.rbs +111 -0
- data/sig/generated/job_workflow/monitoring/parameter_filter.rbs +16 -0
- data/sig/generated/job_workflow/monitoring/workflow_definition.rbs +18 -0
- data/sig/generated/job_workflow/monitoring/workflow_registry.rbs +13 -0
- data/sig/generated/job_workflow/monitoring.rbs +38 -0
- data/sig/generated/job_workflow/queue_adapters/abstract.rbs +7 -4
- data/sig/generated/job_workflow/queue_adapters/null_adapter.rbs +5 -2
- data/sig/generated/job_workflow/queue_adapters/solid_queue_adapter.rbs +18 -6
- data/sig/generated/job_workflow/railtie.rbs +6 -0
- data/sig/generated/job_workflow/runner.rbs +8 -5
- data/sig/generated/job_workflow/sub_task_job.rbs +40 -0
- data/sig/generated/job_workflow/task.rbs +5 -0
- data/sig/generated/job_workflow/task_enqueue.rbs +5 -8
- data/sig/generated/job_workflow/workflow_status.rbs +6 -0
- data/sig-private/job-workflow.rbs +11 -0
- data/sig-private/rails.rbs +5 -0
- metadata +34 -1
data/guides/API_REFERENCE.md
CHANGED
|
@@ -18,10 +18,9 @@ task(name, **options, &block)
|
|
|
18
18
|
- `depends_on` (Symbol | Array[Symbol]): Dependent tasks
|
|
19
19
|
- `each` (Proc): Proc that returns an enumerable for map task execution
|
|
20
20
|
- `enqueue` (Hash | Proc | bool): Controls whether task iterations are enqueued as sub-jobs
|
|
21
|
-
- Hash format (recommended): `{ condition: Proc, queue: String
|
|
21
|
+
- Hash format (recommended): `{ condition: Proc, queue: String }`
|
|
22
22
|
- `condition` (Proc | bool): Determines if task should be enqueued (default: true if Hash is not empty)
|
|
23
23
|
- `queue` (String): Custom queue name for the task (optional)
|
|
24
|
-
- `concurrency` (Integer): Concurrency limit for parallel processing (default: unlimited)
|
|
25
24
|
- Proc format (legacy): Proc that returns boolean
|
|
26
25
|
- bool format: true/false for simple cases
|
|
27
26
|
- Default: nil (synchronous execution)
|
|
@@ -75,7 +74,8 @@ end
|
|
|
75
74
|
# Parallel processing with collection
|
|
76
75
|
task :process_items,
|
|
77
76
|
each: ->(ctx) { ctx.arguments.items },
|
|
78
|
-
enqueue:
|
|
77
|
+
enqueue: true,
|
|
78
|
+
throttle: 5,
|
|
79
79
|
output: { result: "String" } do |ctx|
|
|
80
80
|
item = ctx.each_value
|
|
81
81
|
{ result: ProcessService.handle(item) }
|
|
@@ -84,6 +84,76 @@ end
|
|
|
84
84
|
|
|
85
85
|
**Map Task Output**: When `each:` is specified, outputs are automatically collected as an array.
|
|
86
86
|
|
|
87
|
+
### Task continuation helpers
|
|
88
|
+
|
|
89
|
+
Inside a task body, you can read the current task cursor, store a new cursor, and create interruption points through the task context.
|
|
90
|
+
|
|
91
|
+
#### Regular task example
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
task :sync_pages, output: { processed: "Integer" } do |ctx|
|
|
95
|
+
page = ctx.cursor || 1
|
|
96
|
+
result = ExternalAPI.fetch(page:)
|
|
97
|
+
|
|
98
|
+
ctx.set_cursor!(page + 1) if result.next_page?
|
|
99
|
+
|
|
100
|
+
{ processed: result.items.size }
|
|
101
|
+
end
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
- `ctx.cursor` returns the current task cursor, or `nil` when no cursor has been stored
|
|
105
|
+
- `ctx.set_cursor!(value)` validates that `value` is ActiveJob-serializable, stores it in the current continuation step, and checkpoints the job through Active Job continuation
|
|
106
|
+
- `ctx.checkpoint!` creates a checkpoint without changing the public cursor value
|
|
107
|
+
- Call `ctx.set_cursor!` when you want to change the public cursor value and create a checkpoint at the same time
|
|
108
|
+
- Call `ctx.checkpoint!` when you want the current task execution to become interruptible without changing the public cursor value
|
|
109
|
+
- Outside task execution, `ctx.cursor` returns `nil`, and `ctx.set_cursor!` / `ctx.checkpoint!` raise an error
|
|
110
|
+
|
|
111
|
+
For regular tasks, a cursor is only persisted when you call `ctx.set_cursor!(value)` explicitly.
|
|
112
|
+
|
|
113
|
+
#### Checkpoint without changing the cursor
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
task :publish_report do |ctx|
|
|
117
|
+
report = build_report
|
|
118
|
+
ctx.checkpoint!
|
|
119
|
+
deliver_report(report)
|
|
120
|
+
end
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
#### Repeating work inside a task
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
task :sync_users do |ctx|
|
|
127
|
+
start_index = ctx.cursor || 0
|
|
128
|
+
|
|
129
|
+
ctx.arguments.user_ids.drop(start_index).each_with_index do |user_id, offset|
|
|
130
|
+
sync_user(user_id)
|
|
131
|
+
ctx.set_cursor!(start_index + offset + 1)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
This pattern is useful when a single task iterates over an Enumerable internally and you want to resume from the last completed item after an interruption.
|
|
137
|
+
|
|
138
|
+
#### `each:` task example
|
|
139
|
+
|
|
140
|
+
```ruby
|
|
141
|
+
task :sync_users, each: ->(ctx) { ctx.arguments.user_ids } do |ctx|
|
|
142
|
+
next_cursor = ExternalAPI.sync_user(
|
|
143
|
+
user_id: ctx.each_value,
|
|
144
|
+
cursor: ctx.cursor
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
ctx.set_cursor!(next_cursor) unless next_cursor.nil?
|
|
148
|
+
end
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
For `each:` tasks, JobWorkflow keeps the existing integer resume behavior for completed iterations.
|
|
152
|
+
|
|
153
|
+
- If an `each:` task is interrupted after calling `ctx.set_cursor!`, JobWorkflow resumes with both the current iteration index and the saved task cursor
|
|
154
|
+
- If an iteration completes normally, the resume state advances to the next integer index
|
|
155
|
+
- In other words, a custom cursor in an `each:` task is meant for resuming work inside the current iteration, not for replacing the completion index of finished iterations
|
|
156
|
+
|
|
87
157
|
### workflow_concurrency
|
|
88
158
|
|
|
89
159
|
Configure job-level concurrency limits with workflow-aware context.
|
|
@@ -117,7 +187,8 @@ class ImportJob < ApplicationJob
|
|
|
117
187
|
|
|
118
188
|
task :process,
|
|
119
189
|
each: ->(ctx) { ctx.arguments.items },
|
|
120
|
-
enqueue:
|
|
190
|
+
enqueue: true,
|
|
191
|
+
throttle: 5,
|
|
121
192
|
output: { result: "String" } do |ctx|
|
|
122
193
|
{ result: handle(ctx.each_value) }
|
|
123
194
|
end
|
|
@@ -141,7 +212,8 @@ class BatchImportJob < ApplicationJob
|
|
|
141
212
|
|
|
142
213
|
task :process,
|
|
143
214
|
each: ->(ctx) { ctx.arguments.items },
|
|
144
|
-
enqueue:
|
|
215
|
+
enqueue: true,
|
|
216
|
+
throttle: 5,
|
|
145
217
|
output: { result: "String" } do |ctx|
|
|
146
218
|
{ result: handle(ctx.each_value) }
|
|
147
219
|
end
|
|
@@ -158,7 +230,8 @@ argument :items, "Array[String]"
|
|
|
158
230
|
|
|
159
231
|
task :process_items,
|
|
160
232
|
each: ->(ctx) { ctx.arguments.items },
|
|
161
|
-
enqueue:
|
|
233
|
+
enqueue: true,
|
|
234
|
+
throttle: 5,
|
|
162
235
|
output: { result: "String", status: "Symbol" } do |ctx|
|
|
163
236
|
item = ctx.each_value
|
|
164
237
|
{
|
data/guides/DEPENDENCY_WAIT.md
CHANGED
|
@@ -14,7 +14,8 @@ class ExampleJob < ApplicationJob
|
|
|
14
14
|
|
|
15
15
|
task :process_items,
|
|
16
16
|
each: ->(ctx) { ctx.arguments.items },
|
|
17
|
-
enqueue:
|
|
17
|
+
enqueue: true,
|
|
18
|
+
throttle: 5,
|
|
18
19
|
output: { result: "Integer" } do |ctx|
|
|
19
20
|
# This creates many sub-jobs
|
|
20
21
|
{ result: ctx.each_value * 2 }
|
|
@@ -172,7 +173,8 @@ class DataPipelineJob < ApplicationJob
|
|
|
172
173
|
# Extract data from multiple sources in parallel
|
|
173
174
|
task :extract_data,
|
|
174
175
|
each: ->(ctx) { %w[users orders products inventory] },
|
|
175
|
-
enqueue:
|
|
176
|
+
enqueue: true,
|
|
177
|
+
throttle: 4,
|
|
176
178
|
output: { source: "String", count: "Integer" } do |ctx|
|
|
177
179
|
source = ctx.each_value
|
|
178
180
|
data = DataSource.fetch(source, date: ctx.arguments.date)
|
|
@@ -210,10 +212,11 @@ class APIAggregatorJob < ApplicationJob
|
|
|
210
212
|
|
|
211
213
|
argument :user_ids, "Array[Integer]"
|
|
212
214
|
|
|
213
|
-
# Fetch user data with rate limiting
|
|
215
|
+
# Fetch user data with rate limiting.
|
|
216
|
+
# Async fan-out is unbounded here; the official execution cap is throttle: 5.
|
|
214
217
|
task :fetch_users,
|
|
215
218
|
each: ->(ctx) { ctx.arguments.user_ids },
|
|
216
|
-
enqueue:
|
|
219
|
+
enqueue: true,
|
|
217
220
|
throttle: { key: "external_api", limit: 5 },
|
|
218
221
|
output: { user_id: "Integer", data: "Hash" } do |ctx|
|
|
219
222
|
user_id = ctx.each_value
|
|
@@ -270,7 +273,8 @@ dependency_wait: { poll_timeout: 60, reschedule_delay: 10 }
|
|
|
270
273
|
# ✅ Good: dependency_wait with parallel sub-jobs
|
|
271
274
|
task :process,
|
|
272
275
|
each: ->(ctx) { ctx.arguments.items },
|
|
273
|
-
enqueue:
|
|
276
|
+
enqueue: true,
|
|
277
|
+
throttle: 10 do |ctx|
|
|
274
278
|
heavy_process(ctx.each_value)
|
|
275
279
|
end
|
|
276
280
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Monitoring UI
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
JobWorkflow ships with a workflow-oriented monitoring UI. Instead of listing mixed job rows first, the UI starts
|
|
6
|
+
from workflow definitions, then lets you drill into one workflow's root executions and finally into one execution's
|
|
7
|
+
DAG state.
|
|
8
|
+
|
|
9
|
+
This view is intended to answer workflow-level questions such as:
|
|
10
|
+
|
|
11
|
+
- which workflow is currently stuck
|
|
12
|
+
- which task is running or failed
|
|
13
|
+
- how `each` fan-out is progressing
|
|
14
|
+
- which arguments and outputs shaped the current execution
|
|
15
|
+
|
|
16
|
+
## What the UI shows
|
|
17
|
+
|
|
18
|
+
The current scope includes:
|
|
19
|
+
|
|
20
|
+
- workflow definition list
|
|
21
|
+
- paginated root execution list per workflow
|
|
22
|
+
- execution detail with a DAG overview, task state, arguments, outputs, and failed task
|
|
23
|
+
- fan-out progress and sub-task job links into Mission Control Jobs
|
|
24
|
+
|
|
25
|
+
History analytics, retries, and dry-run launch flows are out of scope for now.
|
|
26
|
+
|
|
27
|
+
## Navigation
|
|
28
|
+
|
|
29
|
+
The UI is organized around workflows rather than a cross-workflow execution feed:
|
|
30
|
+
|
|
31
|
+
```text
|
|
32
|
+
workflow definitions
|
|
33
|
+
└─ one workflow's root executions
|
|
34
|
+
└─ one root execution with sub-task-job detail
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The UI is intentionally scoped to one workflow at a time, so the first screen stays focused on definitions and the
|
|
38
|
+
execution list stays easy to scan.
|
|
39
|
+
|
|
40
|
+
## Mounting the engine
|
|
41
|
+
|
|
42
|
+
Add the engine to your application's routes:
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
# config/routes.rb
|
|
46
|
+
mount JobWorkflow::Monitoring::Engine => "/job_workflow"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
After mounting, open `/job_workflow` to browse workflow definitions and executions.
|
|
50
|
+
|
|
51
|
+
## Authentication and controller inheritance
|
|
52
|
+
|
|
53
|
+
By default, monitoring controllers inherit from `ApplicationController`. If you already use a dedicated authenticated
|
|
54
|
+
controller for admin tooling, configure monitoring to inherit from it:
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
config.job_workflow.monitoring.base_controller_class = "AdminController"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
If `config.job_workflow.monitoring.base_controller_class` is not set and `MissionControl::Jobs` is installed,
|
|
61
|
+
monitoring falls back to `MissionControl::Jobs.base_controller_class`.
|
|
62
|
+
|
|
63
|
+
## Root executions and sub-task jobs
|
|
64
|
+
|
|
65
|
+
Execution lists show **root jobs only**. `SubTaskJob` rows do not appear in the workflow execution list. Instead, the
|
|
66
|
+
detail page shows sub-task job state only after you open one root execution.
|
|
67
|
+
|
|
68
|
+
This keeps the list view focused on workflow-level monitoring while still preserving the full fan-out story on the
|
|
69
|
+
detail page.
|
|
70
|
+
|
|
71
|
+
## Query behavior
|
|
72
|
+
|
|
73
|
+
Root executions are paginated with a cursor and scoped by workflow class. As a user, this means the execution list is
|
|
74
|
+
ordered newest-first within the workflow you selected, without mixing in unrelated job rows.
|
|
@@ -54,9 +54,9 @@ task :process_items,
|
|
|
54
54
|
end
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
### Asynchronous Execution with Concurrency
|
|
57
|
+
### Asynchronous Execution with Throttled Concurrency
|
|
58
58
|
|
|
59
|
-
To execute map task iterations in separate sub-jobs
|
|
59
|
+
To execute map task iterations in separate sub-jobs, enable `enqueue:` and use `throttle:` when you need to cap concurrent execution:
|
|
60
60
|
|
|
61
61
|
```ruby
|
|
62
62
|
# Simplest form: enable parallel execution with default settings
|
|
@@ -69,21 +69,23 @@ end
|
|
|
69
69
|
# Process up to 10 items concurrently in sub-jobs
|
|
70
70
|
task :process_items,
|
|
71
71
|
each: ->(ctx) { ctx.arguments.items },
|
|
72
|
-
enqueue:
|
|
72
|
+
enqueue: true,
|
|
73
|
+
throttle: 10 do |ctx|
|
|
73
74
|
process_item(ctx.each_value)
|
|
74
75
|
end
|
|
75
76
|
|
|
76
|
-
#
|
|
77
|
+
# Conditional enqueue with a concurrency cap
|
|
77
78
|
task :process_items,
|
|
78
79
|
each: ->(ctx) { ctx.arguments.items },
|
|
79
|
-
enqueue: {
|
|
80
|
+
enqueue: { condition: ->(ctx) { ctx.arguments.use_async? } },
|
|
81
|
+
throttle: 10 do |ctx|
|
|
80
82
|
process_item(ctx.each_value)
|
|
81
83
|
end
|
|
82
84
|
|
|
83
85
|
# When enqueue is enabled:
|
|
84
86
|
# - Each iteration is executed in a separate sub-job
|
|
85
87
|
# - Sub-jobs are created via perform_all_later
|
|
86
|
-
# -
|
|
88
|
+
# - `throttle` controls how many iterations can execute concurrently
|
|
87
89
|
# - Parent job waits for all sub-jobs to complete before continuing
|
|
88
90
|
# - Outputs from sub-jobs are automatically collected
|
|
89
91
|
```
|
|
@@ -99,14 +101,18 @@ The `enqueue:` option determines how map task iterations are executed:
|
|
|
99
101
|
|
|
100
102
|
- **`enqueue: true`**: Each iteration is enqueued as a separate sub-job with default settings
|
|
101
103
|
- Simplest way to enable parallel execution
|
|
102
|
-
- No
|
|
104
|
+
- No throttle limit (executes as fast as workers allow)
|
|
103
105
|
- Good for I/O-bound operations with many workers
|
|
104
106
|
|
|
105
|
-
- **`enqueue: { condition:
|
|
106
|
-
- Enables true parallel execution across multiple workers
|
|
107
|
+
- **`enqueue: { condition: ... }`**: Each iteration is enqueued as a separate sub-job when the condition passes
|
|
107
108
|
- Better for I/O-bound operations (API calls, database queries)
|
|
108
|
-
- Can accept dynamic condition: `enqueue: { condition: ->(ctx) { ctx.arguments.
|
|
109
|
-
- Supports `queue:` option for custom queue: `enqueue: { queue: "critical"
|
|
109
|
+
- Can accept dynamic condition: `enqueue: { condition: ->(ctx) { ctx.arguments.use_async? } }`
|
|
110
|
+
- Supports `queue:` option for custom queue: `enqueue: { queue: "critical" }`
|
|
111
|
+
|
|
112
|
+
- **`throttle: 10`**: Limits how many task executions can run concurrently
|
|
113
|
+
- This is the official way to cap async map task parallelism
|
|
114
|
+
- It is enforced at perform time via JobWorkflow semaphores
|
|
115
|
+
- It does not use SolidQueue `ready` / `blocked` dispatch-state controls
|
|
110
116
|
|
|
111
117
|
**Note**: `enqueue:` works with both regular tasks and map tasks. For map tasks, it enables asynchronous sub-job execution. For regular tasks, it allows conditional enqueueing as a separate job. Legacy syntax (`enqueue: ->(_ctx) { true }` as a Proc) is still supported for backward compatibility.
|
|
112
118
|
|
|
@@ -129,7 +135,8 @@ class ImportJob < ApplicationJob
|
|
|
129
135
|
|
|
130
136
|
task :process_items,
|
|
131
137
|
each: ->(ctx) { ctx.arguments.items },
|
|
132
|
-
enqueue:
|
|
138
|
+
enqueue: true,
|
|
139
|
+
throttle: 5,
|
|
133
140
|
output: { result: "String" } do |ctx|
|
|
134
141
|
{ result: process(ctx.each_value) }
|
|
135
142
|
end
|
|
@@ -238,7 +245,8 @@ class DataProcessingJob < ApplicationJob
|
|
|
238
245
|
task :process_by_region,
|
|
239
246
|
each: ->(ctx) { ctx.arguments.regions },
|
|
240
247
|
output: { region: "String", results: "Array[Hash]" },
|
|
241
|
-
enqueue:
|
|
248
|
+
enqueue: true,
|
|
249
|
+
throttle: 5 do |ctx|
|
|
242
250
|
region = ctx.each_value
|
|
243
251
|
# This will create sub-tasks for each region
|
|
244
252
|
{ region: region, results: [] }
|
|
@@ -261,7 +269,8 @@ class DataProcessingJob < ApplicationJob
|
|
|
261
269
|
},
|
|
262
270
|
depends_on: [:process_by_region],
|
|
263
271
|
output: { region: "String", data_type: "String", result: "Hash" },
|
|
264
|
-
enqueue:
|
|
272
|
+
enqueue: true,
|
|
273
|
+
throttle: 10 do |ctx|
|
|
265
274
|
item = ctx.each_value
|
|
266
275
|
region = item[:region]
|
|
267
276
|
data_type = item[:data_type]
|
|
@@ -290,7 +299,7 @@ DataProcessingJob.perform_later(
|
|
|
290
299
|
regions: ["us-east-1", "us-west-1", "eu-west-1"],
|
|
291
300
|
data_types: ["user", "order", "product"]
|
|
292
301
|
)
|
|
293
|
-
# => 3 regions × 3 data types = 9 parallel iterations (with concurrency
|
|
302
|
+
# => 3 regions × 3 data types = 9 parallel iterations (with throttled concurrency)
|
|
294
303
|
```
|
|
295
304
|
|
|
296
305
|
### Advanced Matrix with Filtering
|
|
@@ -318,7 +327,8 @@ task :process_filtered_matrix,
|
|
|
318
327
|
end
|
|
319
328
|
},
|
|
320
329
|
output: { region: "String", data_type: "String", status: "Symbol" },
|
|
321
|
-
enqueue:
|
|
330
|
+
enqueue: true,
|
|
331
|
+
throttle: 10 do |ctx|
|
|
322
332
|
combo = ctx.each_value
|
|
323
333
|
region = combo[:region]
|
|
324
334
|
data_type = combo[:data_type]
|
|
@@ -335,9 +345,9 @@ end
|
|
|
335
345
|
|
|
336
346
|
When implementing matrix processing:
|
|
337
347
|
|
|
338
|
-
1. **Concurrency Control**: Set appropriate `
|
|
339
|
-
- High
|
|
340
|
-
- Low
|
|
348
|
+
1. **Concurrency Control**: Set appropriate `throttle:` limits to avoid overwhelming workers
|
|
349
|
+
- High throttle (20+): Suitable for I/O-bound operations (API calls, database queries)
|
|
350
|
+
- Low throttle (2-5): Better for CPU-bound operations or rate-limited APIs
|
|
341
351
|
|
|
342
352
|
2. **Output Size**: Watch out for large output collections
|
|
343
353
|
- With N×M combinations, the output array will have N×M elements
|
|
@@ -348,7 +358,8 @@ When implementing matrix processing:
|
|
|
348
358
|
task :process_matrix,
|
|
349
359
|
each: ->(_ctx) { combinations },
|
|
350
360
|
timeout: 300.seconds, # 5 minutes per iteration
|
|
351
|
-
enqueue:
|
|
361
|
+
enqueue: true,
|
|
362
|
+
throttle: 5 do |ctx|
|
|
352
363
|
# ...
|
|
353
364
|
end
|
|
354
365
|
```
|
|
@@ -358,7 +369,8 @@ When implementing matrix processing:
|
|
|
358
369
|
task :process_matrix,
|
|
359
370
|
each: ->(_ctx) { combinations },
|
|
360
371
|
retry: { count: 3, strategy: :exponential },
|
|
361
|
-
enqueue:
|
|
372
|
+
enqueue: true,
|
|
373
|
+
throttle: 5 do |ctx|
|
|
362
374
|
# ...
|
|
363
375
|
end
|
|
364
376
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Production Deployment
|
|
2
2
|
|
|
3
|
-
> ⚠️ **Early Stage (v0.
|
|
3
|
+
> ⚠️ **Early Stage (v0.6.0):** JobWorkflow is still in early development. While this section outlines potential deployment patterns, please thoroughly test in your specific environment and monitor for any issues before relying on JobWorkflow in critical production systems.
|
|
4
4
|
|
|
5
5
|
This section covers suggested settings and patterns for running JobWorkflow in production-like environments.
|
|
6
6
|
|
data/guides/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# JobWorkflow Guides
|
|
2
2
|
|
|
3
|
-
> ⚠️ **Early Stage (v0.
|
|
3
|
+
> ⚠️ **Early Stage (v0.6.0):** JobWorkflow is in active development. APIs and features may change. The following guides provide patterns and examples for building workflows, but be aware that implementations may need adjustment as the library evolves.
|
|
4
4
|
|
|
5
5
|
Welcome to the JobWorkflow documentation! This directory contains comprehensive guides to help you build robust workflows with JobWorkflow.
|
|
6
6
|
|
|
@@ -127,6 +127,11 @@ Production deployment and operations:
|
|
|
127
127
|
- Accessing arguments, outputs, and job status
|
|
128
128
|
- Building dashboards and APIs
|
|
129
129
|
|
|
130
|
+
- **[MONITORING_UI.md](MONITORING_UI.md)** - Monitoring UI for workflow execution
|
|
131
|
+
- Mounting the engine
|
|
132
|
+
- Navigating workflow definitions and executions
|
|
133
|
+
- Viewing DAG state, arguments, outputs, and fan-out progress
|
|
134
|
+
|
|
130
135
|
- **[TESTING_STRATEGY.md](TESTING_STRATEGY.md)** - Testing your workflows
|
|
131
136
|
- Unit testing individual tasks
|
|
132
137
|
- Integration testing workflows
|
data/guides/THROTTLING.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
JobWorkflow provides semaphore-based throttling to handle external API rate limits and protect shared resources. Throttling works across multiple jobs and workers, ensuring system-wide rate limiting.
|
|
4
4
|
|
|
5
|
+
For async map tasks (`enqueue: true`), `throttle` is also the official way to cap concurrent sub-job execution. This limit is enforced at perform time by JobWorkflow semaphores, not by SolidQueue's ready/blocked dispatch-state controls.
|
|
6
|
+
|
|
5
7
|
## Task-Level Throttling
|
|
6
8
|
|
|
7
9
|
### Simple Integer Syntax (Recommended)
|
|
@@ -114,6 +116,28 @@ end
|
|
|
114
116
|
# → Max 5 concurrent API calls at any time
|
|
115
117
|
```
|
|
116
118
|
|
|
119
|
+
### Throttling Async Sub-Tasks
|
|
120
|
+
|
|
121
|
+
When a map task runs as sub-jobs, combine `enqueue: true` with `throttle`:
|
|
122
|
+
|
|
123
|
+
```ruby
|
|
124
|
+
class AsyncBatchFetchJob < ApplicationJob
|
|
125
|
+
include JobWorkflow::DSL
|
|
126
|
+
|
|
127
|
+
argument :ids, "Array[Integer]"
|
|
128
|
+
|
|
129
|
+
task :fetch_all,
|
|
130
|
+
each: ->(ctx) { ctx.arguments.ids },
|
|
131
|
+
enqueue: true,
|
|
132
|
+
throttle: 5,
|
|
133
|
+
output: { data: "Hash" } do |ctx|
|
|
134
|
+
{ data: RateLimitedAPI.fetch(ctx.each_value) }
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
This keeps sub-job fan-out while ensuring only 5 iterations execute at the same time across workers.
|
|
140
|
+
|
|
117
141
|
## Runtime Throttling
|
|
118
142
|
|
|
119
143
|
For fine-grained control within a task, use the `ctx.throttle` method to wrap specific code blocks. This method can only be called inside a task block; calling it outside will raise an error.
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
JobWorkflow provides a robust API for querying the execution status of workflows. This allows you to monitor running workflows, inspect their state, and build observability dashboards.
|
|
4
4
|
|
|
5
|
+
`JobWorkflow::WorkflowStatus.find` and `find_by` are root workflow APIs. Pass the root workflow `job_id` only. Async sub-job IDs created by `enqueue: true` are intentionally excluded; inspect those via `JobWorkflow::JobStatus`.
|
|
6
|
+
|
|
5
7
|
## Basic Usage
|
|
6
8
|
|
|
7
9
|
### Finding a Workflow
|
|
@@ -16,6 +18,10 @@ return unless status
|
|
|
16
18
|
|
|
17
19
|
# Check workflow status
|
|
18
20
|
status.status # => :pending, :running, :succeeded, or :failed
|
|
21
|
+
|
|
22
|
+
# Sub-job IDs are excluded from WorkflowStatus
|
|
23
|
+
JobWorkflow::WorkflowStatus.find_by(job_id: "sub-job-123")
|
|
24
|
+
# => nil
|
|
19
25
|
```
|
|
20
26
|
|
|
21
27
|
### Status Check Methods
|
|
@@ -251,7 +257,7 @@ end
|
|
|
251
257
|
|
|
252
258
|
### NotFoundError
|
|
253
259
|
|
|
254
|
-
When using `find`, a `JobWorkflow::WorkflowStatus::NotFoundError` is raised if the job is not found
|
|
260
|
+
When using `find`, a `JobWorkflow::WorkflowStatus::NotFoundError` is raised if the job is not found. The same applies if you pass a sub-job `job_id` instead of a root workflow `job_id`:
|
|
255
261
|
|
|
256
262
|
```ruby
|
|
257
263
|
begin
|
data/lib/job_workflow/context.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module JobWorkflow
|
|
4
4
|
class Context # rubocop:disable Metrics/ClassLength
|
|
5
|
+
EACH_TASK_CURSOR_MARKER = "__job_workflow_each_cursor__"
|
|
6
|
+
|
|
5
7
|
attr_reader :workflow #: Workflow
|
|
6
8
|
attr_reader :arguments #: Arguments
|
|
7
9
|
attr_reader :output #: Output
|
|
@@ -50,10 +52,12 @@ module JobWorkflow
|
|
|
50
52
|
# task_context: TaskContext,
|
|
51
53
|
# output: Output,
|
|
52
54
|
# job_status: JobStatus,
|
|
53
|
-
# ?job:
|
|
55
|
+
# ?job: _JobInterface?
|
|
54
56
|
# ) -> void
|
|
55
|
-
def initialize(workflow:, arguments:, task_context:, output:, job_status:, job: nil) # rubocop:disable Metrics/ParameterLists
|
|
56
|
-
|
|
57
|
+
def initialize(workflow:, arguments:, task_context:, output:, job_status:, job: nil) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize, Metrics/MethodLength
|
|
58
|
+
if job&.class.respond_to?(:_workflow) && job.class._workflow != workflow
|
|
59
|
+
raise "job does not match the provided workflow"
|
|
60
|
+
end
|
|
57
61
|
|
|
58
62
|
self.job = job
|
|
59
63
|
self.workflow = workflow
|
|
@@ -64,6 +68,8 @@ module JobWorkflow
|
|
|
64
68
|
self.enabled_with_each_value = false
|
|
65
69
|
self.throttle_index = 0
|
|
66
70
|
self.skip_in_dry_run_index = 0
|
|
71
|
+
self.current_step = nil
|
|
72
|
+
self.current_cursor = nil
|
|
67
73
|
end
|
|
68
74
|
|
|
69
75
|
#: () -> Hash[String, untyped]
|
|
@@ -77,12 +83,37 @@ module JobWorkflow
|
|
|
77
83
|
self
|
|
78
84
|
end
|
|
79
85
|
|
|
80
|
-
#: (
|
|
86
|
+
#: () -> untyped
|
|
87
|
+
def cursor
|
|
88
|
+
return if current_step.nil?
|
|
89
|
+
|
|
90
|
+
current_cursor
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
#: (untyped) -> void
|
|
94
|
+
def set_cursor!(value)
|
|
95
|
+
step = current_step || (raise "set_cursor! can be called only in task")
|
|
96
|
+
|
|
97
|
+
ActiveJob::Arguments.serialize([value])
|
|
98
|
+
self.current_cursor = value
|
|
99
|
+
step.set!(build_step_cursor(value))
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
#: () -> void
|
|
103
|
+
def checkpoint!
|
|
104
|
+
step = current_step || (raise "checkpoint! can be called only in task")
|
|
105
|
+
|
|
106
|
+
return step.checkpoint! unless each_task?
|
|
107
|
+
|
|
108
|
+
step.set!(build_step_cursor(current_cursor))
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
#: (_JobInterface) -> void
|
|
81
112
|
def _job=(job)
|
|
82
113
|
self.job = job
|
|
83
114
|
end
|
|
84
115
|
|
|
85
|
-
#: () ->
|
|
116
|
+
#: () -> _JobInterface?
|
|
86
117
|
def _job
|
|
87
118
|
job
|
|
88
119
|
end
|
|
@@ -220,6 +251,18 @@ module JobWorkflow
|
|
|
220
251
|
task_context
|
|
221
252
|
end
|
|
222
253
|
|
|
254
|
+
#: (ActiveJob::Continuation::Step, ?cursor: untyped) { () -> void } -> void
|
|
255
|
+
def _with_current_step(step, cursor: nil)
|
|
256
|
+
previous_step = current_step
|
|
257
|
+
previous_cursor = current_cursor
|
|
258
|
+
self.current_step = step
|
|
259
|
+
self.current_cursor = cursor
|
|
260
|
+
yield
|
|
261
|
+
ensure
|
|
262
|
+
self.current_step = previous_step
|
|
263
|
+
self.current_cursor = previous_cursor
|
|
264
|
+
end
|
|
265
|
+
|
|
223
266
|
#: (TaskOutput) -> void
|
|
224
267
|
def _add_task_output(task_output)
|
|
225
268
|
output.add_task_output(task_output)
|
|
@@ -236,7 +279,7 @@ module JobWorkflow
|
|
|
236
279
|
|
|
237
280
|
private
|
|
238
281
|
|
|
239
|
-
attr_accessor :job #:
|
|
282
|
+
attr_accessor :job #: _JobInterface?
|
|
240
283
|
attr_writer :workflow #: Workflow
|
|
241
284
|
attr_writer :arguments #: Arguments
|
|
242
285
|
attr_writer :output #: Output
|
|
@@ -245,12 +288,31 @@ module JobWorkflow
|
|
|
245
288
|
attr_accessor :enabled_with_each_value #: bool
|
|
246
289
|
attr_accessor :throttle_index #: Integer
|
|
247
290
|
attr_accessor :skip_in_dry_run_index #: Integer
|
|
291
|
+
attr_accessor :current_step #: ActiveJob::Continuation::Step?
|
|
292
|
+
attr_accessor :current_cursor #: untyped
|
|
248
293
|
|
|
249
294
|
#: () -> String
|
|
250
295
|
def parent_job_id
|
|
251
296
|
_task_context.parent_job_id || job_id
|
|
252
297
|
end
|
|
253
298
|
|
|
299
|
+
#: () -> bool
|
|
300
|
+
def each_task?
|
|
301
|
+
task_context.task.each?
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
#: (untyped) -> untyped
|
|
305
|
+
def build_step_cursor(value)
|
|
306
|
+
return value unless each_task?
|
|
307
|
+
return task_context.index if value.nil?
|
|
308
|
+
|
|
309
|
+
{
|
|
310
|
+
EACH_TASK_CURSOR_MARKER => true,
|
|
311
|
+
"index" => task_context.index,
|
|
312
|
+
"cursor" => value
|
|
313
|
+
}
|
|
314
|
+
end
|
|
315
|
+
|
|
254
316
|
#: () -> Hash[String, untyped]
|
|
255
317
|
def serialize_for_job
|
|
256
318
|
{
|
data/lib/job_workflow/dsl.rb
CHANGED
|
@@ -128,7 +128,7 @@ module JobWorkflow
|
|
|
128
128
|
# ) { (untyped) -> void } -> void
|
|
129
129
|
def task(
|
|
130
130
|
task_name,
|
|
131
|
-
each:
|
|
131
|
+
each: Task::DEFAULT_EACH,
|
|
132
132
|
enqueue: nil,
|
|
133
133
|
retry: 0,
|
|
134
134
|
output: {},
|
|
@@ -156,10 +156,6 @@ module JobWorkflow
|
|
|
156
156
|
dry_run:
|
|
157
157
|
)
|
|
158
158
|
_workflow.add_task(new_task)
|
|
159
|
-
if new_task.enqueue.should_limits_concurrency? # rubocop:disable Style/GuardClause
|
|
160
|
-
concurrency = new_task.enqueue.concurrency #: Integer
|
|
161
|
-
workflow_concurrency(to: concurrency, key: :concurrency_key.to_proc)
|
|
162
|
-
end
|
|
163
159
|
end
|
|
164
160
|
# rubocop:enable Metrics/ParameterLists
|
|
165
161
|
|
|
@@ -18,7 +18,7 @@ module JobWorkflow
|
|
|
18
18
|
#
|
|
19
19
|
# @note This subscriber requires the opentelemetry-api gem to be installed.
|
|
20
20
|
# If not available, subscription will be silently skipped.
|
|
21
|
-
class OpenTelemetrySubscriber
|
|
21
|
+
class OpenTelemetrySubscriber
|
|
22
22
|
module Attributes
|
|
23
23
|
JOB_NAME = "#{NAMESPACE}.job.name".freeze #: String
|
|
24
24
|
JOB_ID = "#{NAMESPACE}.job.id".freeze #: String
|