job-workflow 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +91 -0
- data/CHANGELOG.md +23 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +55 -0
- data/Steepfile +10 -0
- data/guides/API_REFERENCE.md +112 -0
- data/guides/BEST_PRACTICES.md +113 -0
- data/guides/CACHE_STORE_INTEGRATION.md +145 -0
- data/guides/CONDITIONAL_EXECUTION.md +66 -0
- data/guides/DEPENDENCY_WAIT.md +386 -0
- data/guides/DRY_RUN.md +390 -0
- data/guides/DSL_BASICS.md +216 -0
- data/guides/ERROR_HANDLING.md +187 -0
- data/guides/GETTING_STARTED.md +524 -0
- data/guides/INSTRUMENTATION.md +131 -0
- data/guides/LIFECYCLE_HOOKS.md +415 -0
- data/guides/NAMESPACES.md +75 -0
- data/guides/OPENTELEMETRY_INTEGRATION.md +86 -0
- data/guides/PARALLEL_PROCESSING.md +302 -0
- data/guides/PRODUCTION_DEPLOYMENT.md +110 -0
- data/guides/QUEUE_MANAGEMENT.md +141 -0
- data/guides/README.md +174 -0
- data/guides/SCHEDULED_JOBS.md +165 -0
- data/guides/STRUCTURED_LOGGING.md +268 -0
- data/guides/TASK_OUTPUTS.md +240 -0
- data/guides/TESTING_STRATEGY.md +56 -0
- data/guides/THROTTLING.md +198 -0
- data/guides/TROUBLESHOOTING.md +53 -0
- data/guides/WORKFLOW_COMPOSITION.md +675 -0
- data/guides/WORKFLOW_STATUS_QUERY.md +288 -0
- data/lib/job-workflow.rb +3 -0
- data/lib/job_workflow/argument_def.rb +16 -0
- data/lib/job_workflow/arguments.rb +40 -0
- data/lib/job_workflow/auto_scaling/adapter/aws_adapter.rb +66 -0
- data/lib/job_workflow/auto_scaling/adapter.rb +31 -0
- data/lib/job_workflow/auto_scaling/configuration.rb +85 -0
- data/lib/job_workflow/auto_scaling/executor.rb +43 -0
- data/lib/job_workflow/auto_scaling.rb +69 -0
- data/lib/job_workflow/cache_store_adapters.rb +46 -0
- data/lib/job_workflow/context.rb +352 -0
- data/lib/job_workflow/dry_run_config.rb +31 -0
- data/lib/job_workflow/dsl.rb +236 -0
- data/lib/job_workflow/error_hook.rb +24 -0
- data/lib/job_workflow/hook.rb +24 -0
- data/lib/job_workflow/hook_registry.rb +66 -0
- data/lib/job_workflow/instrumentation/log_subscriber.rb +194 -0
- data/lib/job_workflow/instrumentation/opentelemetry_subscriber.rb +221 -0
- data/lib/job_workflow/instrumentation.rb +257 -0
- data/lib/job_workflow/job_status.rb +92 -0
- data/lib/job_workflow/logger.rb +86 -0
- data/lib/job_workflow/namespace.rb +36 -0
- data/lib/job_workflow/output.rb +81 -0
- data/lib/job_workflow/output_def.rb +14 -0
- data/lib/job_workflow/queue.rb +74 -0
- data/lib/job_workflow/queue_adapter.rb +38 -0
- data/lib/job_workflow/queue_adapters/abstract.rb +87 -0
- data/lib/job_workflow/queue_adapters/null_adapter.rb +127 -0
- data/lib/job_workflow/queue_adapters/solid_queue_adapter.rb +224 -0
- data/lib/job_workflow/runner.rb +173 -0
- data/lib/job_workflow/schedule.rb +46 -0
- data/lib/job_workflow/semaphore.rb +71 -0
- data/lib/job_workflow/task.rb +83 -0
- data/lib/job_workflow/task_callable.rb +43 -0
- data/lib/job_workflow/task_context.rb +70 -0
- data/lib/job_workflow/task_dependency_wait.rb +66 -0
- data/lib/job_workflow/task_enqueue.rb +50 -0
- data/lib/job_workflow/task_graph.rb +43 -0
- data/lib/job_workflow/task_job_status.rb +70 -0
- data/lib/job_workflow/task_output.rb +51 -0
- data/lib/job_workflow/task_retry.rb +64 -0
- data/lib/job_workflow/task_throttle.rb +46 -0
- data/lib/job_workflow/version.rb +5 -0
- data/lib/job_workflow/workflow.rb +87 -0
- data/lib/job_workflow/workflow_status.rb +112 -0
- data/lib/job_workflow.rb +59 -0
- data/rbs_collection.lock.yaml +172 -0
- data/rbs_collection.yaml +14 -0
- data/sig/generated/job-workflow.rbs +2 -0
- data/sig/generated/job_workflow/argument_def.rbs +14 -0
- data/sig/generated/job_workflow/arguments.rbs +26 -0
- data/sig/generated/job_workflow/auto_scaling/adapter/aws_adapter.rbs +32 -0
- data/sig/generated/job_workflow/auto_scaling/adapter.rbs +22 -0
- data/sig/generated/job_workflow/auto_scaling/configuration.rbs +50 -0
- data/sig/generated/job_workflow/auto_scaling/executor.rbs +29 -0
- data/sig/generated/job_workflow/auto_scaling.rbs +47 -0
- data/sig/generated/job_workflow/cache_store_adapters.rbs +28 -0
- data/sig/generated/job_workflow/context.rbs +155 -0
- data/sig/generated/job_workflow/dry_run_config.rbs +16 -0
- data/sig/generated/job_workflow/dsl.rbs +117 -0
- data/sig/generated/job_workflow/error_hook.rbs +18 -0
- data/sig/generated/job_workflow/hook.rbs +18 -0
- data/sig/generated/job_workflow/hook_registry.rbs +47 -0
- data/sig/generated/job_workflow/instrumentation/log_subscriber.rbs +102 -0
- data/sig/generated/job_workflow/instrumentation/opentelemetry_subscriber.rbs +113 -0
- data/sig/generated/job_workflow/instrumentation.rbs +138 -0
- data/sig/generated/job_workflow/job_status.rbs +46 -0
- data/sig/generated/job_workflow/logger.rbs +56 -0
- data/sig/generated/job_workflow/namespace.rbs +24 -0
- data/sig/generated/job_workflow/output.rbs +39 -0
- data/sig/generated/job_workflow/output_def.rbs +12 -0
- data/sig/generated/job_workflow/queue.rbs +49 -0
- data/sig/generated/job_workflow/queue_adapter.rbs +18 -0
- data/sig/generated/job_workflow/queue_adapters/abstract.rbs +56 -0
- data/sig/generated/job_workflow/queue_adapters/null_adapter.rbs +73 -0
- data/sig/generated/job_workflow/queue_adapters/solid_queue_adapter.rbs +111 -0
- data/sig/generated/job_workflow/runner.rbs +66 -0
- data/sig/generated/job_workflow/schedule.rbs +34 -0
- data/sig/generated/job_workflow/semaphore.rbs +37 -0
- data/sig/generated/job_workflow/task.rbs +60 -0
- data/sig/generated/job_workflow/task_callable.rbs +30 -0
- data/sig/generated/job_workflow/task_context.rbs +52 -0
- data/sig/generated/job_workflow/task_dependency_wait.rbs +42 -0
- data/sig/generated/job_workflow/task_enqueue.rbs +27 -0
- data/sig/generated/job_workflow/task_graph.rbs +27 -0
- data/sig/generated/job_workflow/task_job_status.rbs +42 -0
- data/sig/generated/job_workflow/task_output.rbs +29 -0
- data/sig/generated/job_workflow/task_retry.rbs +30 -0
- data/sig/generated/job_workflow/task_throttle.rbs +20 -0
- data/sig/generated/job_workflow/version.rbs +5 -0
- data/sig/generated/job_workflow/workflow.rbs +48 -0
- data/sig/generated/job_workflow/workflow_status.rbs +55 -0
- data/sig/generated/job_workflow.rbs +8 -0
- data/sig-private/activejob.rbs +35 -0
- data/sig-private/activesupport.rbs +23 -0
- data/sig-private/aws.rbs +32 -0
- data/sig-private/opentelemetry.rbs +40 -0
- data/sig-private/solid_queue.rbs +108 -0
- data/tmp/.keep +0 -0
- metadata +190 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
# Parallel Processing
|
|
2
|
+
|
|
3
|
+
JobWorkflow enables parallel processing of collection elements by specifying the `each:` option in a `task` definition. Based on the Fork-Join pattern, it provides efficient and safe parallel execution.
|
|
4
|
+
|
|
5
|
+
## Collection Task Basics
|
|
6
|
+
|
|
7
|
+
### Simple Parallel Processing
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
class BatchProcessingJob < ApplicationJob
|
|
11
|
+
include JobWorkflow::DSL
|
|
12
|
+
|
|
13
|
+
argument :user_ids, "Array[Integer]", default: []
|
|
14
|
+
|
|
15
|
+
# Prepare user IDs
|
|
16
|
+
task :fetch_user_ids, output: { ids: "Array[Integer]" } do |ctx|
|
|
17
|
+
{ ids: User.active.pluck(:id) }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Process each user in parallel
|
|
21
|
+
task :process_users,
|
|
22
|
+
each: ->(ctx) { ctx.arguments.user_ids },
|
|
23
|
+
depends_on: [:fetch_user_ids],
|
|
24
|
+
output: { user_id: "Integer", status: "Symbol" } do |ctx|
|
|
25
|
+
user_id = ctx.each_value
|
|
26
|
+
user = User.find(user_id)
|
|
27
|
+
{
|
|
28
|
+
user_id: user_id,
|
|
29
|
+
status: user.process!
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Aggregate results
|
|
34
|
+
task :aggregate_results, depends_on: [:process_users] do |ctx|
|
|
35
|
+
results = ctx.output[:process_users]
|
|
36
|
+
puts "Processed #{results.size} users"
|
|
37
|
+
# => [{ user_id: 1, status: :ok }, { user_id: 2, status: :ok }, ...]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Controlling Concurrency
|
|
43
|
+
|
|
44
|
+
### Synchronous Execution (Default)
|
|
45
|
+
|
|
46
|
+
By default, map tasks execute synchronously (in-process):
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
# Synchronous map task (default)
|
|
50
|
+
# All iterations execute sequentially in the current job
|
|
51
|
+
task :process_items,
|
|
52
|
+
each: ->(ctx) { ctx.arguments.items } do |ctx|
|
|
53
|
+
process_item(ctx.each_value)
|
|
54
|
+
end
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Asynchronous Execution with Concurrency
|
|
58
|
+
|
|
59
|
+
To execute map task iterations in separate sub-jobs with concurrency control, use the `enqueue:` option with a Hash containing `condition:` and `concurrency:`:
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
# Simplest form: enable parallel execution with default settings
|
|
63
|
+
task :process_items,
|
|
64
|
+
each: ->(ctx) { ctx.arguments.items },
|
|
65
|
+
enqueue: true do |ctx|
|
|
66
|
+
process_item(ctx.each_value)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Process up to 10 items concurrently in sub-jobs
|
|
70
|
+
task :process_items,
|
|
71
|
+
each: ->(ctx) { ctx.arguments.items },
|
|
72
|
+
enqueue: { condition: ->(_ctx) { true }, concurrency: 10 } do |ctx|
|
|
73
|
+
process_item(ctx.each_value)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Simplified syntax when condition is implicitly true
|
|
77
|
+
task :process_items,
|
|
78
|
+
each: ->(ctx) { ctx.arguments.items },
|
|
79
|
+
enqueue: { concurrency: 10 } do |ctx|
|
|
80
|
+
process_item(ctx.each_value)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# When enqueue is enabled:
|
|
84
|
+
# - Each iteration is executed in a separate sub-job
|
|
85
|
+
# - Sub-jobs are created via perform_all_later
|
|
86
|
+
# - Concurrency limit controls how many sub-jobs run in parallel
|
|
87
|
+
# - Parent job waits for all sub-jobs to complete before continuing
|
|
88
|
+
# - Outputs from sub-jobs are automatically collected
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Understanding `enqueue` Option
|
|
92
|
+
|
|
93
|
+
The `enqueue:` option determines how map task iterations are executed:
|
|
94
|
+
|
|
95
|
+
- **`enqueue:` is nil/false (default)**: Iterations execute synchronously in the current job
|
|
96
|
+
- Simple and fast for small datasets
|
|
97
|
+
- Good for CPU-bound operations
|
|
98
|
+
- No network overhead
|
|
99
|
+
|
|
100
|
+
- **`enqueue: true`**: Each iteration is enqueued as a separate sub-job with default settings
|
|
101
|
+
- Simplest way to enable parallel execution
|
|
102
|
+
- No concurrency limit (executes as fast as workers allow)
|
|
103
|
+
- Good for I/O-bound operations with many workers
|
|
104
|
+
|
|
105
|
+
- **`enqueue: { condition: ->(_ctx) { true }, concurrency: 10 }`**: Each iteration is enqueued as a separate sub-job
|
|
106
|
+
- Enables true parallel execution across multiple workers
|
|
107
|
+
- Better for I/O-bound operations (API calls, database queries)
|
|
108
|
+
- Can accept dynamic condition: `enqueue: { condition: ->(ctx) { ctx.arguments.use_concurrency? } }`
|
|
109
|
+
- Supports `queue:` option for custom queue: `enqueue: { queue: "critical", concurrency: 5 }`
|
|
110
|
+
|
|
111
|
+
**Note**: `enqueue:` works with both regular tasks and map tasks. For map tasks, it enables asynchronous sub-job execution. For regular tasks, it allows conditional enqueueing as a separate job. Legacy syntax (`enqueue: ->(_ctx) { true }` as a Proc) is still supported for backward compatibility.
|
|
112
|
+
|
|
113
|
+
## Fork-Join Pattern
|
|
114
|
+
|
|
115
|
+
### Context Isolation
|
|
116
|
+
|
|
117
|
+
Each parallel task has access to the same Context instance. Arguments are immutable and outputs should be returned:
|
|
118
|
+
|
|
119
|
+
```ruby
|
|
120
|
+
argument :items, "Array[Hash]"
|
|
121
|
+
argument :shared_config, "Hash"
|
|
122
|
+
|
|
123
|
+
task :parallel_processing,
|
|
124
|
+
each: ->(ctx) { ctx.arguments.items },
|
|
125
|
+
output: { item_result: "String" } do |ctx|
|
|
126
|
+
# Access current element via ctx.each_value
|
|
127
|
+
item = ctx.each_value
|
|
128
|
+
|
|
129
|
+
# Can read arguments (immutable)
|
|
130
|
+
config = ctx.arguments.shared_config
|
|
131
|
+
|
|
132
|
+
# Return output for this iteration
|
|
133
|
+
{ item_result: process(item, config) }
|
|
134
|
+
end
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Accessing Current Element
|
|
138
|
+
|
|
139
|
+
When using `each:` option, access the current element via `ctx.each_value`:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
task :process_items,
|
|
143
|
+
each: ->(ctx) { ctx.arguments.items } do |ctx|
|
|
144
|
+
item = ctx.each_value # Get current element
|
|
145
|
+
process(item)
|
|
146
|
+
end
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Important**: `ctx.each_value` can only be called within Map Tasks (tasks with `each:` option). Calling it in regular tasks will raise an error:
|
|
150
|
+
|
|
151
|
+
```ruby
|
|
152
|
+
task :regular_task do |ctx|
|
|
153
|
+
ctx.each_value # ❌ Error: "each_value can be called only within each_values block"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
task :map_task,
|
|
157
|
+
each: ->(ctx) { ctx.arguments.items } do |ctx|
|
|
158
|
+
ctx.each_value # ✅ OK: Returns current element
|
|
159
|
+
end
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Matrix Processing (Multi-Axis Parallelization)
|
|
163
|
+
|
|
164
|
+
### Cartesian Product Execution
|
|
165
|
+
|
|
166
|
+
For scenarios where you need to process all combinations of multiple dimensions (e.g., all regions × all data types), use nested `each:` options to create a Cartesian product pattern:
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
class DataProcessingJob < ApplicationJob
|
|
170
|
+
include JobWorkflow::DSL
|
|
171
|
+
|
|
172
|
+
argument :regions, "Array[String]"
|
|
173
|
+
argument :data_types, "Array[String]"
|
|
174
|
+
|
|
175
|
+
# First axis: process each region
|
|
176
|
+
task :process_by_region,
|
|
177
|
+
each: ->(ctx) { ctx.arguments.regions },
|
|
178
|
+
output: { region: "String", results: "Array[Hash]" },
|
|
179
|
+
enqueue: { concurrency: 5 } do |ctx|
|
|
180
|
+
region = ctx.each_value
|
|
181
|
+
# This will create sub-tasks for each region
|
|
182
|
+
{ region: region, results: [] }
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Second axis: for each region result, process each data type
|
|
186
|
+
# This creates a nested loop: 3 regions × 3 data types = 9 combinations
|
|
187
|
+
task :process_matrix,
|
|
188
|
+
each: ->(ctx) {
|
|
189
|
+
# Create combinations from first task's output
|
|
190
|
+
regions_data = ctx.output[:process_by_region]
|
|
191
|
+
data_types = ctx.arguments.data_types
|
|
192
|
+
|
|
193
|
+
# Generate all combinations
|
|
194
|
+
regions_data.flat_map do |region_result|
|
|
195
|
+
data_types.map do |data_type|
|
|
196
|
+
{ region: region_result[:region], data_type: data_type }
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
},
|
|
200
|
+
depends_on: [:process_by_region],
|
|
201
|
+
output: { region: "String", data_type: "String", result: "Hash" },
|
|
202
|
+
enqueue: { concurrency: 10 } do |ctx|
|
|
203
|
+
item = ctx.each_value
|
|
204
|
+
region = item[:region]
|
|
205
|
+
data_type = item[:data_type]
|
|
206
|
+
|
|
207
|
+
# Process specific region + data_type combination
|
|
208
|
+
processed = process_data(region, data_type)
|
|
209
|
+
|
|
210
|
+
{
|
|
211
|
+
region: region,
|
|
212
|
+
data_type: data_type,
|
|
213
|
+
result: processed
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Aggregate all results
|
|
218
|
+
task :aggregate_matrix_results, depends_on: [:process_matrix] do |ctx|
|
|
219
|
+
results = ctx.output[:process_matrix]
|
|
220
|
+
# results is an array of 9 hashes, one per combination
|
|
221
|
+
summary = results.group_by { |r| r[:region] }
|
|
222
|
+
{ summary: summary }
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Execution example
|
|
227
|
+
DataProcessingJob.perform_later(
|
|
228
|
+
regions: ["us-east-1", "us-west-1", "eu-west-1"],
|
|
229
|
+
data_types: ["user", "order", "product"]
|
|
230
|
+
)
|
|
231
|
+
# => 3 regions × 3 data types = 9 parallel iterations (with concurrency limits)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Advanced Matrix with Filtering
|
|
235
|
+
|
|
236
|
+
When certain combinations should be excluded (e.g., avoid processing "legacy" data in "us-west-1"), filter the combinations:
|
|
237
|
+
|
|
238
|
+
```ruby
|
|
239
|
+
argument :regions, "Array[String]"
|
|
240
|
+
argument :data_types, "Array[String]"
|
|
241
|
+
|
|
242
|
+
task :process_filtered_matrix,
|
|
243
|
+
each: ->(ctx) {
|
|
244
|
+
regions = ctx.arguments.regions
|
|
245
|
+
data_types = ctx.arguments.data_types
|
|
246
|
+
|
|
247
|
+
# Create combinations with explicit filtering
|
|
248
|
+
combinations = regions.flat_map do |region|
|
|
249
|
+
data_types.map { |data_type| { region: region, data_type: data_type } }
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Exclude specific combinations
|
|
253
|
+
combinations.reject do |combo|
|
|
254
|
+
(combo[:region] == "us-west-1" && combo[:data_type] == "legacy") ||
|
|
255
|
+
(combo[:region] == "eu-west-1" && combo[:data_type] == "beta")
|
|
256
|
+
end
|
|
257
|
+
},
|
|
258
|
+
output: { region: "String", data_type: "String", status: "Symbol" },
|
|
259
|
+
enqueue: { concurrency: 10 } do |ctx|
|
|
260
|
+
combo = ctx.each_value
|
|
261
|
+
region = combo[:region]
|
|
262
|
+
data_type = combo[:data_type]
|
|
263
|
+
|
|
264
|
+
{
|
|
265
|
+
region: region,
|
|
266
|
+
data_type: data_type,
|
|
267
|
+
status: process(region, data_type)
|
|
268
|
+
}
|
|
269
|
+
end
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Performance Considerations
|
|
273
|
+
|
|
274
|
+
When implementing matrix processing:
|
|
275
|
+
|
|
276
|
+
1. **Concurrency Control**: Set appropriate `concurrency:` limits to avoid overwhelming workers
|
|
277
|
+
- High concurrency (20+): Suitable for I/O-bound operations (API calls, database queries)
|
|
278
|
+
- Low concurrency (2-5): Better for CPU-bound operations or rate-limited APIs
|
|
279
|
+
|
|
280
|
+
2. **Output Size**: Watch out for large output collections
|
|
281
|
+
- With N×M combinations, the output array will have N×M elements
|
|
282
|
+
- Consider using a storage adapter (see [TASK_OUTPUTS.md](TASK_OUTPUTS.md#storage-adapters)) for large datasets
|
|
283
|
+
|
|
284
|
+
3. **Timeout Settings**: Increase timeout for complex matrix operations
|
|
285
|
+
```ruby
|
|
286
|
+
task :process_matrix,
|
|
287
|
+
each: ->(_ctx) { combinations },
|
|
288
|
+
timeout: 300.seconds, # 5 minutes per iteration
|
|
289
|
+
enqueue: { concurrency: 5 } do |ctx|
|
|
290
|
+
# ...
|
|
291
|
+
end
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
4. **Error Handling**: Consider retry strategies for flaky matrix operations
|
|
295
|
+
```ruby
|
|
296
|
+
task :process_matrix,
|
|
297
|
+
each: ->(_ctx) { combinations },
|
|
298
|
+
retry: { count: 3, strategy: :exponential },
|
|
299
|
+
enqueue: { concurrency: 5 } do |ctx|
|
|
300
|
+
# ...
|
|
301
|
+
end
|
|
302
|
+
```
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Production Deployment
|
|
2
|
+
|
|
3
|
+
> ⚠️ **Early Stage (v0.1.3):** JobWorkflow is still in early development. While this section outlines potential deployment patterns, please thoroughly test in your specific environment and monitor for any issues before relying on JobWorkflow in critical production systems.
|
|
4
|
+
|
|
5
|
+
This section covers suggested settings and patterns for running JobWorkflow in production-like environments.
|
|
6
|
+
|
|
7
|
+
## SolidQueue Configuration
|
|
8
|
+
|
|
9
|
+
### Basic Configuration
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
# config/application.rb
|
|
13
|
+
config.active_job.queue_adapter = :solid_queue
|
|
14
|
+
|
|
15
|
+
# config/queue.yml
|
|
16
|
+
production:
|
|
17
|
+
dispatchers:
|
|
18
|
+
- polling_interval: 1
|
|
19
|
+
batch_size: 500
|
|
20
|
+
workers:
|
|
21
|
+
- queues: default
|
|
22
|
+
threads: 5
|
|
23
|
+
processes: 3
|
|
24
|
+
polling_interval: 0.1
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Optimizing Worker Processes
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
# config/queue.yml
|
|
31
|
+
production:
|
|
32
|
+
dispatchers:
|
|
33
|
+
- polling_interval: 1
|
|
34
|
+
batch_size: 500
|
|
35
|
+
|
|
36
|
+
workers:
|
|
37
|
+
# High priority queue (orchestrator)
|
|
38
|
+
- queues: orchestrator
|
|
39
|
+
threads: 3
|
|
40
|
+
processes: 2
|
|
41
|
+
polling_interval: 0.1
|
|
42
|
+
|
|
43
|
+
# Normal priority queue (child jobs)
|
|
44
|
+
- queues: default
|
|
45
|
+
threads: 10
|
|
46
|
+
processes: 5
|
|
47
|
+
polling_interval: 0.5
|
|
48
|
+
|
|
49
|
+
# Low priority queue (batch processing)
|
|
50
|
+
- queues: batch
|
|
51
|
+
threads: 5
|
|
52
|
+
processes: 2
|
|
53
|
+
polling_interval: 1
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Auto Scaling (AWS ECS)
|
|
57
|
+
|
|
58
|
+
JobWorkflow provides a simple autoscaling helper that updates an AWS ECS service `desired_count` based on queue latency.
|
|
59
|
+
|
|
60
|
+
### Prerequisites
|
|
61
|
+
|
|
62
|
+
- Currently supports **AWS ECS only** via `JobWorkflow::AutoScaling::Adapter::AwsAdapter`.
|
|
63
|
+
- The autoscaling job must run **inside an ECS task** (uses ECS metadata via `ECS_CONTAINER_METADATA_URI_V4`).
|
|
64
|
+
- Latency is read via `JobWorkflow::Queue.latency` which uses the configured queue adapter.
|
|
65
|
+
- Scheduling (how often you evaluate scaling) is **out of scope**: enqueue this job periodically from your app/ops tooling.
|
|
66
|
+
|
|
67
|
+
### Usage
|
|
68
|
+
|
|
69
|
+
Create a job for autoscaling and configure it via `include JobWorkflow::AutoScaling`.
|
|
70
|
+
|
|
71
|
+
```ruby
|
|
72
|
+
class MyAutoScalingJob < ApplicationJob
|
|
73
|
+
include JobWorkflow::AutoScaling
|
|
74
|
+
|
|
75
|
+
# Target queue name
|
|
76
|
+
target_queue_name "default"
|
|
77
|
+
|
|
78
|
+
# desired_count range
|
|
79
|
+
min_count 2
|
|
80
|
+
max_count 10
|
|
81
|
+
|
|
82
|
+
# Scale step (e.g. 2 => 2,4,6...)
|
|
83
|
+
step_count 2
|
|
84
|
+
|
|
85
|
+
# Max latency (seconds). Scaling reaches max_count around this value.
|
|
86
|
+
max_latency 1800
|
|
87
|
+
end
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Scaling model
|
|
91
|
+
|
|
92
|
+
- Queue latency is bucketed into $0..max_latency$ and scaled from `min_count` to `max_count` by `step_count`.
|
|
93
|
+
- Latency is retrieved via `JobWorkflow::Queue.latency(queue_name)`, which delegates to the configured queue adapter.
|
|
94
|
+
|
|
95
|
+
## SolidCache Configuration
|
|
96
|
+
|
|
97
|
+
### Basic Configuration
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
# config/environments/production.rb
|
|
101
|
+
config.cache_store = :solid_cache_store, {
|
|
102
|
+
expires_in: 1.day,
|
|
103
|
+
namespace: "myapp_production",
|
|
104
|
+
error_handler: ->(method:, returning:, exception:) {
|
|
105
|
+
Rails.logger.error "[SolidCache] Error in #{method}: #{exception.message}"
|
|
106
|
+
# Send to your error tracking service
|
|
107
|
+
ErrorTracker.capture(exception)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
```
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Queue Management
|
|
2
|
+
|
|
3
|
+
JobWorkflow provides a unified interface for managing job queues through `JobWorkflow::Queue`. This abstraction works with any supported queue adapter (SolidQueue, etc.) and provides operations for monitoring and controlling queue behavior.
|
|
4
|
+
|
|
5
|
+
## Basic Queue Operations
|
|
6
|
+
|
|
7
|
+
### Checking Queue Status
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
# Get current latency (time oldest job has been waiting)
|
|
11
|
+
latency = JobWorkflow::Queue.latency(:default) # => 5.2 (seconds)
|
|
12
|
+
|
|
13
|
+
# Get queue size (number of pending jobs)
|
|
14
|
+
size = JobWorkflow::Queue.size(:default) # => 42
|
|
15
|
+
|
|
16
|
+
# Clear all jobs from a queue (use with caution!)
|
|
17
|
+
JobWorkflow::Queue.clear(:batch_processing)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Queue Pause/Resume
|
|
21
|
+
|
|
22
|
+
You can pause and resume job processing at the queue level. This is useful for:
|
|
23
|
+
- Maintenance windows
|
|
24
|
+
- Emergency stops when downstream services are unavailable
|
|
25
|
+
- Controlled deployment rollouts
|
|
26
|
+
|
|
27
|
+
### Pausing a Queue
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
# Pause a queue - new jobs will be enqueued but not processed
|
|
31
|
+
JobWorkflow::Queue.pause(:default)
|
|
32
|
+
|
|
33
|
+
# Check if a queue is paused
|
|
34
|
+
JobWorkflow::Queue.paused?(:default) # => true
|
|
35
|
+
|
|
36
|
+
# List all paused queues
|
|
37
|
+
JobWorkflow::Queue.paused_queues # => [:default]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Resuming a Queue
|
|
41
|
+
|
|
42
|
+
```ruby
|
|
43
|
+
# Resume processing
|
|
44
|
+
JobWorkflow::Queue.resume(:default)
|
|
45
|
+
|
|
46
|
+
JobWorkflow::Queue.paused?(:default) # => false
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Instrumentation Events
|
|
50
|
+
|
|
51
|
+
Queue pause/resume operations emit instrumentation events that you can subscribe to:
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
# Events emitted:
|
|
55
|
+
# - queue.pause.job_workflow (when a queue is paused)
|
|
56
|
+
# - queue.resume.job_workflow (when a queue is resumed)
|
|
57
|
+
|
|
58
|
+
# Example: Custom notification on pause
|
|
59
|
+
ActiveSupport::Notifications.subscribe("queue.pause.job_workflow") do |event|
|
|
60
|
+
SlackNotifier.notify("Queue #{event.payload[:queue_name]} has been paused")
|
|
61
|
+
end
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Finding Workflows by Queue
|
|
65
|
+
|
|
66
|
+
You can discover which workflow classes are configured to use a specific queue:
|
|
67
|
+
|
|
68
|
+
```ruby
|
|
69
|
+
# Get all workflow classes that use the :default queue
|
|
70
|
+
workflows = JobWorkflow::Queue.workflows(:default)
|
|
71
|
+
# => [OrderProcessingJob, UserRegistrationJob, ...]
|
|
72
|
+
|
|
73
|
+
# Useful for impact analysis before pausing a queue
|
|
74
|
+
JobWorkflow::Queue.workflows(:batch).each do |workflow_class|
|
|
75
|
+
puts "#{workflow_class.name} uses the batch queue"
|
|
76
|
+
end
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Production Considerations
|
|
80
|
+
|
|
81
|
+
### Pause/Resume Best Practices
|
|
82
|
+
|
|
83
|
+
1. **Always notify stakeholders** before pausing production queues
|
|
84
|
+
2. **Monitor queue size** while paused to avoid backlog buildup
|
|
85
|
+
3. **Use instrumentation** to track pause/resume events in your observability stack
|
|
86
|
+
4. **Test resume behavior** - ensure workers pick up jobs promptly after resume
|
|
87
|
+
|
|
88
|
+
### Queue Design Patterns
|
|
89
|
+
|
|
90
|
+
```ruby
|
|
91
|
+
# Separate queues for different reliability requirements
|
|
92
|
+
class CriticalPaymentJob < ApplicationJob
|
|
93
|
+
include JobWorkflow::DSL
|
|
94
|
+
queue_as :payments # High-priority, rarely paused
|
|
95
|
+
# ...
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
class BatchReportJob < ApplicationJob
|
|
99
|
+
include JobWorkflow::DSL
|
|
100
|
+
queue_as :batch # Low-priority, can be paused during peak hours
|
|
101
|
+
# ...
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Maintenance script example
|
|
105
|
+
class MaintenanceService
|
|
106
|
+
def self.pause_non_critical_queues
|
|
107
|
+
[:batch, :reports, :notifications].each do |queue|
|
|
108
|
+
JobWorkflow::Queue.pause(queue)
|
|
109
|
+
Rails.logger.info "Paused queue: #{queue}"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def self.resume_all_queues
|
|
114
|
+
JobWorkflow::Queue.paused_queues.each do |queue|
|
|
115
|
+
JobWorkflow::Queue.resume(queue)
|
|
116
|
+
Rails.logger.info "Resumed queue: #{queue}"
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Monitoring Paused Queues
|
|
123
|
+
|
|
124
|
+
```ruby
|
|
125
|
+
# Health check endpoint
|
|
126
|
+
class HealthController < ApplicationController
|
|
127
|
+
def queues
|
|
128
|
+
critical_queues = [:default, :payments]
|
|
129
|
+
paused_critical = critical_queues & JobWorkflow::Queue.paused_queues
|
|
130
|
+
|
|
131
|
+
if paused_critical.any?
|
|
132
|
+
render json: {
|
|
133
|
+
status: "warning",
|
|
134
|
+
paused_critical_queues: paused_critical
|
|
135
|
+
}, status: :service_unavailable
|
|
136
|
+
else
|
|
137
|
+
render json: { status: "ok" }
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
```
|