concurrent_pipeline 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +457 -91
- data/concurrent_pipeline.gemspec +5 -3
- data/lib/concurrent_pipeline/errors.rb +6 -0
- data/lib/concurrent_pipeline/pipeline.rb +5 -9
- data/lib/concurrent_pipeline/pipelines/processors/asynchronous.rb +81 -18
- data/lib/concurrent_pipeline/pipelines/processors/locker.rb +3 -3
- data/lib/concurrent_pipeline/pipelines/processors/result.rb +11 -0
- data/lib/concurrent_pipeline/pipelines/processors/synchronous.rb +74 -5
- data/lib/concurrent_pipeline/pipelines/schema.rb +39 -13
- data/lib/concurrent_pipeline/store.rb +160 -57
- data/lib/concurrent_pipeline/stores/schema.rb +57 -19
- data/lib/concurrent_pipeline/version.rb +1 -1
- data/lib/concurrent_pipeline.rb +1 -1
- metadata +44 -17
- data/lib/concurrent_pipeline/stores/schema/record.rb +0 -47
- data/lib/concurrent_pipeline/stores/storage/yaml/fs.rb +0 -140
- data/lib/concurrent_pipeline/stores/storage/yaml.rb +0 -196
data/README.md
CHANGED
|
@@ -31,27 +31,31 @@ Define a store with records, create a pipeline with processing steps, and run it
|
|
|
31
31
|
```ruby
|
|
32
32
|
require "concurrent_pipeline"
|
|
33
33
|
|
|
34
|
-
# Define your data store
|
|
35
|
-
store = ConcurrentPipeline
|
|
36
|
-
|
|
34
|
+
# Define your data store with inline schema definitions
|
|
35
|
+
store = ConcurrentPipeline.store do
|
|
36
|
+
dir("/tmp/my_pipeline")
|
|
37
37
|
|
|
38
38
|
record(:user) do
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
schema(:users) do |t|
|
|
40
|
+
t.string(:name)
|
|
41
|
+
t.boolean(:processed, default: false)
|
|
42
|
+
end
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
# Create some data
|
|
45
|
-
store.
|
|
46
|
-
store.create(
|
|
47
|
+
store.transaction do
|
|
48
|
+
store.user.create!(name: "Alice")
|
|
49
|
+
store.user.create!(name: "Bob")
|
|
50
|
+
end
|
|
47
51
|
|
|
48
52
|
# Define processing pipeline
|
|
49
|
-
pipeline = ConcurrentPipeline
|
|
53
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
50
54
|
processor(:sync) # Run sequentially
|
|
51
55
|
|
|
52
|
-
process(
|
|
56
|
+
process(store.user.where(processed: false)) do |user|
|
|
53
57
|
puts "Processing #{user.name}"
|
|
54
|
-
|
|
58
|
+
user.update!(processed: true)
|
|
55
59
|
end
|
|
56
60
|
end
|
|
57
61
|
|
|
@@ -59,18 +63,57 @@ end
|
|
|
59
63
|
pipeline.process(store)
|
|
60
64
|
```
|
|
61
65
|
|
|
66
|
+
### Defining Record Schemas
|
|
67
|
+
|
|
68
|
+
The recommended approach is to define schemas directly inside record blocks. This keeps your table schema, custom methods, and validations all in one place:
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
store = ConcurrentPipeline.store do
|
|
72
|
+
dir("/tmp/my_pipeline")
|
|
73
|
+
|
|
74
|
+
record(:user) do
|
|
75
|
+
schema(:users) do |t|
|
|
76
|
+
t.string(:first_name)
|
|
77
|
+
t.string(:last_name)
|
|
78
|
+
t.integer(:age)
|
|
79
|
+
t.boolean(:processed, default: false)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
validates :first_name, presence: true
|
|
83
|
+
|
|
84
|
+
def full_name
|
|
85
|
+
"#{first_name} #{last_name}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def adult?
|
|
89
|
+
age >= 18
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
store.transaction do
|
|
95
|
+
store.user.create!(first_name: "Alice", last_name: "Smith", age: 25)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
user = store.user.first
|
|
99
|
+
puts user.full_name # => "Alice Smith"
|
|
100
|
+
puts user.adult? # => true
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
This approach keeps related code together - the table schema, custom methods, and validations all in one record definition.
|
|
104
|
+
|
|
62
105
|
### Async Processing
|
|
63
106
|
|
|
64
107
|
Use `:async` processor to run steps concurrently:
|
|
65
108
|
|
|
66
109
|
```ruby
|
|
67
|
-
pipeline = ConcurrentPipeline
|
|
110
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
68
111
|
processor(:async) # Run concurrently
|
|
69
112
|
|
|
70
|
-
process(
|
|
113
|
+
process(store.user.where(processed: false)) do |user|
|
|
71
114
|
# Each user processed in parallel
|
|
72
115
|
sleep 1
|
|
73
|
-
|
|
116
|
+
user.update!(processed: true)
|
|
74
117
|
end
|
|
75
118
|
end
|
|
76
119
|
```
|
|
@@ -78,31 +121,79 @@ end
|
|
|
78
121
|
Control concurrency and polling with optional parameters:
|
|
79
122
|
|
|
80
123
|
```ruby
|
|
81
|
-
pipeline = ConcurrentPipeline
|
|
124
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
82
125
|
# concurrency: max parallel tasks (default: 5)
|
|
83
126
|
# enqueue_seconds: sleep between checking for new work (default: 0.1)
|
|
84
127
|
processor(:async, concurrency: 10, enqueue_seconds: 0.5)
|
|
85
128
|
|
|
86
|
-
process(
|
|
129
|
+
process(store.user.where(processed: false)) do |user|
|
|
87
130
|
# Up to 10 users processed concurrently
|
|
88
131
|
expensive_api_call(user)
|
|
89
|
-
|
|
132
|
+
user.update!(processed: true)
|
|
90
133
|
end
|
|
91
134
|
end
|
|
92
135
|
```
|
|
93
136
|
|
|
137
|
+
### Using migrate for Schema Modifications
|
|
138
|
+
|
|
139
|
+
The `migrate` method is used when you need to modify an existing schema, such as when restoring from a previous version of your store and adding new columns or tables. Migrations defined this way are placed after inline schema definitions:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
store = ConcurrentPipeline.store do
|
|
143
|
+
dir("/tmp/my_pipeline")
|
|
144
|
+
|
|
145
|
+
# Existing record with inline schema
|
|
146
|
+
record(:user) do
|
|
147
|
+
schema(:users) do |t|
|
|
148
|
+
t.string(:name)
|
|
149
|
+
t.boolean(:processed, default: false)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Later, you need to add a new column to the existing table
|
|
154
|
+
# This is useful when working with an existing database
|
|
155
|
+
migrate do
|
|
156
|
+
add_column(:users, :email, :string)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Or add a completely new table not associated with a record
|
|
160
|
+
migrate do
|
|
161
|
+
create_table(:audit_logs) do |t|
|
|
162
|
+
t.string(:action)
|
|
163
|
+
t.integer(:user_id)
|
|
164
|
+
t.timestamps
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**When to use `migrate`:**
|
|
171
|
+
- Adding columns to tables that were created in a prior version of your script
|
|
172
|
+
- Removing or modifying columns in existing tables
|
|
173
|
+
- Creating additional tables not associated with a primary record
|
|
174
|
+
- Running data migrations or other one-time schema changes
|
|
175
|
+
|
|
176
|
+
**Migration order:**
|
|
177
|
+
- Inline `schema` calls are always processed first (prepended to the migration list)
|
|
178
|
+
- `migrate` calls are processed after all schemas (appended to the migration list)
|
|
179
|
+
- Each migration is tracked and only runs once
|
|
180
|
+
|
|
181
|
+
For new record definitions, prefer using the inline `schema` approach to keep related code together.
|
|
182
|
+
|
|
94
183
|
### Custom Methods on Records
|
|
95
184
|
|
|
96
|
-
Records can have custom methods defined in the
|
|
185
|
+
Records can have custom methods defined alongside their schema. This was already shown in the "Defining Record Schemas" section above, but here's another example:
|
|
97
186
|
|
|
98
187
|
```ruby
|
|
99
|
-
store = ConcurrentPipeline
|
|
100
|
-
|
|
188
|
+
store = ConcurrentPipeline.store do
|
|
189
|
+
dir("/tmp/my_pipeline")
|
|
101
190
|
|
|
102
191
|
record(:user) do
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
192
|
+
schema(:users) do |t|
|
|
193
|
+
t.string(:first_name)
|
|
194
|
+
t.string(:last_name)
|
|
195
|
+
t.integer(:age)
|
|
196
|
+
end
|
|
106
197
|
|
|
107
198
|
def full_name
|
|
108
199
|
"#{first_name} #{last_name}"
|
|
@@ -114,38 +205,175 @@ store = ConcurrentPipeline::Store.define do
|
|
|
114
205
|
end
|
|
115
206
|
end
|
|
116
207
|
|
|
117
|
-
store.
|
|
118
|
-
|
|
208
|
+
store.transaction do
|
|
209
|
+
store.user.create!(first_name: "Alice", last_name: "Smith", age: 25)
|
|
210
|
+
end
|
|
211
|
+
user = store.user.first
|
|
119
212
|
puts user.full_name # => "Alice Smith"
|
|
120
213
|
puts user.adult? # => true
|
|
121
214
|
```
|
|
122
215
|
|
|
123
|
-
###
|
|
216
|
+
### Inline Schema Definitions
|
|
124
217
|
|
|
125
|
-
|
|
218
|
+
You can define a record's schema directly inside the record block, combining the schema and custom methods in one place. Schema migrations defined this way are automatically placed at the front of the migration queue and use the table name as the migration version:
|
|
126
219
|
|
|
127
220
|
```ruby
|
|
128
|
-
|
|
129
|
-
|
|
221
|
+
store = ConcurrentPipeline.store do
|
|
222
|
+
dir("/tmp/my_pipeline")
|
|
130
223
|
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
224
|
+
# Define schema inline with the record
|
|
225
|
+
record(:user) do
|
|
226
|
+
schema(:users) do |t|
|
|
227
|
+
t.string(:first_name)
|
|
228
|
+
t.string(:last_name)
|
|
229
|
+
t.integer(:age)
|
|
230
|
+
t.boolean(:processed, default: false)
|
|
231
|
+
end
|
|
134
232
|
|
|
135
|
-
|
|
136
|
-
|
|
233
|
+
def full_name
|
|
234
|
+
"#{first_name} #{last_name}"
|
|
235
|
+
end
|
|
137
236
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
237
|
+
def adult?
|
|
238
|
+
age >= 18
|
|
239
|
+
end
|
|
240
|
+
end
|
|
141
241
|
|
|
142
|
-
#
|
|
143
|
-
|
|
144
|
-
|
|
242
|
+
# Regular migrations are placed after inline schemas
|
|
243
|
+
migrate do
|
|
244
|
+
create_table(:other_table) do |t|
|
|
245
|
+
t.string(:data)
|
|
246
|
+
end
|
|
145
247
|
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Use the record normally
|
|
251
|
+
store.transaction do
|
|
252
|
+
store.user.create!(first_name: "Alice", last_name: "Smith", age: 25)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
user = store.user.first
|
|
256
|
+
puts user.full_name # => "Alice Smith"
|
|
257
|
+
puts user.adult? # => true
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
**Migration Order:**
|
|
261
|
+
- Inline `schema` calls are prepended (added to the front of the migration list)
|
|
262
|
+
- Regular `migrate` calls are appended (added to the end of the migration list)
|
|
263
|
+
- Schema migrations use the table name as their version identifier
|
|
264
|
+
|
|
265
|
+
This approach keeps related code together - the table schema, custom methods, and validations all in one record definition.
|
|
266
|
+
|
|
267
|
+
### Defining Associations (belongs_to, has_many)
|
|
268
|
+
|
|
269
|
+
ConcurrentPipeline supports ActiveRecord associations like `belongs_to` and `has_many`. Because the store creates versioned copies of your data, association class names must be dynamically generated to work across different store versions. This is handled automatically through the `class_name` helper.
|
|
270
|
+
|
|
271
|
+
**Important:** When defining associations, you must explicitly specify the `foreign_key`, `class_name`, and `inverse_of` options:
|
|
272
|
+
|
|
273
|
+
```ruby
|
|
274
|
+
store = ConcurrentPipeline.store do
|
|
275
|
+
dir("/tmp/my_pipeline")
|
|
276
|
+
|
|
277
|
+
# Parent record
|
|
278
|
+
record(:author) do
|
|
279
|
+
schema(:authors) do |t|
|
|
280
|
+
t.string(:name)
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# has_many association
|
|
284
|
+
has_many(
|
|
285
|
+
:posts,
|
|
286
|
+
foreign_key: :author_id, # Required: specify the foreign key column
|
|
287
|
+
class_name: class_name(:post), # Required: dynamic class name for versions
|
|
288
|
+
inverse_of: :author # Required: bidirectional association
|
|
289
|
+
)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Child record
|
|
293
|
+
record(:post) do
|
|
294
|
+
schema(:posts) do |t|
|
|
295
|
+
t.string(:title)
|
|
296
|
+
t.text(:content)
|
|
297
|
+
t.integer(:author_id) # Foreign key column
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# belongs_to association
|
|
301
|
+
belongs_to(
|
|
302
|
+
:author,
|
|
303
|
+
class_name: class_name(:author), # Required: dynamic class name for versions
|
|
304
|
+
inverse_of: :posts # Required: bidirectional association
|
|
305
|
+
)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Create data with associations
|
|
310
|
+
author = store.author.transaction do
|
|
311
|
+
store.author.create!(name: "Jane Doe")
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
post1 = store.post.transaction do
|
|
315
|
+
store.post.create!(
|
|
316
|
+
title: "First Post",
|
|
317
|
+
content: "Hello, World!",
|
|
318
|
+
author_id: author.id
|
|
319
|
+
)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
post2 = store.post.transaction do
|
|
323
|
+
store.post.create!(
|
|
324
|
+
title: "Second Post",
|
|
325
|
+
content: "More content",
|
|
326
|
+
author_id: author.id
|
|
327
|
+
)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# Use the associations
|
|
331
|
+
reloaded_author = store.author.find(author.id)
|
|
332
|
+
puts reloaded_author.posts.count # => 2
|
|
333
|
+
puts reloaded_author.posts.first.title # => "First Post"
|
|
334
|
+
|
|
335
|
+
reloaded_post = store.post.find(post1.id)
|
|
336
|
+
puts reloaded_post.author.name # => "Jane Doe"
|
|
337
|
+
|
|
338
|
+
# Associations work across versions too!
|
|
339
|
+
v0_author = store.versions[0].author.find(author.id)
|
|
340
|
+
puts v0_author.posts.count # => 0 (no posts yet in version 0)
|
|
341
|
+
|
|
342
|
+
v1_author = store.versions[1].author.find(author.id)
|
|
343
|
+
puts v1_author.posts.count # => 1 (one post in version 1)
|
|
344
|
+
|
|
345
|
+
v2_author = store.versions[2].author.find(author.id)
|
|
346
|
+
puts v2_author.posts.count # => 2 (both posts in version 2)
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
**Why dynamic class names?** The store creates immutable snapshots of your data at each version. Each version needs its own set of model classes to prevent data from different versions from interfering with each other. The `class_name` helper generates the correct class name for each version automatically, allowing associations to work seamlessly across all versions of your data.
|
|
350
|
+
|
|
351
|
+
**Required Association Options:**
|
|
352
|
+
- `foreign_key`: The database column name storing the foreign key (must be explicitly specified)
|
|
353
|
+
- `class_name`: Use `class_name(:record_name)` helper to generate the correct versioned class name
|
|
354
|
+
- `inverse_of`: Specifies the reverse association for bidirectional relationships
|
|
146
355
|
|
|
147
|
-
|
|
148
|
-
|
|
356
|
+
### Filtering Records
|
|
357
|
+
|
|
358
|
+
Use ActiveRecord `where` to filter records:
|
|
359
|
+
|
|
360
|
+
```ruby
|
|
361
|
+
# ActiveRecord where clauses
|
|
362
|
+
pending_users = store.user.where(processed: false, active: true)
|
|
363
|
+
|
|
364
|
+
# Complex queries with ActiveRecord syntax
|
|
365
|
+
even_ids = store.user.where("id % 2 = 0")
|
|
366
|
+
adults = store.user.where("age >= ?", 18)
|
|
367
|
+
|
|
368
|
+
# Chain conditions
|
|
369
|
+
active_adults = store.user.where(active: true).where("age >= ?", 18)
|
|
370
|
+
|
|
371
|
+
# Use in pipeline
|
|
372
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
373
|
+
processor(:sync)
|
|
374
|
+
|
|
375
|
+
# Pass ActiveRecord relation directly
|
|
376
|
+
process(store.user.where(processed: false, active: true)) do |user|
|
|
149
377
|
# ...
|
|
150
378
|
end
|
|
151
379
|
end
|
|
@@ -156,12 +384,12 @@ end
|
|
|
156
384
|
When errors occur during async processing, they're collected and the pipeline returns `false`:
|
|
157
385
|
|
|
158
386
|
```ruby
|
|
159
|
-
pipeline = ConcurrentPipeline
|
|
387
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
160
388
|
processor(:async)
|
|
161
389
|
|
|
162
|
-
process(
|
|
390
|
+
process(store.user.where(processed: false)) do |user|
|
|
163
391
|
raise "Something went wrong with #{user.name}" if user.name == "Bob"
|
|
164
|
-
|
|
392
|
+
user.update!(processed: true)
|
|
165
393
|
end
|
|
166
394
|
end
|
|
167
395
|
|
|
@@ -173,32 +401,167 @@ unless result
|
|
|
173
401
|
end
|
|
174
402
|
```
|
|
175
403
|
|
|
404
|
+
### Assertions for Exit Conditions
|
|
405
|
+
|
|
406
|
+
Use the `assert` method within process blocks to verify exit conditions and protect against infinite loops. This is especially useful when delegating work to other classes:
|
|
407
|
+
|
|
408
|
+
```ruby
|
|
409
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
410
|
+
processor(:async)
|
|
411
|
+
|
|
412
|
+
process(MyRecord.where(status: "ready")) do |record|
|
|
413
|
+
# Delegate processing to another class
|
|
414
|
+
SomeOtherClass.call(record)
|
|
415
|
+
|
|
416
|
+
# Assert that the record's state actually changed
|
|
417
|
+
# This protects against infinite loops if SomeOtherClass fails silently
|
|
418
|
+
assert(record.status != "ready")
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
**When to use assertions:**
|
|
424
|
+
- Verifying that external services or classes actually performed expected operations
|
|
425
|
+
- Preventing infinite loops when a record's state must change for processing to continue
|
|
426
|
+
- Catching silent failures in delegated code
|
|
427
|
+
- Ensuring critical invariants are maintained during processing
|
|
428
|
+
|
|
429
|
+
Failed assertions raise `ConcurrentPipeline::Errors::AssertionFailure` and stop processing for that record.
|
|
430
|
+
|
|
431
|
+
### Progress Tracking
|
|
432
|
+
|
|
433
|
+
Use the `before_process` hook to monitor pipeline execution in real-time. The hook receives a `step` object with information about each record being processed:
|
|
434
|
+
|
|
435
|
+
```ruby
|
|
436
|
+
pipeline = Pipeline.define do
|
|
437
|
+
processor(:async)
|
|
438
|
+
|
|
439
|
+
before_process do |step|
|
|
440
|
+
puts "Processing: #{step.value.inspect}"
|
|
441
|
+
puts "Queue size: #{step.queue_size}"
|
|
442
|
+
puts "Label: #{step.label}" if step.label
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
process(store.user.where(processed: false)) do |user|
|
|
446
|
+
user.update!(processed: true)
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
pipeline.process(store)
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
**Step attributes:**
|
|
454
|
+
- `step.value` - The record being processed
|
|
455
|
+
- `step.queue_size` - Number of items remaining in the queue for this process step
|
|
456
|
+
- `step.label` - Optional label assigned to the process step
|
|
457
|
+
|
|
458
|
+
Use labels to distinguish between different processing steps:
|
|
459
|
+
|
|
460
|
+
```ruby
|
|
461
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
462
|
+
processor(:async)
|
|
463
|
+
|
|
464
|
+
on_progress do |step|
|
|
465
|
+
puts "Processing: #{step.label}"
|
|
466
|
+
puts "#{step.queue_size} items remaining in this step"
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
process(store.company.where(fetched: false), label: "fetch_companies") do |company|
|
|
470
|
+
employees = api_fetch_employees(company.name)
|
|
471
|
+
employees.each { |emp| store.employee.create!(company_name: company.name, name: emp) }
|
|
472
|
+
company.update!(fetched: true)
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
process(store.employee.where(processed: false), label: "process_employees") do |employee|
|
|
476
|
+
send_welcome_email(employee.name)
|
|
477
|
+
employee.update!(processed: true)
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
pipeline.process(store)
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
The `before_process` hook is called before each record is processed, making it ideal for:
|
|
485
|
+
- Logging progress
|
|
486
|
+
- Updating progress bars
|
|
487
|
+
- Sending status updates
|
|
488
|
+
- Monitoring queue sizes
|
|
489
|
+
- Debugging pipeline execution
|
|
490
|
+
|
|
491
|
+
### Periodic Timer Hook
|
|
492
|
+
|
|
493
|
+
Use the `timer` hook to execute code periodically during pipeline processing. This is useful for status updates, logging, or monitoring:
|
|
494
|
+
|
|
495
|
+
```ruby
|
|
496
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
497
|
+
processor(:async)
|
|
498
|
+
|
|
499
|
+
# Quick status updates every 2 seconds
|
|
500
|
+
timer(2) do |stats|
|
|
501
|
+
puts "Progress: #{stats.completed} completed, #{stats.queue_size} in queue"
|
|
502
|
+
puts "Runtime: #{stats.time.round(2)} seconds"
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Detailed report every 30 seconds
|
|
506
|
+
timer(30) do |stats|
|
|
507
|
+
puts "\n=== Pipeline Status Report ==="
|
|
508
|
+
puts "Completed: #{stats.completed}"
|
|
509
|
+
puts "Queue size: #{stats.queue_size}"
|
|
510
|
+
puts "Runtime: #{stats.time.round(2)}s"
|
|
511
|
+
puts "============================\n"
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
process(store.user.where(processed: false)) do |user|
|
|
515
|
+
expensive_operation(user)
|
|
516
|
+
user.update!(processed: true)
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
pipeline.process(store)
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
**Timer receives a Stats object with:**
|
|
524
|
+
- `stats.queue_size` - Number of items currently in the queue waiting to be processed
|
|
525
|
+
- `stats.completed` - Total number of steps that have been completed
|
|
526
|
+
- `stats.time` - Number of seconds the pipeline has been running (as a Float)
|
|
527
|
+
|
|
528
|
+
**Timer behavior:**
|
|
529
|
+
- Timers run on separate threads/fibers and don't block processing
|
|
530
|
+
- Timer errors are silently caught to prevent pipeline interruption
|
|
531
|
+
- Timers automatically stop when the pipeline completes
|
|
532
|
+
- Works with both `:sync` and `:async` processors
|
|
533
|
+
|
|
176
534
|
### Recovering from Failures
|
|
177
535
|
|
|
178
536
|
The store automatically versions your data. If processing fails, fix your code and restore from where you left off:
|
|
179
537
|
|
|
180
538
|
```ruby
|
|
181
539
|
# First run - fails partway through
|
|
182
|
-
store = ConcurrentPipeline
|
|
183
|
-
|
|
540
|
+
store = ConcurrentPipeline.store do
|
|
541
|
+
dir("/tmp/my_pipeline")
|
|
184
542
|
|
|
185
543
|
record(:user) do
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
544
|
+
schema(:users) do |t|
|
|
545
|
+
t.string(:name)
|
|
546
|
+
t.string(:email)
|
|
547
|
+
t.boolean(:email_sent, default: false)
|
|
548
|
+
end
|
|
189
549
|
end
|
|
190
550
|
end
|
|
551
|
+
end
|
|
191
552
|
|
|
192
|
-
|
|
553
|
+
store.transaction do
|
|
554
|
+
5.times { |i| store.user.create!(name: "User#{i}") }
|
|
555
|
+
end
|
|
193
556
|
|
|
194
|
-
pipeline = ConcurrentPipeline
|
|
557
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
195
558
|
processor(:async)
|
|
196
559
|
|
|
197
|
-
process(
|
|
560
|
+
process(store.user.where(email_sent: false)) do |user|
|
|
198
561
|
# Oops, forgot to handle missing emails
|
|
199
562
|
email = fetch_email_for(user.name) # Might return nil!
|
|
200
563
|
send_email(email) # This will fail if email is nil
|
|
201
|
-
|
|
564
|
+
user.update!(email: email, email_sent: true)
|
|
202
565
|
end
|
|
203
566
|
end
|
|
204
567
|
|
|
@@ -206,44 +569,41 @@ pipeline.process(store) # Some succeed, some fail
|
|
|
206
569
|
|
|
207
570
|
# Check what versions exist
|
|
208
571
|
store.versions.each_with_index do |version, i|
|
|
209
|
-
puts "Version #{i}: #{version.
|
|
572
|
+
puts "Version #{i}: #{version.user.where(email_sent: true).count} emails sent"
|
|
210
573
|
end
|
|
211
574
|
|
|
212
|
-
#
|
|
213
|
-
|
|
214
|
-
restored_store = last_version.restore
|
|
575
|
+
# Restore from a previous version
|
|
576
|
+
store.restore_version(store.versions[1])
|
|
215
577
|
|
|
216
578
|
# Now run with fixed logic
|
|
217
|
-
pipeline = ConcurrentPipeline
|
|
579
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
218
580
|
processor(:async)
|
|
219
581
|
|
|
220
|
-
process(
|
|
582
|
+
process(store.user.where(email_sent: false)) do |user|
|
|
221
583
|
email = fetch_email_for(user.name) || "default@example.com" # Fixed!
|
|
222
584
|
send_email(email)
|
|
223
|
-
|
|
585
|
+
user.update!(email: email, email_sent: true)
|
|
224
586
|
end
|
|
225
587
|
end
|
|
226
588
|
|
|
227
|
-
pipeline.process(
|
|
589
|
+
pipeline.process(store) # Only processes remaining users
|
|
228
590
|
```
|
|
229
591
|
|
|
230
592
|
### Storage Structure
|
|
231
593
|
|
|
232
|
-
|
|
594
|
+
The store uses SQLite databases for storage:
|
|
233
595
|
|
|
234
596
|
```
|
|
235
597
|
/tmp/my_pipeline/
|
|
236
|
-
├──
|
|
598
|
+
├── db.sqlite3 # Current state database
|
|
237
599
|
└── versions/
|
|
238
|
-
├──
|
|
239
|
-
├── 0002.yml # Historical version 2
|
|
240
|
-
└── 0003.yml # Historical version 3
|
|
600
|
+
├── {timestamp}.sqlite3 # Historical version backups
|
|
241
601
|
```
|
|
242
602
|
|
|
243
|
-
- **`
|
|
244
|
-
- **`versions/`**: Contains
|
|
603
|
+
- **`db.sqlite3`**: Contains the current state of your data with full ActiveRecord capabilities.
|
|
604
|
+
- **`versions/`**: Contains complete database snapshots taken at each version point.
|
|
245
605
|
|
|
246
|
-
|
|
606
|
+
Versions are automatically created during pipeline processing, allowing you to inspect historical states and restore if needed. Each version is a complete, independent SQLite database.
|
|
247
607
|
|
|
248
608
|
### Running Shell Commands
|
|
249
609
|
|
|
@@ -252,16 +612,16 @@ The `Shell` class helps run external commands within your pipeline. It exists be
|
|
|
252
612
|
Available in process blocks via the `shell` helper:
|
|
253
613
|
|
|
254
614
|
```ruby
|
|
255
|
-
pipeline = ConcurrentPipeline
|
|
615
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
256
616
|
processor(:sync)
|
|
257
617
|
|
|
258
|
-
process(
|
|
618
|
+
process(store.repository.where(cloned: false)) do |repo|
|
|
259
619
|
# Shell.run returns a Result with stdout, stderr, success?, command
|
|
260
620
|
result = shell.run("git clone #{repo.url} /tmp/#{repo.name}")
|
|
261
621
|
|
|
262
622
|
if result.success?
|
|
263
623
|
puts result.stdout
|
|
264
|
-
|
|
624
|
+
repo.update!(cloned: true)
|
|
265
625
|
else
|
|
266
626
|
puts "Failed: #{result.stderr}"
|
|
267
627
|
end
|
|
@@ -272,21 +632,21 @@ end
|
|
|
272
632
|
Use `run!` to raise on failure:
|
|
273
633
|
|
|
274
634
|
```ruby
|
|
275
|
-
process(
|
|
635
|
+
process(store.repository.where(cloned: false)) do |repo|
|
|
276
636
|
# Raises error if command fails, returns stdout if success
|
|
277
637
|
output = shell.run!("git clone #{repo.url} /tmp/#{repo.name}")
|
|
278
|
-
|
|
638
|
+
repo.update!(cloned: true, output: output)
|
|
279
639
|
end
|
|
280
640
|
```
|
|
281
641
|
|
|
282
642
|
Stream output in real-time with a block:
|
|
283
643
|
|
|
284
644
|
```ruby
|
|
285
|
-
process(
|
|
645
|
+
process(store.project.where(built: false)) do |project|
|
|
286
646
|
shell.run("npm run build") do |stream, line|
|
|
287
647
|
puts "[#{stream}] #{line}"
|
|
288
648
|
end
|
|
289
|
-
|
|
649
|
+
project.update!(built: true)
|
|
290
650
|
end
|
|
291
651
|
```
|
|
292
652
|
|
|
@@ -307,40 +667,46 @@ puts version # => "ruby 3.2.9 ..."
|
|
|
307
667
|
Chain multiple steps together - each step processes what the previous step created:
|
|
308
668
|
|
|
309
669
|
```ruby
|
|
310
|
-
store = ConcurrentPipeline
|
|
311
|
-
|
|
670
|
+
store = ConcurrentPipeline.store do
|
|
671
|
+
dir("/tmp/my_pipeline")
|
|
312
672
|
|
|
313
673
|
record(:company) do
|
|
314
|
-
|
|
315
|
-
|
|
674
|
+
schema(:companies) do |t|
|
|
675
|
+
t.string(:name)
|
|
676
|
+
t.boolean(:fetched, default: false)
|
|
677
|
+
end
|
|
316
678
|
end
|
|
317
679
|
|
|
318
680
|
record(:employee) do
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
681
|
+
schema(:employees) do |t|
|
|
682
|
+
t.string(:company_name)
|
|
683
|
+
t.string(:name)
|
|
684
|
+
t.boolean(:processed, default: false)
|
|
685
|
+
end
|
|
322
686
|
end
|
|
323
687
|
end
|
|
324
688
|
|
|
325
|
-
store.
|
|
326
|
-
store.create(
|
|
689
|
+
store.transaction do
|
|
690
|
+
store.company.create!(name: "Acme Corp")
|
|
691
|
+
store.company.create!(name: "Tech Inc")
|
|
692
|
+
end
|
|
327
693
|
|
|
328
|
-
pipeline = ConcurrentPipeline
|
|
694
|
+
pipeline = ConcurrentPipeline.pipeline do
|
|
329
695
|
processor(:async)
|
|
330
696
|
|
|
331
697
|
# Step 1: Fetch employees for each company
|
|
332
|
-
process(
|
|
698
|
+
process(store.company.where(fetched: false)) do |company|
|
|
333
699
|
employees = api_fetch_employees(company.name)
|
|
334
700
|
employees.each do |emp|
|
|
335
|
-
store.create(
|
|
701
|
+
store.employee.create!(company_name: company.name, name: emp)
|
|
336
702
|
end
|
|
337
|
-
|
|
703
|
+
company.update!(fetched: true)
|
|
338
704
|
end
|
|
339
705
|
|
|
340
706
|
# Step 2: Process each employee
|
|
341
|
-
process(
|
|
707
|
+
process(store.employee.where(processed: false)) do |employee|
|
|
342
708
|
send_welcome_email(employee.name)
|
|
343
|
-
|
|
709
|
+
employee.update!(processed: true)
|
|
344
710
|
end
|
|
345
711
|
end
|
|
346
712
|
|