concurrent_pipeline 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/settings.local.json +9 -0
- data/.ruby-version +1 -1
- data/README.md +232 -353
- data/Rakefile +4 -2
- data/concurrent_pipeline.gemspec +3 -1
- data/lib/concurrent_pipeline/pipeline.rb +14 -201
- data/lib/concurrent_pipeline/pipelines/processors/asynchronous.rb +92 -0
- data/lib/concurrent_pipeline/pipelines/processors/locker.rb +28 -0
- data/lib/concurrent_pipeline/pipelines/processors/synchronous.rb +50 -0
- data/lib/concurrent_pipeline/pipelines/schema.rb +56 -0
- data/lib/concurrent_pipeline/store.rb +88 -13
- data/lib/concurrent_pipeline/stores/schema/record.rb +47 -0
- data/lib/concurrent_pipeline/stores/schema.rb +35 -0
- data/lib/concurrent_pipeline/stores/storage/yaml/fs.rb +140 -0
- data/lib/concurrent_pipeline/stores/storage/yaml.rb +196 -0
- data/lib/concurrent_pipeline/version.rb +1 -1
- data/lib/concurrent_pipeline.rb +13 -9
- metadata +40 -14
- data/.rubocop.yml +0 -14
- data/lib/concurrent_pipeline/changeset.rb +0 -133
- data/lib/concurrent_pipeline/model.rb +0 -31
- data/lib/concurrent_pipeline/processors/actor_processor.rb +0 -363
- data/lib/concurrent_pipeline/producer.rb +0 -156
- data/lib/concurrent_pipeline/read_only_store.rb +0 -22
- data/lib/concurrent_pipeline/registry.rb +0 -36
- data/lib/concurrent_pipeline/stores/versioned.rb +0 -24
- data/lib/concurrent_pipeline/stores/yaml/db.rb +0 -110
- data/lib/concurrent_pipeline/stores/yaml/history.rb +0 -67
- data/lib/concurrent_pipeline/stores/yaml.rb +0 -40
data/README.md
CHANGED
|
@@ -18,456 +18,335 @@ This code I've just written is already legacy code. Good luck!
|
|
|
18
18
|
|
|
19
19
|
### License
|
|
20
20
|
|
|
21
|
-
WTFPL
|
|
21
|
+
[WTFPL](https://www.wtfpl.net/txt/copying/)
|
|
22
22
|
|
|
23
23
|
## Guide and Code Examples
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
The text above was written by a human. The text below was written by Monsieur Claude. Is it correct? Yeah, I guess probably, sure, let's go with "yep" ok?
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
### Basic Example
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
# Define your producer:
|
|
29
|
+
Define a store with records, create a pipeline with processing steps, and run it:
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
steps(:step_1, :step_2)
|
|
31
|
+
```ruby
|
|
32
|
+
require "concurrent_pipeline"
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
# Define your data store
|
|
35
|
+
store = ConcurrentPipeline::Store.define do
|
|
36
|
+
storage(:yaml, dir: "/tmp/my_pipeline")
|
|
39
37
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
record(:user) do
|
|
39
|
+
attribute(:name)
|
|
40
|
+
attribute(:processed, default: false)
|
|
43
41
|
end
|
|
44
42
|
end
|
|
45
43
|
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# hi from step_1
|
|
50
|
-
# hi from step_2
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
Wow! What a convoluted way to just run two methods!
|
|
44
|
+
# Create some data
|
|
45
|
+
store.create(:user, name: "Alice")
|
|
46
|
+
store.create(:user, name: "Bob")
|
|
54
47
|
|
|
55
|
-
|
|
48
|
+
# Define processing pipeline
|
|
49
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
50
|
+
processor(:sync) # Run sequentially
|
|
56
51
|
|
|
57
|
-
|
|
52
|
+
process(:user, processed: false) do |user|
|
|
53
|
+
puts "Processing #{user.name}"
|
|
54
|
+
store.update(user, processed: true)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
58
57
|
|
|
59
|
-
|
|
58
|
+
# Run it
|
|
59
|
+
pipeline.process(store)
|
|
60
|
+
```
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
- `Pipeline#changeset`: returns a Changeset
|
|
63
|
-
- `Pipeline#stream`: returns a Stream (covered in a later example)
|
|
62
|
+
### Async Processing
|
|
64
63
|
|
|
65
|
-
|
|
64
|
+
Use `:async` processor to run steps concurrently:
|
|
66
65
|
|
|
67
66
|
```ruby
|
|
68
|
-
|
|
67
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
68
|
+
processor(:async) # Run concurrently
|
|
69
69
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
process(:user, processed: false) do |user|
|
|
71
|
+
# Each user processed in parallel
|
|
72
|
+
sleep 1
|
|
73
|
+
store.update(user, processed: true)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
```
|
|
74
77
|
|
|
75
|
-
|
|
76
|
-
# models are immutable. If you update an
|
|
77
|
-
# attribute here it will be forgotten at the
|
|
78
|
-
# end of the step. All models are re-created
|
|
79
|
-
# from the store for every step.
|
|
78
|
+
Control concurrency and polling with optional parameters:
|
|
80
79
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
```ruby
|
|
81
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
82
|
+
# concurrency: max parallel tasks (default: 5)
|
|
83
|
+
# enqueue_seconds: sleep between checking for new work (default: 0.1)
|
|
84
|
+
processor(:async, concurrency: 10, enqueue_seconds: 0.5)
|
|
85
|
+
|
|
86
|
+
process(:user, processed: false) do |user|
|
|
87
|
+
# Up to 10 users processed concurrently
|
|
88
|
+
expensive_api_call(user)
|
|
89
|
+
store.update(user, processed: true)
|
|
84
90
|
end
|
|
91
|
+
end
|
|
92
|
+
```
|
|
85
93
|
|
|
86
|
-
|
|
87
|
-
steps(:step_1, :step_2)
|
|
94
|
+
### Custom Methods on Records
|
|
88
95
|
|
|
89
|
-
|
|
90
|
-
# An :id will automatically be created or you can
|
|
91
|
-
# pass your own:
|
|
92
|
-
changeset.create(:my_model, id: 1, status: "created")
|
|
93
|
-
end
|
|
96
|
+
Records can have custom methods defined in the record block:
|
|
94
97
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
```ruby
|
|
99
|
+
store = ConcurrentPipeline::Store.define do
|
|
100
|
+
storage(:yaml, dir: "/tmp/my_pipeline")
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
record(:user) do
|
|
103
|
+
attribute(:first_name)
|
|
104
|
+
attribute(:last_name)
|
|
105
|
+
attribute(:age)
|
|
101
106
|
|
|
102
|
-
|
|
107
|
+
def full_name
|
|
108
|
+
"#{first_name} #{last_name}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def adult?
|
|
112
|
+
age >= 18
|
|
103
113
|
end
|
|
104
114
|
end
|
|
105
115
|
end
|
|
106
116
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
#
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
# view results:
|
|
113
|
-
puts producer.data
|
|
114
|
-
# {
|
|
115
|
-
# my_model: [
|
|
116
|
-
# { id: 1, status: "updated" },
|
|
117
|
-
# ]
|
|
118
|
-
# }
|
|
117
|
+
store.create(:user, first_name: "Alice", last_name: "Smith", age: 25)
|
|
118
|
+
user = store.all(:user).first
|
|
119
|
+
puts user.full_name # => "Alice Smith"
|
|
120
|
+
puts user.adult? # => true
|
|
119
121
|
```
|
|
120
122
|
|
|
121
|
-
|
|
123
|
+
### Filtering Records
|
|
122
124
|
|
|
123
|
-
|
|
125
|
+
Use `where` to filter records, or pass filters directly to `process`:
|
|
124
126
|
|
|
125
|
-
|
|
127
|
+
```ruby
|
|
128
|
+
# Manual filtering
|
|
129
|
+
pending_users = store.where(:user, processed: false, active: true)
|
|
126
130
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
131
|
+
# Filter with lambdas/procs for custom logic
|
|
132
|
+
even_ids = store.where(:user, id: ->(id) { id.to_i.even? })
|
|
133
|
+
adults = store.where(:user, age: ->(age) { age >= 18 })
|
|
130
134
|
|
|
131
|
-
|
|
135
|
+
# Combine regular values with lambda filters
|
|
136
|
+
active_adults = store.where(:user, active: true, age: ->(age) { age >= 18 })
|
|
132
137
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
attribute :id # an :id attribute is always required!
|
|
137
|
-
attribute :status
|
|
138
|
-
end
|
|
138
|
+
# Or use filters in pipeline
|
|
139
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
140
|
+
processor(:sync)
|
|
139
141
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
#
|
|
143
|
-
# finished successfully
|
|
144
|
-
steps(
|
|
145
|
-
:step_1,
|
|
146
|
-
[:step_2, :step_3],
|
|
147
|
-
:step_4
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
# noops since we're just demonstrating usage here.
|
|
151
|
-
def step_1; end
|
|
152
|
-
def step_2; end
|
|
153
|
-
def step_3; end
|
|
154
|
-
def step_4; end
|
|
142
|
+
# Old style: pass a lambda
|
|
143
|
+
process(-> { store.all(:user).select(&:active?) }) do |user|
|
|
144
|
+
# ...
|
|
155
145
|
end
|
|
156
146
|
|
|
157
|
-
#
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
steps(:step_1)
|
|
161
|
-
def step_1; end
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
# passing `each:` to the Pipeline indicates that it
|
|
165
|
-
# should be run for every record of that type. When
|
|
166
|
-
# `each:` is specified, the record can be accessed
|
|
167
|
-
# using the `record` method.
|
|
168
|
-
#
|
|
169
|
-
# Note: every record will be processed concurrently.
|
|
170
|
-
# You can limit concurrency by passing the
|
|
171
|
-
# `concurrency: {integer}` option. The default
|
|
172
|
-
# concurrency is Infinite! INFINIIIIITE!!1!11!!!1!
|
|
173
|
-
pipeline(each: :my_model, concurrency: 3) do
|
|
174
|
-
steps(:process)
|
|
175
|
-
|
|
176
|
-
def process
|
|
177
|
-
changeset.update(record, status: "processed")
|
|
178
|
-
end
|
|
147
|
+
# New style: pass record name and filters
|
|
148
|
+
process(:user, processed: false, active: true) do |user|
|
|
149
|
+
# ...
|
|
179
150
|
end
|
|
180
151
|
end
|
|
181
|
-
|
|
182
|
-
# Lets Pass some initial data:
|
|
183
|
-
initial_data = {
|
|
184
|
-
my_model: [
|
|
185
|
-
{ id: 1, status: "waiting" },
|
|
186
|
-
{ id: 2, status: "waiting" },
|
|
187
|
-
{ id: 3, status: "waiting" },
|
|
188
|
-
]
|
|
189
|
-
}
|
|
190
|
-
producer = MyProducer.new(data: initial_data)
|
|
191
|
-
|
|
192
|
-
# invoke it:
|
|
193
|
-
producer.call
|
|
194
|
-
|
|
195
|
-
# view results:
|
|
196
|
-
puts producer.data
|
|
197
|
-
# {
|
|
198
|
-
# my_model: [
|
|
199
|
-
# { id: 1, status: "processed" },
|
|
200
|
-
# { id: 2, status: "processed" },
|
|
201
|
-
# { id: 3, status: "processed" },
|
|
202
|
-
# ]
|
|
203
|
-
# }
|
|
204
152
|
```
|
|
205
153
|
|
|
206
|
-
###
|
|
207
|
-
|
|
208
|
-
A version is created each time a record is updated. This example shows how to view and rerun with a prior version.
|
|
154
|
+
### Error Handling
|
|
209
155
|
|
|
210
|
-
|
|
156
|
+
When errors occur during async processing, they're collected and the pipeline returns `false`:
|
|
211
157
|
|
|
212
158
|
```ruby
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
attribute :id # an :id attribute is always required!
|
|
216
|
-
attribute :status
|
|
217
|
-
end
|
|
159
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
160
|
+
processor(:async)
|
|
218
161
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
def process
|
|
223
|
-
changeset.update(record, status: "processed")
|
|
224
|
-
end
|
|
162
|
+
process(:user, processed: false) do |user|
|
|
163
|
+
raise "Something went wrong with #{user.name}" if user.name == "Bob"
|
|
164
|
+
store.update(user, processed: true)
|
|
225
165
|
end
|
|
226
166
|
end
|
|
227
167
|
|
|
228
|
-
|
|
229
|
-
my_model: [
|
|
230
|
-
{ id: 1, status: "waiting" },
|
|
231
|
-
{ id: 2, status: "waiting" },
|
|
232
|
-
]
|
|
233
|
-
}
|
|
234
|
-
producer = MyProducer.new(data: initial_data)
|
|
235
|
-
producer.call
|
|
236
|
-
|
|
237
|
-
# access the versions like so:
|
|
238
|
-
puts producer.history.versions.count
|
|
239
|
-
# 5
|
|
240
|
-
|
|
241
|
-
# A version can tell you what diff it applied.
|
|
242
|
-
# Notice here, the :PipelineStep record, that
|
|
243
|
-
# is how the progress is tracked internally.
|
|
244
|
-
puts producer.history.versions[3].diff
|
|
245
|
-
# {
|
|
246
|
-
# changes: [
|
|
247
|
-
# {
|
|
248
|
-
# :action: :update,
|
|
249
|
-
# id: 1,
|
|
250
|
-
# type: :my_model,
|
|
251
|
-
# delta: {:status: "processed"}
|
|
252
|
-
# },
|
|
253
|
-
# {
|
|
254
|
-
# action: :update,
|
|
255
|
-
# id: "5d02ca83-0435-49b5-a812-d4da4eef080e",
|
|
256
|
-
# type: :PipelineStep,
|
|
257
|
-
# delta: {
|
|
258
|
-
# :completed_at: "2024-05-10T18:44:04+00:00",
|
|
259
|
-
# result: :success
|
|
260
|
-
# }
|
|
261
|
-
# }
|
|
262
|
-
# ]
|
|
263
|
-
# }
|
|
264
|
-
|
|
265
|
-
# Let's re-process using a previous version:
|
|
266
|
-
# This will just pick up where it was left off
|
|
267
|
-
re_producer = MyProducer.new(
|
|
268
|
-
store: producer.history.versions[3].store
|
|
269
|
-
)
|
|
270
|
-
re_producer.call
|
|
271
|
-
|
|
272
|
-
# If you need to change the code, you'd probably
|
|
273
|
-
# want to write the data to disk and then read
|
|
274
|
-
# it the next time you run:
|
|
275
|
-
|
|
276
|
-
File.write(
|
|
277
|
-
"last_good_version.yml",
|
|
278
|
-
producer.history.versions[3].store.data.to_yaml
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
# And then next time, load it like so:
|
|
282
|
-
re_producer = MyProducer.new(
|
|
283
|
-
data: YAML.unsafe_load_file("last_good_version.yml")
|
|
284
|
-
)
|
|
285
|
-
```
|
|
168
|
+
result = pipeline.process(store)
|
|
286
169
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
170
|
+
unless result
|
|
171
|
+
puts "Pipeline failed!"
|
|
172
|
+
pipeline.errors.each { |error| puts error.message }
|
|
173
|
+
end
|
|
174
|
+
```
|
|
290
175
|
|
|
291
|
-
|
|
176
|
+
### Recovering from Failures
|
|
292
177
|
|
|
293
|
-
|
|
178
|
+
The store automatically versions your data. If processing fails, fix your code and restore from where you left off:
|
|
294
179
|
|
|
295
180
|
```ruby
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
181
|
+
# First run - fails partway through
|
|
182
|
+
store = ConcurrentPipeline::Store.define do
|
|
183
|
+
storage(:yaml, dir: "/tmp/my_pipeline")
|
|
184
|
+
|
|
185
|
+
record(:user) do
|
|
186
|
+
attribute(:name)
|
|
187
|
+
attribute(:email)
|
|
188
|
+
attribute(:email_sent, default: false)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
304
191
|
|
|
305
|
-
|
|
306
|
-
# Audio.play(:jeopardy_music)
|
|
307
|
-
end
|
|
192
|
+
5.times { |i| store.create(:user, name: "User#{i}") }
|
|
308
193
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
#
|
|
312
|
-
# Streams are really about monitoring progress,
|
|
313
|
-
# so mutating state here is probably recipe for
|
|
314
|
-
# chaos and darkness, but hey, it's your code
|
|
315
|
-
# and I say fortune favors the bold (I've never
|
|
316
|
-
# actually said that until now).
|
|
317
|
-
some_other_object.reverse!
|
|
318
|
-
end
|
|
319
|
-
end
|
|
194
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
195
|
+
processor(:async)
|
|
320
196
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
# type: A symbol
|
|
327
|
-
# payload: any object, go crazy...
|
|
328
|
-
# ...but remember...concurrency...
|
|
329
|
-
stream.push(:start, "some_object!")
|
|
330
|
-
sleep 1
|
|
331
|
-
stream.push(:progress, {slept: 1 })
|
|
332
|
-
sleep 1
|
|
333
|
-
stream.push(:progress, { slept: 2 })
|
|
334
|
-
changeset.update(record, status: "processed")
|
|
335
|
-
|
|
336
|
-
# Don't feel pressured into sending an object
|
|
337
|
-
# if you don't feel like it.
|
|
338
|
-
stream.push(:finished)
|
|
339
|
-
end
|
|
197
|
+
process(:user, email_sent: false) do |user|
|
|
198
|
+
# Oops, forgot to handle missing emails
|
|
199
|
+
email = fetch_email_for(user.name) # Might return nil!
|
|
200
|
+
send_email(email) # This will fail if email is nil
|
|
201
|
+
store.update(user, email: email, email_sent: true)
|
|
340
202
|
end
|
|
341
203
|
end
|
|
342
204
|
|
|
343
|
-
|
|
205
|
+
pipeline.process(store) # Some succeed, some fail
|
|
344
206
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
puts
|
|
207
|
+
# Check what versions exist
|
|
208
|
+
store.versions.each_with_index do |version, i|
|
|
209
|
+
puts "Version #{i}: #{version.all(:user).count { |u| u.email_sent }} emails sent"
|
|
210
|
+
end
|
|
348
211
|
|
|
349
|
-
#
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
# [3, 2, 1]
|
|
353
|
-
```
|
|
212
|
+
# Fix the code and restore from last version
|
|
213
|
+
last_version = store.versions.first
|
|
214
|
+
restored_store = last_version.restore
|
|
354
215
|
|
|
355
|
-
|
|
216
|
+
# Now run with fixed logic
|
|
217
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
218
|
+
processor(:async)
|
|
356
219
|
|
|
357
|
-
|
|
220
|
+
process(:user, email_sent: false) do |user|
|
|
221
|
+
email = fetch_email_for(user.name) || "default@example.com" # Fixed!
|
|
222
|
+
send_email(email)
|
|
223
|
+
restored_store.update(user, email: email, email_sent: true)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
358
226
|
|
|
359
|
-
|
|
227
|
+
pipeline.process(restored_store) # Only processes remaining users
|
|
228
|
+
```
|
|
360
229
|
|
|
361
|
-
|
|
230
|
+
### Storage Structure
|
|
362
231
|
|
|
363
|
-
|
|
232
|
+
When using YAML storage, data is stored in a simple, human-readable file structure:
|
|
364
233
|
|
|
365
|
-
```
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
234
|
+
```
|
|
235
|
+
/tmp/my_pipeline/
|
|
236
|
+
├── data.yml # Current state (always up-to-date)
|
|
237
|
+
└── versions/
|
|
238
|
+
├── 0001.yml # Historical version 1
|
|
239
|
+
├── 0002.yml # Historical version 2
|
|
240
|
+
└── 0003.yml # Historical version 3
|
|
241
|
+
```
|
|
371
242
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
attribute :processed
|
|
375
|
-
end
|
|
243
|
+
- **`data.yml`**: Contains the most recent state of your data. You can inspect this file at any time to see the current state.
|
|
244
|
+
- **`versions/`**: Contains snapshots of previous versions. Each file is a complete snapshot at that point in time.
|
|
376
245
|
|
|
377
|
-
|
|
378
|
-
# we close this pipeline as soon as we've found at least
|
|
379
|
-
# three valid :model_one records. Note that because of
|
|
380
|
-
# concurrency, we might not be able to stop at *exactly*
|
|
381
|
-
# three valid models!
|
|
382
|
-
open { store.all(:model_one).select(&:valid).count < 3 }
|
|
246
|
+
When you restore to a previous version, that version is copied to `data.yml` and any versions after it are deleted. You can then continue working from that restored state.
|
|
383
247
|
|
|
384
|
-
|
|
248
|
+
### Running Shell Commands
|
|
385
249
|
|
|
386
|
-
|
|
387
|
-
sleep(rand(4))
|
|
388
|
-
changeset.update(record, valid: true)
|
|
389
|
-
end
|
|
390
|
-
end
|
|
250
|
+
The `Shell` class helps run external commands within your pipeline. It exists because running shell commands in Ruby can be tedious - you need to capture stdout, stderr, check exit status, and handle failures. Shell simplifies this.
|
|
391
251
|
|
|
392
|
-
|
|
393
|
-
open { store.all(:model_one).select(&:valid).count >= 3 }
|
|
252
|
+
Available in process blocks via the `shell` helper:
|
|
394
253
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
254
|
+
```ruby
|
|
255
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
256
|
+
processor(:sync)
|
|
257
|
+
|
|
258
|
+
process(:repository, cloned: false) do |repo|
|
|
259
|
+
# Shell.run returns a Result with stdout, stderr, success?, command
|
|
260
|
+
result = shell.run("git clone #{repo.url} /tmp/#{repo.name}")
|
|
261
|
+
|
|
262
|
+
if result.success?
|
|
263
|
+
puts result.stdout
|
|
264
|
+
store.update(repo, cloned: true)
|
|
265
|
+
else
|
|
266
|
+
puts "Failed: #{result.stderr}"
|
|
399
267
|
end
|
|
400
268
|
end
|
|
269
|
+
end
|
|
270
|
+
```
|
|
401
271
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
272
|
+
Use `run!` to raise on failure:
|
|
273
|
+
|
|
274
|
+
```ruby
|
|
275
|
+
process(:repository, cloned: false) do |repo|
|
|
276
|
+
# Raises error if command fails, returns stdout if success
|
|
277
|
+
output = shell.run!("git clone #{repo.url} /tmp/#{repo.name}")
|
|
278
|
+
store.update(repo, cloned: true, output: output)
|
|
279
|
+
end
|
|
280
|
+
```
|
|
411
281
|
|
|
412
|
-
|
|
282
|
+
Stream output in real-time with a block:
|
|
413
283
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
284
|
+
```ruby
|
|
285
|
+
process(:project, built: false) do |project|
|
|
286
|
+
shell.run("npm run build") do |stream, line|
|
|
287
|
+
puts "[#{stream}] #{line}"
|
|
417
288
|
end
|
|
289
|
+
store.update(project, built: true)
|
|
418
290
|
end
|
|
419
|
-
|
|
420
|
-
initial_data = {
|
|
421
|
-
model_one: [
|
|
422
|
-
{ id: 1, valid: false },
|
|
423
|
-
{ id: 2, valid: false },
|
|
424
|
-
{ id: 3, valid: false },
|
|
425
|
-
{ id: 4, valid: false },
|
|
426
|
-
{ id: 5, valid: false },
|
|
427
|
-
],
|
|
428
|
-
model_two: [
|
|
429
|
-
{ id: 1, processed: false }
|
|
430
|
-
]
|
|
431
|
-
}
|
|
432
|
-
producer = MyProducer.new(data: initial_data)
|
|
433
|
-
producer.call
|
|
434
291
|
```
|
|
435
292
|
|
|
436
|
-
|
|
293
|
+
Use outside of pipelines by calling directly:
|
|
437
294
|
|
|
438
|
-
|
|
295
|
+
```ruby
|
|
296
|
+
# Check if a command succeeds
|
|
297
|
+
result = ConcurrentPipeline::Shell.run("which docker")
|
|
298
|
+
docker_installed = result.success?
|
|
439
299
|
|
|
440
|
-
|
|
300
|
+
# Get output or raise
|
|
301
|
+
version = ConcurrentPipeline::Shell.run!("ruby --version")
|
|
302
|
+
puts version # => "ruby 3.2.9 ..."
|
|
303
|
+
```
|
|
441
304
|
|
|
442
|
-
|
|
305
|
+
### Multiple Processing Steps
|
|
443
306
|
|
|
444
|
-
|
|
307
|
+
Chain multiple steps together - each step processes what the previous step created:
|
|
445
308
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
309
|
+
```ruby
|
|
310
|
+
store = ConcurrentPipeline::Store.define do
|
|
311
|
+
storage(:yaml, dir: "/tmp/my_pipeline")
|
|
449
312
|
|
|
450
|
-
|
|
313
|
+
record(:company) do
|
|
314
|
+
attribute(:name)
|
|
315
|
+
attribute(:fetched, default: false)
|
|
316
|
+
end
|
|
451
317
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
318
|
+
record(:employee) do
|
|
319
|
+
attribute(:company_name)
|
|
320
|
+
attribute(:name)
|
|
321
|
+
attribute(:processed, default: false)
|
|
456
322
|
end
|
|
457
323
|
end
|
|
458
|
-
```
|
|
459
324
|
|
|
460
|
-
|
|
325
|
+
store.create(:company, name: "Acme Corp")
|
|
326
|
+
store.create(:company, name: "Tech Inc")
|
|
461
327
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
328
|
+
pipeline = ConcurrentPipeline::Pipeline.define do
|
|
329
|
+
processor(:async)
|
|
330
|
+
|
|
331
|
+
# Step 1: Fetch employees for each company
|
|
332
|
+
process(:company, fetched: false) do |company|
|
|
333
|
+
employees = api_fetch_employees(company.name)
|
|
334
|
+
employees.each do |emp|
|
|
335
|
+
store.create(:employee, company_name: company.name, name: emp)
|
|
336
|
+
end
|
|
337
|
+
store.update(company, fetched: true)
|
|
338
|
+
end
|
|
468
339
|
|
|
469
|
-
|
|
340
|
+
# Step 2: Process each employee
|
|
341
|
+
process(:employee, processed: false) do |employee|
|
|
342
|
+
send_welcome_email(employee.name)
|
|
343
|
+
store.update(employee, processed: true)
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
pipeline.process(store)
|
|
348
|
+
```
|
|
470
349
|
|
|
471
|
-
|
|
350
|
+
### Final words
|
|
472
351
|
|
|
473
|
-
|
|
352
|
+
That's it, you've reached THE END OF THE INTERNET.
|