scout-gear 10.8.4 → 10.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +13 -0
- data/README.md +352 -0
- data/VERSION +1 -1
- data/doc/Association.md +288 -0
- data/doc/Entity.md +296 -0
- data/doc/KnowledgeBase.md +433 -0
- data/doc/Persist.md +356 -0
- data/doc/Semaphore.md +171 -0
- data/doc/TSV.md +449 -0
- data/doc/WorkQueue.md +359 -0
- data/doc/Workflow.md +586 -0
- data/lib/scout/association.rb +4 -2
- data/lib/scout/entity/identifiers.rb +1 -1
- data/lib/scout/entity/object.rb +1 -1
- data/lib/scout/entity/property.rb +5 -5
- data/lib/scout/entity.rb +1 -1
- data/lib/scout/knowledge_base/description.rb +1 -1
- data/lib/scout/knowledge_base/list.rb +7 -2
- data/lib/scout/knowledge_base/registry.rb +2 -2
- data/lib/scout/knowledge_base.rb +20 -2
- data/lib/scout/monitor.rb +10 -6
- data/lib/scout/persist/engine/packed_index.rb +2 -2
- data/lib/scout/persist/engine/sharder.rb +1 -1
- data/lib/scout/persist/tsv.rb +1 -0
- data/lib/scout/semaphore.rb +1 -1
- data/lib/scout/tsv/dumper.rb +3 -3
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +1 -1
- data/lib/scout/tsv/transformer.rb +1 -0
- data/lib/scout/tsv/util.rb +2 -2
- data/lib/scout/work_queue/socket.rb +1 -1
- data/lib/scout/work_queue/worker.rb +7 -5
- data/lib/scout/workflow/entity.rb +22 -1
- data/lib/scout/workflow/step/config.rb +3 -3
- data/lib/scout/workflow/step/file.rb +4 -0
- data/lib/scout/workflow/step/info.rb +8 -2
- data/lib/scout/workflow/step.rb +10 -5
- data/lib/scout/workflow/task/inputs.rb +1 -1
- data/lib/scout/workflow/usage.rb +3 -2
- data/lib/scout/workflow/util.rb +22 -0
- data/scout-gear.gemspec +16 -5
- data/scout_commands/cat +86 -0
- data/scout_commands/doc +3 -1
- data/scout_commands/entity +151 -0
- data/scout_commands/system/status +238 -0
- data/scout_commands/workflow/info +23 -10
- data/scout_commands/workflow/install +1 -1
- data/test/scout/entity/test_property.rb +1 -1
- data/test/scout/knowledge_base/test_registry.rb +19 -0
- data/test/scout/test_work_queue.rb +1 -1
- data/test/scout/work_queue/test_worker.rb +12 -10
- metadata +15 -4
- data/doc/lib/scout/path.md +0 -35
- data/doc/lib/scout/workflow/task.md +0 -13
data/doc/WorkQueue.md
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
# WorkQueue
|
|
2
|
+
|
|
3
|
+
WorkQueue is a lightweight, multi-process work pipeline that uses forked workers and IPC pipes to process a stream of objects in parallel. It provides:
|
|
4
|
+
|
|
5
|
+
- A queue with input/output sockets guarded by semaphores for safe concurrent access across processes.
|
|
6
|
+
- Worker processes that loop over input items, apply a user function, and emit results.
|
|
7
|
+
- Clean shutdown via sentinels; dynamic scaling (add/remove workers).
|
|
8
|
+
- Robust error propagation from workers to the main process.
|
|
9
|
+
- Efficient serialization (optimized for Strings/Integers, Marshal for general objects and annotated objects).
|
|
10
|
+
|
|
11
|
+
Core components:
|
|
12
|
+
- WorkQueue — orchestrates sockets, workers, reader/waiter threads, lifecycle.
|
|
13
|
+
- WorkQueue::Worker — manages a single forked worker’s lifecycle and processing loop.
|
|
14
|
+
- WorkQueue::Socket — typed pipe with serialization and semaphores.
|
|
15
|
+
- Exceptions: DoneProcessing sentinel and WorkerException wrapper.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Quick start
|
|
20
|
+
|
|
21
|
+
Process items with multiple workers and collect results:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
num_workers = 10
|
|
25
|
+
|
|
26
|
+
q = WorkQueue.new(num_workers) do |obj|
|
|
27
|
+
# User function runs in each worker process
|
|
28
|
+
[Process.pid.to_s, obj.to_s] * " "
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
results = []
|
|
32
|
+
q.process do |out|
|
|
33
|
+
results << out # Outgoing results seen in the parent
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Enqueue work (can be done from other threads or processes)
|
|
37
|
+
1_000.times { |i| q.write i }
|
|
38
|
+
|
|
39
|
+
q.close # Signal no more input (sends sentinel for each worker)
|
|
40
|
+
q.join # Wait for completion and cleanup
|
|
41
|
+
|
|
42
|
+
puts results.length # => 1000
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Ignore outputs:
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
# Either return :ignore from worker proc
|
|
49
|
+
q = WorkQueue.new(10) { |obj| :ignore }
|
|
50
|
+
|
|
51
|
+
# Or mark workers to ignore outputs
|
|
52
|
+
q.ignore_ouput
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Scale workers dynamically:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
# Remove workers gracefully (one at a time)
|
|
59
|
+
3.times { q.remove_one_worker }
|
|
60
|
+
|
|
61
|
+
# Add another worker with a different function
|
|
62
|
+
q.add_worker { |obj| "SPECIAL: #{obj}" }
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## WorkQueue
|
|
68
|
+
|
|
69
|
+
Constructor:
|
|
70
|
+
- WorkQueue.new(workers = 0, &worker_proc)
|
|
71
|
+
- workers: initial number of workers (Integer/String).
|
|
72
|
+
- worker_proc: block run in each worker to process input objects.
|
|
73
|
+
|
|
74
|
+
Core attributes and methods:
|
|
75
|
+
- queue_id -> a stable identifier "[object_id]@[pid]".
|
|
76
|
+
- process(&callback)
|
|
77
|
+
- Starts workers with worker_proc, a background reader to consume output and call the callback, and a waiter to reap pids.
|
|
78
|
+
- Returns immediately; threads run in background until join.
|
|
79
|
+
- write(obj)
|
|
80
|
+
- Enqueue an object into input; serialized and delivered to workers.
|
|
81
|
+
- If an input-side exception was recorded (via input socket abort), write re-raises it.
|
|
82
|
+
- close
|
|
83
|
+
- Signal end-of-input: sends a DoneProcessing sentinel per worker.
|
|
84
|
+
- join(clean = true)
|
|
85
|
+
- Wait for reader/waiter threads to finish. When clean=true, closes sockets and removes semaphores.
|
|
86
|
+
- abort
|
|
87
|
+
- Stop all workers (kill INT), post semaphores to unblock waiters, and mark the queue aborted.
|
|
88
|
+
- add_worker(&block)
|
|
89
|
+
- Add a new worker dynamically; block overrides worker_proc for that worker.
|
|
90
|
+
- remove_one_worker
|
|
91
|
+
- Gracefully remove one worker by sending a DoneProcessing sentinel into the input.
|
|
92
|
+
- ignore_ouput
|
|
93
|
+
- Mark all current workers to not write results (sic: method name is misspelled to match implementation).
|
|
94
|
+
- remove_worker(pid)
|
|
95
|
+
- Internal bookkeeping after a worker exits; prunes @workers and tracks removed pids (used to compute done count).
|
|
96
|
+
- clean
|
|
97
|
+
- Join waiter (if any) and clean both sockets/semaphores.
|
|
98
|
+
|
|
99
|
+
Lifecycle notes:
|
|
100
|
+
- process spawns:
|
|
101
|
+
- A reader thread: pops output items and calls your callback; finishes when all workers signal DoneProcessing.
|
|
102
|
+
- A waiter thread: waits on worker pids and removes them from the pool.
|
|
103
|
+
|
|
104
|
+
Error behavior:
|
|
105
|
+
- If a worker raises, it is wrapped in WorkerException and written to output. The reader thread logs, aborts the queue, forwards the original exception via input abort, and raises the underlying exception in the parent.
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Worker
|
|
110
|
+
|
|
111
|
+
A managed forked worker that can run a block or a processing loop on sockets.
|
|
112
|
+
|
|
113
|
+
- WorkQueue::Worker.new(ignore_output = false)
|
|
114
|
+
- ignore_output controls whether the worker writes results back (also controlled globally via queue.ignore_ouput).
|
|
115
|
+
- run { ... }
|
|
116
|
+
- Forks; in child, sets INT trap to exit(-1), logs start, evals the block, then exits(0).
|
|
117
|
+
- process(input, output) { |obj| ... }
|
|
118
|
+
- Forks; in child:
|
|
119
|
+
- Purges inherited pipes (Open.purge_pipes); closes the queue’s write end if provided, to avoid deadlocks.
|
|
120
|
+
- Loop: obj = input.read until EOF or sentinel:
|
|
121
|
+
- If obj is DoneProcessing, writes DoneProcessing to output and exits cleanly.
|
|
122
|
+
- Else, res = block.call(obj); writes res to output unless ignore_output or res == :ignore.
|
|
123
|
+
- On exceptions (other than DoneProcessing/Interrupt), writes WorkerException($!, pid) to output and exits(-1).
|
|
124
|
+
- abort
|
|
125
|
+
- Sends INT to worker pid (best-effort).
|
|
126
|
+
- join
|
|
127
|
+
- Wait for this worker’s pid to exit.
|
|
128
|
+
- Worker.join([workers])
|
|
129
|
+
- Wait for all provided workers until no child remains.
|
|
130
|
+
|
|
131
|
+
Identifiers:
|
|
132
|
+
- worker_short_id — object_id@pid
|
|
133
|
+
- worker_id — worker_short_id->queue_id (after queue assignment)
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Socket
|
|
138
|
+
|
|
139
|
+
A typed, semaphore-protected pipe abstraction used by the queue and workers.
|
|
140
|
+
|
|
141
|
+
Construction:
|
|
142
|
+
- WorkQueue::Socket.new(serializer = Marshal)
|
|
143
|
+
- serializer must respond to dump/load; default Marshal.
|
|
144
|
+
- Creates an IO.pipe pair (sread, swrite) and two named semaphores (write_sem, read_sem).
|
|
145
|
+
|
|
146
|
+
Serialization protocol (length-prefixed):
|
|
147
|
+
- Integer: pack with code "I" → faster than Marshal for counters.
|
|
148
|
+
- nil: code "N".
|
|
149
|
+
- String: code "C", payload is the raw string.
|
|
150
|
+
- Annotation::AnnotatedObject or general Ruby object: code "S", payload is serializer.dump(obj).
|
|
151
|
+
|
|
152
|
+
API:
|
|
153
|
+
- push(obj) / write(obj)
|
|
154
|
+
- Serialize and write atomically under write semaphore.
|
|
155
|
+
- pop / read
|
|
156
|
+
- Read one object atomically under read semaphore. If it is ClosedStream, raises ClosedStream (ended writer). If DoneProcessing is received, returns the sentinel, which the worker and queue treat specially.
|
|
157
|
+
- close_write
|
|
158
|
+
- Writes ClosedStream sentinel, then closes swrite; subsequent reads will raise ClosedStream.
|
|
159
|
+
- close_read
|
|
160
|
+
- Closes sread.
|
|
161
|
+
- clean
|
|
162
|
+
- Close both ends (if open) and delete semaphores.
|
|
163
|
+
- abort(exception)
|
|
164
|
+
- Record an exception on the socket; future writes (queue.write) will raise it.
|
|
165
|
+
|
|
166
|
+
Notes:
|
|
167
|
+
- Semaphores (ScoutSemaphore) ensure that concurrent processes/threads do not interleave frames when reading/writing on shared pipes.
|
|
168
|
+
- Open.read_stream ensures bounded reads of exact sizes.
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Exceptions
|
|
173
|
+
|
|
174
|
+
- DoneProcessing < Exception
|
|
175
|
+
- Sentinel signaling a worker has finished processing input. Carries pid in message and attribute. Workers pass this through from input to output on shutdown.
|
|
176
|
+
- WorkerException < ScoutException
|
|
177
|
+
- Wraps a worker-side exception and its pid; emitted on output when a worker fails.
|
|
178
|
+
|
|
179
|
+
Main-thread behavior:
|
|
180
|
+
- The reader thread re-raises WorkerException.worker_exception (after aborting the queue), surfacing the original cause.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Patterns and recommendations
|
|
185
|
+
|
|
186
|
+
- Standard parallel map:
|
|
187
|
+
|
|
188
|
+
```ruby
|
|
189
|
+
q = WorkQueue.new(Etc.nprocessors) { |obj| compute(obj) }
|
|
190
|
+
out = []
|
|
191
|
+
q.process { |res| out << res }
|
|
192
|
+
items.each { |i| q.write i }
|
|
193
|
+
q.close
|
|
194
|
+
q.join
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
- Fire-and-forget additions during processing:
|
|
198
|
+
- You can call q.write while the queue is processing; input is thread-safe.
|
|
199
|
+
- Adding/removing workers works concurrently.
|
|
200
|
+
|
|
201
|
+
- Optional filtering:
|
|
202
|
+
- Return :ignore from the worker proc to skip writing an output for an item.
|
|
203
|
+
- Or call q.ignore_ouput to silence outputs from all workers.
|
|
204
|
+
|
|
205
|
+
- Graceful resize:
|
|
206
|
+
- q.remove_one_worker injects DoneProcessing, causing one worker to exit cleanly.
|
|
207
|
+
- You can repeat to downscale; add_worker to scale up again.
|
|
208
|
+
|
|
209
|
+
- Error handling:
|
|
210
|
+
- If workers raise, expect WorkerException to be emitted; q.process rescues, logs and aborts, and re-raises in the parent.
|
|
211
|
+
- If the main callback raises, join will propagate (see tests); wrap q.process body if you want to handle errors yourself.
|
|
212
|
+
|
|
213
|
+
- Cleanup:
|
|
214
|
+
- Always call q.close followed by q.join to ensure sockets and semaphores are cleaned.
|
|
215
|
+
- On error, ensure q.clean is called (join(clean=false) lets caller decide).
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Advanced usage
|
|
220
|
+
|
|
221
|
+
- Custom serializer:
|
|
222
|
+
```ruby
|
|
223
|
+
require 'oj'
|
|
224
|
+
sock = WorkQueue::Socket.new(Oj) # Oj must implement dump/load
|
|
225
|
+
```
|
|
226
|
+
WorkQueue itself constructs sockets with Marshal; roll your own workers/sockets if you need alternative serialization.
|
|
227
|
+
|
|
228
|
+
- Raw Worker + Sockets:
|
|
229
|
+
```ruby
|
|
230
|
+
input = WorkQueue::Socket.new
|
|
231
|
+
output = WorkQueue::Socket.new
|
|
232
|
+
workers = 4.times.map { WorkQueue::Worker.new }
|
|
233
|
+
workers.each do |w|
|
|
234
|
+
w.process(input, output) { |obj| obj.to_s.reverse }
|
|
235
|
+
end
|
|
236
|
+
# write/read; send DoneProcessing 4 times; close; join
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
- IPC correctness:
|
|
240
|
+
- Workers call Open.purge_pipes in child to avoid inherited descriptors interfering with semaphores/IO.
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## API quick reference
|
|
245
|
+
|
|
246
|
+
WorkQueue:
|
|
247
|
+
- new(workers = 0, &worker_proc)
|
|
248
|
+
- process(&callback)
|
|
249
|
+
- write(obj)
|
|
250
|
+
- close
|
|
251
|
+
- join(clean = true)
|
|
252
|
+
- abort
|
|
253
|
+
- add_worker(&block)
|
|
254
|
+
- remove_one_worker
|
|
255
|
+
- ignore_ouput # sic: misspelled in code
|
|
256
|
+
- clean
|
|
257
|
+
- queue_id
|
|
258
|
+
|
|
259
|
+
Worker:
|
|
260
|
+
- new(ignore_output = false)
|
|
261
|
+
- run { ... }
|
|
262
|
+
- process(input, output) { |obj| ... }
|
|
263
|
+
- abort
|
|
264
|
+
- join
|
|
265
|
+
- self.join(workers)
|
|
266
|
+
- worker_short_id / worker_id
|
|
267
|
+
- pid, queue_id accessors
|
|
268
|
+
|
|
269
|
+
Socket:
|
|
270
|
+
- new(serializer = Marshal)
|
|
271
|
+
- write(obj) / push(obj)
|
|
272
|
+
- read / pop
|
|
273
|
+
- close_write / close_read
|
|
274
|
+
- clean
|
|
275
|
+
- abort(exception)
|
|
276
|
+
- socket_id, sread, swrite, write_sem, read_sem accessors
|
|
277
|
+
|
|
278
|
+
Exceptions:
|
|
279
|
+
- DoneProcessing.new(pid = Process.pid)
|
|
280
|
+
- WorkerException.new(worker_exception, pid)
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Examples
|
|
285
|
+
|
|
286
|
+
Remove workers, then add a special worker mid-flight:
|
|
287
|
+
|
|
288
|
+
```ruby
|
|
289
|
+
num = 10
|
|
290
|
+
reps = 10_000
|
|
291
|
+
|
|
292
|
+
q = WorkQueue.new(num) { |obj| "#{Process.pid} #{obj}" }
|
|
293
|
+
|
|
294
|
+
output = []
|
|
295
|
+
q.process { |out| output << out }
|
|
296
|
+
|
|
297
|
+
reps.times { |i| q.write i }
|
|
298
|
+
|
|
299
|
+
(num - 1).times { q.remove_one_worker } # shrink to one worker
|
|
300
|
+
|
|
301
|
+
q.add_worker { |obj| "SPECIAL" } # extra worker with unique behavior
|
|
302
|
+
|
|
303
|
+
reps.times { |i| q.write i + reps }
|
|
304
|
+
|
|
305
|
+
q.close
|
|
306
|
+
q.join
|
|
307
|
+
|
|
308
|
+
output.include?("SPECIAL") # => true
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
Handle errors from workers:
|
|
312
|
+
|
|
313
|
+
```ruby
|
|
314
|
+
q = WorkQueue.new(5) do |_|
|
|
315
|
+
raise ScoutException, "worker failure"
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
q.process { |_| } # not used; will error before callback
|
|
319
|
+
|
|
320
|
+
begin
|
|
321
|
+
100.times { |i| q.write i }
|
|
322
|
+
q.close
|
|
323
|
+
q.join(false)
|
|
324
|
+
rescue ScoutException
|
|
325
|
+
# original worker exception surfaced here
|
|
326
|
+
ensure
|
|
327
|
+
q.clean
|
|
328
|
+
end
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
Use a worker directly:
|
|
332
|
+
|
|
333
|
+
```ruby
|
|
334
|
+
input = WorkQueue::Socket.new
|
|
335
|
+
output = WorkQueue::Socket.new
|
|
336
|
+
|
|
337
|
+
w = WorkQueue::Worker.new
|
|
338
|
+
w.process(input, output) { |x| x * 2 }
|
|
339
|
+
|
|
340
|
+
Thread.new do
|
|
341
|
+
10.times { |i| input.write i }
|
|
342
|
+
input.write DoneProcessing.new
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
vals = []
|
|
346
|
+
loop do
|
|
347
|
+
v = output.read
|
|
348
|
+
break if DoneProcessing === v
|
|
349
|
+
vals << v
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
w.join
|
|
353
|
+
input.clean
|
|
354
|
+
output.clean
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
---
|
|
358
|
+
|
|
359
|
+
WorkQueue focuses on simple, robust multi-process parallelism: feed a stream of items, process them in forked workers, get results back, and shut down cleanly. Use it when you need fast, CPU-parallel pipelines with minimal overhead and clear failure propagation.
|