scout-gear 10.8.3 → 10.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +17 -0
  3. data/README.md +352 -0
  4. data/Rakefile +1 -0
  5. data/VERSION +1 -1
  6. data/doc/Association.md +288 -0
  7. data/doc/Entity.md +296 -0
  8. data/doc/KnowledgeBase.md +433 -0
  9. data/doc/Persist.md +356 -0
  10. data/doc/Semaphore.md +171 -0
  11. data/doc/TSV.md +449 -0
  12. data/doc/WorkQueue.md +359 -0
  13. data/doc/Workflow.md +586 -0
  14. data/lib/scout/association.rb +4 -2
  15. data/lib/scout/entity/identifiers.rb +1 -1
  16. data/lib/scout/entity/object.rb +1 -1
  17. data/lib/scout/entity/property.rb +5 -5
  18. data/lib/scout/entity.rb +1 -1
  19. data/lib/scout/knowledge_base/description.rb +1 -1
  20. data/lib/scout/knowledge_base/list.rb +7 -2
  21. data/lib/scout/knowledge_base/registry.rb +2 -2
  22. data/lib/scout/knowledge_base.rb +20 -2
  23. data/lib/scout/monitor.rb +300 -0
  24. data/lib/scout/persist/engine/packed_index.rb +2 -2
  25. data/lib/scout/persist/engine/sharder.rb +1 -1
  26. data/lib/scout/persist/tsv.rb +1 -0
  27. data/lib/scout/semaphore.rb +1 -1
  28. data/lib/scout/tsv/dumper.rb +3 -3
  29. data/lib/scout/tsv/open.rb +1 -0
  30. data/lib/scout/tsv/parser.rb +1 -1
  31. data/lib/scout/tsv/transformer.rb +1 -0
  32. data/lib/scout/tsv/util.rb +2 -2
  33. data/lib/scout/work_queue/socket.rb +1 -1
  34. data/lib/scout/work_queue/worker.rb +7 -5
  35. data/lib/scout/workflow/documentation.rb +1 -1
  36. data/lib/scout/workflow/entity.rb +22 -1
  37. data/lib/scout/workflow/step/config.rb +3 -3
  38. data/lib/scout/workflow/step/file.rb +4 -0
  39. data/lib/scout/workflow/step/info.rb +8 -2
  40. data/lib/scout/workflow/step.rb +10 -5
  41. data/lib/scout/workflow/task/inputs.rb +1 -1
  42. data/lib/scout/workflow/usage.rb +3 -2
  43. data/lib/scout/workflow/util.rb +22 -0
  44. data/scout-gear.gemspec +20 -6
  45. data/scout_commands/cat +86 -0
  46. data/scout_commands/doc +3 -1
  47. data/scout_commands/entity +151 -0
  48. data/scout_commands/system/clean +146 -0
  49. data/scout_commands/system/status +238 -0
  50. data/scout_commands/workflow/info +23 -10
  51. data/scout_commands/workflow/install +1 -1
  52. data/scout_commands/workflow/task +1 -1
  53. data/test/scout/entity/test_property.rb +1 -1
  54. data/test/scout/knowledge_base/test_registry.rb +19 -0
  55. data/test/scout/test_work_queue.rb +1 -1
  56. data/test/scout/work_queue/test_worker.rb +12 -10
  57. metadata +32 -5
  58. data/doc/lib/scout/path.md +0 -35
  59. data/doc/lib/scout/workflow/task.md +0 -13
data/doc/Workflow.md ADDED
@@ -0,0 +1,586 @@
1
+ # Workflow
2
+
3
+ The Workflow module implements a lightweight, annotation-based workflow engine. It lets you:
4
+
5
+ - Define workflows composed of named tasks with typed inputs, defaults and dependencies.
6
+ - Instantiate jobs (Steps), run them synchronously or as streams, track provenance, and persist results atomically.
7
+ - Override dependencies, archive inputs/outputs, and relocate jobs across path maps.
8
+ - Orchestrate multiple jobs under resource constraints.
9
+ - Attach helper methods to workflows and reuse them in task code.
10
+ - Generate usage and task documentation automatically.
11
+ - Extend workflows with entity-oriented helpers (EntityWorkflow).
12
+
13
+ It integrates with core modules: Annotation, IndiferentHash, Path, Open, Persist, Log, ConcurrentStream, and SOPT.
14
+
15
+ Sections:
16
+ - Defining workflows and helpers
17
+ - Inputs, tasks and dependencies
18
+ - Jobs (Step): execution, streaming, info, files, provenance
19
+ - Orchestrator: scheduling with resource rules
20
+ - Task aliases and overrides
21
+ - Usage and documentation
22
+ - Entity workflows
23
+ - Persist helper
24
+ - Path integration (step files)
25
+ - Queue helpers
26
+ - API quick reference
27
+ - CLI: scout workflow commands
28
+ - Examples
29
+
30
+ ---
31
+
32
+ ## Defining workflows and helpers
33
+
34
+ Create a module and extend Workflow. Set a name (used in job storage paths and provenance). Optionally add helper methods reusable in tasks:
35
+
36
+ ```ruby
37
+ module Baking
38
+ extend Workflow
39
+ self.name = "Baking"
40
+
41
+ helper :whisk do |eggs|
42
+ "Whisking eggs from #{eggs}"
43
+ end
44
+
45
+ helper :mix do |base, mixer|
46
+ "Mixing base (#{base}) with mixer (#{mixer})"
47
+ end
48
+
49
+ helper :bake do |batter|
50
+ "Baking batter (#{batter})"
51
+ end
52
+ end
53
+ ```
54
+
55
+ Helpers:
56
+ - Define with `helper :name { ... }` to register.
57
+ - Invoke inside tasks simply as method calls.
58
+ - Outside task contexts, call `workflow.helper(:name, args...)`.
59
+
60
+ Directory:
61
+ - `Workflow.directory` defaults to Path "var/jobs".
62
+ - Each workflow has a per-name subdir: `workflow.directory # => var/jobs/<WorkflowName>`.
63
+ - Set it via `workflow.directory = Path.setup("tmp/var/jobs/<name>")`.
64
+
65
+ Anonymous workflows:
66
+ ```ruby
67
+ wf = Workflow.annonymous_workflow "MyWF" do
68
+ input :string, :string
69
+ task :length => :integer do |s|
70
+ s.length
71
+ end
72
+ end
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Inputs, tasks and dependencies
78
+
79
+ Inputs are declared before tasks:
80
+
81
+ ```ruby
82
+ input :name, :string, "Name to call", nil, jobname: true
83
+ input :count, :integer, "Times", 1, required: false
84
+ ```
85
+
86
+ - Signature: input(name, type = nil, [description], [default], [options = {}])
87
+ - Common options:
88
+ - jobname: true — this input sets the job identifier if provided.
89
+ - required: true — missing or nil values raise ParameterException.
90
+ - shortcut — preferred CLI short option letter (SOPT).
91
+
92
+ Task definitions:
93
+
94
+ ```ruby
95
+ task :call_name => :string do |name|
96
+ "Hi #{name}"
97
+ end
98
+ ```
99
+
100
+ - Signature: task(name_and_type, &block)
101
+ - name_and_type can be Hash ({name => type}), Symbol/String (defaults to :binary).
102
+ - Supported types: :string, :integer, :float, :boolean, :array, :yaml, :json, :marshal, :tsv, :binary, etc.
103
+ - Implicit inputs: the block parameters match declared inputs in order.
104
+ - Description and metadata:
105
+ - desc "..." — description shown in usage.
106
+ - returns(type) — annotate return type (already in the task type).
107
+ - extension("ext" or :dep_task) — filename extension for jobs of this task. When :dep_task, extension is inferred from aliased dependency.
108
+
109
+ Dependencies:
110
+
111
+ ```ruby
112
+ dep :prepare_batter
113
+ dep :whisk_eggs
114
+ task :bake_muffin_tray => :string do
115
+ bake(step(:prepare_batter).load)
116
+ end
117
+ ```
118
+
119
+ - dep signatures:
120
+ - dep(workflow, task, options = {}, &block)
121
+ - dep(task, options = {}, &block) — workflow self
122
+ - dep({ ... }) — pass only options
123
+ - Options map dependency inputs and behavior:
124
+ - Symbols reference previous dependencies or provided inputs by name.
125
+ - :jobname to set child jobname; `jobname: nil` to reset and use parent id where applicable.
126
+ - :compute flags: :canfail, :stream, :produce (also available at top level via block return).
127
+ - Block form receives `(jobname, options, dependencies)` and returns:
128
+ - Step — explicit dep
129
+ - Hash — merged into options (keys: :inputs, :jobname, :compute, :produce, :stream, :canfail)
130
+ - Array of Hash/Step — multiple deps
131
+
132
+ Dependency input resolution:
133
+ - Symbol value v in options tries, in order: a dep with name v, a provided input v, or the current options[v].
134
+
135
+ Recursive inputs and overrides:
136
+ - `task.recursive_inputs` merges required inputs from its dep tree, honoring local overrides.
137
+ - Override any dependency at job instantiation using keys "Workflow#task" => step_or_path:
138
+ ```ruby
139
+ base = wf.job(:step1, input1: 6)
140
+ job = wf.job(:step2, "Workflow#step1" => base)
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Jobs (Step)
146
+
147
+ Create jobs from tasks:
148
+
149
+ ```ruby
150
+ job = wf.job(:call_name, "Miguel") # jobname from jobname input
151
+ job = wf.job(:call_name, nil, name: "Cleia") # pass inputs explicitly
152
+ ```
153
+
154
+ Step basics:
155
+ - `step.run(stream = false | :no_load | :stream)`:
156
+ - false (default): computes and returns the Ruby object (non-streaming) or stored result if present.
157
+ - true or :stream: run and return a streaming IO; producer is a ConcurrentStream; join or read to EOF to finish.
158
+ - :no_load: run but return nil (useful when only persisting).
159
+ - `step.exec`: execute task block directly in-process without persistence (converts child Step return types if needed).
160
+ - `step.join`: wait for completion and raise on error; re-raises job exception from info.
161
+ - `step.path`: persisted path (Path or String).
162
+ - `step.files_dir`: companion directory `<path>.files` holding auxiliary files.
163
+ - `step.file("name")`: file helper within files_dir.
164
+ - `step.info`: IndiferentHash with status, pid, start/end times, messages, inputs, dependencies, etc. Stored at `<path>.info` (JSON by default).
165
+ - `step.log(status, [message_or_block])`: set info status and message (block timed).
166
+ - Status helpers: `done?`, `error?`, `aborted?`, `running?`, `waiting?`, `updated?`, `dirty?`, `started?`, `recoverable_error?`.
167
+ - Cleanup: `clean`, `recursive_clean`, `produce(with_fork: false)`.
168
+ - Dependency helpers: `dependencies`, `input_dependencies` (Steps in inputs), `rec_dependencies(connected=false)`.
169
+ - Provenance: `Step.prov_report(step)` returns colorized tree as text.
170
+ - Progress: `progress_bar(desc, options) { |bar| ... }`; `traverse(obj, desc:, **kwargs, &block)` integrates TSV.traverse with a bar.
171
+ - Child processes and commands:
172
+ - `child { ... }` — fork a child process and track pid in info[:children_pids].
173
+ - `cmd("shell args", log:, pipe: true, ...)` — run external command streaming stdout into process logs; ties child pid to this step.
174
+
175
+ Streaming pipelines:
176
+ - If a dep is marked for streaming (compute includes :stream) and SCOUT_EXPLICIT_STREAMING is set, you can consume child streams while they are produced.
177
+ - `step.stream` returns the next available stream copy; reading to EOF auto-joins producers (ConcurrentStream autojoin if set).
178
+ - `consume_all_streams` drains internal tees when streaming.
179
+
180
+ Saving and loading inputs:
181
+ - `task.save_inputs(dir, provided_inputs)` writes inputs to files (including file/array/file_array handling).
182
+ - `task.load_inputs(dir)` reconstructs input hash; supports .as_file/.as_path/.as_step markers and tar.gz bundles (auto-extracted).
183
+ - `step.save_inputs(dir_or_tar_gz_path)` convenience to export job inputs; `save_input_bundle` writes a tarball.
184
+
185
+ Archiving:
186
+ - `step.archive_deps` stores dependency info/inputs under `info[:archived_info]` and `info[:archived_dependencies]`.
187
+ - `step.archived_info`/`step.archived_inputs` read back archived data.
188
+
189
+ Relocation:
190
+ - `Step.load(path)` reconstructs a job, relocating to alternative maps if necessary (Path.relocate heuristics, including var/jobs/<wf>/<task>/...).
191
+
192
+ ---
193
+
194
+ ## Orchestrator: scheduling with resource rules
195
+
196
+ Workflow::Orchestrator runs sets of jobs respecting resource constraints (cpus, IO, etc.) with periodic scheduling:
197
+
198
+ ```ruby
199
+ rules = YAML.load <<-YAML
200
+ defaults:
201
+ log: 4
202
+ default_resources:
203
+ IO: 1
204
+ MyWF:
205
+ a: { resources: { cpus: 7 } }
206
+ b: { resources: { cpus: 2 } }
207
+ c: { resources: { cpus: 10 } }
208
+ d: { resources: { cpus: 15 } }
209
+ YAML
210
+
211
+ orchestrator = Workflow::Orchestrator.new(0.1, "cpus" => 30, "IO" => 10)
212
+ orchestrator.process(rules, jobs)
213
+ ```
214
+
215
+ Features:
216
+ - Builds a workload graph from jobs and their dependencies (including input_dependencies).
217
+ - Selects runnable candidates (deps done/updated, not running/error), purges duplicates.
218
+ - Applies resource requests from `rules[workflow][task]["resources"]`; tracks requested vs available; delays jobs exceeding limits.
219
+ - Runs jobs (spawn via `job.fork`) with per-job Log severity (`rules.defaults.log` or overrides).
220
+ - Handles recoverable errors (non-ScoutException): retries once after clean; logs and continues non-recoverable or repeated failures.
221
+ - Erases dependency artifacts when rules specify `erase: true` for the dep task and top-level jobs are unaffected; archives dep info to parent (see tests).
222
+ - Workflow helpers:
223
+ - `Workflow.produce(jobs, produce_cpus:, produce_timer:)` — run one or more jobs under Orchestrator.
224
+ - `Workflow.produce_dependencies(jobs, tasks, produce_cpus:, produce_timer:)` — pre-produce specific dependency tasks for given jobs.
225
+
226
+ ---
227
+
228
+ ## Task aliases and overrides
229
+
230
+ Create a task that aliases another task's output:
231
+
232
+ ```ruby
233
+ task_alias :say_hello, self, :say, name: "Miguel"
234
+ # alias name => inferred type, returns and extension from :say
235
+ ```
236
+
237
+ Behavior:
238
+ - The alias depends on the original task; upon completion:
239
+ - With config forget/remove enabled (see below), the alias job archives dependency info and either hard-links, copies, or removes dep artifacts.
240
+ - Otherwise links dep files_dir and result file directly (or copies for remote steps).
241
+ - Control via config or environment:
242
+ - SCOUT_FORGET_TASK_ALIAS / SCOUT_FORGET_DEP_TASKS (true) to forget deps on alias (also RBBT_ variants).
243
+ - SCOUT_REMOVE_TASK_ALIAS / SCOUT_REMOVE_DEP_TASKS (= 'true' or 'recursive') to remove dep files.
244
+ - The alias keeps the dep extension (`extension :dep_task` when not set).
245
+ - Mark alias as not overridden by inputs via option `:not_overriden => true`.
246
+
247
+ Overriding dependencies at job time:
248
+ - Pass `"Workflow#task" => Step_or_Path` in job inputs; the system marks dep as overridden, adjusts naming, and uses provided artifact.
249
+
250
+ ---
251
+
252
+ ## Usage and documentation
253
+
254
+ Workflow usage:
255
+ - Set `self.title`, `self.description` on the workflow.
256
+ - Provide per-task descriptions via `desc "..."` immediately before task.
257
+ - If the workflow repo includes `workflow.md` or `README.md`, it is parsed to fill title/description and to attach extended descriptions to tasks (via parse_workflow_doc).
258
+
259
+ Programmatic usage:
260
+ - `workflow.usage` — prints a summary of tasks and their short descriptions plus abridged dependency sequences.
261
+ - `workflow.usage(task)` — detailed task usage:
262
+ - Shows inputs (types, defaults) and their CLI flags (from SOPT metadata).
263
+ - Lists inherited inputs from dependencies (those not fixed by dep options).
264
+ - Explains list/file conventions for array/file inputs.
265
+ - Shows Returns type and an abridged dependency graph.
266
+
267
+ Task usage:
268
+ - `task.usage(workflow)` — render usage of a single task (See Usage tests).
269
+
270
+ SOPT integration:
271
+ - For CLI, a task generates option descriptors from recursive inputs:
272
+ - `task.get_SOPT` returns parsed `--input` options from ARGV.
273
+ - Boolean inputs render as `--flag`; string-like inputs accept `--key=value` or `--key value`.
274
+ - Array inputs accept comma-separated values; file/path arrays resolve files.
275
+
276
+ ---
277
+
278
+ ## Entity workflows
279
+
280
+ EntityWorkflow extends Workflow with an entity pattern, where tasks operate on one or many “entities” (strings or annotated objects):
281
+
282
+ ```ruby
283
+ module People
284
+ extend EntityWorkflow
285
+ self.name = 'People'
286
+ self.entity_name = 'person' # default: 'entity'
287
+
288
+ property :introduction do
289
+ "My name is #{self}"
290
+ end
291
+
292
+ entity_task hi: :string do
293
+ "Hi. #{entity.introduction}"
294
+ end
295
+
296
+ list_task group_hi: :string do
297
+ "Here is the group: " + entity_list.hi * "; "
298
+ end
299
+ end
300
+
301
+ People.setup("Miki").hi # => "Hi. My name is Miki"
302
+ People.setup(%w[Miki Clei]).group_hi
303
+ ```
304
+
305
+ - entity_task / list_task / multiple_task define tasks and matching convenience properties.
306
+ - For properties, call `.property task_name => property_type` to define accessors that trigger jobs and return run values.
307
+ - `annotation_input(name, type, desc, default, options)` declares entity annotation-sourced inputs; automatically wired as inputs for property tasks.
308
+
309
+ ---
310
+
311
+ ## Persist helper
312
+
313
+ Inside Workflow modules:
314
+ - `persist(name, type = :serializer, options = {}, &block)` — thin wrapper around Persist.persist with default dir under `Scout.var.workflows[workflow.name].persist`.
315
+
316
+ ---
317
+
318
+ ## Path integration (step files)
319
+
320
+ `Path.step_file?(path)` identifies paths under `.files` for step artifacts and generates a compact digest string. Path#digest_str is overridden to render step file info: "Step file: <Workflow>/<task>/<...>.files/...".
321
+
322
+ ---
323
+
324
+ ## Queue helpers
325
+
326
+ The queue subsystem lets you enqueue jobs (inputs saved on disk) and process them:
327
+
328
+ - `Workflow.queue_job(file)` — build a job from a queue file path `.../<workflow>/<task>/<name>...`:
329
+ - If the path is a directory or a non-empty file, parses inputs via Task.load_inputs.
330
+ - Infers clean job name (jobname input) from file base if present.
331
+ - `Workflow.unqueue(file)` — lock, run job, and remove queue file.
332
+ - CLI `scout workflow process` processes queue entries continuously or once (see below).
333
+
334
+ ---
335
+
336
+ ## API quick reference
337
+
338
+ Workflow (module-level and instance):
339
+ - Workflow.annonymous_workflow(name=nil) { ... } => Module (extends Workflow)
340
+ - Workflow.require_workflow(name) => Module (loads from workflows/<name>/workflow.rb or autoinstalls)
341
+ - Workflow.install_workflow(name[, base_repo_url]) and update_workflow_dir
342
+ - workflow.name / directory / directory= — job storage
343
+ - helper(:name) { ... } and helper(:name, args...) to call
344
+ - input(name, type, desc=nil, default=nil, options={})
345
+ - dep(workflow, task, options={}, &block) | dep(task, options={}, &block)
346
+ - desc, returns, extension
347
+ - task(name => type, &block)
348
+ - task_alias(name, workflow, original_task, options={}, &block) — alias dep_task
349
+ - job(task_name, jobname=nil, provided_inputs={}) => Step
350
+ - find_in_dependencies(name, dependencies) — find dep with name
351
+ - Documentation:
352
+ - title, description, documentation (parse_workflow_doc)
353
+ - usage([task], abridge=false)
354
+ - task_info(task_name) => hash of inputs, defaults, returns, deps, extension
355
+ - Orchestration:
356
+ - Workflow.produce(jobs, produce_cpus:, produce_timer:)
357
+ - Workflow.produce_dependencies(jobs, tasks, produce_cpus:, produce_timer:)
358
+ - Persist: persist(name, type, options) { ... }
359
+
360
+ Task:
361
+ - Task.setup(&block) — create a task proc with annotation attributes
362
+ - annotation attrs: name, type, inputs, deps, directory, description, returns, extension, workflow
363
+ - inputs — array of [name, type, desc, default, options]
364
+ - job(id=nil, provided_inputs=nil) => Step
365
+ - exec_on(binding, *inputs) — eval block on a binding (obj) with inputs
366
+ - assign_inputs(provided_inputs, id=nil) => [input_array, non_default_inputs, jobname_input?]
367
+ - process_inputs(provided_inputs, id=nil) => [input_array, non_default_inputs, digest_str]
368
+ - dependencies(id, provided_inputs, non_default_inputs, compute) => [Step...]
369
+ - recursive_inputs(overridden=[]) => inputs array
370
+ - save_inputs(dir, provided_inputs) and load_inputs(dir)
371
+
372
+ Step:
373
+ - run(stream = false | :no_load | :stream), exec, join, stream, consume_all_streams
374
+ - status: done?, error?, aborted?, running?, waiting?, updated?, dirty?, started?, recoverable_error?
375
+ - info (load_info, save_info, set_info, merge_info), info_file, messages, log
376
+ - paths: path, tmp_path, files_dir, file, files, bundle_files
377
+ - deps: dependencies, input_dependencies, rec_dependencies(connected=false), all_dependencies
378
+ - provenance: Step.prov_report(step, ...)
379
+ - resolving/relocating: Step.load(path), Step.relocate(path)
380
+ - concurrency: child { ... }, cmd(...), progress_bar, traverse
381
+ - cleaning: clean, recursive_clean, produce(with_fork: false), grace, terminated?
382
+ - overriden?: overriden_task, overriden_workflow, overriden_deps, recursive_overriden_deps
383
+ - digest_str, fingerprint, short_path, task_signature, alias?, step(:task_name)
384
+
385
+ Orchestrator:
386
+ - Orchestrator.new(timer=5, available_resources={cpus: Etc.nprocessors})
387
+ - process(rules, jobs), candidates, job_workload, workload, job_rules, job_resources
388
+ - release_resources, check_resources (internal)
389
+
390
+ ---
391
+
392
+ ## Command Line Interface (scout workflow)
393
+
394
+ The scout command discovers and runs scripts under scout_commands using the Path subsystem. For Workflow:
395
+
396
+ - General dispatcher:
397
+ - scout workflow cmd <workflow> [<subcommand> ...]
398
+ - Navigates <workflow>/share/scout_commands/<subcommand> (nested).
399
+ - If a directory is selected, lists available subcommands.
400
+ - If a file is found, it is executed; remaining ARGV parsed with SOPT.
401
+
402
+ - List installed workflows:
403
+ - scout workflow list
404
+
405
+ - Install or update workflows:
406
+ - scout workflow install <WorkflowName> [<repo_base_url>]
407
+ - workflow can be 'all' to update all installed workflows.
408
+ - Autoinstall on demand is enabled by SCOUT_WORKFLOW_AUTOINSTALL=true.
409
+ - Defaults repo base to 'https://github.com/Scout-Workflows/' (or config).
410
+
411
+ - Run a workflow task:
412
+ - scout workflow task <workflow> <task> [--jobname NAME] [--deploy serial|local|queue|SLURM|<server>|<server-slurm>] [--fork] [--nostream] [--update] [--load_inputs DIR|TAR] [--save_inputs DIR|TAR] [--printpath] [--provenance] [--clean] [--recursive_clean] [--clean_task task[,task2,...]] [--override_deps Workflow#task=path[,...]] [task-input-options...]
413
+ - Input options are auto-generated from task recursive inputs (e.g. --name, --count, etc.).
414
+ - --nostream disables streaming (writes file then prints content).
415
+ - --update recomputes if deps are newer.
416
+ - --deploy:
417
+ - serial — run in current process and stream output.
418
+ - local — run with local Orchestrator (uses cpus = Misc.processors).
419
+ - queue — save inputs to queue dir and exit.
420
+ - SLURM — submit via SLURM (requires rbbt-scout/hpc).
421
+ - <server> or <server-slurm> — offsite execution helpers (if configured).
422
+ - --fork — fork and return the job path immediately.
423
+ - --load_inputs — load inputs from a directory or tar.gz bundle (see save_inputs).
424
+ - --save_inputs — save current inputs to directory or tar.gz bundle and exit.
425
+ - --printpath — print step path after completion.
426
+ - --provenance — print provenance report and exit.
427
+ - --clean, --recursive_clean — cleanup artifacts.
428
+ - --clean_task — clean matching dependency tasks (optionally qualified as Workflow#task).
429
+ - --override_deps — override specific dependencies with paths.
430
+
431
+ Examples:
432
+ - scout workflow task Baking bake_muffin_tray --add_bluberries
433
+ - scout workflow task UsageWorkflow step2 --array "a,b" --float 1.5
434
+ - scout workflow task MyWF my_task --override_deps "MyWF#dep1=/path/result1,OtherWF#depX=/path/resultX"
435
+
436
+ - Job info:
437
+ - scout workflow info <step_path> [-i|--inputs | -ri|--recursive_inputs]
438
+ - Without flags prints the job info hash (status, pid, times, messages, inputs, dependencies).
439
+ - --inputs prints input_names and inputs.
440
+ - --recursive_inputs prints all inputs (including propagated).
441
+
442
+ - Provenance:
443
+ - scout workflow prov <step_path> [-p|--plot file.png] [-i inputs,csv] [--info_fields fields,csv] [-t|--touch] [-e|--expand_repeats]
444
+ - Prints a colorized dependency tree or plots a graph (requires R/igraph).
445
+ - --touch updates mtimes of parents consistent with deps.
446
+
447
+ - Execution trace:
448
+ - scout workflow trace <job-result> [options]
449
+ - Options: --fix_gap, --report_keys key1,key2,..., --plot file.png, --width N, --height N, --size N, --plot_data
450
+ - Prints a per-task summary by default (calls, avg time, total time).
451
+ - With --plot_data prints a per-step table with start/end offsets since first start.
452
+ - Accepts multiple jobs; includes archived info when available.
453
+
454
+ - Queue processing:
455
+ - scout workflow process [<workflow> [<task> [<name>]] | <queue_file>] [--list] [--continuous] [--produce_timer N] [--produce_cpus N] [-r|--requires file1,file2]
456
+ - Without args, processes all queued jobs under var/queue.
457
+ - --list lists matched queue files and exits.
458
+ - --continuous loops and re-checks for new jobs.
459
+ - --requires auto-requires Ruby files before processing.
460
+ - Produces jobs via Orchestrator with given cpus/timer.
461
+
462
+ - Write info:
463
+ - scout workflow write_info <job-result> <key> <value> [--force] [--recursive] [--check_pid]
464
+ - Sets an info key/value for a job and optionally all deps (and archived deps), respecting pid/host filters if requested.
465
+ - Use value 'DELETE' or 'nil' to remove a key (forces).
466
+
467
+ CLI discovery:
468
+ - All workflow CLI scripts live under <workflow>/share/scout_commands/workflow/*.
469
+ - The dispatcher `scout workflow cmd` allows invoking any custom scripts shipped with a workflow package.
470
+ - If you specify a directory rather than a script, the CLI lists available subcommands.
471
+
472
+ ---
473
+
474
+ ## Examples
475
+
476
+ Define a workflow with inputs, deps, and tasks:
477
+
478
+ ```ruby
479
+ module Pantry
480
+ extend Resource
481
+ self.subdir = 'share/pantry'
482
+
483
+ claim Pantry.eggs, :proc { "Eggs" }
484
+ claim Pantry.flour, :proc { "Flour" }
485
+ claim Pantry.blueberries, :proc { "Blueberries" }
486
+ end
487
+
488
+ module Baking
489
+ extend Workflow
490
+ self.name = "Baking"
491
+
492
+ helper(:whisk) { |eggs| "Whisking eggs from #{eggs}" }
493
+ helper(:mix) { |base, mixer| "Mixing base (#{base}) with mixer (#{mixer})" }
494
+ helper(:bake) { |batter| "Baking batter (#{batter})" }
495
+
496
+ task :whisk_eggs => :string do
497
+ whisk(Pantry.eggs.produce)
498
+ end
499
+
500
+ dep :whisk_eggs
501
+ input :add_bluberries, :boolean
502
+ task :prepare_batter => :string do |add_bluberries|
503
+ whisked = step(:whisk_eggs).load
504
+ batter = mix(whisked, Pantry.flour.produce)
505
+ batter = mix(batter, Pantry.blueberries.produce) if add_bluberries
506
+ batter
507
+ end
508
+
509
+ dep :prepare_batter
510
+ task :bake_muffin_tray => :string do
511
+ bake(step(:prepare_batter).load)
512
+ end
513
+ end
514
+
515
+ # Run
516
+ Baking.directory = Path.setup("tmp/var/jobs/baking")
517
+ Baking.job(:bake_muffin_tray, "Blueberry muffin", add_bluberries: true).run
518
+ # => "Baking batter (Mixing base (Mixing base (Whisking eggs from share/pantry/eggs) with mixer (share/pantry/flour)) with mixer (share/pantry/blueberries))"
519
+ ```
520
+
521
+ Streaming dependent steps:
522
+
523
+ ```ruby
524
+ times = 1000
525
+ producer = wf.task(:producer => :array) do |n|
526
+ Open.open_pipe do |sin|
527
+ n.times { |i| sin.puts "line-#{i}" }
528
+ end
529
+ end
530
+
531
+ consumer = wf.task(:consumer => :array) do
532
+ p = dependencies.first
533
+ stream = p.stream
534
+ Open.open_pipe do |sin|
535
+ while line = stream.gets
536
+ sin.puts line if line.split("-").last.to_i.even?
537
+ end
538
+ end
539
+ end
540
+
541
+ s1 = producer.job(nil, n: times)
542
+ s2 = consumer.job(nil, inputs: {}) # using dep
543
+ s2.dependencies = [s1]
544
+
545
+ io = s2.run(true)
546
+ lines = io.read.split("\n")
547
+ io.join
548
+ # lines.length == times/2
549
+ ```
550
+
551
+ Orchestrate with resource rules:
552
+
553
+ ```ruby
554
+ jobs = 6.times.map { |i| Baking.job(:bake_muffin_tray, "Job #{i}") }
555
+ rules = { "defaults" => { "log" => 4 }, "default_resources" => { "IO" => 1 }, "Baking" => { "bake_muffin_tray" => { "resources" => { "cpus" => 4 } } } }
556
+ Workflow::Orchestrator.new(0.1, "cpus" => 8, "IO" => 4).process(rules, jobs)
557
+ ```
558
+
559
+ Task aliases and cleanup:
560
+
561
+ ```ruby
562
+ module Greeter
563
+ extend Workflow
564
+ self.name = "Greeter"
565
+
566
+ input :name, :string, jobname: true
567
+ task :say => :string do |name| "Hi #{name}" end
568
+
569
+ task_alias :say_miguel, self, :say, name: "Miguel"
570
+ end
571
+
572
+ Greeter.job(:say_miguel).run # => "Hi Miguel"
573
+ ```
574
+
575
+ Provenance:
576
+
577
+ ```ruby
578
+ job = Baking.job(:bake_muffin_tray, "Normal", add_bluberries: false).run
579
+ puts Step.prov_report(job)
580
+ # or via CLI:
581
+ # scout workflow prov var/jobs/Baking/bake_muffin_tray/<id>
582
+ ```
583
+
584
+ ---
585
+
586
+ This document covers the Workflow engine: defining tasks and dependencies, creating and running jobs, streaming, info management, orchestration, documentation, and CLI integration. Use it to build reproducible pipelines with safe persistence and rich provenance.
@@ -113,7 +113,9 @@ module Association
113
113
  if target_index
114
114
  if Array === v[0]
115
115
  v[0] = target_index.values_at(*v[0])
116
- v = v.reject{|l| l[0].nil? || l[0].empty?}
116
+ non_nil_pos = []
117
+ v[0].each_with_index{|e,i| non_nil_pos << i unless e.nil? || (String === e) && e.empty? }
118
+ v = v.collect{|l| l.values_at *non_nil_pos}
117
119
  else
118
120
  v[0] = target_index[v[0]]
119
121
  next if v[0].nil? or v[0].empty?
@@ -129,7 +131,7 @@ module Association
129
131
  persist_options = IndiferentHash.pull_keys kwargs, :persist
130
132
 
131
133
  database_persist_options = IndiferentHash.add_defaults persist_options.dup, persist: true,
132
- prefix: "Association::Index", serializer: :double,
134
+ prefix: "Association::Index", serializer: :double, update: true,
133
135
  other_options: kwargs
134
136
 
135
137
  Persist.tsv(file, kwargs, engine: "BDB", persist_options: database_persist_options) do |data|
@@ -50,7 +50,7 @@ module Entity
50
50
  return [] if files.nil?
51
51
  files.collect!{|f| f.annotate f.gsub(/\b#{NAMESPACE_TAG}\b/, namespace.to_s) } if annotation_hash.include? :namespace and self.namespace
52
52
  if files.select{|f| f =~ /\b#{NAMESPACE_TAG}\b/ }.any?
53
- Log.warn "Rejecting some identifier files for lack of 'namespace': " << files.select{|f| f =~ /\b#{NAMESPACE_TAG}\b/ } * ", "
53
+ Log.warn "Rejecting some identifier files for lack of 'namespace': " + files.select{|f| f =~ /\b#{NAMESPACE_TAG}\b/ } * ", "
54
54
  end
55
55
  files.reject!{|f| f =~ /\b#{NAMESPACE_TAG}\b/ }
56
56
  files
@@ -14,7 +14,7 @@ module Entity
14
14
  end
15
15
 
16
16
  def all_properties
17
- entity_classes.inject([]){|acc,e| acc.concat(e.properties) }
17
+ entity_classes.inject([]){|acc,e| acc.concat(e.properties.keys) }
18
18
  end
19
19
  end
20
20
  end
@@ -60,7 +60,7 @@ module Entity
60
60
  raise "Type of property unknown #{type}"
61
61
  end
62
62
 
63
- properties.push name
63
+ properties[name] = block.parameters
64
64
 
65
65
  entity_class = self
66
66
  if type == :multiple
@@ -75,7 +75,7 @@ module Entity
75
75
  responses = {}
76
76
  self.each do |item|
77
77
  begin
78
- responses[item] = Entity::Property.persist(name, item, type, options) do
78
+ responses[item] = Entity::Property.persist(name, item, type, options.merge(other: {args: args, kwargs: kwargs})) do
79
79
  raise MultipleEntityProperty
80
80
  end
81
81
  rescue MultipleEntityProperty
@@ -89,7 +89,7 @@ module Entity
89
89
  new_responses = missing.instance_exec(*args, **kwargs, &block)
90
90
 
91
91
  missing.each do |item|
92
- responses[item] = Entity::Property.persist(name, item, type, options) do
92
+ responses[item] = Entity::Property.persist(name, item, type, options.merge(other: {args: args, kwargs: kwargs})) do
93
93
  Array === new_responses ? new_responses[item.container_index] : new_responses[item]
94
94
  end
95
95
  end
@@ -105,7 +105,7 @@ module Entity
105
105
  type, options = nil, {persist: false}
106
106
  end
107
107
 
108
- Entity::Property.persist(name, self, type, options) do
108
+ Entity::Property.persist(name, self, type, options.merge(other: {args: args, kwargs: kwargs})) do
109
109
  self.instance_exec(*args, **kwargs, &block)
110
110
  end
111
111
  end
@@ -138,7 +138,7 @@ module Entity
138
138
  res = (self.container._ary_property_cache[cache_code] ||= self.container.send(real_method, *args, **kwargs))
139
139
  Array === res ? res[self.container_index] : res[self]
140
140
  else
141
- res = self.make_array.send(real_method)
141
+ res = self.make_array.send(real_method, *args, **kwargs)
142
142
  Array === res ? res[0] : res[self]
143
143
  end
144
144
  end
data/lib/scout/entity.rb CHANGED
@@ -9,7 +9,7 @@ module Entity
9
9
  def self.extended(base)
10
10
  base.extend Annotation
11
11
  base.extend Entity::Property
12
- base.instance_variable_set(:@properties, [])
12
+ base.instance_variable_set(:@properties, {})
13
13
  base.instance_variable_set(:@persisted_methods, {})
14
14
  base.include Entity::Object
15
15
  base.include AnnotatedArray
@@ -64,7 +64,7 @@ class KnowledgeBase
64
64
 
65
65
  full_description = []
66
66
  empty_line = ''
67
- full_description << ("# " << Misc.humanize(name))
67
+ full_description << ("# " + Misc.humanize(name))
68
68
  full_description << empty_line
69
69
 
70
70
  source_formats = begin