scout-gear 10.11.10 → 10.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +6 -0
- data/VERSION +1 -1
- data/doc/Workflow.md +159 -1
- data/lib/scout/association/index.rb +4 -1
- data/lib/scout/association.rb +1 -1
- data/lib/scout/knowledge_base/entity.rb +2 -2
- data/lib/scout/knowledge_base/query.rb +3 -1
- data/lib/scout/work_queue/socket.rb +7 -3
- data/lib/scout/workflow/deployment/local.rb +25 -13
- data/lib/scout/workflow/documentation.rb +3 -1
- data/lib/scout/workflow/step/info.rb +7 -1
- data/lib/scout/workflow/step/inputs.rb +1 -3
- data/lib/scout/workflow/step/status.rb +1 -1
- data/lib/scout/workflow/step.rb +10 -11
- data/lib/scout/workflow/task.rb +1 -1
- data/lib/scout/workflow.rb +1 -0
- data/scout-gear.gemspec +4 -3
- data/scout_commands/purge +170 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cae9e2eae34c1d25717ba78283cca252e5b677c289fbe16e733ba03f6a438e02
|
|
4
|
+
data.tar.gz: 91c3f4c813c2fb493ebaf7d795e3085c0959e8a45719a6caf7961f27fa92f814
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b3133d93ece825983115375d519daea394914f4c70b083fc2f6f6d1c28605422129e31170becd5da4d338e8a1623ac9685855eed24b15752b31ba261417de3f7
|
|
7
|
+
data.tar.gz: 8f8bc352a274b6564360fce1a79b86f268fa88a8f22854393bb1aa773a04247ac35ed78cd22d81842a4d5a1ac04f3a1ce3ab3c292db2109ba7c05196c658533c
|
data/.vimproject
CHANGED
|
@@ -2,6 +2,11 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
|
2
2
|
Rakefile
|
|
3
3
|
README.md
|
|
4
4
|
chats=chats filter="*"{
|
|
5
|
+
|
|
6
|
+
purge
|
|
7
|
+
|
|
8
|
+
update_workflow_doc
|
|
9
|
+
|
|
5
10
|
job_chains
|
|
6
11
|
|
|
7
12
|
|
|
@@ -179,6 +184,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
|
179
184
|
alias
|
|
180
185
|
entity
|
|
181
186
|
find
|
|
187
|
+
purge
|
|
182
188
|
cat
|
|
183
189
|
glob
|
|
184
190
|
log
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
10.
|
|
1
|
+
10.12.0
|
data/doc/Workflow.md
CHANGED
|
@@ -89,6 +89,75 @@ input :count, :integer, "Times", 1, required: false
|
|
|
89
89
|
- required: true — missing or nil values raise ParameterException.
|
|
90
90
|
- shortcut — preferred CLI short option letter (SOPT).
|
|
91
91
|
|
|
92
|
+
Important (common pitfall): inputs and other annotations are **queued for the next task definition**.
|
|
93
|
+
|
|
94
|
+
In implementation terms, `input`, `dep`, `desc`, `returns` and `extension` call `annotate_next_task(...)` and
|
|
95
|
+
their annotations are consumed by the next call to `task(...)` **or** `task_alias(...)`/`dep_task(...)`.
|
|
96
|
+
After that, the annotation queue is cleared.
|
|
97
|
+
|
|
98
|
+
This is the most frequent source of confusion when you introduce intermediate `task_alias` helpers.
|
|
99
|
+
|
|
100
|
+
Bad (inputs attach to the alias, not to `analysis`):
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
input :top_k, :integer, "How many states", 5
|
|
104
|
+
task_alias :backend, self, :tool_run, mode: :fast
|
|
105
|
+
|
|
106
|
+
dep :backend
|
|
107
|
+
task :analysis => :json do |top_k|
|
|
108
|
+
# top_k will be nil here (it was attached to :backend)
|
|
109
|
+
end
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Good (define alias first, then analysis-specific inputs, then the task):
|
|
113
|
+
|
|
114
|
+
```ruby
|
|
115
|
+
task_alias :backend, self, :tool_run, mode: :fast
|
|
116
|
+
dep :backend
|
|
117
|
+
|
|
118
|
+
input :top_k, :integer, "How many states", 5
|
|
119
|
+
task :analysis => :json do |top_k|
|
|
120
|
+
# ok
|
|
121
|
+
end
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Common gotchas
|
|
125
|
+
|
|
126
|
+
These are the failure modes that most often bite first-time workflow authors:
|
|
127
|
+
|
|
128
|
+
- **Annotations attach to the next task definition**: `input`, `dep`, `desc`, `returns`, `extension` are queued and consumed by the next `task(...)` *or* `task_alias(...)` call.
|
|
129
|
+
- If you use `task_alias` as a convenience backend, define the alias *first*, then define analysis-only inputs, then define the analysis task.
|
|
130
|
+
|
|
131
|
+
- **`return` cannot be used inside tasks**: because of how they are implemented you need to use next to abort execution
|
|
132
|
+
and return a value. Just replace `return` for `next` inside a task block.
|
|
133
|
+
|
|
134
|
+
- **`dep_task` is just an alias for `task_alias`**: it defines a task alias; it does not mean “declare a dependency”. You still need `dep :that_alias` if you want it to be a dependency.
|
|
135
|
+
|
|
136
|
+
- **`step(:name)` only finds declared dependencies**: inside a task, `step(:x)` returns a dependency Step whose `task_name` is `:x`.
|
|
137
|
+
- If you forgot `dep :x`, then `step(:x)` will be `nil`.
|
|
138
|
+
|
|
139
|
+
- **On-disk layout is `<job>`, `<job>.info`, `<job>.files/`**:
|
|
140
|
+
- `.info` is a file (JSON metadata), and `.files/` is a directory.
|
|
141
|
+
- There is no `.info.files/` path.
|
|
142
|
+
|
|
143
|
+
- **Caching and recomputation**:
|
|
144
|
+
- Jobs are cached by *non-default inputs* plus a digest of their dependency tree.
|
|
145
|
+
- Changing task code does not automatically invalidate old results; use `--update` to recompute when dependencies are newer,
|
|
146
|
+
or `--clean`/`--recursive_clean` to remove cached outputs.
|
|
147
|
+
- When debugging, it can be useful to change the job name (`--jobname`) or set an explicit input so you get a fresh job directory.
|
|
148
|
+
|
|
149
|
+
- **Array/list CLI inputs**: prefer a single comma-separated flag (e.g. `--nodes A,B,C`) rather than repeating the same flag many times.
|
|
150
|
+
|
|
151
|
+
- **Introspection vs execution helpers**:
|
|
152
|
+
- `Task#dependencies(...)` is an internal constructor used during job creation and requires arguments.
|
|
153
|
+
- For introspection, use `task.deps`, `workflow.usage(task)`, or `workflow.dep_tree(task)`.
|
|
154
|
+
|
|
155
|
+
- **Tool invocation**:
|
|
156
|
+
- `CMD.cmd('MyTool', ...)` runs a binary from PATH.
|
|
157
|
+
- `CMD.cmd(:mytool, ...)` only works if that tool symbol is registered in CMD’s tool registry.
|
|
158
|
+
- For details, see the CMD documentation (in scout-essentials: `doc/CMD.md`).
|
|
159
|
+
|
|
160
|
+
|
|
92
161
|
Task definitions:
|
|
93
162
|
|
|
94
163
|
```ruby
|
|
@@ -162,6 +231,17 @@ Step basics:
|
|
|
162
231
|
- `step.files_dir`: companion directory `<path>.files` holding auxiliary files.
|
|
163
232
|
- `step.file("name")`: file helper within files_dir.
|
|
164
233
|
- `step.info`: IndiferentHash with status, pid, start/end times, messages, inputs, dependencies, etc. Stored at `<path>.info` (JSON by default).
|
|
234
|
+
|
|
235
|
+
On disk you will typically see:
|
|
236
|
+
|
|
237
|
+
```text
|
|
238
|
+
var/jobs/<Workflow>/<task>/<jobname>.<ext> # main result
|
|
239
|
+
var/jobs/<Workflow>/<task>/<jobname>.<ext>.info # JSON info (status, inputs, deps, messages, exceptions)
|
|
240
|
+
var/jobs/<Workflow>/<task>/<jobname>.<ext>.files/ # auxiliary files created by the task
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
There is **no** `...<job>.info.files/` directory; `.info` is a file alongside the `.files/` directory.
|
|
244
|
+
|
|
165
245
|
- `step.log(status, [message_or_block])`: set info status and message (block timed).
|
|
166
246
|
- Status helpers: `done?`, `error?`, `aborted?`, `running?`, `waiting?`, `updated?`, `dirty?`, `started?`, `recoverable_error?`.
|
|
167
247
|
- Cleanup: `clean`, `recursive_clean`, `produce(with_fork: false)`.
|
|
@@ -234,6 +314,14 @@ task_alias :say_hello, self, :say, name: "Miguel"
|
|
|
234
314
|
# alias name => inferred type, returns and extension from :say
|
|
235
315
|
```
|
|
236
316
|
|
|
317
|
+
Notes:
|
|
318
|
+
- `dep_task` is an alias for `task_alias` (same method).
|
|
319
|
+
- The `workflow` argument should be a Workflow module (often `self` inside the workflow module, or an explicit module name).
|
|
320
|
+
There is no special `Self` constant.
|
|
321
|
+
- `task_alias` is itself a task definition, so any queued `input` / `dep` / `desc` / `returns` / `extension` immediately preceding it
|
|
322
|
+
are consumed by the alias (not by the following task).
|
|
323
|
+
|
|
324
|
+
|
|
237
325
|
Behavior:
|
|
238
326
|
- The alias depends on the original task; upon completion:
|
|
239
327
|
- With config forget/remove enabled (see below), the alias job archives dependency info and either hard-links, copies, or removes dep artifacts.
|
|
@@ -247,6 +335,73 @@ Behavior:
|
|
|
247
335
|
Overriding dependencies at job time:
|
|
248
336
|
- Pass `"Workflow#task" => Step_or_Path` in job inputs; the system marks dep as overridden, adjusts naming, and uses provided artifact.
|
|
249
337
|
|
|
338
|
+
### Pattern: backend + analysis tasks (wrapping external tools)
|
|
339
|
+
|
|
340
|
+
When wrapping external command-line tools (any CLI program), prefer a two-layer design:
|
|
341
|
+
|
|
342
|
+
1) **Backend task**: runs the tool, writes full outputs to `step.files_dir`, and returns a small JSON document
|
|
343
|
+
describing what was produced (paths, key parameters, summary stats).
|
|
344
|
+
|
|
345
|
+
2) **Analysis task(s)**: `dep` on the backend task and parse its outputs into compact, LLM-friendly summaries.
|
|
346
|
+
|
|
347
|
+
This pattern keeps caching/reproducibility correct (because the backend inputs are part of the dependency graph)
|
|
348
|
+
and avoids blowing up the CLI / LLM context window with large outputs.
|
|
349
|
+
|
|
350
|
+
Example skeleton:
|
|
351
|
+
|
|
352
|
+
```ruby
|
|
353
|
+
# backend
|
|
354
|
+
input :network, :text, required: true
|
|
355
|
+
input :seed, :integer, 0
|
|
356
|
+
task :tool_run => :json do |network, seed|
|
|
357
|
+
Open.write(file('input.txt'), network)
|
|
358
|
+
io = CMD.cmd('SomeTool', "--seed #{seed} '#{file('input.txt')}'", log: true, save_stderr: true)
|
|
359
|
+
raise ScoutException, io.read + "\n" + io.std_err if io.exit_status != 0
|
|
360
|
+
|
|
361
|
+
{
|
|
362
|
+
"files" => Dir.glob(file('out').to_s + '*'),
|
|
363
|
+
"params" => {"seed" => seed}
|
|
364
|
+
}.to_json
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# analysis
|
|
368
|
+
dep :tool_run
|
|
369
|
+
input :top_k, :integer, 5
|
|
370
|
+
task :tool_summary => :json do |top_k|
|
|
371
|
+
info = JSON.parse(step(:tool_run).load)
|
|
372
|
+
# parse info["files"] ...
|
|
373
|
+
end
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
Notes:
|
|
377
|
+
- Use `step.file('name')`/`file('name')` to ensure artifacts land in the step `.files` directory.
|
|
378
|
+
- For binaries that are not registered in CMD's tool registry, use `CMD.cmd('BinaryName', ...)` (string),
|
|
379
|
+
not `CMD.cmd(:BinaryName, ...)` (symbol).
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
### Designing tasks for interactive/agent use
|
|
383
|
+
|
|
384
|
+
Many users (and autonomous agents) cannot afford to load large tool outputs into memory or into a chat context window.
|
|
385
|
+
A robust pattern is:
|
|
386
|
+
|
|
387
|
+
- **Persist the full output to disk** (in `step.files_dir`) and return only a *small* summary object.
|
|
388
|
+
- Prefer returning `:json`/`:text` with a compact JSON document.
|
|
389
|
+
|
|
390
|
+
- **Echo analysis parameters** in the returned JSON.
|
|
391
|
+
- This makes it obvious what was actually used when debugging caching, CLI parsing, or defaults.
|
|
392
|
+
|
|
393
|
+
- **Separate “run” from “summarize”**.
|
|
394
|
+
- Backend task: run tool, write outputs, return metadata + file list.
|
|
395
|
+
- Analysis task(s): parse, aggregate, downsample, and return small summaries.
|
|
396
|
+
|
|
397
|
+
- **Use task_alias for common presets**.
|
|
398
|
+
- e.g. a `*_final_run` alias that fixes `final: true`, or a `*_trajectory_run` alias that fixes `format: 'csv'`.
|
|
399
|
+
|
|
400
|
+
- **Keep results stable and machine-readable**.
|
|
401
|
+
- Prefer JSON hashes/arrays over ad-hoc human-readable text; add derived fields (like expression strings) for convenience.
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
|
|
250
405
|
---
|
|
251
406
|
|
|
252
407
|
## Usage and documentation
|
|
@@ -272,6 +427,7 @@ SOPT integration:
|
|
|
272
427
|
- `task.get_SOPT` returns parsed `--input` options from ARGV.
|
|
273
428
|
- Boolean inputs render as `--flag`; string-like inputs accept `--key=value` or `--key value`.
|
|
274
429
|
- Array inputs accept comma-separated values; file/path arrays resolve files.
|
|
430
|
+
- Tip: prefer a single flag with comma-separated values (e.g. `--nodes A,B,C`) over repeating the same flag multiple times.
|
|
275
431
|
|
|
276
432
|
---
|
|
277
433
|
|
|
@@ -366,6 +522,8 @@ Task:
|
|
|
366
522
|
- assign_inputs(provided_inputs, id=nil) => [input_array, non_default_inputs, jobname_input?]
|
|
367
523
|
- process_inputs(provided_inputs, id=nil) => [input_array, non_default_inputs, digest_str]
|
|
368
524
|
- dependencies(id, provided_inputs, non_default_inputs, compute) => [Step...]
|
|
525
|
+
- Note: `Task#dependencies` is an internal constructor used during job creation and requires arguments.
|
|
526
|
+
For introspection, use `task.deps` (declared dependency annotations) or `workflow.usage(task)` / `workflow.dep_tree(...)`.
|
|
369
527
|
- recursive_inputs(overridden=[]) => inputs array
|
|
370
528
|
- save_inputs(dir, provided_inputs) and load_inputs(dir)
|
|
371
529
|
|
|
@@ -583,4 +741,4 @@ puts Step.prov_report(job)
|
|
|
583
741
|
|
|
584
742
|
---
|
|
585
743
|
|
|
586
|
-
This document covers the Workflow engine: defining tasks and dependencies, creating and running jobs, streaming, info management, orchestration, documentation, and CLI integration. Use it to build reproducible pipelines with safe persistence and rich provenance.
|
|
744
|
+
This document covers the Workflow engine: defining tasks and dependencies, creating and running jobs, streaming, info management, orchestration, documentation, and CLI integration. Use it to build reproducible pipelines with safe persistence and rich provenance.
|
|
@@ -29,8 +29,9 @@ module Association
|
|
|
29
29
|
if database.type == :double
|
|
30
30
|
transformer.traverse do |source,value_list|
|
|
31
31
|
res = []
|
|
32
|
-
NamedArray.zip_fields(value_list).
|
|
32
|
+
NamedArray.zip_fields(value_list).each do |values|
|
|
33
33
|
target, *info = values
|
|
34
|
+
next if source.nil? or target.nil?
|
|
34
35
|
key = [source, target] * "~"
|
|
35
36
|
res << [key, info]
|
|
36
37
|
if undirected
|
|
@@ -45,6 +46,7 @@ module Association
|
|
|
45
46
|
res = []
|
|
46
47
|
res.extend MultipleResult
|
|
47
48
|
targets.each do |target|
|
|
49
|
+
next if source.nil? or target.nil?
|
|
48
50
|
key = [source, target] * "~"
|
|
49
51
|
res << [key, []]
|
|
50
52
|
if undirected
|
|
@@ -59,6 +61,7 @@ module Association
|
|
|
59
61
|
res = []
|
|
60
62
|
res.extend MultipleResult
|
|
61
63
|
target, *info = values
|
|
64
|
+
next if source.nil? or target.nil?
|
|
62
65
|
key = [source, target] * "~"
|
|
63
66
|
res << [key, info]
|
|
64
67
|
if undirected
|
data/lib/scout/association.rb
CHANGED
|
@@ -131,7 +131,7 @@ module Association
|
|
|
131
131
|
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
|
132
132
|
|
|
133
133
|
database_persist_options = IndiferentHash.add_defaults persist_options.dup, persist: true,
|
|
134
|
-
prefix: "Association::Index", serializer: :double,
|
|
134
|
+
prefix: "Association::Index", serializer: :double,
|
|
135
135
|
other_options: kwargs
|
|
136
136
|
|
|
137
137
|
Persist.tsv(file, kwargs, engine: "BDB", persist_options: database_persist_options) do |data|
|
|
@@ -91,7 +91,7 @@ class KnowledgeBase
|
|
|
91
91
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
|
92
92
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
|
|
93
93
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
|
94
|
-
TSV.translation_index identifier_files, nil, source(name), :persist => true
|
|
94
|
+
TSV.translation_index identifier_files.uniq, nil, source(name), :persist => true
|
|
95
95
|
end
|
|
96
96
|
end
|
|
97
97
|
|
|
@@ -114,7 +114,7 @@ class KnowledgeBase
|
|
|
114
114
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
|
|
115
115
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
|
|
116
116
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
|
117
|
-
TSV.translation_index identifier_files, nil, target(name), :persist => true
|
|
117
|
+
TSV.translation_index identifier_files.uniq, nil, target(name), :persist => true
|
|
118
118
|
end
|
|
119
119
|
end
|
|
120
120
|
|
|
@@ -75,7 +75,9 @@ class KnowledgeBase
|
|
|
75
75
|
entity = identify_target(name, entity)
|
|
76
76
|
matches = _parents(name, entity)
|
|
77
77
|
#matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
|
|
78
|
-
setup(name, matches, true)
|
|
78
|
+
items = setup(name, matches, true)
|
|
79
|
+
items = items.invert unless undirected(name)
|
|
80
|
+
items
|
|
79
81
|
end
|
|
80
82
|
|
|
81
83
|
def _neighbours(name, entity)
|
|
@@ -44,16 +44,19 @@ class WorkQueue
|
|
|
44
44
|
str = size_head
|
|
45
45
|
when Annotation::AnnotatedObject
|
|
46
46
|
payload = @serializer.dump(obj)
|
|
47
|
+
payload.force_encoding("BINARY")
|
|
47
48
|
size_head = [payload.bytesize,"S"].pack 'La'
|
|
48
|
-
str = size_head
|
|
49
|
+
str = size_head + payload
|
|
49
50
|
when String
|
|
50
51
|
payload = obj
|
|
51
52
|
size_head = [payload.bytesize,"C"].pack 'La'
|
|
52
|
-
|
|
53
|
+
payload.force_encoding("BINARY")
|
|
54
|
+
str = size_head + payload
|
|
53
55
|
else
|
|
54
56
|
payload = @serializer.dump(obj)
|
|
57
|
+
payload.force_encoding("BINARY")
|
|
55
58
|
size_head = [payload.bytesize,"S"].pack 'La'
|
|
56
|
-
str = size_head
|
|
59
|
+
str = size_head + payload
|
|
57
60
|
end
|
|
58
61
|
|
|
59
62
|
write_length = str.length
|
|
@@ -82,6 +85,7 @@ class WorkQueue
|
|
|
82
85
|
raise $!
|
|
83
86
|
end
|
|
84
87
|
when "C"
|
|
88
|
+
payload.force_encoding('UTF-8')
|
|
85
89
|
payload
|
|
86
90
|
end
|
|
87
91
|
rescue TryAgain
|
|
@@ -6,13 +6,10 @@ class Workflow::LocalExecutor
|
|
|
6
6
|
self.new.process(*args)
|
|
7
7
|
end
|
|
8
8
|
|
|
9
|
-
def self.produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1)
|
|
9
|
+
def self.produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1, bar: nil)
|
|
10
10
|
jobs = [jobs] unless Array === jobs
|
|
11
11
|
orchestrator = self.new produce_timer.to_f, cpus: produce_cpus.to_i
|
|
12
|
-
|
|
13
|
-
orchestrator.process(rules, jobs)
|
|
14
|
-
rescue self::NoWork
|
|
15
|
-
end
|
|
12
|
+
orchestrator.process(rules, jobs, bar: bar)
|
|
16
13
|
end
|
|
17
14
|
|
|
18
15
|
def self.produce_dependencies(jobs, tasks, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1)
|
|
@@ -59,6 +56,7 @@ class Workflow::LocalExecutor
|
|
|
59
56
|
bar.pos batches.select{|b| Workflow::Orchestrator.done_batch?(b) }.length if bar
|
|
60
57
|
|
|
61
58
|
candidates = Workflow::LocalExecutor.candidates(batches)
|
|
59
|
+
candidates = candidates.reject{|batch| failed_jobs.include? batch[:top_level] }
|
|
62
60
|
top_level_jobs = candidates.collect{|batch| batch[:top_level] }
|
|
63
61
|
|
|
64
62
|
raise NoWork, "No candidates and no running jobs #{Log.fingerprint batches}" if resources_used.empty? && top_level_jobs.empty?
|
|
@@ -148,7 +146,7 @@ class Workflow::LocalExecutor
|
|
|
148
146
|
}
|
|
149
147
|
end
|
|
150
148
|
|
|
151
|
-
def process(rules, jobs = nil)
|
|
149
|
+
def process(rules, jobs = nil, bar: nil)
|
|
152
150
|
jobs, rules = rules, {} if jobs.nil?
|
|
153
151
|
|
|
154
152
|
if Step === jobs
|
|
@@ -157,11 +155,19 @@ class Workflow::LocalExecutor
|
|
|
157
155
|
|
|
158
156
|
batches = Workflow::Orchestrator.job_batches(rules, jobs)
|
|
159
157
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
158
|
+
bar = case bar
|
|
159
|
+
when true
|
|
160
|
+
true
|
|
161
|
+
when Log::ProgressBar
|
|
162
|
+
bar.max = batches.length
|
|
163
|
+
bar
|
|
164
|
+
when nil
|
|
165
|
+
if jobs.length == 1
|
|
166
|
+
jobs.first.progress_bar("Processing batches for #{jobs.first.short_path}", max: batches.length)
|
|
167
|
+
else
|
|
168
|
+
true
|
|
169
|
+
end
|
|
170
|
+
end
|
|
165
171
|
|
|
166
172
|
batches.each do |batch|
|
|
167
173
|
rules = IndiferentHash.setup batch[:rules]
|
|
@@ -172,7 +178,14 @@ class Workflow::LocalExecutor
|
|
|
172
178
|
batch[:rules] = rules
|
|
173
179
|
end
|
|
174
180
|
|
|
175
|
-
|
|
181
|
+
begin
|
|
182
|
+
process_batches(batches, bar: bar)
|
|
183
|
+
rescue NoWork
|
|
184
|
+
batches.each do |batch|
|
|
185
|
+
job = batch[:top_level]
|
|
186
|
+
raise job.exception if job.error? && ! job.recoverable_error?
|
|
187
|
+
end
|
|
188
|
+
end
|
|
176
189
|
end
|
|
177
190
|
|
|
178
191
|
def release_resources(job)
|
|
@@ -309,7 +322,6 @@ class Workflow::LocalExecutor
|
|
|
309
322
|
end
|
|
310
323
|
|
|
311
324
|
def self.candidates(batches)
|
|
312
|
-
|
|
313
325
|
leaf_nodes = batches.select{|b| b[:deps].empty? }
|
|
314
326
|
|
|
315
327
|
leaf_nodes.reject!{|b| Workflow::Orchestrator.done_batch?(b) }
|
|
@@ -29,7 +29,9 @@ module Workflow
|
|
|
29
29
|
|
|
30
30
|
def self.parse_workflow_doc(doc)
|
|
31
31
|
title = doc_parse_first_line doc
|
|
32
|
-
description,
|
|
32
|
+
description, task_info_and_extra = doc_parse_up_to doc, /^# Tasks/i
|
|
33
|
+
task_info, extra = doc_parse_up_to task_info_and_extra, /^#[^#]/i, true
|
|
34
|
+
|
|
33
35
|
task_description, tasks = doc_parse_up_to task_info, /^##/, true
|
|
34
36
|
tasks = doc_parse_chunks tasks, /^## (.*)/
|
|
35
37
|
{:title => title.strip, :description => description.strip, :task_description => task_description.strip, :tasks => tasks}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
require 'time'
|
|
2
2
|
require 'scout/config'
|
|
3
|
+
require "json/add/exception"
|
|
4
|
+
|
|
3
5
|
class Step
|
|
4
6
|
SERIALIZER = Scout::Config.get(:serializer, :step_info, :info, :step, env: "SCOUT_SERIALIZER", default: :json)
|
|
5
7
|
def info_file
|
|
@@ -186,10 +188,14 @@ class Step
|
|
|
186
188
|
! (done? && status == :done) && (info[:pid] && Misc.pid_alive?(info[:pid]))
|
|
187
189
|
end
|
|
188
190
|
|
|
191
|
+
def self.encode_exception(e)
|
|
192
|
+
return e.to_json
|
|
193
|
+
end
|
|
194
|
+
|
|
189
195
|
def exception
|
|
190
196
|
return nil unless info[:exception]
|
|
191
197
|
begin
|
|
192
|
-
|
|
198
|
+
JSON.parse(info[:exception], create_additions: true)
|
|
193
199
|
rescue
|
|
194
200
|
Log.exception $!
|
|
195
201
|
return Exception.new messages.last
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
class Step
|
|
2
2
|
def save_inputs(inputs_dir)
|
|
3
|
-
if
|
|
4
|
-
#hash = name[clean_name.length..-1]
|
|
5
|
-
#inputs_dir += hash
|
|
3
|
+
if provided_inputs.any?
|
|
6
4
|
Log.medium "Saving job inputs to: #{Log.fingerprint inputs_dir} #{Log.fingerprint provided_inputs}"
|
|
7
5
|
self.task.save_inputs(inputs_dir, provided_inputs)
|
|
8
6
|
else
|
|
@@ -41,7 +41,7 @@ class Step
|
|
|
41
41
|
|
|
42
42
|
def updated?
|
|
43
43
|
return false if self.error? && self.recoverable_error?
|
|
44
|
-
return true if (self.done? || (self.error? && ! self.recoverable_error?)) &&
|
|
44
|
+
return true if (self.done? || (self.error? && ! self.recoverable_error?)) && ENV["SCOUT_UPDATE"].to_s.downcase != 'true'
|
|
45
45
|
newer = newer_dependencies
|
|
46
46
|
cleaned = cleaned_dependencies
|
|
47
47
|
|
data/lib/scout/workflow/step.rb
CHANGED
|
@@ -192,7 +192,13 @@ class Step
|
|
|
192
192
|
|
|
193
193
|
return @result || self.load if done?
|
|
194
194
|
|
|
195
|
-
|
|
195
|
+
begin
|
|
196
|
+
prepare_dependencies
|
|
197
|
+
rescue => e
|
|
198
|
+
exception_encoded = Step.encode_exception e
|
|
199
|
+
merge_info :status => :error, :exception => exception_encoded, :end => Time.now, :backtrace => e.backtrace, :message => "#{e.class}: #{e.message}"
|
|
200
|
+
raise $!
|
|
201
|
+
end
|
|
196
202
|
|
|
197
203
|
begin
|
|
198
204
|
|
|
@@ -242,17 +248,10 @@ class Step
|
|
|
242
248
|
rescue Exception => e
|
|
243
249
|
begin
|
|
244
250
|
begin
|
|
245
|
-
|
|
246
|
-
s = e.concurrent_stream
|
|
247
|
-
e.concurrent_stream = nil
|
|
248
|
-
exception_encoded = Base64.encode64(Marshal.dump(e))
|
|
249
|
-
e.concurrent_stream = s
|
|
250
|
-
else
|
|
251
|
-
exception_encoded = Base64.encode64(Marshal.dump(e))
|
|
252
|
-
end
|
|
251
|
+
exception_encoded = Step.encode_exception e
|
|
253
252
|
merge_info :status => :error, :exception => exception_encoded, :end => Time.now, :backtrace => e.backtrace, :message => "#{e.class}: #{e.message}"
|
|
254
|
-
rescue Exception
|
|
255
|
-
exception_encoded =
|
|
253
|
+
rescue Exception => e
|
|
254
|
+
exception_encoded = Step.encode_exception e
|
|
256
255
|
merge_info :status => :error, :exception => exception_encoded, :end => Time.now, :backtrace => e.backtrace, :message => "#{e.class}: #{e.message}"
|
|
257
256
|
end
|
|
258
257
|
|
data/lib/scout/workflow/task.rb
CHANGED
data/lib/scout/workflow.rb
CHANGED
data/scout-gear.gemspec
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: scout-gear 10.
|
|
5
|
+
# stub: scout-gear 10.12.0 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "scout-gear".freeze
|
|
9
|
-
s.version = "10.
|
|
9
|
+
s.version = "10.12.0".freeze
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib".freeze]
|
|
@@ -161,6 +161,7 @@ Gem::Specification.new do |s|
|
|
|
161
161
|
"scout_commands/kb/show",
|
|
162
162
|
"scout_commands/kb/traverse",
|
|
163
163
|
"scout_commands/log",
|
|
164
|
+
"scout_commands/purge",
|
|
164
165
|
"scout_commands/rbbt",
|
|
165
166
|
"scout_commands/resource/produce",
|
|
166
167
|
"scout_commands/resource/sync",
|
|
@@ -284,7 +285,7 @@ Gem::Specification.new do |s|
|
|
|
284
285
|
]
|
|
285
286
|
s.homepage = "http://github.com/mikisvaz/scout-gear".freeze
|
|
286
287
|
s.licenses = ["MIT".freeze]
|
|
287
|
-
s.rubygems_version = "3.7.
|
|
288
|
+
s.rubygems_version = "3.7.0.dev".freeze
|
|
288
289
|
s.summary = "basic gear for scouts".freeze
|
|
289
290
|
|
|
290
291
|
s.specification_version = 4
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# scout purge – delete files by their last access time.
|
|
4
|
+
#
|
|
5
|
+
# Supports three operations:
|
|
6
|
+
# * --before <specifier> – delete all files accessed before the given
|
|
7
|
+
# timestamp. The specifier can be a relative description such as
|
|
8
|
+
# `last_week`, `3_day` or an absolute ISO date.
|
|
9
|
+
# * --older <file> – delete all files older than the last access time of
|
|
10
|
+
# the reference file.
|
|
11
|
+
# * --save <N> – keep the N most‑recently accessed files; delete the
|
|
12
|
+
# rest. Can be combined with ``--before`` or ``--older``.
|
|
13
|
+
#
|
|
14
|
+
# The command uses the Scout option parser and prints messages using the
|
|
15
|
+
# standard Log facility. It works on the working directory when called
|
|
16
|
+
# directly.
|
|
17
|
+
|
|
18
|
+
require 'scout'
|
|
19
|
+
require 'fileutils'
|
|
20
|
+
require 'time'
|
|
21
|
+
require 'pathname'
|
|
22
|
+
|
|
23
|
+
# $0 handling used by other Scout commands.
|
|
24
|
+
$0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{File.basename(__FILE__)}" if $previous_commands
|
|
25
|
+
|
|
26
|
+
# ---- Options ------------------------------------------------------------
|
|
27
|
+
options = SOPT.setup <<EOF
|
|
28
|
+
|
|
29
|
+
Delete files by last access time.
|
|
30
|
+
|
|
31
|
+
$ #{$0} <directory> [<options>]
|
|
32
|
+
|
|
33
|
+
You need to specify the 'directory', at least one of the two parameters
|
|
34
|
+
'before' or 'older'. I will find all the files under 'directory' that match the
|
|
35
|
+
criteria and delete them, but only if the flag 'delete' is set, otherwise it
|
|
36
|
+
will only list them. It only works with files; directories are ignored so that
|
|
37
|
+
the directory structure is preserved.
|
|
38
|
+
|
|
39
|
+
-b--before* Delete files accessed for the last time before the given time
|
|
40
|
+
-o--older* Delete files whose atime is older than the reference file
|
|
41
|
+
-s--save* Keep the N most recently accessed files, delete the rest
|
|
42
|
+
-d--delete Delete the files
|
|
43
|
+
-h--help Print this help
|
|
44
|
+
EOF
|
|
45
|
+
|
|
46
|
+
if options[:help]
|
|
47
|
+
if defined? scout_usage
|
|
48
|
+
scout_usage
|
|
49
|
+
else
|
|
50
|
+
puts SOPT.doc
|
|
51
|
+
end
|
|
52
|
+
exit 0
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# ---- Validate directory ---------------------------------------------------
|
|
56
|
+
dir = ARGV[0]
|
|
57
|
+
raise MissingParameterException, "'directory' required" if dir.nil?
|
|
58
|
+
|
|
59
|
+
dir = Path.setup dir.dup
|
|
60
|
+
|
|
61
|
+
raise ParameterException, "'directory' does not exist" unless dir.exists?
|
|
62
|
+
|
|
63
|
+
# ---- Helper -------------------------------------------------------------
|
|
64
|
+
def time_from_specifier(spec)
|
|
65
|
+
case spec
|
|
66
|
+
when /^last_(day|week|month|year)$/
|
|
67
|
+
period = $1
|
|
68
|
+
secs = case period
|
|
69
|
+
when 'day' then 24 * 60 * 60
|
|
70
|
+
when 'week' then 7 * 24 * 60 * 60
|
|
71
|
+
when 'month' then 30 * 24 * 60 * 60
|
|
72
|
+
when 'year' then 335 * 24 * 60 * 60
|
|
73
|
+
else 24 * 60 * 60
|
|
74
|
+
end
|
|
75
|
+
Time.now - secs
|
|
76
|
+
when /^(\d+)_(day|week|month|year)$/
|
|
77
|
+
amount = $1.to_i
|
|
78
|
+
period = $2
|
|
79
|
+
secs = case period
|
|
80
|
+
when 'day' then 24 * 60 * 60
|
|
81
|
+
when 'week' then 7 * 24 * 60 * 60
|
|
82
|
+
when 'month' then 30 * 24 * 60 * 60
|
|
83
|
+
when 'year' then 365 * 24 * 60 * 60
|
|
84
|
+
end
|
|
85
|
+
Time.now - amount * secs
|
|
86
|
+
when /^\d+\w+$/
|
|
87
|
+
Time.now - Misc.timespan(spec)
|
|
88
|
+
else
|
|
89
|
+
raise ParameterException, "unable to parse time spec '#{spec}'"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# ---- Gather files --------------------------------------------------------
|
|
94
|
+
dir = Path.setup(dir.dup)
|
|
95
|
+
dir = dir.find
|
|
96
|
+
Log.info "Purging #{dir}"
|
|
97
|
+
all_files = {}
|
|
98
|
+
# Include dot files, ignore directories
|
|
99
|
+
dir.glob('**/*').each do |p|
|
|
100
|
+
next if File.directory?(p)
|
|
101
|
+
all_files[p] = begin
|
|
102
|
+
File.atime(p)
|
|
103
|
+
rescue
|
|
104
|
+
Log.warn $!.message
|
|
105
|
+
next
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
if all_files.empty?
|
|
110
|
+
puts 'no files found'
|
|
111
|
+
exit 0
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# ---- Resolve options -----------------------------------------------------
|
|
115
|
+
if options[:before] && options[:older]
|
|
116
|
+
Log.warn 'error: --before and --older are mutually exclusive'
|
|
117
|
+
exit 1
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
before_threshold = options[:before] ? time_from_specifier(options[:before]) : nil
|
|
121
|
+
older_path = options[:older]
|
|
122
|
+
keep_count = options[:save] ? options[:save].to_i : nil
|
|
123
|
+
delete = options[:delete]
|
|
124
|
+
reference_at = nil
|
|
125
|
+
if older_path
|
|
126
|
+
older_path = File.expand_path(older_path)
|
|
127
|
+
unless File.exist?(older_path)
|
|
128
|
+
Log.warn "reference file #{older_path.inspect} does not exist"
|
|
129
|
+
exit 1
|
|
130
|
+
end
|
|
131
|
+
reference_at = File.atime(older_path)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# ---- Decide which files to delete ---------------------------------------
|
|
135
|
+
target_files = if before_threshold
|
|
136
|
+
all_files.select { |_, at| at < before_threshold }.keys
|
|
137
|
+
elsif reference_at
|
|
138
|
+
all_files.select { |_, at| at < reference_at }.keys
|
|
139
|
+
else
|
|
140
|
+
raise ParameterException, 'no criteria to select files'
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Apply --save if provided
|
|
144
|
+
if keep_count && keep_count.positive?
|
|
145
|
+
sorted = all_files.sort_by { |_, at| -at.to_f }
|
|
146
|
+
keep = sorted.take(keep_count).map(&:first)
|
|
147
|
+
target_files -= keep
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
if target_files.empty?
|
|
151
|
+
Log.warn 'no files matched deletion criteria'
|
|
152
|
+
exit 0
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
if delete
|
|
156
|
+
target_files.each do |file|
|
|
157
|
+
begin
|
|
158
|
+
Log.info "Delete #{file} atime: #{File.atime(file)}"
|
|
159
|
+
FileUtils.rm(file)
|
|
160
|
+
rescue => e
|
|
161
|
+
Log.warn "failed to delete #{file}: #{e.message}"
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
else
|
|
165
|
+
target_files.each do |file|
|
|
166
|
+
Log.debug "atime: #{File.atime(file)} #{file}"
|
|
167
|
+
puts file
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
# ---- Perform or preview -------------------------------------------------
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: scout-gear
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 10.
|
|
4
|
+
version: 10.12.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
@@ -244,6 +244,7 @@ files:
|
|
|
244
244
|
- scout_commands/kb/show
|
|
245
245
|
- scout_commands/kb/traverse
|
|
246
246
|
- scout_commands/log
|
|
247
|
+
- scout_commands/purge
|
|
247
248
|
- scout_commands/rbbt
|
|
248
249
|
- scout_commands/resource/produce
|
|
249
250
|
- scout_commands/resource/sync
|
|
@@ -382,7 +383,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
382
383
|
- !ruby/object:Gem::Version
|
|
383
384
|
version: '0'
|
|
384
385
|
requirements: []
|
|
385
|
-
rubygems_version: 3.7.
|
|
386
|
+
rubygems_version: 3.7.0.dev
|
|
386
387
|
specification_version: 4
|
|
387
388
|
summary: basic gear for scouts
|
|
388
389
|
test_files: []
|