okstra 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.kr.md +15 -0
- package/README.md +15 -0
- package/docs/kr/architecture.md +2 -6
- package/docs/kr/cli.md +40 -6
- package/docs/kr/performance-improvement-plan-v2.md +23 -0
- package/docs/kr/performance-improvement-plan.md +22 -0
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/bin/okstra.sh +0 -1
- package/runtime/prompts/profiles/_common-contract.md +25 -1
- package/runtime/prompts/profiles/error-analysis.md +12 -0
- package/runtime/prompts/profiles/implementation-planning.md +20 -0
- package/runtime/prompts/profiles/requirements-discovery.md +20 -0
- package/runtime/python/lib/okstra/cli.sh +1 -7
- package/runtime/python/lib/okstra/globals.sh +0 -1
- package/runtime/python/lib/okstra/usage.sh +1 -4
- package/runtime/python/okstra_ctl/render.py +3 -0
- package/runtime/python/okstra_ctl/run.py +0 -6
- package/runtime/python/okstra_ctl/run_context.py +1 -1
- package/runtime/python/okstra_ctl/wizard.py +25 -2
- package/runtime/python/okstra_token_usage/blocks.py +5 -1
- package/runtime/python/okstra_token_usage/claude.py +16 -1
- package/runtime/python/okstra_token_usage/collect.py +17 -3
- package/runtime/python/okstra_token_usage/pricing.py +159 -24
- package/runtime/skills/okstra-brief/SKILL.md +532 -65
- package/runtime/skills/okstra-context-loader/SKILL.md +25 -11
- package/runtime/skills/okstra-convergence/SKILL.md +37 -13
- package/runtime/skills/okstra-history/SKILL.md +68 -37
- package/runtime/skills/okstra-logs/SKILL.md +26 -4
- package/runtime/skills/okstra-report-finder/SKILL.md +49 -22
- package/runtime/skills/okstra-report-writer/SKILL.md +59 -64
- package/runtime/skills/okstra-run/SKILL.md +35 -34
- package/runtime/skills/okstra-schedule/SKILL.md +51 -20
- package/runtime/skills/okstra-setup/SKILL.md +31 -12
- package/runtime/skills/okstra-status/SKILL.md +20 -8
- package/runtime/skills/okstra-team-contract/SKILL.md +27 -15
- package/runtime/skills/okstra-time-summary/SKILL.md +53 -16
- package/runtime/templates/reports/settings.template.json +7 -4
- package/runtime/validators/lib/fixtures.sh +10 -2
- package/runtime/validators/lib/validate-assets.sh +50 -24
- package/runtime/validators/validate-brief.py +385 -0
- package/runtime/validators/validate-brief.sh +35 -0
- package/runtime/validators/validate-workflow.sh +7 -33
|
@@ -40,7 +40,7 @@ parse and reuse it instead of re-resolving in the steps below.
|
|
|
40
40
|
|
|
41
41
|
## Step 1: Overall Project Status
|
|
42
42
|
|
|
43
|
-
To view the overall project status,
|
|
43
|
+
To view the overall project status, read `.project-docs/okstra/discovery/task-catalog.json`. The catalog is the authoritative source — every field listed below (including `workStatus`, `workStatusUpdatedAt`, `workStatusNote`) is projected directly from each `task-manifest.json` by `scripts/okstra_ctl/render.py :: render_task_catalog_discovery`. Do NOT re-open individual manifests for the overview.
|
|
44
44
|
|
|
45
45
|
Extract the following fields from each task.
|
|
46
46
|
|
|
@@ -68,13 +68,19 @@ Sort by:
|
|
|
68
68
|
|
|
69
69
|
출력 형식:
|
|
70
70
|
|
|
71
|
+
The overview table is intentionally narrow so it renders cleanly in a terminal. Only six columns are shown; for any task that needs a closer look (phase state, routing, approval gate, last run status, resume path, etc.) tell the user to run `okstra status <task-key>` for the detail view in Step 2.
|
|
72
|
+
|
|
73
|
+
If `awaitingApproval` is true OR `routingStatus == "pending"`, append a `*` to the `Next` cell as a visual marker and explain the marker once below the table.
|
|
74
|
+
|
|
71
75
|
```markdown
|
|
72
76
|
## okstra Status — <project-id>
|
|
73
77
|
|
|
74
|
-
| # | Task Key | Category | Phase |
|
|
75
|
-
|
|
76
|
-
| 1 | proj:group:id | bugfix | error-analysis |
|
|
77
|
-
| 2 | proj:group:id2 | feature | requirements-discovery |
|
|
78
|
+
| # | Task Key | Category | Phase | workStatus | Next |
|
|
79
|
+
|---|----------|----------|-------|------------|------|
|
|
80
|
+
| 1 | proj:group:id | bugfix | error-analysis | in-progress | implementation-planning |
|
|
81
|
+
| 2 | proj:group:id2 | feature | requirements-discovery | done | pending-routing-decision* |
|
|
82
|
+
|
|
83
|
+
`*` = awaiting user approval or pending routing decision. Run `okstra status <task-key>` for details.
|
|
78
84
|
```
|
|
79
85
|
|
|
80
86
|
## Step 2: Specific Task Status
|
|
@@ -153,10 +159,12 @@ The status response always includes one of the following options:
|
|
|
153
159
|
2. **Restart current phase**
|
|
154
160
|
- Indicates whether the task can be re-run with the same `task-key` and the current `taskType`.
|
|
155
161
|
3. **Start next phase**
|
|
156
|
-
- If `workflow.nextRecommendedPhase` is one of `error-analysis`, `implementation-planning`, `final-verification`, or `release-handoff`, that phase is proposed as the next candidate for the Okstra run.
|
|
162
|
+
- If `workflow.nextRecommendedPhase` is one of `error-analysis`, `implementation-planning`, `implementation`, `final-verification`, or `release-handoff`, that phase is proposed as the next candidate for the Okstra run.
|
|
157
163
|
- If `nextRecommendedPhase` is `pending-release-handoff`, the prior `final-verification` run completed but its verdict still has to be inspected before entering `release-handoff` — surface this as a verdict-gated next step and present `release-handoff` as a candidate only when the cited verdict is `accepted`.
|
|
158
164
|
4. **Need more information**
|
|
159
165
|
- If `nextRecommendedPhase` is `pending-routing-decision` or `routingStatus` is `pending`, this indicates that additional information is required.
|
|
166
|
+
5. **Task complete (terminal)**
|
|
167
|
+
- If `nextRecommendedPhase` is `done-or-follow-up`, the task lifecycle has reached its terminal signal. This is **not** a "next phase" — do not propose a new Okstra run. Surface the latest report and ask the user whether any follow-up task should be opened separately.
|
|
160
168
|
|
|
161
169
|
## Step 4: Update workStatus
|
|
162
170
|
|
|
@@ -195,7 +203,7 @@ Accepted `<status>` values: `todo`, `in-progress`, `blocked`, `done`.
|
|
|
195
203
|
Stop without modifying any file.
|
|
196
204
|
- If no match → output `<TASK-ID>를 찾을 수 없습니다.` and stop.
|
|
197
205
|
|
|
198
|
-
3. **Open the matching `task-manifest.json`** at `.project-docs/okstra/tasks/<task-group-segment>/<task-id-segment>/task-manifest.json`.
|
|
206
|
+
3. **Open the matching `task-manifest.json`** at `.project-docs/okstra/tasks/<task-group-segment>/<task-id-segment>/task-manifest.json`. Assemble the path from the catalog entry's `taskGroupPathSegment` and `taskIdPathSegment` fields (these are the filesystem-safe segments emitted by the renderer) — NOT from the raw user-supplied `<task-group>` / `<task-id>` strings, which may differ in case or contain characters that were normalized when the manifest was created. As a defensive shortcut, prefer the `taskManifestPath` field directly when present.
|
|
199
207
|
|
|
200
208
|
4. **Update fields at the manifest root**:
|
|
201
209
|
- `workStatus` ← new status value
|
|
@@ -224,7 +232,7 @@ If `workStatus` is missing or empty in any manifest, infer the display value fro
|
|
|
224
232
|
|
|
225
233
|
| Manifest state | Inferred display |
|
|
226
234
|
|---|---|
|
|
227
|
-
| `currentStatus == "completed"` AND `workflow.nextRecommendedPhase
|
|
235
|
+
| `currentStatus == "completed"` AND `workflow.nextRecommendedPhase == "done-or-follow-up"` | `done` (inferred) |
|
|
228
236
|
| `currentStatus == "completed"` AND `workflow.currentPhaseState == "completed"` | `phase-done` (inferred) |
|
|
229
237
|
| `currentStatus == "contract-violated"` OR `workflow.currentPhaseState == "blocked"` | `blocked` (inferred) |
|
|
230
238
|
| anything else | `in-progress` (default) |
|
|
@@ -244,3 +252,7 @@ This skill updates `task-manifest.json` only. `discovery/task-catalog.json` may
|
|
|
244
252
|
- If there is no recent report, display `--`.
|
|
245
253
|
- If a specific task does not exist, explicitly state that it cannot be found based on `task-catalog.json`.
|
|
246
254
|
- If `awaitingApproval` is true, clearly indicate that the task is awaiting user approval.
|
|
255
|
+
|
|
256
|
+
## Out-of-Scope Backlog
|
|
257
|
+
|
|
258
|
+
- **Step 0 boilerplate duplication.** The `ensure-installed` + `paths --shell` + `check-project --json` preamble is byte-identical across every user-facing okstra skill. The Claude Code skill framework has no include/snippet mechanism today, so each skill duplicates the block. A future change should either (a) extract the preamble into a single `okstra preflight` subcommand the skill can call in one line, or (b) ship the block as a shared SKILL fragment if the framework gains include support. Not actionable inside this skill alone.
|
|
@@ -11,6 +11,8 @@ user-invocable: false
|
|
|
11
11
|
- When verifying worker team composition and operational rules
|
|
12
12
|
- When applying model assignment rules
|
|
13
13
|
|
|
14
|
+
**Not applicable to `release-handoff`** — that profile is lead-only and intentionally has no `Required workers:` block (see `prompts/profiles/release-handoff.md`). The worker-dispatch contract in this document does not engage during `release-handoff` runs.
|
|
15
|
+
|
|
14
16
|
## Team Structure
|
|
15
17
|
|
|
16
18
|
okstra tasks are always operated using the `Claude lead` + required worker team structure.
|
|
@@ -19,24 +21,27 @@ okstra tasks are always operated using the `Claude lead` + required worker team
|
|
|
19
21
|
|
|
20
22
|
**All analysis workers (Claude / Codex / Gemini) share an identical core responsibility.** Specialization is additive — it lives in optional Section 6 of the worker output, NOT in differentiated core questions. This is intentional: cross-verification only converges if all three workers are answering the same questions against the same brief. Disjoint per-worker scopes produce union-of-perspectives, not triangulation.
|
|
21
23
|
|
|
22
|
-
| Role | Core responsibility | Specialization lens (Section 6 only) |
|
|
23
|
-
|
|
24
|
-
| Claude lead | orchestration + convergence supervision + final-report review/approval | — |
|
|
25
|
-
| Claude worker | Answer every brief question across feasibility, requirement interpretation, hidden assumptions, and alternatives — with file:line evidence | broad reasoning depth, hidden assumptions, execution-risk surfacing |
|
|
26
|
-
| Codex worker | Same core responsibility as Claude worker — identical questions, identical sections 1–5 | implementation realism, code-path implications, edge cases, technical trade-offs |
|
|
27
|
-
| Gemini worker | Same core responsibility as Claude worker — identical questions, identical sections 1–5 | requirement interpretation, consistency, safety, alternative viewpoints |
|
|
28
|
-
| Report writer worker | **Authors** the final-report file in Phase 6. NOT an analysis worker. | — |
|
|
24
|
+
| Role | Core responsibility | Specialization lens (Section 6 only) | subagent_type | Notes |
|
|
25
|
+
|------|------|------|---------------|------|
|
|
26
|
+
| Claude lead | orchestration + convergence supervision + final-report review/approval | — | -- | Does NOT author the final-report file when `Report writer worker` is in the roster |
|
|
27
|
+
| Claude worker | Answer every brief question across feasibility, requirement interpretation, hidden assumptions, and alternatives — with file:line evidence | broad reasoning depth, hidden assumptions, execution-risk surfacing | claude-worker | `agents/claude-worker.md` |
|
|
28
|
+
| Codex worker | Same core responsibility as Claude worker — identical questions, identical sections 1–5 | implementation realism, code-path implications, edge cases, technical trade-offs | codex-worker | `agents/codex-worker.md` |
|
|
29
|
+
| Gemini worker | Same core responsibility as Claude worker — identical questions, identical sections 1–5 | requirement interpretation, consistency, safety, alternative viewpoints | gemini-worker | `agents/gemini-worker.md` |
|
|
30
|
+
| Report writer worker | **Authors** the final-report file in Phase 6. NOT an analysis worker. | — | report-writer-worker | `agents/report-writer-worker.md`. Excluded from Phase 4/5 and convergence |
|
|
31
|
+
|
|
32
|
+
**Model assignment has no default.** The model for every role comes from `resultContract.requiredWorkerRoles[*].modelExecutionValue` in `task-manifest.json` (and lead model metadata). There is no per-role hard-coded fallback — see "Model Assignment Rules" below.
|
|
29
33
|
|
|
30
34
|
**Dispatch-prompt invariant (BLOCKING).** Lead's dispatch prompt body for Claude / Codex / Gemini workers MUST be byte-identical except for the role label and any wrapper-specific path headers (e.g. `**Worktree:**`, `**Errors sidecar path:**`). Lead MUST NOT bias the brief by inserting per-worker emphasis sentences ("you focus on X") into the body. Bias-by-prompt reproduces the historical failure mode where Claude commented only on assumptions, Codex only on code paths, and Gemini only on requirements — leaving convergence with nothing to converge on.
|
|
31
35
|
|
|
32
36
|
### Model Assignment Rules
|
|
33
37
|
|
|
34
|
-
1.
|
|
35
|
-
2. If
|
|
36
|
-
3. If `modelExecutionValue` differs from `model`, use `modelExecutionValue` during execution.
|
|
38
|
+
1. `resultContract.requiredWorkerRoles` in `task-manifest.json` (and the lead model metadata) is the canonical source. There is no role-level fallback — a missing assignment is a manifest defect, not a license to invent one.
|
|
39
|
+
2. If `modelExecutionValue` differs from `model`, use `modelExecutionValue` during execution.
|
|
37
40
|
|
|
38
41
|
### Dynamic Worker Role Determination
|
|
39
42
|
|
|
43
|
+
**Roster canonical-source rule.** The profile's `Required workers:` block (in `prompts/profiles/<phase>.md`) is the **static roster definition** — the set of roles legal for that phase. `resultContract.requiredWorkerRoles` in `task-manifest.json` is the **per-run instance** — the actual roster materialized for this run, after recommendation, user selection, and any post-recommendation overrides. **On conflict, the task-manifest wins** — it is what the run was actually launched with, and what lead must dispatch against.
|
|
44
|
+
|
|
40
45
|
Only workers selected from `recommendedWorkers` in `task-manifest.json` and `resultContract.requiredWorkerRoles` become required roles.
|
|
41
46
|
|
|
42
47
|
- If one worker is selected: "`<role>` is the required worker role for this run."
|
|
@@ -251,7 +256,11 @@ The same frontmatter contract applies to the `Report writer worker`'s final-repo
|
|
|
251
256
|
|
|
252
257
|
A successful worker result must include the following sections in this exact order, beneath the frontmatter block:
|
|
253
258
|
|
|
254
|
-
0. **Reading Confirmation** — one short line per input file
|
|
259
|
+
0. **Reading Confirmation** — one short line per input file stating that the worker read it end-to-end. Each line takes the form `- Read <file-name> end-to-end (<line-count> lines).`. The enumerated files are audience-scoped — they MUST match the recipient's row in the "Audience-scoped enumeration" table above:
|
|
260
|
+
- **Claude / Codex / Gemini analysis workers**: `task-brief.md`, `analysis-profile.md`, `analysis-material.md` (if present), `reference-expectations.md`, `clarification-response.md` (if a carry-in was provided). Analysis workers MUST NOT include `final-report-template.md` — it is not in their `[Required reading]` block.
|
|
261
|
+
- **Report writer worker (Phase 6)**: all of the above **plus** `final-report-template.md`.
|
|
262
|
+
|
|
263
|
+
If a file was skipped or only partially read, the worker MUST NOT produce sections 1–5; instead it records a `tool-failure` in the errors sidecar and stops. This section exists specifically to counteract the common failure mode where workers skim long inputs because they share structure with the file the run will eventually write into.
|
|
255
264
|
1. Findings
|
|
256
265
|
2. Missing Information or Assumptions
|
|
257
266
|
3. Safe or Reasonable Areas
|
|
@@ -341,7 +350,7 @@ without proceeding — this is the contractual replacement for the previous
|
|
|
341
350
|
empty run-level error logs in production.
|
|
342
351
|
|
|
343
352
|
- `cli-failure` events are recorded by the wrapper subagent itself (Codex / Gemini), but **directly to the run-level error log** via `okstra-error-log.py append-observed --error-type cli-failure ...` — NOT via the sidecar. The sidecar is an in-process tool-failure channel only.
|
|
344
|
-
- **Wrapper invocation arity.** Both `okstra-codex-exec.sh` and `okstra-gemini-exec.sh` accept four positional arguments
|
|
353
|
+
- **Wrapper invocation arity.** Both `okstra-codex-exec.sh` and `okstra-gemini-exec.sh` accept four required positional arguments plus an optional fifth `<role>`: `<project-root> <model> <prompt-path> <worktree-path> [<role>]`. The fourth (worktree) argument is **mandatory for implementation phase** and optional otherwise. For codex it becomes `--add-dir <worktree>` (sandbox write access); for gemini it is appended to `--include-directories`. Omitting it during implementation causes the codex sandbox to reject every Edit/Write targeting the worktree with EPERM. Workers extract the path from the `**Worktree:**` / `EXECUTOR_WORKTREE_PATH` / `cwd for every mutating command:` line in the lead prompt. The optional fifth `<role>` is the trace-pane label suffix (e.g. `codex-<role>-trace`); always pass the literal string `worker` so the dispatch is self-describing (the wrapper defaults to `worker` if omitted).
|
|
345
354
|
- **Background dispatch + polling contract (Codex / Gemini wrappers).** Both wrapper subagents MUST dispatch `okstra-codex-exec.sh` / `okstra-gemini-exec.sh` via `Bash(run_in_background: true)` and poll with `BashOutput(bash_id)` until the shell reports `status == "completed"`, capped at 30 minutes (1800s) of wall-clock elapsed time. `BashOutput` itself is the wait primitive — call it back-to-back; do NOT insert a standalone `sleep` between polls. The Claude Code harness blocks `sleep` calls of 5 seconds or longer as a circumvention vector and explicitly forbids chaining shorter sleeps inside until-loops to work around the block. Workers that hit the contract bug must NOT self-recover with `until ...; do sleep 2; done` wrappers — that path violates the harness anti-circumvention rule, even though it superficially "works". The legacy "single foreground `Bash` with 120000ms timeout" rule, and the subsequent "60-second cadence with `sleep 60` between polls" rule, are both retired. The current rule applies in **every phase** (analysis runs typically complete in 1–2 `BashOutput` calls, so there is no regression for short jobs). Recording responsibilities:
|
|
346
355
|
- Successful completion: return the wrapper's accumulated stdout from the final `BashOutput`. No log entry.
|
|
347
356
|
- Non-zero `exit_code` reported by `BashOutput`: record a `cli-failure` to the run-level error log with the real `exit_code` and observed `duration-ms`.
|
|
@@ -356,7 +365,7 @@ empty run-level error logs in production.
|
|
|
356
365
|
2. Re-verification workers follow a constrained response format (verdict + brief explanation).
|
|
357
366
|
3. Workers cannot vote on their own findings (only verify other workers’ work).
|
|
358
367
|
4. The `report writer worker` does not participate in re-verification voting. It is responsible only for generating the final report.
|
|
359
|
-
5.
|
|
368
|
+
5. Division of labor: the Claude lead performs **finding-to-finding matching** (deciding which worker-A finding maps to which worker-B finding for cross-review) and mediates the round protocol; **workers cast the AGREE / DISAGREE / SUPPLEMENT votes** that determine consensus. The lead does NOT vote on substance and does NOT collapse worker disagreements by fiat — disagreements flow into the `contested` / `partial-consensus` classifications defined in `skills/okstra-convergence/SKILL.md`.
|
|
360
369
|
6. Batch processing is performed with one spawn per worker per round (not one spawn per finding).
|
|
361
370
|
7. These rules do not apply if Convergence is disabled.
|
|
362
371
|
|
|
@@ -376,10 +385,13 @@ Every worker result file under `worker-results/` must begin with a standardized
|
|
|
376
385
|
# <Role> Analysis — <task-key>
|
|
377
386
|
|
|
378
387
|
**Task:** <task-type>
|
|
388
|
+
**Target:** <path or scope> <!-- OPTIONAL: include when the run is scoped to a specific file/module -->
|
|
379
389
|
**Date:** <YYYY-MM-DD>
|
|
380
390
|
**Model:** <Role>, <AI model>
|
|
381
391
|
```
|
|
382
392
|
|
|
393
|
+
The `Target:` line is optional. Include it when the run is scoped to a specific path or module; omit it when the run spans the whole project. When included, place it between `Task:` and `Date:` as shown.
|
|
394
|
+
|
|
383
395
|
Examples:
|
|
384
396
|
|
|
385
397
|
```markdown
|
|
@@ -422,12 +434,12 @@ Token usage is collected from agent session transcripts after the run, NOT from
|
|
|
422
434
|
At the **start of Phase 7** (persistence), run the helper script with the path to this run's `team-state.json`:
|
|
423
435
|
|
|
424
436
|
```bash
|
|
425
|
-
python3
|
|
437
|
+
python3 "$HOME/.okstra/lib/python/okstra-token-usage.py" \
|
|
426
438
|
<runDirectoryPath>/state/team-state-<task-type>-<seq>.json \
|
|
427
439
|
--write --summary
|
|
428
440
|
```
|
|
429
441
|
|
|
430
|
-
|
|
442
|
+
The script is installed at `$HOME/.okstra/lib/python/okstra-token-usage.py` by `okstra install`. The previous repo-relative path (`scripts/okstra-token-usage.py`) only exists in a working clone of the okstra repo and is not appropriate for end-user-deployed runs.
|
|
431
443
|
|
|
432
444
|
The script reads:
|
|
433
445
|
- `~/.claude/projects/<encoded-cwd>/<sessionId>.jsonl` for the lead and every Claude-side worker (Claude worker, Report writer worker, plus the Claude wrappers around Codex/Gemini workers). Sessions are discovered by `teamName: okstra-<task-id>`, lead is identified by `lead.sessionId`, and other workers are identified by `agentName` (e.g. `claude-worker`, `codex-worker`, `gemini-worker`, `report-writer`).
|
|
@@ -6,7 +6,7 @@ user-invocable: false
|
|
|
6
6
|
|
|
7
7
|
# OKSTRA Time Summary
|
|
8
8
|
|
|
9
|
-
Aggregate elapsed work time for a given task, grouped by **task type** and broken down by **worker** (lead,
|
|
9
|
+
Aggregate elapsed work time for a given task, grouped by **task type** and broken down by **worker** (lead, claude, codex, gemini, report-writer).
|
|
10
10
|
|
|
11
11
|
## When to Use
|
|
12
12
|
|
|
@@ -19,7 +19,7 @@ Aggregate elapsed work time for a given task, grouped by **task type** and broke
|
|
|
19
19
|
Two sources, both already collected by `okstra`:
|
|
20
20
|
|
|
21
21
|
1. `.project-docs/okstra/tasks/<task-group>/<task-id>/history/timeline.json`
|
|
22
|
-
— lists every run with `runTimestamp`, `taskType`, `status`, `teamStatePath`.
|
|
22
|
+
— lists every run with `runTimestamp`, `taskType`, `status`, `teamStatePath`, and `taskRootPath`. Both path fields may be either project-root-relative or task-root-relative depending on which version of `render.py` wrote the manifest.
|
|
23
23
|
2. Each run's `.../runs/<task-type>/state/team-state-<suffix>.json`
|
|
24
24
|
— populated by `scripts/okstra-token-usage.py` at Phase 7. Contains:
|
|
25
25
|
- `leadUsage.{startedAt, endedAt, durationMs}`
|
|
@@ -64,12 +64,20 @@ If `task-catalog.json` is missing, respond: "No okstra history found. Run `scrip
|
|
|
64
64
|
|
|
65
65
|
For each entry in `timeline.json`'s `runs` array:
|
|
66
66
|
|
|
67
|
-
1.
|
|
67
|
+
1. Resolve the `team-state` file using a two-step lookup:
|
|
68
|
+
a. First try `<projectRoot>/<teamStatePath>`.
|
|
69
|
+
b. If that file does not exist, fall back to `<projectRoot>/<taskRootPath>/<teamStatePath>` (the manifest's `taskRootPath` field is the task-root relative to project root; `teamStatePath` written by `render.py` is task-root-relative in many runs).
|
|
70
|
+
Either path satisfies the lookup. If neither resolves to an existing file, treat the run as `unavailable`.
|
|
68
71
|
2. Extract:
|
|
69
72
|
- `taskType` from the timeline entry (authoritative).
|
|
70
73
|
- `leadUsage.durationMs` and `leadUsage.{startedAt,endedAt}`.
|
|
71
74
|
- For each `worker` in `workers[]`: `workerId`, `agent`, `usage.durationMs`.
|
|
72
|
-
|
|
75
|
+
Read defensively. `usage` (and `leadUsage`) may be:
|
|
76
|
+
- a normal `usage_block` with `durationMs >= 0`,
|
|
77
|
+
- a `na_block` with `{"source": "unavailable", "durationMs": 0, "note": ...}` when Phase 7 collection failed,
|
|
78
|
+
- missing entirely (older team-state files), or `None`.
|
|
79
|
+
Always normalize via `(block or {}).get("durationMs", 0) or 0`, and treat a `source == "unavailable"` block as zero contribution.
|
|
80
|
+
3. If the team-state file is missing, or every `durationMs` for the run is `0`/absent (i.e. `leadUsage` and every `workers[].usage` are zero or unavailable), record the run under `unavailable` with its `runTimestamp` and `taskType`.
|
|
73
81
|
|
|
74
82
|
## Step 3: Aggregate
|
|
75
83
|
|
|
@@ -82,20 +90,50 @@ For each distinct `taskType` across runs:
|
|
|
82
90
|
| Column | Computation |
|
|
83
91
|
|--------|-------------|
|
|
84
92
|
| `Runs` | count of runs of that task type that contributed any duration |
|
|
85
|
-
| `
|
|
93
|
+
| `CPU sum` | sum of (lead + all workers) across those runs — see note below |
|
|
86
94
|
| `Lead` | sum of `leadUsage.durationMs` |
|
|
87
95
|
| `Workers` | sum of all `workers[].usage.durationMs` |
|
|
88
96
|
|
|
89
97
|
Add a final `Grand total` row.
|
|
90
98
|
|
|
99
|
+
**Note on `CPU sum` vs wall-clock**: workers run as children of the lead session, so the lead's `durationMs` window OVERLAPS its workers' windows. `CPU sum = Lead + Workers` is therefore an additive CPU-style sum, not the wall-clock elapsed time the user actually waited.
|
|
100
|
+
|
|
101
|
+
Worked example for one run with three concurrent workers:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
lead [================================] durationMs = 1800000 (30:00)
|
|
105
|
+
claude [============] durationMs = 720000 (12:00)
|
|
106
|
+
codex [==============] durationMs = 840000 (14:00)
|
|
107
|
+
gemini [========] durationMs = 480000 (08:00)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
- `CPU sum` for the run = `1800000 + 720000 + 840000 + 480000` = `3840000` (`01:04:00`)
|
|
111
|
+
- Wall-clock for the run = `max(endedAt) − min(startedAt)` ≈ `30:00`
|
|
112
|
+
|
|
113
|
+
Always report `CPU sum` in the by-task-type table. If the user explicitly asks for wall-clock, compute it per run as `max(leadUsage.endedAt, max(workers[].usage.endedAt)) − min(leadUsage.startedAt, min(workers[].usage.startedAt))` and surface it separately — never silently substitute it for `CPU sum`.
|
|
114
|
+
|
|
91
115
|
### B. Per worker breakdown (per task type)
|
|
92
116
|
|
|
93
|
-
For each task type, list one row per `workerId` actually present, plus `lead
|
|
117
|
+
For each task type, list one row per `workerId` actually present, plus `lead`. Aggregate `durationMs` across all runs of that task type.
|
|
94
118
|
|
|
95
119
|
| Worker | Runs | Total | Avg/run |
|
|
96
120
|
|--------|------|-------|---------|
|
|
97
121
|
|
|
98
|
-
|
|
122
|
+
- `Runs` denominator = number of runs of this task type in which this worker recorded a **nonzero** `durationMs`. A run where the worker's block was `na_block`, missing, or `0` does NOT count.
|
|
123
|
+
- If `Runs == 0` for a worker, **omit the row entirely** rather than dividing by zero.
|
|
124
|
+
- `Avg/run = Total / Runs` (integer ms, then format to `HH:MM:SS`).
|
|
125
|
+
|
|
126
|
+
Use the `workerId` from team-state. The valid worker enum is `lead, claude, codex, gemini, report-writer`.
|
|
127
|
+
|
|
128
|
+
Display rule for `workerId` vs `agent`:
|
|
129
|
+
- If every run of this task type used `agent == workerId` for this row, display the bare `workerId` (e.g. `claude`).
|
|
130
|
+
- If `agent` differs from `workerId` (e.g. a `claude` worker slot ran with `agent == "sonnet-eval"`), display `workerId (agent)` — and if multiple distinct agents were used across runs, comma-join them: `claude (sonnet-eval, opus-eval)`.
|
|
131
|
+
|
|
132
|
+
Never write `claude (claude)` — the parenthesized agent is shown only when it adds information.
|
|
133
|
+
|
|
134
|
+
### Timestamp parsing
|
|
135
|
+
|
|
136
|
+
When you need `startedAt` / `endedAt` (e.g. for wall-clock or chronological sort within a task type), normalize the ISO-8601 string before comparing: replace a trailing `Z` with `+00:00`, accept explicit offsets as-is, and parse via `datetime.fromisoformat(s.replace("Z", "+00:00"))`. Strings without an offset are assumed UTC. Mixed-form comparisons must be done as `datetime` objects, never as raw strings.
|
|
99
137
|
|
|
100
138
|
## Step 4: Format output
|
|
101
139
|
|
|
@@ -110,19 +148,20 @@ Use the `workerId` from team-state (e.g. `claude`, `codex`, `gemini`, `report-wr
|
|
|
110
148
|
|
|
111
149
|
### By task type
|
|
112
150
|
|
|
113
|
-
| Task type | Runs |
|
|
114
|
-
|
|
115
|
-
| requirements-discovery | 2 | 00:
|
|
116
|
-
| error-analysis | 1 | 00:18:45 | 00:08:11 |
|
|
117
|
-
| implementation | 3 | 02:11:09 | 00:45:30 |
|
|
118
|
-
| **Grand total** | 6 | **03:
|
|
151
|
+
| Task type | Runs | CPU sum | Lead | Workers |
|
|
152
|
+
|------------------------|------|-----------|----------|----------|
|
|
153
|
+
| requirements-discovery | 2 | 00:33:12 | 00:12:08 | 00:21:04 |
|
|
154
|
+
| error-analysis | 1 | 00:18:45 | 00:08:11 | 00:10:34 |
|
|
155
|
+
| implementation | 3 | 02:11:09 | 00:45:30 | 01:25:39 |
|
|
156
|
+
| **Grand total** | 6 | **03:03:06** | 01:05:49 | 01:57:17 |
|
|
157
|
+
|
|
158
|
+
`CPU sum` adds the lead window to each worker window even though they overlap; it is not a wall-clock total.
|
|
119
159
|
|
|
120
160
|
### Per worker — requirements-discovery
|
|
121
161
|
|
|
122
162
|
| Worker | Runs | Total | Avg/run |
|
|
123
163
|
|----------------|------|----------|----------|
|
|
124
164
|
| lead | 2 | 00:12:08 | 00:06:04 |
|
|
125
|
-
| intake | 1 | 00:01:00 | 00:01:00 |
|
|
126
165
|
| claude | 2 | 00:09:12 | 00:04:36 |
|
|
127
166
|
| codex | 2 | 00:07:40 | 00:03:50 |
|
|
128
167
|
| gemini | 2 | 00:03:12 | 00:01:36 |
|
|
@@ -134,8 +173,6 @@ Use the `workerId` from team-state (e.g. `claude`, `codex`, `gemini`, `report-wr
|
|
|
134
173
|
> Unavailable: 1 run (implementation / 2026-04-30_03-03-48) — team-state has no durationMs (Phase 7 not reached)
|
|
135
174
|
```
|
|
136
175
|
|
|
137
|
-
If the `Intake` column is all zero across every task type, omit that column entirely.
|
|
138
|
-
|
|
139
176
|
## Output Rules
|
|
140
177
|
|
|
141
178
|
- Always render durations as `HH:MM:SS`; never raw milliseconds.
|
|
@@ -131,14 +131,14 @@
|
|
|
131
131
|
"Bash(codex exec:*)",
|
|
132
132
|
"Bash(okstra)",
|
|
133
133
|
"Bash(okstra:*)",
|
|
134
|
+
"Bash(npx okstra@latest:*)",
|
|
135
|
+
"Bash(npx -y okstra@latest:*)",
|
|
134
136
|
"Bash($HOME/.okstra/bin/:*)",
|
|
137
|
+
"Bash(STATE_FILE=:*)",
|
|
135
138
|
|
|
136
139
|
"Bash(gemini)",
|
|
137
140
|
"Bash(gemini:*)",
|
|
138
141
|
|
|
139
|
-
"Bash($HOME/.okstra/bin/okstra-trace-cleanup.sh)",
|
|
140
|
-
"Bash($HOME/.okstra/bin/okstra-trace-cleanup.sh:*)",
|
|
141
|
-
|
|
142
142
|
"Bash(claude)",
|
|
143
143
|
"Bash(claude:*)",
|
|
144
144
|
|
|
@@ -150,7 +150,10 @@
|
|
|
150
150
|
"SessionEnd": [
|
|
151
151
|
{
|
|
152
152
|
"hooks": [
|
|
153
|
-
{
|
|
153
|
+
{
|
|
154
|
+
"type": "command",
|
|
155
|
+
"command": "$HOME/.okstra/bin/okstra-trace-cleanup.sh"
|
|
156
|
+
}
|
|
154
157
|
]
|
|
155
158
|
}
|
|
156
159
|
]
|
|
@@ -51,8 +51,7 @@ write_validation_brief() {
|
|
|
51
51
|
|
|
52
52
|
- Config file: \`.claude/settings.json\`
|
|
53
53
|
- Expected values:
|
|
54
|
-
-
|
|
55
|
-
- refresh should occur only when \`--refresh-assets\` is used
|
|
54
|
+
- installed okstra Claude assets must remain discoverable under \`~/.claude/skills/\` and \`~/.claude/agents/\` (managed by \`okstra install\`)
|
|
56
55
|
- Config file: \`.project-docs/okstra/discovery/latest-task.json\`
|
|
57
56
|
- Expected values:
|
|
58
57
|
- latest prepared task pointer must include the current task key
|
|
@@ -257,6 +256,15 @@ if isinstance(lead, dict):
|
|
|
257
256
|
lead["status"] = "completed"
|
|
258
257
|
team_state["workflowState"] = "worker-results-collected"
|
|
259
258
|
|
|
259
|
+
# validate-run.py requires team-state.teamCreate.attempted=true with a
|
|
260
|
+
# status of ok|error once any worker has been dispatched (see
|
|
261
|
+
# validators/validate-run.py:334-337). Mirror that here so the fixture
|
|
262
|
+
# represents a valid post-Phase-3 state.
|
|
263
|
+
team_state["teamCreate"] = {
|
|
264
|
+
"attempted": True,
|
|
265
|
+
"status": "ok",
|
|
266
|
+
}
|
|
267
|
+
|
|
260
268
|
# Phase 7 token-usage collection is normally produced by okstra-token-usage.py.
|
|
261
269
|
# The validator (`team-state.usageSummary is empty`) treats absence as a contract
|
|
262
270
|
# violation, so the fixture must mirror that step with a synthetic-but-valid object.
|
|
@@ -1,40 +1,66 @@
|
|
|
1
1
|
# shellcheck shell=bash
|
|
2
2
|
|
|
3
|
+
# Verify that the npm build output under `runtime/` is fresh and matches the
|
|
4
|
+
# source files that `okstra install` (src/install.mjs) copies into the user's
|
|
5
|
+
# `~/.claude/skills/`, `~/.claude/agents/`, and `~/.okstra/` directories.
|
|
6
|
+
#
|
|
7
|
+
# Historically this validator checked a *project-local* `.claude/skills/` +
|
|
8
|
+
# `.claude/agents/` tree that `okstra.sh` seeded into the project root on
|
|
9
|
+
# every run. That seeding step was removed when install moved into
|
|
10
|
+
# `src/install.mjs`; install now writes only to the user's
|
|
11
|
+
# `$HOME/.claude` + `$HOME/.okstra`, never to the project root. The runtime/
|
|
12
|
+
# tree is the canonical staging area that `okstra install` rsyncs from, so we
|
|
13
|
+
# validate parity there instead.
|
|
3
14
|
validate_seeded_assets() {
|
|
4
15
|
local validation_mode="$1"
|
|
16
|
+
local runtime_root="$WORKSPACE_ROOT/runtime"
|
|
5
17
|
|
|
6
|
-
|
|
18
|
+
if [[ ! -d "$runtime_root" ]]; then
|
|
19
|
+
printf 'runtime/ build output is missing — run `npm run build` before validating.\n' >&2
|
|
20
|
+
return 1
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
python3 - "$SOURCE_ASSET_ROOT" "$WORKSPACE_ROOT/skills" "$runtime_root" "$validation_mode" <<'PY'
|
|
7
24
|
from pathlib import Path
|
|
8
25
|
import sys
|
|
9
26
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
27
|
+
agents_source_root = Path(sys.argv[1]) # <repo>/agents
|
|
28
|
+
skills_source_root = Path(sys.argv[2]) # <repo>/skills
|
|
29
|
+
runtime_root = Path(sys.argv[3]) # <repo>/runtime
|
|
30
|
+
validation_mode = sys.argv[4]
|
|
13
31
|
errors = []
|
|
14
32
|
|
|
15
|
-
for source_path in sorted(source_root.rglob("*.md")):
|
|
16
|
-
relative_path = source_path.relative_to(source_root)
|
|
17
|
-
parts = relative_path.parts
|
|
18
|
-
|
|
19
|
-
if relative_path.as_posix() == "SKILL.md":
|
|
20
|
-
target_path = target_root / "skills" / "okstra" / "SKILL.md"
|
|
21
|
-
elif parts[0] == "skills":
|
|
22
|
-
target_path = target_root / "skills" / Path(*parts[1:])
|
|
23
|
-
elif parts[0] == "workers":
|
|
24
|
-
# `agents/workers/<name>.md` 는 `.claude/agents/<name>.md` 로 시드된다.
|
|
25
|
-
# seeding.sh 의 분기와 동일하게 유지해야 한다.
|
|
26
|
-
target_path = target_root / "agents" / Path(*parts[1:])
|
|
27
|
-
elif parts[0] == "agents":
|
|
28
|
-
target_path = target_root / "agents" / Path(*parts[1:])
|
|
29
|
-
else:
|
|
30
|
-
target_path = target_root / "skills" / "okstra" / relative_path
|
|
31
33
|
|
|
34
|
+
def check(source_path: Path, target_path: Path) -> None:
|
|
32
35
|
if not target_path.is_file():
|
|
33
|
-
errors.append(f"missing
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
errors.append(f"missing build-output asset: {target_path}")
|
|
37
|
+
return
|
|
36
38
|
if validation_mode == "match" and target_path.read_bytes() != source_path.read_bytes():
|
|
37
|
-
errors.append(f"
|
|
39
|
+
errors.append(f"build-output asset does not match source: {target_path}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# 1. Worker agent files: agents/workers/*-worker.md -> runtime/agents/workers/*-worker.md
|
|
43
|
+
workers_source = agents_source_root / "workers"
|
|
44
|
+
workers_target = runtime_root / "agents" / "workers"
|
|
45
|
+
if not workers_source.is_dir():
|
|
46
|
+
errors.append(f"missing agents/workers source directory: {workers_source}")
|
|
47
|
+
else:
|
|
48
|
+
for source_path in sorted(workers_source.glob("*.md")):
|
|
49
|
+
check(source_path, workers_target / source_path.name)
|
|
50
|
+
|
|
51
|
+
# 2. Lead agent SKILL.md: agents/SKILL.md -> runtime/agents/SKILL.md
|
|
52
|
+
lead_source = agents_source_root / "SKILL.md"
|
|
53
|
+
if lead_source.is_file():
|
|
54
|
+
check(lead_source, runtime_root / "agents" / "SKILL.md")
|
|
55
|
+
|
|
56
|
+
# 3. Skill packages: skills/<name>/SKILL.md -> runtime/skills/<name>/SKILL.md
|
|
57
|
+
if not skills_source_root.is_dir():
|
|
58
|
+
errors.append(f"missing skills source directory: {skills_source_root}")
|
|
59
|
+
else:
|
|
60
|
+
for skill_dir in sorted(p for p in skills_source_root.iterdir() if p.is_dir()):
|
|
61
|
+
for source_path in sorted(skill_dir.rglob("*.md")):
|
|
62
|
+
relative = source_path.relative_to(skills_source_root)
|
|
63
|
+
check(source_path, runtime_root / "skills" / relative)
|
|
38
64
|
|
|
39
65
|
if errors:
|
|
40
66
|
for error in errors:
|