@yemi33/minions 0.1.2070 → 0.1.2072
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/qa.js +358 -0
- package/dashboard/js/state.js +2 -1
- package/dashboard/pages/qa.html +72 -0
- package/dashboard/styles.css +102 -0
- package/dashboard.js +410 -6
- package/docs/qa-runbook-lifecycle.md +232 -0
- package/engine/cleanup.js +4 -1
- package/engine/comment-classifier.js +8 -1
- package/engine/cooldown.js +6 -2
- package/engine/gh-comment.js +74 -3
- package/engine/gh-token.js +7 -9
- package/engine/lifecycle.js +100 -0
- package/engine/pipeline.js +9 -1
- package/engine/playbook.js +39 -0
- package/engine/qa-runners/maestro.js +152 -0
- package/engine/qa-runners/playwright.js +149 -0
- package/engine/qa-runners.js +323 -0
- package/engine/qa-sessions.js +1008 -0
- package/engine/shared.js +71 -12
- package/engine.js +140 -0
- package/package.json +1 -1
- package/playbooks/qa-session-draft.md +158 -0
- package/playbooks/qa-session-execute.md +165 -0
- package/playbooks/qa-session-setup.md +154 -0
- package/prompts/cc-system.md +43 -0
- package/routing.md +3 -0
|
@@ -69,3 +69,235 @@ receives `target`, `steps`, `expectedArtifacts` as template vars; required
|
|
|
69
69
|
to write the sidecar above before exit. Routing line in `routing.md` maps
|
|
70
70
|
the synthetic `qa-validate` task-type to the playbook so manual dispatches
|
|
71
71
|
work too.
|
|
72
|
+
|
|
73
|
+
# QA Sessions (PL-qa-sessions)
|
|
74
|
+
|
|
75
|
+
Natural-language QA flows: the user describes what they want tested in
|
|
76
|
+
plain prose, the engine sets up a live target, an agent drafts a
|
|
77
|
+
runner-native test file, and (with user approval) a second agent executes
|
|
78
|
+
it. Sessions are a thin orchestration layer on top of the same
|
|
79
|
+
`managed-spawn` + `qa-run-result.json` infrastructure that powers
|
|
80
|
+
runbooks above — they reuse `qa-runs.json`, `engine/qa-artifacts/`, and
|
|
81
|
+
the existing `engine/lifecycle.js#runPostCompletionHooks` qa-run sidecar
|
|
82
|
+
hook. Surfaced on `/qa` (sessions card list above the runbooks/runs
|
|
83
|
+
tables) and proxied by the Command Center natural-language shortcut.
|
|
84
|
+
|
|
85
|
+
## Use case (vs. runbooks)
|
|
86
|
+
|
|
87
|
+
- **Runbook**: a stable, named, reusable test plan stored as JSON. Same
|
|
88
|
+
steps run repeatedly against the same target. Edit once, dispatch
|
|
89
|
+
many times.
|
|
90
|
+
- **Session**: an ad-hoc, single-shot, natural-language QA prompt. The
|
|
91
|
+
user describes intent ("smoke the homepage after my PR"), engine
|
|
92
|
+
resolves the target + spawns it, an agent drafts a *new* runner-native
|
|
93
|
+
test file just for this run. The drafted file lives at
|
|
94
|
+
`engine/qa-tests/<sessionId>/test.<ext>` and is the audit artifact; the
|
|
95
|
+
session record is the orchestration breadcrumb.
|
|
96
|
+
|
|
97
|
+
If the same flow is going to be re-run repeatedly, promote the drafted
|
|
98
|
+
test file into a saved runbook. Sessions are *not* meant to replace
|
|
99
|
+
runbooks for repeat traffic.
|
|
100
|
+
|
|
101
|
+
## State machine
|
|
102
|
+
|
|
103
|
+
Eight states, source of truth `engine/qa-sessions.js#QA_SESSION_STATE`:
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
pending ──▶ spawning ──▶ drafting ──▶ awaiting-approval ──▶ executing ──▶ done
|
|
107
|
+
│ │ │ │ │ ╲
|
|
108
|
+
╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ failed
|
|
109
|
+
╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ killed
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Forward transitions only; `done` / `failed` / `killed` are terminal (zero
|
|
113
|
+
outgoing edges). Two non-linear edges:
|
|
114
|
+
|
|
115
|
+
- `awaiting-approval → drafting` — `POST /api/qa/sessions/<id>/edit` re-fires
|
|
116
|
+
DRAFT with the user's natural-language feedback as steering.
|
|
117
|
+
- `drafting → executing` — `auto` mode skips approval and chains EXECUTE
|
|
118
|
+
directly from DRAFT-done.
|
|
119
|
+
|
|
120
|
+
Cancellation paths (`pending → killed`, `spawning → killed`, etc.) cover
|
|
121
|
+
the `POST /api/qa/sessions/<id>/cancel` and `/kill` endpoints. Illegal
|
|
122
|
+
transitions throw; `dashboard.js` translates the thrown error to HTTP 409
|
|
123
|
+
via `_qaSessionsErrorToStatus` (see endpoint table).
|
|
124
|
+
|
|
125
|
+
## Three-phase work-item chain
|
|
126
|
+
|
|
127
|
+
A session orchestrates three sequential work items, each rendered with a
|
|
128
|
+
phase-specific playbook (`meta.playbook` override beats the work-type
|
|
129
|
+
routing default):
|
|
130
|
+
|
|
131
|
+
1. **SETUP** (`playbooks/qa-session-setup.md`, `meta.managed_spawn: true`)
|
|
132
|
+
resolves `target.kind ∈ {pr, branch, current, commit}` to a worktree,
|
|
133
|
+
inspects the codebase for the dev-up command, and writes
|
|
134
|
+
`agents/<agentId>/managed-spawn.json` with
|
|
135
|
+
`name=qa-session-<sessionId>`. The engine spawn-validates the sidecar
|
|
136
|
+
then awaits `waitForFirstHealth` per the existing managed-spawn
|
|
137
|
+
lifecycle (see `docs/managed-spawn.md`). Healthcheck-passing transitions
|
|
138
|
+
the session `spawning → drafting`. First-health failure → `failed` with
|
|
139
|
+
`failure_class: qa-session-setup-failed`.
|
|
140
|
+
|
|
141
|
+
2. **DRAFT** (`playbooks/qa-session-draft.md`) reads the live spawn
|
|
142
|
+
metadata via `/api/managed-processes/by-name/qa-session-<id>`, calls the
|
|
143
|
+
resolved runner's `generateBrief({target, flowsRaw, capture})` hook, and
|
|
144
|
+
writes the runner-native test file to
|
|
145
|
+
`engine/qa-tests/<sessionId>/test.<ext>`. The runner's
|
|
146
|
+
`validateOutputDir` hook gates DRAFT success — a missing or malformed
|
|
147
|
+
test file lands the session in `failed`. In `confirm` mode the session
|
|
148
|
+
parks at `awaiting-approval`; in `auto` mode it auto-chains to EXECUTE.
|
|
149
|
+
|
|
150
|
+
3. **EXECUTE** (`playbooks/qa-session-execute.md`) runs the drafted test
|
|
151
|
+
against the live spawn via the runner's `executeBrief` hook, captures
|
|
152
|
+
artifacts per `spec.capture: { video, screenshots, logs }`, and writes
|
|
153
|
+
`agents/<agentId>/qa-run-result.json` — the same sidecar the
|
|
154
|
+
`qa-validate` runbook flow uses. The existing
|
|
155
|
+
`engine/lifecycle.js#runPostCompletionHooks` `meta.qaRunId` hook ingests
|
|
156
|
+
the sidecar and marks the linked `qa-runs` record terminal; the
|
|
157
|
+
session-level `handleExecuteComplete` then reads the `qa-run` terminal
|
|
158
|
+
status and transitions `executing → done` (or `failed`).
|
|
159
|
+
|
|
160
|
+
Each phase WI carries `meta.sessionId`, `meta.sessionPhase`, `meta.qaSession`
|
|
161
|
+
(target + flowsRaw + mode + capture + runner), and `meta.playbook`. The
|
|
162
|
+
EXECUTE WI additionally carries `meta.qaRunId` so the existing qa-run
|
|
163
|
+
lifecycle hook fires.
|
|
164
|
+
|
|
165
|
+
## Endpoints
|
|
166
|
+
|
|
167
|
+
Documented in `dashboard.js`; routes are visible at `GET /api/routes`.
|
|
168
|
+
|
|
169
|
+
| Method | Path | Behavior |
|
|
170
|
+
|--------|--------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|
|
|
171
|
+
| POST | `/api/qa/session` | Create session; validates spec, calls `createSession` + `queueSetup` (`pending → spawning`). Returns `sessionId`, `setupWorkItemId`, `managedSpawnName`. |
|
|
172
|
+
| GET | `/api/qa/sessions` | List sessions newest-first. Optional `?limit=N` and `?state=pending\|spawning\|drafting\|awaiting-approval\|executing\|done\|failed\|killed`. |
|
|
173
|
+
| GET | `/api/qa/sessions/<id>` | Fetch a single session record by id. |
|
|
174
|
+
| POST | `/api/qa/sessions/<id>/approve` | `awaiting-approval → executing`. Server-side creates the linked `qa-runs` record (synthetic `runbookId='qa-session-<id>'`), queues EXECUTE WI, stamps `qaRunId` on the session. |
|
|
175
|
+
| POST | `/api/qa/sessions/<id>/edit` | `awaiting-approval → drafting`. Body: `{ feedback }`. Re-fires DRAFT with the reviewer feedback threaded into the prompt. |
|
|
176
|
+
| POST | `/api/qa/sessions/<id>/cancel` | Non-terminal → `killed`. Optional `{ reason }`. Does NOT touch the managed-spawn — use `/kill` for that. |
|
|
177
|
+
| POST | `/api/qa/sessions/<id>/kill` | Non-terminal → `killed` AND `removeManagedSpec(<name>)`. Best-effort on the spawn kill (no-op if no spawn). |
|
|
178
|
+
| POST | `/api/qa/sessions/<id>/dismiss` | Non-terminal → `done`. Accept the draft as final; leaves spawn alive. Optional `{ summary }`. |
|
|
179
|
+
| GET | `/api/qa/runners` | List registered runner adapters (built-ins + `qa-runners.d/` plugins). Metadata only — hooks (functions) are stripped. |
|
|
180
|
+
| POST | `/api/qa/runners/reload` | Clear in-process registry, re-register built-ins, re-scan `qa-runners.d/` for plugin edits. Returns the fresh runner list. |
|
|
181
|
+
|
|
182
|
+
The single-session POSTs share `_qaSessionAction` in `dashboard.js`; module
|
|
183
|
+
errors are mapped to HTTP via `_qaSessionsErrorToStatus`:
|
|
184
|
+
|
|
185
|
+
- `'session not found'` → 404
|
|
186
|
+
- `'unsafe sessionId'` / `'invalid spec'` / `'requires …'` (size cap) → 400
|
|
187
|
+
- `'illegal state transition'` / `'requires state …'` / `'requires non-terminal'` → 409
|
|
188
|
+
|
|
189
|
+
## File locations
|
|
190
|
+
|
|
191
|
+
- **Session state**: `engine/qa-sessions.json` (single file, all projects,
|
|
192
|
+
capped at `QA_SESSIONS_MAX_RECORDS = 500` via `createSession`-time
|
|
193
|
+
rotation by `createdAt`).
|
|
194
|
+
- **Drafted test files**: `engine/qa-tests/<sessionId>/test.<ext>`. Created
|
|
195
|
+
by `createSession` (idempotent `mkdirSync recursive`); written by the
|
|
196
|
+
DRAFT agent. Path is sandboxed by `_isSafeSessionId` (kebab-case ≤ 64
|
|
197
|
+
chars, no `..`, no separators) before every read/write that maps id →
|
|
198
|
+
path. Gitignored.
|
|
199
|
+
- **Managed-spawn**: `engine/managed-processes.json` entry with
|
|
200
|
+
`name='qa-session-<sessionId>'`. Cleaned up by `/kill` or by the
|
|
201
|
+
session's `cancelSession` if the operator opts for the cancel-without-
|
|
202
|
+
spawn-kill path.
|
|
203
|
+
- **EXECUTE artifacts**: same as runbooks — `engine/qa-artifacts/<runId>/`
|
|
204
|
+
served via `GET /api/qa/artifacts/<runId>/<file>`.
|
|
205
|
+
|
|
206
|
+
## Runner adapters (P-c4a9e7f3 / P-b8e1d4a6)
|
|
207
|
+
|
|
208
|
+
Pluggable test-runner registry at `engine/qa-runners.js`. Built-in
|
|
209
|
+
adapters: `playwright` (priority 100, detects `playwright.config.*`),
|
|
210
|
+
`maestro` (priority 80, detects `.maestro/`). Each adapter exports five
|
|
211
|
+
hooks:
|
|
212
|
+
|
|
213
|
+
- `detect(target, project)` → `boolean`. Auto-detect for blank-runner
|
|
214
|
+
sessions.
|
|
215
|
+
- `generateBrief({target, flowsRaw, capture, project})` → instructions
|
|
216
|
+
string. Handed to the DRAFT agent so it emits a runner-native test file.
|
|
217
|
+
- `executeBrief({sessionId, target, capture, project})` → instructions /
|
|
218
|
+
command for the EXECUTE agent.
|
|
219
|
+
- `validateOutputDir(dir)` → `{ ok, errors[] }`. Gates the DRAFT → EXECUTE
|
|
220
|
+
transition.
|
|
221
|
+
- `installHint` → string shown when `detect()` returns true but the
|
|
222
|
+
runner CLI is missing.
|
|
223
|
+
|
|
224
|
+
Resolution order in `detectRunner(target, project, explicitRunner)`:
|
|
225
|
+
explicit-name (no `detect` call, unknown names return null), then
|
|
226
|
+
priority-desc iteration. Plugin folder: `<MINIONS_DIR>/qa-runners.d/*.js`
|
|
227
|
+
(same trust level as `playbooks/` and `watches.d/`). Hot-reload via
|
|
228
|
+
`POST /api/qa/runners/reload` (clears registry → re-registers built-ins →
|
|
229
|
+
re-scans plugin dir) so plugin edits take effect without an engine
|
|
230
|
+
restart.
|
|
231
|
+
|
|
232
|
+
## Fast-state slice
|
|
233
|
+
|
|
234
|
+
`/api/status.qaSessions = { total, sig }` — the unsorted summary helper
|
|
235
|
+
`engine/qa-sessions.js#summarizeSessionsForStatus()`. Mirrors `qaRuns` so
|
|
236
|
+
the sidebar activity-dot lights up on any new session or state
|
|
237
|
+
transition within one `/api/status` poll cycle (~4s). Do NOT call
|
|
238
|
+
`listSessions({limit:50})` from this hot path — it sorts O(N log N) on
|
|
239
|
+
every fast-state rebuild.
|
|
240
|
+
|
|
241
|
+
## Dashboard UI (P-h7e4f9b2)
|
|
242
|
+
|
|
243
|
+
`/qa` page (`dashboard/pages/qa.html`, `dashboard/js/qa.js`) gains a
|
|
244
|
+
**QA Sessions** section above the existing Targets/Runbooks/Runs trio:
|
|
245
|
+
|
|
246
|
+
- **Start QA Session form** — target kind dropdown with conditional
|
|
247
|
+
sub-fields (PR id, branch, commit SHA, worktree path), flows textarea,
|
|
248
|
+
mode toggle (`confirm`|`auto`), capture checkboxes, runner dropdown
|
|
249
|
+
auto-populated from `GET /api/qa/runners`, project input.
|
|
250
|
+
- **Sessions list** — composite cards with phase chips
|
|
251
|
+
(🔧 setup → 📝 draft → ▶ execute → ✅ done). Cards visually flip the
|
|
252
|
+
chip classes (`--done` / `--active` / `--pending` / `--failed` /
|
|
253
|
+
`--killed`) per `session.state`. State-driven left-border color
|
|
254
|
+
(red=failed, green=done, yellow=awaiting-approval, blue=active).
|
|
255
|
+
- **Action buttons** —
|
|
256
|
+
`awaiting-approval` cards show `[Approve & run]` `[Edit]` `[Cancel]`;
|
|
257
|
+
every non-terminal card shows `[Dismiss]` `[Kill spawn]` in the footer;
|
|
258
|
+
terminal cards show no actions.
|
|
259
|
+
- **Polling** — `_startQaSessionsPoll` runs a 3000 ms
|
|
260
|
+
`setInterval(loadQaSessions)`; `_qaAfterSessionsRender` auto-stops the
|
|
261
|
+
poll once every cached session is in
|
|
262
|
+
`QA_SESSION_TERMINAL_STATES = {done, failed, killed}`. Form-submit and
|
|
263
|
+
action handlers each call `_startQaSessionsPoll` to re-activate polling
|
|
264
|
+
when a new non-terminal session appears. Wired into the canonical page
|
|
265
|
+
lifecycle: `loadQaRunners` + `loadQaSessions` + `_startQaSessionsPoll`
|
|
266
|
+
live in `PAGE_LAZY_LOADERS.qa`; `_stopQaSessionsPoll` lives in
|
|
267
|
+
`PAGE_LEAVE_HOOKS`.
|
|
268
|
+
|
|
269
|
+
## Command Center shortcut (PR12: CC system prompt)
|
|
270
|
+
|
|
271
|
+
The CC system prompt teaches natural-language → `POST /api/qa/session`.
|
|
272
|
+
The user can type "smoke test the home page in PR #2887" and CC builds
|
|
273
|
+
the spec (`target.kind='pr'`, `target.prId='github:yemi33/minions#2887'`,
|
|
274
|
+
`flowsRaw='smoke test the home page'`, default `mode='confirm'`) and
|
|
275
|
+
fires the endpoint. CC must include the
|
|
276
|
+
`X-Minions-Agent: cc-<turn-id>` header so the session's `createdBy`
|
|
277
|
+
field gets the right audit trail.
|
|
278
|
+
|
|
279
|
+
## When something goes wrong
|
|
280
|
+
|
|
281
|
+
- **SETUP managed-spawn won't validate** → session lands in `failed` with
|
|
282
|
+
`failure_class: 'invalid-managed-spawn'` (from
|
|
283
|
+
`evaluateManagedSpawnAcceptance` in `onAgentClose`) or
|
|
284
|
+
`'managed-spawn-healthcheck-failed'`. The `error` field carries the
|
|
285
|
+
reason; check `engine/managed-logs/qa-session-<sessionId>.log` for the
|
|
286
|
+
failing healthcheck.
|
|
287
|
+
- **DRAFT validateOutputDir rejects** → `failure_class:
|
|
288
|
+
'qa-session-draft-failed'`. Inspect the drafted file at
|
|
289
|
+
`engine/qa-tests/<sessionId>/`; the runner's `validateOutputDir.errors`
|
|
290
|
+
array tells you what's missing.
|
|
291
|
+
- **EXECUTE qa-run terminal status is `failed`/`errored`** →
|
|
292
|
+
`failure_class: 'qa-session-execute-failed'` /
|
|
293
|
+
`'qa-session-execute-errored'`. The linked `qa-runs` record (joined via
|
|
294
|
+
`session.qaRunId`) carries the agent's `summary` and artifact list.
|
|
295
|
+
- **Want to start over after seeing a bad draft** → POST
|
|
296
|
+
`/api/qa/sessions/<id>/edit` with `{ feedback: "…" }`; do NOT
|
|
297
|
+
`/cancel` + create a new session unless the original spec was wrong
|
|
298
|
+
(the test file from the prior DRAFT round stays on disk under
|
|
299
|
+
`engine/qa-tests/<sessionId>/` as `test.<ext>.bak.<round>` so you can
|
|
300
|
+
diff iterations).
|
|
301
|
+
- **Spawn is wedged but session is still in `executing`** → POST
|
|
302
|
+
`/api/qa/sessions/<id>/kill` (NOT `/cancel` — the latter leaves the
|
|
303
|
+
spawn alive).
|
package/engine/cleanup.js
CHANGED
|
@@ -1238,7 +1238,10 @@ async function runCleanup(config, verbose = false) {
|
|
|
1238
1238
|
cleaned.pendingContextsTrimmed = 0;
|
|
1239
1239
|
try {
|
|
1240
1240
|
const cooldownPath = path.join(ENGINE_DIR, 'cooldowns.json');
|
|
1241
|
-
|
|
1241
|
+
// safeJsonNoRestore — same rationale as engine/cooldown.js loadCooldowns:
|
|
1242
|
+
// resurrecting a stale .backup at cleanup time could re-introduce expired
|
|
1243
|
+
// entries the active code already pruned (P-bfa1d-safejson-no-restore).
|
|
1244
|
+
const cooldowns = safeJsonNoRestore(cooldownPath);
|
|
1242
1245
|
if (cooldowns && typeof cooldowns === 'object') {
|
|
1243
1246
|
let dirty = false;
|
|
1244
1247
|
// Trim oversized pendingContexts arrays (one-time migration + ongoing cap)
|
|
@@ -94,8 +94,15 @@ function isPreviewStatusBody(body) {
|
|
|
94
94
|
// gh-comment.buildMinionsCommentBody produces (marker, \n\n, body) and
|
|
95
95
|
// prevents quoted/blockquoted markers in human replies from triggering the
|
|
96
96
|
// classifier.
|
|
97
|
+
//
|
|
98
|
+
// P-bfa-s1-bom-marker (settlement): strip an optional leading BOM (\uFEFF)
|
|
99
|
+
// followed by any ASCII/Unicode whitespace before applying the regex.
|
|
100
|
+
// Copy-paste flows from Word, VSCode-with-BOM, and editors that auto-insert
|
|
101
|
+
// leading spaces would otherwise be misclassified as un-marked. The
|
|
102
|
+
// blockquote rejection (lines beginning with `> <!--`) is preserved — `>`
|
|
103
|
+
// is not whitespace and is not stripped, so quoted markers remain rejected.
|
|
97
104
|
function hasMinionsMarker(body) {
|
|
98
|
-
const text = String(body || '');
|
|
105
|
+
const text = String(body || '').replace(/^\uFEFF?\s*/, '');
|
|
99
106
|
if (!text) return false;
|
|
100
107
|
const m = MINIONS_COMMENT_MARKER_RE.exec(text);
|
|
101
108
|
if (!m) return false;
|
package/engine/cooldown.js
CHANGED
|
@@ -7,7 +7,7 @@ const path = require('path');
|
|
|
7
7
|
const shared = require('./shared');
|
|
8
8
|
const queries = require('./queries');
|
|
9
9
|
|
|
10
|
-
const { safeJson, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
|
|
10
|
+
const { safeJson, safeJsonNoRestore, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
|
|
11
11
|
const { ENGINE_DIR } = queries;
|
|
12
12
|
|
|
13
13
|
/**
|
|
@@ -40,7 +40,11 @@ const dispatchCooldowns = new Map(); // key → { timestamp, failures }
|
|
|
40
40
|
let _lastDiskCooldownKeys = new Set();
|
|
41
41
|
|
|
42
42
|
function loadCooldowns() {
|
|
43
|
-
|
|
43
|
+
// safeJsonNoRestore — cooldowns are time-bounded ephemeral state (24h TTL).
|
|
44
|
+
// Restoring a stale `cooldowns.json.backup` could resurrect expired entries
|
|
45
|
+
// that should already have been pruned, suppressing legitimate dispatches
|
|
46
|
+
// (P-bfa1d-safejson-no-restore). Missing/corrupt primary == "no cooldowns".
|
|
47
|
+
const saved = safeJsonNoRestore(COOLDOWN_PATH);
|
|
44
48
|
if (!saved) return;
|
|
45
49
|
const now = Date.now();
|
|
46
50
|
for (const [k, v] of Object.entries(saved)) {
|
package/engine/gh-comment.js
CHANGED
|
@@ -25,12 +25,28 @@
|
|
|
25
25
|
* `gh` invocation: argv form with `--body-file <tmp>` (NOT `--body <inline>`).
|
|
26
26
|
* Avoids platform-specific shell-quoting bugs for bodies that contain quotes,
|
|
27
27
|
* backticks, or `$(…)`. Temp files are cleaned up in `finally`.
|
|
28
|
+
*
|
|
29
|
+
* Per-slug PAT routing (P-bfa2a): each public function resolves a per-slug
|
|
30
|
+
* GitHub token via `engine/gh-token.js#resolveTokenForSlug(repo)` and threads
|
|
31
|
+
* it into the spawned `gh` process via `env.GH_TOKEN`. When the slug has no
|
|
32
|
+
* mapping in `engine.ghAccounts`, we fall back to the ambient `gh` identity
|
|
33
|
+
* (inherited via `process.env`) and emit a one-shot `console.warn` per slug
|
|
34
|
+
* so the dual-account audit can spot stragglers. The pinned "Never run
|
|
35
|
+
* `gh auth switch`" policy means we MUST resolve per-call rather than
|
|
36
|
+
* piggy-backing on the global active account.
|
|
28
37
|
*/
|
|
29
38
|
|
|
30
39
|
const fs = require('fs');
|
|
31
40
|
const path = require('path');
|
|
32
41
|
const os = require('os');
|
|
33
42
|
const { execFileSync: _execFileSync } = require('child_process');
|
|
43
|
+
const { resolveTokenForSlug: _defaultResolveTokenForSlug } = require('./gh-token');
|
|
44
|
+
|
|
45
|
+
// Module-level dedupe set so the "no token mapping for <slug>" warning fires
|
|
46
|
+
// at most once per slug per process — keeps the audit signal visible without
|
|
47
|
+
// flooding the log on every comment post. Exported via `_clearTokenWarnings`
|
|
48
|
+
// for tests.
|
|
49
|
+
const _warnedUnresolvedSlugs = new Set();
|
|
34
50
|
|
|
35
51
|
// ── Validation ───────────────────────────────────────────────────────────────
|
|
36
52
|
|
|
@@ -134,12 +150,56 @@ function _writeTempBodyFile(content) {
|
|
|
134
150
|
return file;
|
|
135
151
|
}
|
|
136
152
|
|
|
137
|
-
function _runGh(execFileSync, args, timeoutMs) {
|
|
138
|
-
|
|
153
|
+
function _runGh(execFileSync, args, timeoutMs, env) {
|
|
154
|
+
const opts = {
|
|
139
155
|
encoding: 'utf8',
|
|
140
156
|
timeout: timeoutMs,
|
|
141
157
|
windowsHide: true,
|
|
142
|
-
}
|
|
158
|
+
};
|
|
159
|
+
// Only set `env` when we have a token override to thread; leaving the option
|
|
160
|
+
// unset preserves the existing `process.env` inheritance for back-compat with
|
|
161
|
+
// unmapped slugs and tests that don't stub the resolver.
|
|
162
|
+
if (env) opts.env = env;
|
|
163
|
+
return execFileSync('gh', args, opts);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Resolve the per-slug PAT for `repo` and return an env override suitable for
|
|
168
|
+
* passing to `_runGh`. Returns `undefined` when no mapping exists (caller
|
|
169
|
+
* falls back to the ambient `gh` identity inherited from `process.env`).
|
|
170
|
+
*
|
|
171
|
+
* Emits a one-shot `console.warn` per (slug, reason) so the dual-account audit
|
|
172
|
+
* can spot stragglers without flooding the log on every comment post. Resolver
|
|
173
|
+
* exceptions are swallowed — comment posting must never fail because token
|
|
174
|
+
* resolution did. The pinned "Never run `gh auth switch`" policy survives even
|
|
175
|
+
* when the mapping is missing: we just inherit the ambient identity instead of
|
|
176
|
+
* forcibly mutating it.
|
|
177
|
+
*/
|
|
178
|
+
function _resolveTokenEnvForRepo(repo, resolveTokenForSlugFn) {
|
|
179
|
+
const resolve = resolveTokenForSlugFn || _defaultResolveTokenForSlug;
|
|
180
|
+
let token = null;
|
|
181
|
+
try {
|
|
182
|
+
token = resolve(repo);
|
|
183
|
+
} catch (e) {
|
|
184
|
+
const key = `error:${repo}`;
|
|
185
|
+
if (!_warnedUnresolvedSlugs.has(key)) {
|
|
186
|
+
_warnedUnresolvedSlugs.add(key);
|
|
187
|
+
console.warn(
|
|
188
|
+
`gh-comment: resolveTokenForSlug threw for "${repo}" (${e?.message || e}) — falling back to ambient gh identity`,
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
|
193
|
+
if (!token) {
|
|
194
|
+
if (!_warnedUnresolvedSlugs.has(repo)) {
|
|
195
|
+
_warnedUnresolvedSlugs.add(repo);
|
|
196
|
+
console.warn(
|
|
197
|
+
`gh-comment: no token mapping for repo "${repo}" — falling back to ambient gh identity (audit straggler)`,
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
return undefined;
|
|
201
|
+
}
|
|
202
|
+
return { ...process.env, GH_TOKEN: token };
|
|
143
203
|
}
|
|
144
204
|
|
|
145
205
|
function postPrComment({
|
|
@@ -151,16 +211,19 @@ function postPrComment({
|
|
|
151
211
|
workItemId,
|
|
152
212
|
timeoutMs = 30000,
|
|
153
213
|
execFileSync = _execFileSync,
|
|
214
|
+
resolveTokenForSlug,
|
|
154
215
|
} = {}) {
|
|
155
216
|
_validateRepo(repo);
|
|
156
217
|
_validatePrNumber(prNumber);
|
|
157
218
|
const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
|
|
158
219
|
const file = _writeTempBodyFile(finalBody);
|
|
220
|
+
const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
|
|
159
221
|
try {
|
|
160
222
|
const output = _runGh(
|
|
161
223
|
execFileSync,
|
|
162
224
|
['pr', 'comment', String(prNumber), '--repo', repo, '--body-file', file],
|
|
163
225
|
timeoutMs,
|
|
226
|
+
env,
|
|
164
227
|
);
|
|
165
228
|
return { output: String(output || '').trim(), bodyFile: file };
|
|
166
229
|
} finally {
|
|
@@ -177,16 +240,19 @@ function postPrReviewComment({
|
|
|
177
240
|
workItemId,
|
|
178
241
|
timeoutMs = 30000,
|
|
179
242
|
execFileSync = _execFileSync,
|
|
243
|
+
resolveTokenForSlug,
|
|
180
244
|
} = {}) {
|
|
181
245
|
_validateRepo(repo);
|
|
182
246
|
_validatePrNumber(prNumber);
|
|
183
247
|
const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
|
|
184
248
|
const file = _writeTempBodyFile(finalBody);
|
|
249
|
+
const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
|
|
185
250
|
try {
|
|
186
251
|
const output = _runGh(
|
|
187
252
|
execFileSync,
|
|
188
253
|
['pr', 'review', String(prNumber), '--comment', '--repo', repo, '--body-file', file],
|
|
189
254
|
timeoutMs,
|
|
255
|
+
env,
|
|
190
256
|
);
|
|
191
257
|
return { output: String(output || '').trim(), bodyFile: file };
|
|
192
258
|
} finally {
|
|
@@ -210,6 +276,7 @@ function postPrReview({
|
|
|
210
276
|
workItemId,
|
|
211
277
|
timeoutMs = 30000,
|
|
212
278
|
execFileSync = _execFileSync,
|
|
279
|
+
resolveTokenForSlug,
|
|
213
280
|
} = {}) {
|
|
214
281
|
const flag = _REVIEW_EVENT_FLAGS[event];
|
|
215
282
|
if (!flag) {
|
|
@@ -221,11 +288,13 @@ function postPrReview({
|
|
|
221
288
|
_validatePrNumber(prNumber);
|
|
222
289
|
const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
|
|
223
290
|
const file = _writeTempBodyFile(finalBody);
|
|
291
|
+
const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
|
|
224
292
|
try {
|
|
225
293
|
const output = _runGh(
|
|
226
294
|
execFileSync,
|
|
227
295
|
['pr', 'review', String(prNumber), flag, '--repo', repo, '--body-file', file],
|
|
228
296
|
timeoutMs,
|
|
297
|
+
env,
|
|
229
298
|
);
|
|
230
299
|
return { output: String(output || '').trim(), bodyFile: file };
|
|
231
300
|
} finally {
|
|
@@ -250,4 +319,6 @@ module.exports = {
|
|
|
250
319
|
// Internal helpers exported for tests / advanced callers
|
|
251
320
|
_buildMarker,
|
|
252
321
|
_writeTempBodyFile,
|
|
322
|
+
_resolveTokenEnvForRepo,
|
|
323
|
+
_clearTokenWarnings: () => _warnedUnresolvedSlugs.clear(),
|
|
253
324
|
};
|
package/engine/gh-token.js
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
* via `_setTokenForTest(slug, token)` and clear it via `_clearTokenCache()`.
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
|
-
const {
|
|
21
|
+
const { execFileSync } = require('child_process');
|
|
22
22
|
const path = require('path');
|
|
23
23
|
const shared = require('./shared');
|
|
24
24
|
const { safeJson, MINIONS_DIR, log } = shared;
|
|
@@ -70,14 +70,12 @@ function _fetchTokenForAccount(account, opts = {}) {
|
|
|
70
70
|
const cached = _accountTokens.get(account);
|
|
71
71
|
if (cached && cached.expiresAt > Date.now()) return cached.token;
|
|
72
72
|
|
|
73
|
-
const run = opts.
|
|
73
|
+
const run = opts.execFileSync || execFileSync;
|
|
74
74
|
try {
|
|
75
|
-
// Argv form
|
|
76
|
-
//
|
|
77
|
-
//
|
|
78
|
-
|
|
79
|
-
const cmd = `gh auth token --user ${account} --hostname github.com`;
|
|
80
|
-
const out = run(cmd, {
|
|
75
|
+
// Argv-array form: `account` is passed as a literal argument and never
|
|
76
|
+
// interpreted by a shell, so shell metacharacters in the configured
|
|
77
|
+
// account name (e.g. `;`, backticks, `$()`) cannot be executed.
|
|
78
|
+
const out = run('gh', ['auth', 'token', '--user', account, '--hostname', 'github.com'], {
|
|
81
79
|
timeout: FETCH_TIMEOUT_MS,
|
|
82
80
|
encoding: 'utf8',
|
|
83
81
|
windowsHide: true,
|
|
@@ -102,7 +100,7 @@ function _fetchTokenForAccount(account, opts = {}) {
|
|
|
102
100
|
* caller should fall back to the ambient `gh` identity.
|
|
103
101
|
*
|
|
104
102
|
* Test seam: `_setTokenForTest(slug, token)` short-circuits the entire chain
|
|
105
|
-
* so unit tests do not have to mock
|
|
103
|
+
* so unit tests do not have to mock execFileSync nor stand up a config file.
|
|
106
104
|
*/
|
|
107
105
|
function resolveTokenForSlug(slug, opts = {}) {
|
|
108
106
|
if (slug && _slugTokenOverrides.has(slug)) return _slugTokenOverrides.get(slug);
|
package/engine/lifecycle.js
CHANGED
|
@@ -4393,6 +4393,106 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
|
|
|
4393
4393
|
}
|
|
4394
4394
|
}
|
|
4395
4395
|
|
|
4396
|
+
// P-a3f7c1b2 — QA Session lifecycle hook. The SETUP / DRAFT / EXECUTE work
|
|
4397
|
+
// items carry `meta.sessionId` and `meta.sessionPhase` (set by the
|
|
4398
|
+
// qa-sessions chain helpers — see engine/qa-sessions.js). On each agent
|
|
4399
|
+
// exit we delegate to the matching handler, which validates the state
|
|
4400
|
+
// transition and queues the next phase WI when applicable. The qa-validate
|
|
4401
|
+
// qaRunId hook above (line 4340) already handles the qa-runs sidecar for
|
|
4402
|
+
// the EXECUTE phase; we just need to map the qa-runs terminal status onto
|
|
4403
|
+
// the session state.
|
|
4404
|
+
//
|
|
4405
|
+
// The same dispatchItem.meta vs dispatchItem.meta.item.meta dual-location
|
|
4406
|
+
// problem the qaRunId block documents applies here — central-spawned
|
|
4407
|
+
// sessions land their sidecar at the top level, project-routed ones nest
|
|
4408
|
+
// it under `meta.item.meta`. Accept both.
|
|
4409
|
+
const qaSessionId = meta?.sessionId || meta?.item?.meta?.sessionId;
|
|
4410
|
+
const qaSessionPhase = meta?.sessionPhase || meta?.item?.meta?.sessionPhase;
|
|
4411
|
+
if (qaSessionId && qaSessionPhase) {
|
|
4412
|
+
try {
|
|
4413
|
+
const qaSessions = require('./qa-sessions');
|
|
4414
|
+
const wiPath = resolveWorkItemPath(meta);
|
|
4415
|
+
const project = meta?.project?.name || meta?.item?.project || null;
|
|
4416
|
+
const failureReason = typeof resultSummary === 'string' && resultSummary
|
|
4417
|
+
? resultSummary
|
|
4418
|
+
: '';
|
|
4419
|
+
if (qaSessionPhase === 'setup') {
|
|
4420
|
+
qaSessions.handleSetupComplete(qaSessionId, {
|
|
4421
|
+
success: !!effectiveSuccess,
|
|
4422
|
+
wiPath,
|
|
4423
|
+
project,
|
|
4424
|
+
failureClass: (structuredCompletion && structuredCompletion.failure_class) || null,
|
|
4425
|
+
reason: failureReason,
|
|
4426
|
+
});
|
|
4427
|
+
} else if (qaSessionPhase === 'draft') {
|
|
4428
|
+
// The DRAFT agent reports the relative test file path in its
|
|
4429
|
+
// structured completion as `testFile` (or the playbook can write a
|
|
4430
|
+
// qa-session-draft-result.json sidecar in a future iteration). For
|
|
4431
|
+
// now read it from structuredCompletion when present; the session
|
|
4432
|
+
// record still works without it (EXECUTE prompt falls back to the
|
|
4433
|
+
// generic test.<ext> hint).
|
|
4434
|
+
const testFile = structuredCompletion && typeof structuredCompletion.testFile === 'string'
|
|
4435
|
+
? structuredCompletion.testFile
|
|
4436
|
+
: null;
|
|
4437
|
+
const session = qaSessions.getSession(qaSessionId);
|
|
4438
|
+
if (session && session.spec && session.spec.mode === 'auto' && effectiveSuccess) {
|
|
4439
|
+
// Auto-mode chains EXECUTE; we need a qa-runs record up-front. The
|
|
4440
|
+
// dashboard endpoint that created the session can't know in
|
|
4441
|
+
// advance whether EXECUTE will fire (mode could be flipped), so
|
|
4442
|
+
// the qa-runs record is created here on the auto path.
|
|
4443
|
+
let qaRunId = null;
|
|
4444
|
+
try {
|
|
4445
|
+
const qaRuns = require('./qa-runs');
|
|
4446
|
+
const run = qaRuns.createRun({
|
|
4447
|
+
runbookId: 'qa-session-' + qaSessionId,
|
|
4448
|
+
targetName: session.managedSpawnName,
|
|
4449
|
+
project: project || session.spec.project || null,
|
|
4450
|
+
});
|
|
4451
|
+
qaRunId = run.id;
|
|
4452
|
+
} catch (createErr) {
|
|
4453
|
+
log('warn', `qa-session auto-mode createRun failed for ${qaSessionId}: ${createErr.message}`);
|
|
4454
|
+
}
|
|
4455
|
+
qaSessions.handleDraftComplete(qaSessionId, {
|
|
4456
|
+
success: true,
|
|
4457
|
+
testFile,
|
|
4458
|
+
wiPath,
|
|
4459
|
+
project,
|
|
4460
|
+
qaRunId,
|
|
4461
|
+
reason: failureReason,
|
|
4462
|
+
});
|
|
4463
|
+
} else {
|
|
4464
|
+
qaSessions.handleDraftComplete(qaSessionId, {
|
|
4465
|
+
success: !!effectiveSuccess,
|
|
4466
|
+
testFile,
|
|
4467
|
+
reason: failureReason,
|
|
4468
|
+
});
|
|
4469
|
+
}
|
|
4470
|
+
} else if (qaSessionPhase === 'execute') {
|
|
4471
|
+
// The qaRunId block above (line 4340) already wrote the terminal
|
|
4472
|
+
// qa-runs record. Re-read it to drive the session state.
|
|
4473
|
+
let qaRunStatus = null;
|
|
4474
|
+
const linkedQaRunId = meta?.qaRunId || meta?.item?.meta?.qaRunId;
|
|
4475
|
+
if (linkedQaRunId) {
|
|
4476
|
+
try {
|
|
4477
|
+
const qaRuns = require('./qa-runs');
|
|
4478
|
+
const run = qaRuns.getRun(linkedQaRunId);
|
|
4479
|
+
if (run && run.status) qaRunStatus = run.status;
|
|
4480
|
+
} catch (readErr) {
|
|
4481
|
+
log('warn', `qa-session execute qa-run lookup failed for ${qaSessionId}: ${readErr.message}`);
|
|
4482
|
+
}
|
|
4483
|
+
}
|
|
4484
|
+
qaSessions.handleExecuteComplete(qaSessionId, {
|
|
4485
|
+
success: !!effectiveSuccess,
|
|
4486
|
+
qaRunStatus,
|
|
4487
|
+
summary: resultSummary || null,
|
|
4488
|
+
reason: failureReason,
|
|
4489
|
+
});
|
|
4490
|
+
}
|
|
4491
|
+
} catch (err) {
|
|
4492
|
+
log('warn', `qa-session completion hook for ${qaSessionId} (${qaSessionPhase}): ${err.message}`);
|
|
4493
|
+
}
|
|
4494
|
+
}
|
|
4495
|
+
|
|
4396
4496
|
// Plan chaining removed — user must explicitly execute plan-to-prd after reviewing the plan
|
|
4397
4497
|
if (effectiveSuccess && meta?.item?.sourcePlan) checkPlanCompletion(meta, config);
|
|
4398
4498
|
|
package/engine/pipeline.js
CHANGED
|
@@ -76,7 +76,9 @@ function getPipelineRuns() {
|
|
|
76
76
|
function getActiveRun(pipelineId) {
|
|
77
77
|
const runs = getPipelineRuns();
|
|
78
78
|
const pipelineRuns = runs[pipelineId] || [];
|
|
79
|
-
return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING
|
|
79
|
+
return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING
|
|
80
|
+
|| r.status === PIPELINE_STATUS.PAUSED
|
|
81
|
+
|| r.status === PIPELINE_STATUS.WAITING_HUMAN);
|
|
80
82
|
}
|
|
81
83
|
|
|
82
84
|
function startRun(pipelineId, pipeline) {
|
|
@@ -110,6 +112,12 @@ function startRun(pipelineId, pipeline) {
|
|
|
110
112
|
}
|
|
111
113
|
|
|
112
114
|
function updateRunStage(pipelineId, runId, stageId, updates) {
|
|
115
|
+
if (updates && Object.prototype.hasOwnProperty.call(updates, 'status')) {
|
|
116
|
+
const validStatuses = Object.values(PIPELINE_STATUS);
|
|
117
|
+
if (!validStatuses.includes(updates.status)) {
|
|
118
|
+
throw new Error(`updateRunStage: invalid status '${updates.status}' (expected one of: ${validStatuses.join('|')})`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
113
121
|
mutateJsonFileLocked(PIPELINE_RUNS_PATH, (data) => {
|
|
114
122
|
const runs = data[pipelineId] || [];
|
|
115
123
|
const run = runs.find(r => r.runId === runId);
|