@yemi33/minions 0.1.1996 → 0.1.1997

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ const os = require('os');
9
9
  const path = require('path');
10
10
  const shared = require('./shared');
11
11
  const queries = require('./queries');
12
+ const { wrapUntrusted, buildSource } = require('./untrusted-fence');
12
13
 
13
14
  const { safeJson, safeRead, getProjects, log, ts, dateStamp, truncateTextBytes, ENGINE_DEFAULTS, WI_STATUS, WORK_TYPE, PR_STATUS, DISPATCH_RESULT, getProjectOrg } = shared;
14
15
  const { getConfig, getDispatch, getNotes, getAgentCharter, getPrs, getKnowledgeBaseIndex, AGENTS_DIR } = queries;
@@ -184,7 +185,9 @@ function resolveTaskContext(item, config) {
184
185
  const planPath = path.join(MINIONS_DIR, 'plans', planFile);
185
186
  try {
186
187
  const content = safeRead(planPath);
187
- resolved.additionalContext += `\n\n## Referenced Plan: ${planFile} (created by ${agent.name})\n\n${truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan')}`;
188
+ const truncated = truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan');
189
+ const fenced = wrapUntrusted(truncated, buildSource('wi-reference', { path: `plans/${planFile}` }));
190
+ resolved.additionalContext += `\n\n## Referenced Plan: ${planFile} (created by ${agent.name})\n\n${fenced || truncated}`;
188
191
  resolved.referencedFiles.push(planPath);
189
192
  log('info', `Context resolution: found plan "${planFile}" by ${agent.name} for work item ${item.id}`);
190
193
  } catch (e) { log('warn', 'resolve plan context: ' + e.message); }
@@ -195,7 +198,9 @@ function resolveTaskContext(item, config) {
195
198
  const planPath = path.join(MINIONS_DIR, 'plans', match);
196
199
  try {
197
200
  const content = safeRead(planPath);
198
- resolved.additionalContext += `\n\n## Referenced Plan: ${match}\n\n${truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan')}`;
201
+ const truncated = truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan');
202
+ const fenced = wrapUntrusted(truncated, buildSource('wi-reference', { path: `plans/${match}` }));
203
+ resolved.additionalContext += `\n\n## Referenced Plan: ${match}\n\n${fenced || truncated}`;
199
204
  resolved.referencedFiles.push(planPath);
200
205
  log('info', `Context resolution: found plan "${match}" (name match) for work item ${item.id}`);
201
206
  } catch (e) { log('warn', 'resolve plan fallback context: ' + e.message); }
@@ -218,7 +223,9 @@ function resolveTaskContext(item, config) {
218
223
  .sort().reverse();
219
224
  if (files.length > 0) {
220
225
  const content = safeRead(path.join(inboxDir, files[0]));
221
- resolved.additionalContext += `\n\n## Referenced Notes by ${agent.name}: ${files[0]}\n\n${truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedNotesBytes, 'referenced notes')}`;
226
+ const truncated = truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedNotesBytes, 'referenced notes');
227
+ const fenced = wrapUntrusted(truncated, buildSource('inbox', { filename: files[0] }));
228
+ resolved.additionalContext += `\n\n## Referenced Notes by ${agent.name}: ${files[0]}\n\n${fenced || truncated}`;
222
229
  resolved.referencedFiles.push(path.join(inboxDir, files[0]));
223
230
  log('info', `Context resolution: found notes "${files[0]}" by ${agent.name} for work item ${item.id}`);
224
231
  }
@@ -237,7 +244,9 @@ function resolveTaskContext(item, config) {
237
244
  if (plans.length > 0) {
238
245
  const planPath = path.join(MINIONS_DIR, 'plans', plans[0]);
239
246
  const content = safeRead(planPath);
240
- resolved.additionalContext += `\n\n## Referenced Plan (latest): ${plans[0]}\n\n${truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan')}`;
247
+ const truncated = truncateReferencedContext(content, ENGINE_DEFAULTS.maxReferencedPlanBytes, 'referenced plan');
248
+ const fenced = wrapUntrusted(truncated, buildSource('wi-reference', { path: `plans/${plans[0]}` }));
249
+ resolved.additionalContext += `\n\n## Referenced Plan (latest): ${plans[0]}\n\n${fenced || truncated}`;
241
250
  resolved.referencedFiles.push(planPath);
242
251
  log('info', `Context resolution: using latest plan "${plans[0]}" for work item ${item.id}`);
243
252
  }
@@ -309,6 +318,7 @@ const PLAYBOOK_REQUIRED_VARS = {
309
318
  'test': ['item_name'],
310
319
  'docs': ['item_id', 'item_name'],
311
320
  'setup': ['item_id', 'item_name', 'project_path'],
321
+ 'qa-validate': ['item_id', 'item_name', 'qa_run_id'],
312
322
  'work-item': ['item_id', 'item_name'],
313
323
  'meeting-investigate': ['meeting_title', 'agenda'],
314
324
  'meeting-debate': ['meeting_title', 'agenda'],
@@ -391,6 +401,69 @@ function resolvePlaybookPath(projectName, playbookType) {
391
401
  return path.join(PLAYBOOKS_DIR, `${playbookTypeName}.md`);
392
402
  }
393
403
 
404
+ // W-mpeiwz6k0005bf34-c — Build the QA Run Context block that renderPlaybook
405
+ // injects when vars.qa_run_id is set. Pure formatter: takes the runbook +
406
+ // target snapshot the dispatcher captured (and stored on the work item meta)
407
+ // and renders a compact, prompt-friendly summary. Heavy guards against
408
+ // missing fields because dispatch callers may supply partial snapshots when
409
+ // the managed-process state has rotated between schedule and dispatch.
410
+ function buildQaValidateContextBlock({ runId, runbook, target, artifactsDir }) {
411
+ if (!runId) return '';
412
+ const lines = [];
413
+ lines.push('## QA Run Context');
414
+ lines.push('');
415
+ lines.push(`- **runId:** \`${runId}\``);
416
+ if (artifactsDir) lines.push(`- **artifactsDir:** \`${artifactsDir}\``);
417
+ lines.push('');
418
+
419
+ const rb = runbook && typeof runbook === 'object' ? runbook : null;
420
+ if (rb) {
421
+ lines.push('### Runbook');
422
+ lines.push(`- **id:** \`${rb.id || ''}\``);
423
+ if (rb.name) lines.push(`- **name:** ${rb.name}`);
424
+ if (Array.isArray(rb.steps) && rb.steps.length > 0) {
425
+ lines.push('- **steps:**');
426
+ rb.steps.forEach((s, i) => {
427
+ if (!s || typeof s !== 'object') return;
428
+ const desc = String(s.description || '').trim();
429
+ const cmd = s.command ? ` \`${String(s.command).trim()}\`` : '';
430
+ lines.push(` ${i + 1}. ${desc}${cmd}`);
431
+ });
432
+ }
433
+ if (Array.isArray(rb.expectedArtifacts) && rb.expectedArtifacts.length > 0) {
434
+ lines.push('- **expectedArtifacts:**');
435
+ for (const a of rb.expectedArtifacts) {
436
+ if (!a || typeof a !== 'object') continue;
437
+ const type = String(a.type || 'other');
438
+ const label = String(a.label || '').trim();
439
+ const hint = a.path ? ` (\`${a.path}\`)` : '';
440
+ lines.push(` - \`${type}\` — ${label}${hint}`);
441
+ }
442
+ }
443
+ lines.push('');
444
+ }
445
+
446
+ const t = target && typeof target === 'object' ? target : null;
447
+ if (t) {
448
+ lines.push('### Target (managed-process snapshot)');
449
+ if (t.name) lines.push(`- **name:** \`${t.name}\``);
450
+ if (t.owner_project) lines.push(`- **project:** \`${t.owner_project}\``);
451
+ if (typeof t.healthy === 'boolean') lines.push(`- **healthy:** ${t.healthy}`);
452
+ if (Array.isArray(t.ports) && t.ports.length > 0) lines.push(`- **ports:** ${t.ports.join(', ')}`);
453
+ if (t.attrs && typeof t.attrs === 'object') {
454
+ const base = t.attrs.base_url || t.attrs.baseUrl;
455
+ const framework = t.attrs.framework;
456
+ if (base) lines.push(`- **base_url:** ${base}`);
457
+ if (framework) lines.push(`- **framework:** ${framework}`);
458
+ }
459
+ lines.push('');
460
+ }
461
+
462
+ lines.push('Use this context to execute the runbook against the live target. Write the result sidecar to `agents/<your-id>/qa-run-result.json` before exit — the engine consumes it in `engine/lifecycle.js` and calls `qaRuns.completeRun(runId, ...)`.');
463
+
464
+ return lines.join('\n');
465
+ }
466
+
394
467
 
395
468
  // ─── Playbook Renderer ──────────────────────────────────────────────────────
396
469
 
@@ -411,15 +484,20 @@ function renderPlaybook(type, vars) {
411
484
 
412
485
  const inertAppendices = [];
413
486
 
414
- // Inject pinned context (always visible to agents) — capped at 4KB
487
+ // Inject pinned context (always visible to agents) — capped at 4KB.
488
+ // F5 (W-mpeklod3000we69c): wrap in <UNTRUSTED-INPUT> fence — human-edited
489
+ // file that ends up in every agent prompt.
415
490
  let pinnedContent = '';
416
491
  try { pinnedContent = fs.readFileSync(path.join(MINIONS_DIR, 'pinned.md'), 'utf8'); } catch { /* optional */ }
417
492
  if (pinnedContent) {
418
493
  if (pinnedContent.length > 4096) pinnedContent = pinnedContent.slice(0, 4096) + '\n\n_...pinned.md truncated (read full file if needed)_';
419
- inertAppendices.push('\n\n---\n\n## Pinned Context (CRITICAL READ FIRST)\n\n' + pinnedContent);
494
+ const fenced = wrapUntrusted(pinnedContent, buildSource('pinned-note', { path: 'pinned.md' }));
495
+ inertAppendices.push('\n\n---\n\n## Pinned Context (CRITICAL — READ FIRST)\n\n' + (fenced || pinnedContent));
420
496
  }
421
497
 
422
- // Inject team notes (single injection point — not in buildAgentContext) — capped via ENGINE_DEFAULTS
498
+ // Inject team notes (single injection point — not in buildAgentContext) — capped via ENGINE_DEFAULTS.
499
+ // F5: wrap in <UNTRUSTED-INPUT> fence — notes.md is an LLM-consolidated mix
500
+ // of agent inbox notes (semi-trusted) and human edits.
423
501
  let notes = getNotes();
424
502
  if (notes) {
425
503
  if (Buffer.byteLength(notes, 'utf8') > ENGINE_DEFAULTS.maxNotesPromptBytes) {
@@ -430,15 +508,19 @@ function renderPlaybook(type, vars) {
430
508
  const budget = Math.max(0, ENGINE_DEFAULTS.maxNotesPromptBytes - Buffer.byteLength(footer, 'utf8'));
431
509
  notes = truncateTextBytes(recent, budget, '\n\n_...notes truncated_') + footer;
432
510
  }
433
- inertAppendices.push('\n\n---\n\n## Team Notes (MUST READ)\n\n' + notes);
511
+ const fenced = wrapUntrusted(notes, buildSource('team-notes', { path: 'notes.md' }));
512
+ inertAppendices.push('\n\n---\n\n## Team Notes (MUST READ)\n\n' + (fenced || notes));
434
513
  }
435
514
 
436
515
  // Inject per-agent memory file (knowledge/agents/<agentId>.md) — personal
437
516
  // notebook curated by the consolidation pipeline. Capped at the same
438
517
  // notes budget; missing file degrades gracefully (silent skip).
518
+ // F5: fence — agent-authored inbox notes routed into this file; any agent
519
+ // could include attacker-controlled quoted material.
439
520
  const agentIdForMemory = vars.agent_id;
440
521
  if (agentIdForMemory && /^[a-z][a-z0-9-]{0,40}$/i.test(agentIdForMemory) && !String(agentIdForMemory).toLowerCase().startsWith('temp-')) {
441
- const agentMemPath = path.join(MINIONS_DIR, 'knowledge', 'agents', `${String(agentIdForMemory).toLowerCase()}.md`);
522
+ const agentMemRel = `knowledge/agents/${String(agentIdForMemory).toLowerCase()}.md`;
523
+ const agentMemPath = path.join(MINIONS_DIR, agentMemRel);
442
524
  let agentMem = '';
443
525
  try { agentMem = fs.readFileSync(agentMemPath, 'utf8'); } catch { /* optional — file may not exist */ }
444
526
  if (agentMem && agentMem.trim()) {
@@ -448,7 +530,8 @@ function renderPlaybook(type, vars) {
448
530
  const budget = Math.max(0, ENGINE_DEFAULTS.maxNotesPromptBytes);
449
531
  agentMem = truncateTextBytes(recent, budget, '\n\n_...agent memory truncated_');
450
532
  }
451
- inertAppendices.push('\n\n---\n\n## Personal Memory (your past learnings MUST READ)\n\n' + agentMem);
533
+ const fenced = wrapUntrusted(agentMem, buildSource('agent-memory', { path: agentMemRel }));
534
+ inertAppendices.push('\n\n---\n\n## Personal Memory (your past learnings — MUST READ)\n\n' + (fenced || agentMem));
452
535
  }
453
536
  }
454
537
 
@@ -503,6 +586,23 @@ function renderPlaybook(type, vars) {
503
586
  } catch (e) { log('warn', `managed-spawn live-processes inject failed: ${e.message}`); }
504
587
  }
505
588
 
589
+ // W-mpeiwz6k0005bf34-c — opt-in qa-validate context block. Injected only
590
+ // when the dispatcher set vars.qa_run_id (truthy) from the work item's
591
+ // `meta.qaRunId`. Mirrors the managed_spawn hint pattern: the playbook is
592
+ // pure markdown; this block surfaces the live runbook + target snapshot so
593
+ // the agent doesn't need to re-resolve them from disk.
594
+ if (vars.qa_run_id) {
595
+ try {
596
+ const block = buildQaValidateContextBlock({
597
+ runId: vars.qa_run_id,
598
+ runbook: vars.qa_runbook,
599
+ target: vars.qa_target,
600
+ artifactsDir: vars.qa_artifacts_dir,
601
+ });
602
+ if (block) inertAppendices.push(block);
603
+ } catch (e) { log('warn', `qa-validate context render failed: ${e.message}`); }
604
+ }
605
+
506
606
  // Inject KB guardrail
507
607
  content += `\n\n---\n\n## Knowledge Base Rules\n\n`;
508
608
  content += `**Never delete, move, or overwrite files in \`knowledge/\`.** The sweep (consolidation engine) is the only process that writes to \`knowledge/\`. If you think a KB file is wrong, note it in your learnings file — do not touch \`knowledge/\` directly.\n`;
@@ -846,6 +946,15 @@ function buildBaseVars(agentId, config, project) {
846
946
  }
847
947
 
848
948
  function selectPlaybook(workType, item) {
949
+ // W-mpeiwz6k0005bf34-c — explicit playbook override via item.meta.playbook.
950
+ // Used by /api/qa/runbooks/run to route a `test`-type work item to the
951
+ // qa-validate playbook without minting a new work-type. Validated against
952
+ // PLAYBOOK_REQUIRED_VARS so a typo'd override falls through to work-item
953
+ // rather than mis-rendering.
954
+ const playbookOverride = (item?.meta?.playbook || item?.playbook || '').toString().trim();
955
+ if (playbookOverride && PLAYBOOK_REQUIRED_VARS[playbookOverride]) {
956
+ return playbookOverride;
957
+ }
849
958
  if (item?.branchStrategy === 'shared-branch' && (workType === WORK_TYPE.IMPLEMENT || workType === WORK_TYPE.IMPLEMENT_LARGE)) {
850
959
  return 'implement-shared';
851
960
  }
@@ -893,6 +1002,7 @@ module.exports = {
893
1002
  selectPlaybook,
894
1003
  buildBaseVars,
895
1004
  buildPrDispatch,
1005
+ buildQaValidateContextBlock,
896
1006
  resolveTaskContext,
897
1007
  // Repo host helpers (used by engine.js for buildProjectContext)
898
1008
  getRepoHost,
package/engine/qa-runs.js CHANGED
@@ -43,8 +43,23 @@ const TERMINAL_STATUSES = new Set([
43
43
  ]);
44
44
 
45
45
  // Allowed forward transitions. Anything not enumerated here is rejected.
46
+ //
47
+ // PR #2697 review fix (W-mpeiwz6k0005bf34-c — Ripley): the lifecycle hook in
48
+ // engine/lifecycle.js parses the agent's qa-run-result.json sidecar and calls
49
+ // completeRun({status: 'passed'|'failed'|'errored'}) directly. It never calls
50
+ // markRunning, because the agent may crash before writing the sidecar (in
51
+ // which case the hook still needs to mark the run errored from `pending`).
52
+ // Allowing pending → {passed,failed,errored} keeps the production path from
53
+ // throwing "illegal transition" inside the hook's try/catch and leaving the
54
+ // run perma-pending. The state machine still rejects double-completion
55
+ // (terminal → terminal) so race-y double-writes can't silently overwrite.
46
56
  const ALLOWED_TRANSITIONS = {
47
- [QA_RUN_STATUS.PENDING]: new Set([QA_RUN_STATUS.RUNNING]),
57
+ [QA_RUN_STATUS.PENDING]: new Set([
58
+ QA_RUN_STATUS.RUNNING,
59
+ QA_RUN_STATUS.PASSED,
60
+ QA_RUN_STATUS.FAILED,
61
+ QA_RUN_STATUS.ERRORED,
62
+ ]),
48
63
  [QA_RUN_STATUS.RUNNING]: new Set([
49
64
  QA_RUN_STATUS.PASSED,
50
65
  QA_RUN_STATUS.FAILED,
@@ -259,6 +274,31 @@ function getRunsForWorkItem(wi) {
259
274
  });
260
275
  }
261
276
 
277
+ /**
278
+ * Back-fill workItemId on an existing run record. Used by the qa-validate
279
+ * dispatch endpoint (dashboard.js handleQaRunbookRun) when the WI is created
280
+ * after the run record so the dashboard can join the two. No-op (returns
281
+ * null) when the run id is unknown.
282
+ *
283
+ * @param {string} id - run id
284
+ * @param {string|null} workItemId - work-item id (or null to clear)
285
+ * @returns {object|null} updated run, or null if not found
286
+ */
287
+ function setRunWorkItemId(id, workItemId) {
288
+ if (!id) return null;
289
+ let captured = null;
290
+ mutateJsonFileLocked(qaRunsPath(), (runs) => {
291
+ if (!Array.isArray(runs)) runs = [];
292
+ const run = runs.find(r => r && r.id === id);
293
+ if (run) {
294
+ run.workItemId = workItemId || null;
295
+ captured = run;
296
+ }
297
+ return runs;
298
+ }, { defaultValue: [] });
299
+ return captured;
300
+ }
301
+
262
302
  module.exports = {
263
303
  QA_RUN_STATUS,
264
304
  TERMINAL_STATUSES,
@@ -269,6 +309,7 @@ module.exports = {
269
309
  createRun,
270
310
  markRunning,
271
311
  completeRun,
312
+ setRunWorkItemId,
272
313
  getRun,
273
314
  listRuns,
274
315
  getRunsForWorkItem,
package/engine/queries.js CHANGED
@@ -1401,18 +1401,31 @@ function getPrdInfo(config) {
1401
1401
  const items = allPrdItems;
1402
1402
  const total = items.length;
1403
1403
 
1404
- // Build work item lookup — work item ID = PRD item ID
1404
+ // Build work item lookups:
1405
+ // wiById — PRD-item-keyed (sourcePlan only) — used by status sync + plan timings below
1406
+ // allWiById — every WI, used solely by countDistinctPrdItems() to resolve sibling
1407
+ // sub-WIs (e.g. review-followup WIs) back to their owning PRD item
1408
+ // so they don't masquerade as a 2nd PRD item in the aggregate guard (W-mpem52qn).
1405
1409
  const wiById = {};
1410
+ const allWiById = {};
1406
1411
  for (const project of projects) {
1407
1412
  try {
1408
1413
  const workItems = readJsonNoRestore(projectWorkItemsPath(project)) || [];
1409
- for (const wi of workItems) { if (!wi?.id) { console.warn(`[queries] Skipping work item without id in ${project.name}:`, JSON.stringify(wi).slice(0, 120)); continue; } if (wi.sourcePlan) wiById[wi.id] = wi; }
1414
+ for (const wi of workItems) {
1415
+ if (!wi?.id) { console.warn(`[queries] Skipping work item without id in ${project.name}:`, JSON.stringify(wi).slice(0, 120)); continue; }
1416
+ if (!allWiById[wi.id]) allWiById[wi.id] = wi;
1417
+ if (wi.sourcePlan) wiById[wi.id] = wi;
1418
+ }
1410
1419
  } catch { /* optional */ }
1411
1420
  }
1412
1421
  // Also check central work-items.json
1413
1422
  try {
1414
1423
  const centralWi = readJsonNoRestore(path.join(MINIONS_DIR, 'work-items.json')) || [];
1415
- for (const wi of centralWi) { if (!wi?.id) { console.warn('[queries] Skipping central work item without id:', JSON.stringify(wi).slice(0, 120)); continue; } if (wi.sourcePlan && !wiById[wi.id]) wiById[wi.id] = wi; }
1424
+ for (const wi of centralWi) {
1425
+ if (!wi?.id) { console.warn('[queries] Skipping central work item without id:', JSON.stringify(wi).slice(0, 120)); continue; }
1426
+ if (!allWiById[wi.id]) allWiById[wi.id] = wi;
1427
+ if (wi.sourcePlan && !wiById[wi.id]) wiById[wi.id] = wi;
1428
+ }
1416
1429
  } catch { /* optional */ }
1417
1430
 
1418
1431
  // PR-to-PRD linking — derived from PR.prdItems (single source of truth).
@@ -1422,14 +1435,43 @@ function getPrdInfo(config) {
1422
1435
  const prById = {};
1423
1436
  for (const pr of allPrs) prById[pr.id] = pr;
1424
1437
 
1438
+ // Set of every known PRD item ID across all scanned PRD JSON files. Used to
1439
+ // distinguish "this itemId is a PRD item" from "this itemId is a sub-WI" when
1440
+ // counting how many distinct PRD items a PR truly spans.
1441
+ const prdItemIdSet = new Set();
1442
+ for (const it of allPrdItems) { if (it && typeof it.id === 'string' && it.id) prdItemIdSet.add(it.id); }
1443
+
1444
+ // Resolve a PR's prdItems list to the Set of distinct PRD items it actually
1445
+ // belongs to. A PRD item + N sibling sub-WIs (review-followups, decomposition
1446
+ // children) all resolve to size 1 — they're one PRD item's PR. Only PRs that
1447
+ // genuinely span 2+ distinct PRD items return size ≥ 2. (W-mpem52qn)
1448
+ function countDistinctPrdItems(itemIds) {
1449
+ const set = new Set();
1450
+ for (const itemId of (itemIds || [])) {
1451
+ if (typeof itemId !== 'string' || !itemId) continue;
1452
+ if (prdItemIdSet.has(itemId)) { set.add(itemId); continue; }
1453
+ const wi = allWiById[itemId];
1454
+ if (!wi) continue;
1455
+ // Sub-WI may link to its PRD item via parent_id (decomposition pattern at line 1444).
1456
+ if (typeof wi.parent_id === 'string' && prdItemIdSet.has(wi.parent_id)) {
1457
+ set.add(wi.parent_id);
1458
+ }
1459
+ }
1460
+ return set;
1461
+ }
1462
+
1425
1463
  const prdToPr = {};
1426
1464
  const prLinks = shared.getPrLinks(); // { "PR-xxxx": ["P-xxxx", "P-yyyy"] }
1427
1465
  for (const [prId, itemIds] of Object.entries(prLinks)) {
1428
1466
  const pr = prById[prId];
1429
- // Skip aggregate / E2E PRs from per-item mapping — they link to multiple items
1430
- // (or are typed as verify) and would bleed through as duplicate entries on every
1431
- // constituent item. They are surfaced via renderE2eSection instead. (#1220)
1432
- if ((itemIds || []).length > 1 || pr?.itemType === 'verify' || pr?.title?.startsWith('[E2E]')) continue;
1467
+ // Skip aggregate / E2E PRs from per-item mapping — they link to multiple
1468
+ // PRD items (or are typed as verify) and would bleed through as duplicate
1469
+ // entries on every constituent item. They are surfaced via renderE2eSection
1470
+ // instead. (#1220) The aggregate check counts DISTINCT PRD items the PR
1471
+ // resolves to, not raw itemIds.length: a PRD item + sibling review-followup
1472
+ // sub-WIs all resolve to one PRD item and must still render. (W-mpem52qn)
1473
+ const distinctPrdCount = countDistinctPrdItems(itemIds).size;
1474
+ if (distinctPrdCount > 1 || pr?.itemType === 'verify' || pr?.title?.startsWith('[E2E]')) continue;
1433
1475
  const url = buildPrUrlFromId(prId, pr, projects);
1434
1476
  for (const itemId of (itemIds || [])) {
1435
1477
  if (!prdToPr[itemId]) prdToPr[itemId] = [];
package/engine/shared.js CHANGED
@@ -1784,6 +1784,7 @@ const ENGINE_DEFAULTS = {
1784
1784
  maxReferencedNotesBytes: 5 * 1024, // cap referenced inbox note excerpts injected via task context resolution
1785
1785
  maxResolvedTaskContextBytes: 20 * 1024, // bound the total implicit context injected from referenced plans/notes
1786
1786
  maxNotesPromptBytes: 8 * 1024, // cap Team Notes injected into every playbook prompt
1787
+ untrustedFenceMaxBytes: 64 * 1024, // F5 (W-mpeklod3000we69c): per-block cap for `<UNTRUSTED-INPUT>` fences in engine/untrusted-fence.js. 64KB is long enough for realistic PR comments / pinned notes / agent memory sections, short enough that a megabyte-bomb comment cannot blow up the prompt. Content above the cap is truncated INSIDE the fence with a `[truncated N more bytes]` marker so the agent still sees the provenance attribute.
1787
1788
  maxMeetingPromptBytes: 16 * 1024, // cap meeting findings/debate context injected into prompts
1788
1789
  maxMeetingHumanNotesBytes: 2 * 1024, // cap human note bullet lists injected into meeting prompts
1789
1790
  maxPipelineMeetingContextBytes: 16 * 1024, // cap aggregated meeting/dependency context for pipeline plan generation
@@ -2597,6 +2598,7 @@ const FAILURE_CLASS = {
2597
2598
  INVALID_KEEP_PROCESSES_SCHEMA: 'invalid-keep-processes-schema', // W-mp7i902u000l991f: keep-pids.json failed validation for a reason other than workdir (pids-missing, ttl-too-long, expires_at-missing, pids-too-many, port-invalid, etc.) — agent wrote the wrong shape; never retryable until they fix the file
2598
2599
  INVALID_MANAGED_SPAWN: 'invalid-managed-spawn', // P-7a3b1c92: agents/<id>/managed-spawn.json failed validator (bad schema, broken workdir, executable/env not on allowlist, healthcheck shape wrong). Engine refuses to spawn any spec — agent must fix file; never retryable as-is.
2599
2600
  MANAGED_SPAWN_HEALTHCHECK_FAILED: 'managed-spawn-healthcheck-failed', // P-7a3b1c92: at least one managed-spawn spec was spawned but failed its healthcheck within timeout_s. Engine killed the failing PIDs; siblings stay alive. Dispatch ERROR with the failing spec name + log tail surfaced in the inbox alert.
2601
+ INJECTION_FLAGGED: 'injection-flagged', // F5 (W-mpeklod3000we69c): the agent set `securityFlags.injectionAttempt:true` in its completion report after spotting a prompt-injection attempt inside an <UNTRUSTED-INPUT> fence. Engine writes a security inbox note + stamps `_securityFlag` on the WI and treats the dispatch as non-retryable so a human can review the source before the agent re-runs.
2600
2602
  UNKNOWN: 'unknown', // Unclassified failure
2601
2603
  };
2602
2604
  const ESCALATION_POLICY = {
@@ -2608,7 +2610,7 @@ const ESCALATION_POLICY = {
2608
2610
  };
2609
2611
 
2610
2612
  // Structured completion protocol — fields agents must produce in ```completion blocks
2611
- const COMPLETION_FIELDS = ['status', 'summary', 'files_changed', 'tests', 'pr', 'not_changed', 'failure_class', 'retryable', 'needs_rerun', 'verdict', 'artifacts', 'nonce'];
2613
+ const COMPLETION_FIELDS = ['status', 'summary', 'files_changed', 'tests', 'pr', 'not_changed', 'failure_class', 'retryable', 'needs_rerun', 'verdict', 'artifacts', 'nonce', 'securityFlags'];
2612
2614
 
2613
2615
  const DEFAULT_AGENT_METRICS = {
2614
2616
  tasksCompleted: 0, tasksErrored: 0,
@@ -0,0 +1,184 @@
1
+ /**
2
+ * engine/untrusted-fence.js — F5 (W-mpeklod3000we69c).
3
+ *
4
+ * Wraps human-authored / external content in
5
+ * <UNTRUSTED-INPUT source="…">…</UNTRUSTED-INPUT>
6
+ * fences before splicing it into agent prompts. Pairs with the directive in
7
+ * `playbooks/shared-rules.md` and `prompts/cc-system.md` that teaches agents
8
+ * to treat fenced content as data, not instructions.
9
+ *
10
+ * Zero dependencies beyond `engine/shared` (for the ENGINE_DEFAULTS byte cap).
11
+ * Pure helpers — safe to call from poll-time, render-time, and consolidation
12
+ * paths. Source attributes are sanitized so attacker-influenced parts
13
+ * (PR comment author, file paths) cannot break out of the fence header.
14
+ *
15
+ * Contributors adding a new splice site that includes human-authored,
16
+ * external, or otherwise-untrusted content into a prompt MUST wrap it with
17
+ * `wrapUntrusted(content, source)` (or `wrapUntrustedBlock`) — see
18
+ * `docs/security.md` §5 and `CLAUDE.md` "F5" for the policy.
19
+ */
20
+
21
+ const FENCE_OPEN_PREFIX = '<UNTRUSTED-INPUT';
22
+ const FENCE_CLOSE = '</UNTRUSTED-INPUT>';
23
+ const FENCE_CLOSE_ESCAPED = '</UNTRUSTED-INPUT-ESCAPED>';
24
+
25
+ // Match any flavor of the closing tag that an attacker might try to inject:
26
+ // </UNTRUSTED-INPUT> — bare closer
27
+ // </untrusted-input> — lowercase
28
+ // </UNTRUSTED-INPUT > — trailing space before '>'
29
+ // </UNTRUSTED-INPUT attr="x"> — attributes before '>'
30
+ // The first capture group is empty/optional; we always rewrite to the canonical
31
+ // escaped marker, dropping any pretend-attribute.
32
+ const INNER_CLOSE_RE = /<\/UNTRUSTED-INPUT(?:\s[^>]*)?>/gi;
33
+
34
+ function _shared() {
35
+ // Late require — keep this module loadable in isolated test contexts that
36
+ // bust `engine/shared` from require.cache between runs.
37
+ return require('./shared');
38
+ }
39
+
40
+ function _maxBytes() {
41
+ try {
42
+ const { ENGINE_DEFAULTS } = _shared();
43
+ const n = ENGINE_DEFAULTS && ENGINE_DEFAULTS.untrustedFenceMaxBytes;
44
+ if (typeof n === 'number' && n > 0) return n;
45
+ } catch { /* fall through */ }
46
+ return 64 * 1024;
47
+ }
48
+
49
+ function _truncateUtf8(str, maxBytes) {
50
+ const buf = Buffer.from(String(str), 'utf8');
51
+ if (buf.length <= maxBytes) return { text: String(str), truncatedBytes: 0 };
52
+ // Step back one byte at a time so we don't slice mid-codepoint. The decoder
53
+ // would emit a replacement char otherwise.
54
+ let cut = maxBytes;
55
+ while (cut > 0 && (buf[cut] & 0xC0) === 0x80) cut--;
56
+ const head = buf.slice(0, cut).toString('utf8');
57
+ return { text: head, truncatedBytes: buf.length - cut };
58
+ }
59
+
60
+ function _escapeInnerClosers(content) {
61
+ return String(content).replace(INNER_CLOSE_RE, FENCE_CLOSE_ESCAPED);
62
+ }
63
+
64
+ // Strip characters that would break out of the fence header's source="…"
65
+ // attribute. Conservative whitelist — keep ASCII letters/digits and a small
66
+ // set of punctuation that real source attributes need.
67
+ function _sanitizeSourceToken(value) {
68
+ return String(value == null ? '' : value)
69
+ .replace(/[\r\n\t]+/g, ' ')
70
+ .replace(/[<>"'&`]/g, '')
71
+ .replace(/\s+/g, '_')
72
+ .slice(0, 200);
73
+ }
74
+
75
+ /**
76
+ * Build a canonical source-attribute string. Keys are emitted in a stable,
77
+ * domain-specific order so source-inspection tests can assert literal output.
78
+ *
79
+ * Known shapes:
80
+ * buildSource('pr-comment', { host, slug, number, author }) →
81
+ * 'pr-comment:<host>:<slug>#<number>:author=<author>' (GitHub)
82
+ * buildSource('pr-comment', { host:'ado', org, project, repo, number, author }) →
83
+ * 'pr-comment:ado:<org>/<project>/<repo>!<number>:author=<author>'
84
+ * buildSource('pinned-note', { path }) → 'pinned-note:<path>'
85
+ * buildSource('team-notes', { path }) → 'team-notes:<path>'
86
+ * buildSource('agent-memory', { path }) → 'agent-memory:<path>'
87
+ * buildSource('inbox', { filename }) → 'inbox:<filename>'
88
+ * buildSource('wi-reference', { path }) → 'wi-reference:<path>'
89
+ * buildSource('doc-content', { path }) → 'doc-content:<path>'
90
+ *
91
+ * Unknown shapes fall through to a generic `kind:k=v:k=v` ordering by key,
92
+ * still sanitized.
93
+ */
94
+ function buildSource(kind, parts) {
95
+ const k = _sanitizeSourceToken(kind || 'untrusted');
96
+ if (!parts || typeof parts !== 'object') return k;
97
+
98
+ const get = (key) => parts[key] == null ? '' : _sanitizeSourceToken(parts[key]);
99
+
100
+ if (k === 'pr-comment') {
101
+ const host = get('host');
102
+ const author = get('author');
103
+ if (host === 'ado') {
104
+ const ref = [get('org'), get('project'), get('repo')].filter(Boolean).join('/');
105
+ const num = get('number');
106
+ const tail = num ? `${ref}!${num}` : ref;
107
+ return [k, host, tail, author && `author=${author}`].filter(Boolean).join(':');
108
+ }
109
+ const slug = get('slug');
110
+ const num = get('number');
111
+ const tail = num ? `${slug}#${num}` : slug;
112
+ return [k, host, tail, author && `author=${author}`].filter(Boolean).join(':');
113
+ }
114
+
115
+ if (k === 'pinned-note' || k === 'team-notes' || k === 'agent-memory'
116
+ || k === 'wi-reference' || k === 'doc-content' || k === 'doc-selection') {
117
+ return parts.path ? `${k}:${get('path')}` : k;
118
+ }
119
+ if (k === 'inbox') {
120
+ return parts.filename ? `${k}:${get('filename')}` : k;
121
+ }
122
+ if (k === 'wi-description') {
123
+ return parts.wi ? `${k}:${get('wi')}` : k;
124
+ }
125
+ if (k === 'human-feedback') {
126
+ const wi = get('wi');
127
+ const author = get('author');
128
+ return [k, wi, author && `author=${author}`].filter(Boolean).join(':');
129
+ }
130
+ if (k === 'ci-log') {
131
+ const host = get('host');
132
+ const job = get('job');
133
+ const run = get('run');
134
+ return [k, host, job, run].filter(Boolean).join(':');
135
+ }
136
+
137
+ // Generic fallback: stable key order via Object.keys (insertion order).
138
+ const segs = Object.keys(parts)
139
+ .map(key => {
140
+ const v = get(key);
141
+ return v ? `${_sanitizeSourceToken(key)}=${v}` : '';
142
+ })
143
+ .filter(Boolean);
144
+ return [k, ...segs].join(':');
145
+ }
146
+
147
+ /**
148
+ * Wrap `content` in an <UNTRUSTED-INPUT> fence. Returns '' if `content` is
149
+ * empty or whitespace-only — callers should never see an empty fence in
150
+ * their rendered prompt.
151
+ */
152
+ function wrapUntrusted(content, source) {
153
+ const raw = content == null ? '' : String(content);
154
+ if (!raw.trim()) return '';
155
+
156
+ const escaped = _escapeInnerClosers(raw);
157
+ const cap = _maxBytes();
158
+ const { text, truncatedBytes } = _truncateUtf8(escaped, cap);
159
+ const body = truncatedBytes > 0
160
+ ? `${text}\n\n[truncated ${truncatedBytes} more bytes]`
161
+ : text;
162
+
163
+ const srcAttr = _sanitizeSourceToken(source || 'untrusted');
164
+ return `${FENCE_OPEN_PREFIX} source="${srcAttr}">${body}${FENCE_CLOSE}`;
165
+ }
166
+
167
+ /**
168
+ * Convenience: prepend `\n\n` so callers can splice without worrying about
169
+ * adjacency. Still returns '' for empty content.
170
+ */
171
+ function wrapUntrustedBlock(content, source) {
172
+ const fenced = wrapUntrusted(content, source);
173
+ return fenced ? `\n\n${fenced}` : '';
174
+ }
175
+
176
+ module.exports = {
177
+ wrapUntrusted,
178
+ wrapUntrustedBlock,
179
+ buildSource,
180
+ // Constants exported for source-inspection tests.
181
+ FENCE_OPEN_PREFIX,
182
+ FENCE_CLOSE,
183
+ FENCE_CLOSE_ESCAPED,
184
+ };
package/engine.js CHANGED
@@ -4535,6 +4535,17 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
4535
4535
  managed_spawn_ttl_minutes: item.meta && Number.isFinite(Number(item.meta.managed_spawn_ttl_minutes))
4536
4536
  ? Math.floor(Number(item.meta.managed_spawn_ttl_minutes))
4537
4537
  : '',
4538
+ // W-mpeiwz6k0005bf34-c — opt-in qa-validate context. The dispatch handler
4539
+ // POST /api/qa/runbooks/run stamps meta.qaRunId + meta.qaRunbook (full
4540
+ // spec) + meta.qaTarget (managed-process snapshot) on the work item;
4541
+ // renderPlaybook injects them as a QA Run Context block + the
4542
+ // qa-validate playbook references these vars by template literal.
4543
+ qa_run_id: (item.meta && item.meta.qaRunId) || '',
4544
+ qa_runbook: (item.meta && item.meta.qaRunbook) || null,
4545
+ qa_target: (item.meta && item.meta.qaTarget) || null,
4546
+ qa_artifacts_dir: item.meta && item.meta.qaRunId
4547
+ ? path.posix.join('engine', 'qa-artifacts', String(item.meta.qaRunId))
4548
+ : '',
4538
4549
  };
4539
4550
  const cpResult = buildWorkItemDispatchVars(item, vars, config, {
4540
4551
  worktreePath: vars.worktree_path || root,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1996",
3
+ "version": "0.1.1997",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"