@xn-intenton-z2a/agentic-lib 7.2.4 → 7.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/agentic-lib-init.yml +56 -0
- package/.github/workflows/agentic-lib-test.yml +7 -2
- package/.github/workflows/agentic-lib-workflow.yml +55 -8
- package/agentic-lib.toml +7 -0
- package/package.json +1 -1
- package/src/actions/agentic-step/config-loader.js +9 -0
- package/src/actions/agentic-step/index.js +104 -7
- package/src/actions/agentic-step/tasks/direct.js +428 -0
- package/src/actions/agentic-step/tasks/supervise.js +100 -180
- package/src/agents/agent-director.md +58 -0
- package/src/agents/agent-supervisor.md +22 -50
- package/src/seeds/zero-behaviour.test.js +4 -4
- package/src/seeds/zero-package.json +1 -1
- package/src/seeds/zero-playwright.config.js +1 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// asks the Copilot SDK to choose multiple concurrent actions, then dispatches them.
|
|
7
7
|
|
|
8
8
|
import * as core from "@actions/core";
|
|
9
|
-
import { existsSync, readFileSync,
|
|
9
|
+
import { existsSync, readFileSync, readdirSync, statSync } from "fs";
|
|
10
10
|
import { runCopilotTask, readOptionalFile, scanDirectory, filterIssues } from "../copilot.js";
|
|
11
11
|
|
|
12
12
|
/**
|
|
@@ -305,6 +305,61 @@ async function gatherContext(octokit, repo, config, t) {
|
|
|
305
305
|
}
|
|
306
306
|
} catch { /* ignore */ }
|
|
307
307
|
|
|
308
|
+
// Check for dedicated test files (not just seed tests)
|
|
309
|
+
// A dedicated test imports from the source directory (src/lib/) rather than being a seed test
|
|
310
|
+
let hasDedicatedTests = false;
|
|
311
|
+
let dedicatedTestFiles = [];
|
|
312
|
+
try {
|
|
313
|
+
const testDirs = ["tests", "__tests__"];
|
|
314
|
+
for (const dir of testDirs) {
|
|
315
|
+
if (existsSync(dir)) {
|
|
316
|
+
const testFiles = scanDirectory(dir, [".js", ".ts", ".mjs"], { limit: 20 });
|
|
317
|
+
for (const tf of testFiles) {
|
|
318
|
+
// Skip seed test files (main.test.js, web.test.js, behaviour.test.js)
|
|
319
|
+
if (/^(main|web|behaviour)\.test\.[jt]s$/.test(tf.name)) continue;
|
|
320
|
+
const content = readFileSync(tf.path, "utf8");
|
|
321
|
+
// Check if it imports from src/lib/ (mission-specific code)
|
|
322
|
+
if (/from\s+['"].*src\/lib\//.test(content) || /require\s*\(\s*['"].*src\/lib\//.test(content)) {
|
|
323
|
+
hasDedicatedTests = true;
|
|
324
|
+
dedicatedTestFiles.push(tf.name);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
} catch { /* ignore */ }
|
|
330
|
+
|
|
331
|
+
// W9: Count TODO comments in source directory
|
|
332
|
+
let sourceTodoCount = 0;
|
|
333
|
+
try {
|
|
334
|
+
const sourcePath = config.paths.source?.path || "src/lib/";
|
|
335
|
+
const sourceDir = sourcePath.endsWith("/") ? sourcePath.slice(0, -1) : sourcePath;
|
|
336
|
+
const srcRoot = sourceDir.includes("/") ? sourceDir.split("/").slice(0, -1).join("/") || "src" : "src";
|
|
337
|
+
// Inline recursive TODO counter (avoids circular import with index.js)
|
|
338
|
+
const countTodos = (dir) => {
|
|
339
|
+
let n = 0;
|
|
340
|
+
if (!existsSync(dir)) return 0;
|
|
341
|
+
try {
|
|
342
|
+
const entries = readdirSync(dir);
|
|
343
|
+
for (const entry of entries) {
|
|
344
|
+
if (entry === "node_modules" || entry.startsWith(".")) continue;
|
|
345
|
+
const fp = `${dir}/${entry}`;
|
|
346
|
+
try {
|
|
347
|
+
const stat = statSync(fp);
|
|
348
|
+
if (stat.isDirectory()) {
|
|
349
|
+
n += countTodos(fp);
|
|
350
|
+
} else if (/\.(js|ts|mjs)$/.test(entry)) {
|
|
351
|
+
const content = readFileSync(fp, "utf8");
|
|
352
|
+
const m = content.match(/\bTODO\b/gi);
|
|
353
|
+
if (m) n += m.length;
|
|
354
|
+
}
|
|
355
|
+
} catch { /* skip */ }
|
|
356
|
+
}
|
|
357
|
+
} catch { /* skip */ }
|
|
358
|
+
return n;
|
|
359
|
+
};
|
|
360
|
+
sourceTodoCount = countTodos(srcRoot);
|
|
361
|
+
} catch { /* ignore */ }
|
|
362
|
+
|
|
308
363
|
return {
|
|
309
364
|
mission,
|
|
310
365
|
recentActivity,
|
|
@@ -333,10 +388,13 @@ async function gatherContext(octokit, repo, config, t) {
|
|
|
333
388
|
cumulativeTransformationCost,
|
|
334
389
|
recentlyClosedSummary,
|
|
335
390
|
sourceExports,
|
|
391
|
+
hasDedicatedTests,
|
|
392
|
+
dedicatedTestFiles,
|
|
393
|
+
sourceTodoCount,
|
|
336
394
|
};
|
|
337
395
|
}
|
|
338
396
|
|
|
339
|
-
function buildPrompt(ctx, agentInstructions) {
|
|
397
|
+
function buildPrompt(ctx, agentInstructions, config) {
|
|
340
398
|
return [
|
|
341
399
|
"## Instructions",
|
|
342
400
|
agentInstructions,
|
|
@@ -368,6 +426,39 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
368
426
|
"",
|
|
369
427
|
]
|
|
370
428
|
: []),
|
|
429
|
+
`### Test Coverage`,
|
|
430
|
+
ctx.hasDedicatedTests
|
|
431
|
+
? `Dedicated test files: ${ctx.dedicatedTestFiles.join(", ")}`
|
|
432
|
+
: "**No dedicated test files found.** Only seed tests (main.test.js, web.test.js) exist. Mission-complete requires dedicated tests that import from src/lib/.",
|
|
433
|
+
"",
|
|
434
|
+
`### Source TODO Count: ${ctx.sourceTodoCount}`,
|
|
435
|
+
ctx.sourceTodoCount > 0
|
|
436
|
+
? `**${ctx.sourceTodoCount} TODO(s) found in source.** All TODOs must be resolved before mission-complete can be declared.`
|
|
437
|
+
: "No TODOs found in source — this criterion is met.",
|
|
438
|
+
"",
|
|
439
|
+
...(() => {
|
|
440
|
+
// W10: Build mission-complete metrics inline for the LLM
|
|
441
|
+
const thresholds = config?.missionCompleteThresholds || {};
|
|
442
|
+
const minResolved = thresholds.minResolvedIssues ?? 3;
|
|
443
|
+
const requireTests = thresholds.requireDedicatedTests ?? true;
|
|
444
|
+
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
445
|
+
const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
|
|
446
|
+
const rows = [
|
|
447
|
+
`### Mission-Complete Metrics`,
|
|
448
|
+
"| Metric | Value | Target | Status |",
|
|
449
|
+
"|--------|-------|--------|--------|",
|
|
450
|
+
`| Open issues | ${ctx.issuesSummary.length} | 0 | ${ctx.issuesSummary.length === 0 ? "MET" : "NOT MET"} |`,
|
|
451
|
+
`| Open PRs | ${ctx.prsSummary.length} | 0 | ${ctx.prsSummary.length === 0 ? "MET" : "NOT MET"} |`,
|
|
452
|
+
`| Issues resolved (RESOLVED) | ${resolvedCount} | >= ${minResolved} | ${resolvedCount >= minResolved ? "MET" : "NOT MET"} |`,
|
|
453
|
+
`| Dedicated test files | ${ctx.hasDedicatedTests ? "YES" : "NO"} | ${requireTests ? "YES" : "—"} | ${!requireTests || ctx.hasDedicatedTests ? "MET" : "NOT MET"} |`,
|
|
454
|
+
`| Source TODO count | ${ctx.sourceTodoCount} | <= ${maxTodos} | ${ctx.sourceTodoCount <= maxTodos ? "MET" : "NOT MET"} |`,
|
|
455
|
+
`| Budget used | ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} | < ${ctx.transformationBudget || "unlimited"} | ${ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget ? "EXHAUSTED" : "OK"} |`,
|
|
456
|
+
"",
|
|
457
|
+
"**All metrics must show MET/OK for mission-complete to be declared.**",
|
|
458
|
+
"",
|
|
459
|
+
];
|
|
460
|
+
return rows;
|
|
461
|
+
})(),
|
|
371
462
|
`### Recent Workflow Runs`,
|
|
372
463
|
ctx.workflowsSummary.join("\n") || "none",
|
|
373
464
|
"",
|
|
@@ -419,7 +510,7 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
419
510
|
]
|
|
420
511
|
: []),
|
|
421
512
|
...(ctx.transformationBudget > 0
|
|
422
|
-
? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, ""]
|
|
513
|
+
? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, "Note: instability transforms (infrastructure fixes) do not count against this budget.", ""]
|
|
423
514
|
: []),
|
|
424
515
|
`### Issue Limits`,
|
|
425
516
|
`Feature development WIP limit: ${ctx.featureIssuesWipLimit}`,
|
|
@@ -444,10 +535,6 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
444
535
|
"### Communication",
|
|
445
536
|
"- `respond:discussions | message: <text> | discussion-url: <url>` — Reply via discussions bot",
|
|
446
537
|
"",
|
|
447
|
-
"### Mission Lifecycle",
|
|
448
|
-
"- `mission-complete | reason: <text>` — Declare mission accomplished. Writes MISSION_COMPLETE.md and sets schedule to off. Use when: all acceptance criteria in MISSION.md are satisfied, tests pass, and recently-closed issues confirm resolution.",
|
|
449
|
-
"- `mission-failed | reason: <text>` — Declare mission failed. Writes MISSION_FAILED.md and sets schedule to off. Use when: transformation budget is exhausted with no progress, pipeline is stuck in a loop, or the mission is unachievable.",
|
|
450
|
-
"",
|
|
451
538
|
"### Schedule Control",
|
|
452
539
|
"- `set-schedule:<frequency>` — Change supervisor schedule (off, weekly, daily, hourly, continuous). Use `set-schedule:weekly` when mission is substantially complete, `set-schedule:continuous` to ramp up.",
|
|
453
540
|
"",
|
|
@@ -605,152 +692,11 @@ async function executeRespondDiscussions(octokit, repo, params, ctx) {
|
|
|
605
692
|
return "skipped:respond-no-message";
|
|
606
693
|
}
|
|
607
694
|
|
|
608
|
-
async function executeMissionComplete(octokit, repo, params, ctx) {
|
|
609
|
-
const reason = params.reason || "All acceptance criteria satisfied";
|
|
610
|
-
const signal = [
|
|
611
|
-
"# Mission Complete",
|
|
612
|
-
"",
|
|
613
|
-
`- **Timestamp:** ${new Date().toISOString()}`,
|
|
614
|
-
`- **Detected by:** supervisor`,
|
|
615
|
-
`- **Reason:** ${reason}`,
|
|
616
|
-
"",
|
|
617
|
-
"This file was created automatically. To restart transformations, delete this file or run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
|
|
618
|
-
].join("\n");
|
|
619
|
-
writeFileSync("MISSION_COMPLETE.md", signal);
|
|
620
|
-
core.info(`Mission complete signal written: ${reason}`);
|
|
621
|
-
|
|
622
|
-
// Persist MISSION_COMPLETE.md to the repository via Contents API so it survives across runs
|
|
623
|
-
try {
|
|
624
|
-
const contentBase64 = Buffer.from(signal).toString("base64");
|
|
625
|
-
// Check if file already exists (to get its SHA for updates)
|
|
626
|
-
let existingSha;
|
|
627
|
-
try {
|
|
628
|
-
const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_COMPLETE.md", ref: "main" });
|
|
629
|
-
existingSha = data.sha;
|
|
630
|
-
} catch {
|
|
631
|
-
// File doesn't exist yet — that's fine
|
|
632
|
-
}
|
|
633
|
-
await octokit.rest.repos.createOrUpdateFileContents({
|
|
634
|
-
...repo,
|
|
635
|
-
path: "MISSION_COMPLETE.md",
|
|
636
|
-
message: "mission-complete: " + reason.substring(0, 72),
|
|
637
|
-
content: contentBase64,
|
|
638
|
-
branch: "main",
|
|
639
|
-
...(existingSha ? { sha: existingSha } : {}),
|
|
640
|
-
});
|
|
641
|
-
core.info("MISSION_COMPLETE.md committed to main via Contents API");
|
|
642
|
-
} catch (err) {
|
|
643
|
-
core.warning(`Could not commit MISSION_COMPLETE.md to repo: ${err.message}`);
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
|
|
647
|
-
// Only turn off schedule if it's not already off or in maintenance mode
|
|
648
|
-
let currentSupervisor = "";
|
|
649
|
-
try {
|
|
650
|
-
const tomlContent = readFileSync("agentic-lib.toml", "utf8");
|
|
651
|
-
const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
|
|
652
|
-
if (match) currentSupervisor = match[1];
|
|
653
|
-
} catch { /* ignore */ }
|
|
654
|
-
|
|
655
|
-
if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
|
|
656
|
-
core.info(`Schedule already "${currentSupervisor}" — not changing on mission-complete`);
|
|
657
|
-
} else {
|
|
658
|
-
try {
|
|
659
|
-
await octokit.rest.actions.createWorkflowDispatch({
|
|
660
|
-
...repo,
|
|
661
|
-
workflow_id: "agentic-lib-schedule.yml",
|
|
662
|
-
ref: "main",
|
|
663
|
-
inputs: { frequency: "off" },
|
|
664
|
-
});
|
|
665
|
-
} catch (err) {
|
|
666
|
-
core.warning(`Could not set schedule to off: ${err.message}`);
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
// Announce mission complete via bot
|
|
671
|
-
const websiteUrl = getWebsiteUrl(repo);
|
|
672
|
-
const discussionUrl = ctx?.activeDiscussionUrl || "";
|
|
673
|
-
await dispatchBot(octokit, repo, `Mission complete! ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
|
|
674
|
-
}
|
|
675
|
-
return `mission-complete:${reason.substring(0, 100)}`;
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
async function executeMissionFailed(octokit, repo, params, ctx) {
|
|
679
|
-
const reason = params.reason || "Mission could not be completed";
|
|
680
|
-
const signal = [
|
|
681
|
-
"# Mission Failed",
|
|
682
|
-
"",
|
|
683
|
-
`- **Timestamp:** ${new Date().toISOString()}`,
|
|
684
|
-
`- **Detected by:** supervisor`,
|
|
685
|
-
`- **Reason:** ${reason}`,
|
|
686
|
-
"",
|
|
687
|
-
"This file was created automatically. To restart, delete this file and run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
|
|
688
|
-
].join("\n");
|
|
689
|
-
writeFileSync("MISSION_FAILED.md", signal);
|
|
690
|
-
core.info(`Mission failed signal written: ${reason}`);
|
|
691
|
-
|
|
692
|
-
// Persist MISSION_FAILED.md to the repository via Contents API so it survives across runs
|
|
693
|
-
try {
|
|
694
|
-
const contentBase64 = Buffer.from(signal).toString("base64");
|
|
695
|
-
let existingSha;
|
|
696
|
-
try {
|
|
697
|
-
const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_FAILED.md", ref: "main" });
|
|
698
|
-
existingSha = data.sha;
|
|
699
|
-
} catch {
|
|
700
|
-
// File doesn't exist yet — that's fine
|
|
701
|
-
}
|
|
702
|
-
await octokit.rest.repos.createOrUpdateFileContents({
|
|
703
|
-
...repo,
|
|
704
|
-
path: "MISSION_FAILED.md",
|
|
705
|
-
message: "mission-failed: " + reason.substring(0, 72),
|
|
706
|
-
content: contentBase64,
|
|
707
|
-
branch: "main",
|
|
708
|
-
...(existingSha ? { sha: existingSha } : {}),
|
|
709
|
-
});
|
|
710
|
-
core.info("MISSION_FAILED.md committed to main via Contents API");
|
|
711
|
-
} catch (err) {
|
|
712
|
-
core.warning(`Could not commit MISSION_FAILED.md to repo: ${err.message}`);
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
|
|
716
|
-
// Only turn off schedule if it's not already off or in maintenance mode
|
|
717
|
-
let currentSupervisor = "";
|
|
718
|
-
try {
|
|
719
|
-
const tomlContent = readFileSync("agentic-lib.toml", "utf8");
|
|
720
|
-
const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
|
|
721
|
-
if (match) currentSupervisor = match[1];
|
|
722
|
-
} catch { /* ignore */ }
|
|
723
|
-
|
|
724
|
-
if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
|
|
725
|
-
core.info(`Schedule already "${currentSupervisor}" — not changing on mission-failed`);
|
|
726
|
-
} else {
|
|
727
|
-
try {
|
|
728
|
-
await octokit.rest.actions.createWorkflowDispatch({
|
|
729
|
-
...repo,
|
|
730
|
-
workflow_id: "agentic-lib-schedule.yml",
|
|
731
|
-
ref: "main",
|
|
732
|
-
inputs: { frequency: "off" },
|
|
733
|
-
});
|
|
734
|
-
} catch (err) {
|
|
735
|
-
core.warning(`Could not set schedule to off: ${err.message}`);
|
|
736
|
-
}
|
|
737
|
-
}
|
|
738
|
-
|
|
739
|
-
// Announce mission failed via bot
|
|
740
|
-
const websiteUrl = getWebsiteUrl(repo);
|
|
741
|
-
const discussionUrl = ctx?.activeDiscussionUrl || "";
|
|
742
|
-
await dispatchBot(octokit, repo, `Mission failed. ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
|
|
743
|
-
}
|
|
744
|
-
return `mission-failed:${reason.substring(0, 100)}`;
|
|
745
|
-
}
|
|
746
|
-
|
|
747
695
|
const ACTION_HANDLERS = {
|
|
748
696
|
"github:create-issue": executeCreateIssue,
|
|
749
697
|
"github:label-issue": executeLabelIssue,
|
|
750
698
|
"github:close-issue": executeCloseIssue,
|
|
751
699
|
"respond:discussions": executeRespondDiscussions,
|
|
752
|
-
"mission-complete": executeMissionComplete,
|
|
753
|
-
"mission-failed": executeMissionFailed,
|
|
754
700
|
};
|
|
755
701
|
|
|
756
702
|
async function executeSetSchedule(octokit, repo, frequency) {
|
|
@@ -778,7 +724,7 @@ async function executeAction(octokit, repo, action, params, ctx) {
|
|
|
778
724
|
if (action === "nop") return "nop";
|
|
779
725
|
const handler = ACTION_HANDLERS[action];
|
|
780
726
|
if (handler) return handler(octokit, repo, params, ctx);
|
|
781
|
-
core.
|
|
727
|
+
core.debug(`Ignoring unrecognised action: ${action}`);
|
|
782
728
|
return `unknown:${action}`;
|
|
783
729
|
}
|
|
784
730
|
|
|
@@ -814,7 +760,7 @@ export async function supervise(context) {
|
|
|
814
760
|
|
|
815
761
|
// --- LLM decision ---
|
|
816
762
|
const agentInstructions = instructions || "You are the supervisor. Decide what actions to take.";
|
|
817
|
-
const prompt = buildPrompt(ctx, agentInstructions);
|
|
763
|
+
const prompt = buildPrompt(ctx, agentInstructions, config);
|
|
818
764
|
|
|
819
765
|
const { content, tokensUsed, inputTokens, outputTokens, cost } = await runCopilotTask({
|
|
820
766
|
model,
|
|
@@ -845,28 +791,8 @@ export async function supervise(context) {
|
|
|
845
791
|
|
|
846
792
|
// --- Deterministic lifecycle posts (after LLM) ---
|
|
847
793
|
|
|
848
|
-
//
|
|
849
|
-
//
|
|
850
|
-
// Skip in maintenance mode — maintenance keeps running regardless of mission status.
|
|
851
|
-
if (!ctx.missionComplete && !ctx.missionFailed && config.supervisor !== "maintenance") {
|
|
852
|
-
const llmChoseMissionComplete = results.some((r) => r.startsWith("mission-complete:"));
|
|
853
|
-
if (!llmChoseMissionComplete) {
|
|
854
|
-
const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
|
|
855
|
-
const hasNoOpenIssues = ctx.issuesSummary.length === 0;
|
|
856
|
-
const hasNoOpenPRs = ctx.prsSummary.length === 0;
|
|
857
|
-
if (hasNoOpenIssues && hasNoOpenPRs && resolvedCount >= 1) {
|
|
858
|
-
core.info(`Deterministic mission-complete: 0 open issues, 0 open PRs, ${resolvedCount} recently resolved — LLM did not detect completion`);
|
|
859
|
-
try {
|
|
860
|
-
const autoResult = await executeMissionComplete(octokit, repo,
|
|
861
|
-
{ reason: `All acceptance criteria satisfied (${resolvedCount} issues resolved, 0 open issues, 0 open PRs)` },
|
|
862
|
-
ctx);
|
|
863
|
-
results.push(autoResult);
|
|
864
|
-
} catch (err) {
|
|
865
|
-
core.warning(`Deterministic mission-complete failed: ${err.message}`);
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
}
|
|
869
|
-
}
|
|
794
|
+
// W12: Mission-complete/failed evaluation moved to the director task.
|
|
795
|
+
// The supervisor no longer declares mission-complete or mission-failed.
|
|
870
796
|
|
|
871
797
|
// Step 3: Auto-respond when a message referral is present
|
|
872
798
|
// If the workflow was triggered with a message (from bot's request-supervisor),
|
|
@@ -888,14 +814,8 @@ export async function supervise(context) {
|
|
|
888
814
|
|
|
889
815
|
// Build changes list from executed actions
|
|
890
816
|
const changes = results
|
|
891
|
-
.filter((r) => r.startsWith("created-issue:")
|
|
892
|
-
.map((r) => {
|
|
893
|
-
if (r.startsWith("created-issue:")) return { action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" };
|
|
894
|
-
if (r.startsWith("mission-complete:")) return { action: "mission-complete", file: "MISSION_COMPLETE.md", sizeInfo: r.replace("mission-complete:", "") };
|
|
895
|
-
if (r.startsWith("mission-failed:")) return { action: "mission-failed", file: "MISSION_FAILED.md", sizeInfo: r.replace("mission-failed:", "") };
|
|
896
|
-
return null;
|
|
897
|
-
})
|
|
898
|
-
.filter(Boolean);
|
|
817
|
+
.filter((r) => r.startsWith("created-issue:"))
|
|
818
|
+
.map((r) => ({ action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" }));
|
|
899
819
|
|
|
900
820
|
return {
|
|
901
821
|
outcome: actions.length === 0 ? "nop" : `supervised:${actions.length}-actions`,
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
You are the director of an autonomous coding repository. Your sole responsibility is evaluating whether the mission is complete, failed, or in progress.
|
|
2
|
+
|
|
3
|
+
## Your Role
|
|
4
|
+
|
|
5
|
+
You do NOT dispatch workflows, create issues, or manage the schedule. That is the supervisor's job. You ONLY assess mission status and produce a structured evaluation.
|
|
6
|
+
|
|
7
|
+
## Input
|
|
8
|
+
|
|
9
|
+
You receive:
|
|
10
|
+
1. **MISSION.md** — the acceptance criteria
|
|
11
|
+
2. **Mission-Complete Metrics** — a table of mechanical checks (open issues, PRs, resolved count, test coverage, TODO count, budget)
|
|
12
|
+
3. **Metric based mission complete assessment** — a pre-computed advisory from the mechanical check
|
|
13
|
+
4. **Source Exports** — functions exported from source files
|
|
14
|
+
5. **Recently Closed Issues** — issues resolved since init
|
|
15
|
+
6. **Recent Activity** — the latest entries from the activity log
|
|
16
|
+
|
|
17
|
+
## Decision Framework
|
|
18
|
+
|
|
19
|
+
### Mission Complete
|
|
20
|
+
Declare `mission-complete` when ALL of the following are true:
|
|
21
|
+
1. Every row in the Mission-Complete Metrics table shows **MET** or **OK**
|
|
22
|
+
2. The Source Exports demonstrate that all functions required by MISSION.md are implemented
|
|
23
|
+
3. The Recently Closed Issues confirm that acceptance criteria have been addressed
|
|
24
|
+
4. No TODOs remain in source code
|
|
25
|
+
5. Dedicated test files exist (not just seed tests)
|
|
26
|
+
|
|
27
|
+
### Mission Failed
|
|
28
|
+
Declare `mission-failed` when ANY of the following are true:
|
|
29
|
+
1. Transformation budget is EXHAUSTED and acceptance criteria are still unmet
|
|
30
|
+
2. The last 3+ transforms produced no meaningful code changes
|
|
31
|
+
3. The pipeline is stuck in a loop (same issues created and closed repeatedly)
|
|
32
|
+
|
|
33
|
+
### Gap Analysis (most common output)
|
|
34
|
+
When the mission is neither complete nor failed, produce a detailed gap analysis:
|
|
35
|
+
- What has been achieved so far
|
|
36
|
+
- What specific gaps remain between the current state and mission-complete
|
|
37
|
+
- Which metrics are NOT MET and what needs to happen to satisfy them
|
|
38
|
+
- Prioritised list of what should be done next
|
|
39
|
+
|
|
40
|
+
## Output Format
|
|
41
|
+
|
|
42
|
+
Respond with EXACTLY this structure:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
[DECISION]
|
|
46
|
+
mission-complete | mission-failed | in-progress
|
|
47
|
+
[/DECISION]
|
|
48
|
+
[REASON]
|
|
49
|
+
One-line summary of the decision.
|
|
50
|
+
[/REASON]
|
|
51
|
+
[ANALYSIS]
|
|
52
|
+
Detailed gap analysis or completion summary. Include:
|
|
53
|
+
- Metrics status (which are MET, which are NOT MET)
|
|
54
|
+
- What has been achieved
|
|
55
|
+
- What remains (if in-progress)
|
|
56
|
+
- Recommended next actions (if in-progress)
|
|
57
|
+
[/ANALYSIS]
|
|
58
|
+
```
|
|
@@ -1,18 +1,24 @@
|
|
|
1
1
|
You are the supervisor of an autonomous coding repository. Your job is to advance the mission by strategically choosing which workflows to dispatch and which GitHub actions to take.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Important:** You do NOT evaluate mission-complete or mission-failed. That is the director's exclusive responsibility. Focus on advancing the mission through strategic action.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## MANDATORY FIRST CHECK: What Needs to Happen Next?
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
2. Were 2+ recently-closed issues "closed by review as RESOLVED"?
|
|
9
|
-
3. Do the Source Exports show the functions required by MISSION.md?
|
|
7
|
+
**Before choosing ANY action, check the Mission-Complete Metrics table in the prompt.**
|
|
10
8
|
|
|
11
|
-
|
|
9
|
+
Look at which metrics are NOT MET — these tell you what gaps remain:
|
|
10
|
+
1. Open issues > 0 → close resolved issues or wait for review
|
|
11
|
+
2. Open PRs > 0 → merge or close stale PRs
|
|
12
|
+
3. Issues resolved < threshold → create and resolve more issues
|
|
13
|
+
4. Dedicated test files = NO → create an issue requesting dedicated tests
|
|
14
|
+
5. Source TODO count > 0 → create an issue to resolve TODOs
|
|
15
|
+
6. Budget near exhaustion → be strategic with remaining transforms
|
|
16
|
+
|
|
17
|
+
If all metrics show MET/OK, use `nop` — the director will handle the rest.
|
|
12
18
|
|
|
13
19
|
## Priority Order
|
|
14
20
|
|
|
15
|
-
1. **Always strive
|
|
21
|
+
1. **Always strive to close gaps** — every action you take should aim to satisfy the remaining NOT MET metrics. If the code is already complete (see Source Exports and Recently Closed Issues), use `nop` and let the director evaluate. Otherwise, create one comprehensive issue that targets the entire mission (all acceptance criteria, tests, website, docs, README). Only create a second issue if the first transform couldn't complete everything, and scope it to the remaining work. Do not create issues just to fill a quota.
|
|
16
22
|
2. **Dispatch transform when ready issues exist** — transform is where code gets written. Always prefer it over maintain when there are open issues with the `ready` label.
|
|
17
23
|
3. **Dispatch review after transform** — when recent workflow runs show a transform completion, dispatch review to close resolved issues and add `ready` labels to new issues. This keeps the pipeline flowing.
|
|
18
24
|
4. **Fix failing PRs** — dispatch fix-code for any PR with failing checks (include pr-number).
|
|
@@ -36,10 +42,8 @@ If ALL three are true → the mission is done. Choose `mission-complete | reason
|
|
|
36
42
|
- **github:label-issue** — When an issue needs better categorisation for prioritisation.
|
|
37
43
|
- **github:close-issue** — When an issue is clearly resolved or no longer relevant.
|
|
38
44
|
- **respond:discussions** — When replying to a user request that came through the discussions bot. Include the discussion URL and a clear message.
|
|
39
|
-
- **set-schedule:\<frequency\>** — Change the workflow schedule. Use `weekly` when
|
|
40
|
-
- **
|
|
41
|
-
- **mission-failed** — When the mission cannot be completed. Use when: transformation budget is exhausted with acceptance criteria still unmet, the pipeline is stuck in a create-close loop with no code changes, or 3+ consecutive transforms failed to produce working code. This writes MISSION_FAILED.md and sets the schedule to off. Always include a reason explaining what went wrong.
|
|
42
|
-
- **nop** — When everything is running optimally: transform is active, issues are flowing, no failures.
|
|
45
|
+
- **set-schedule:\<frequency\>** — Change the workflow schedule. Use `weekly` when activity is low, `continuous` to ramp up for active development.
|
|
46
|
+
- **nop** — When everything is running optimally: transform is active, issues are flowing, no failures. Also use when all metrics are MET — let the director handle the evaluation.
|
|
43
47
|
|
|
44
48
|
## Stale Issue Detection
|
|
45
49
|
|
|
@@ -52,43 +56,11 @@ When recent workflow runs show an init completion, the repository has a fresh or
|
|
|
52
56
|
Dispatch the discussions bot to announce the new mission to the community.
|
|
53
57
|
Include the website URL in the announcement — the site is at `https://<owner>.github.io/<repo>/` and runs the library.
|
|
54
58
|
|
|
55
|
-
### Mission Accomplished (bounded missions)
|
|
56
|
-
When ALL of the following conditions are met, the mission is accomplished:
|
|
57
|
-
1. All open issues are closed (check Recently Closed Issues — if the last 2+ were closed by review as RESOLVED, this is strong evidence)
|
|
58
|
-
2. Tests pass (CI gates commits, so this is usually the case)
|
|
59
|
-
3. The MISSION.md acceptance criteria are all satisfied (verify each criterion against the Recently Closed Issues and Recent Activity)
|
|
60
|
-
4. Do not create an issue if a similar issue was recently closed as resolved — check the Recently Closed Issues section
|
|
61
|
-
|
|
62
|
-
When all conditions are met, use the `mission-complete` action:
|
|
63
|
-
1. `mission-complete | reason: <summary of what was achieved>` — this writes MISSION_COMPLETE.md and sets the schedule to off
|
|
64
|
-
2. `dispatch:agentic-lib-bot` — announce mission accomplished in the discussions thread. Include the website URL (`https://<owner>.github.io/<repo>/`) where users can see the finished product.
|
|
65
|
-
|
|
66
|
-
Do NOT create another issue when the mission is already accomplished. If the Recently Closed Issues show 2+ issues closed by review as RESOLVED and 0 open issues remain, the mission is done.
|
|
67
|
-
|
|
68
59
|
### Ongoing Missions
|
|
69
60
|
If MISSION.md explicitly says "do not set schedule to off" or "ongoing mission", the mission never completes.
|
|
70
61
|
Instead, when activity is healthy, use `set-schedule:weekly` or `set-schedule:daily` to keep the pipeline running.
|
|
71
62
|
Never use `set-schedule:off` for ongoing missions.
|
|
72
63
|
|
|
73
|
-
### Mission Substantially Complete (bounded, but minor gaps)
|
|
74
|
-
When the transform agent has implemented all major features but minor polish remains
|
|
75
|
-
(e.g. missing README examples, incomplete edge case coverage):
|
|
76
|
-
1. `dispatch:agentic-lib-bot` — announce near-completion in the discussions thread
|
|
77
|
-
2. `set-schedule:weekly` — reduce to weekly maintenance check-ins
|
|
78
|
-
3. Check that `docs/` contains evidence of the library working before declaring done
|
|
79
|
-
|
|
80
|
-
### Mission Failed
|
|
81
|
-
When the mission cannot be completed, use the `mission-failed` action. Indicators of failure:
|
|
82
|
-
1. **Budget exhausted** — Transformation Budget shows usage at or near capacity with acceptance criteria still unmet
|
|
83
|
-
2. **Pipeline stuck** — 3+ consecutive supervisor cycles created issues that were immediately closed by review as RESOLVED, but the acceptance criteria are NOT actually met (false positives in review)
|
|
84
|
-
3. **No progress** — the last 3+ transforms produced no code changes (all nop outcomes) and acceptance criteria remain unmet
|
|
85
|
-
4. **Repeated failures** — transforms keep producing code that fails tests, and fix-code cannot resolve the failures
|
|
86
|
-
5. **Consuming budget without results** — transformation budget is being spent but the codebase is not converging toward the acceptance criteria
|
|
87
|
-
|
|
88
|
-
When declaring mission failed:
|
|
89
|
-
1. `mission-failed | reason: <what went wrong and what was achieved>` — this writes MISSION_FAILED.md and sets the schedule to off
|
|
90
|
-
2. `dispatch:agentic-lib-bot` — announce the failure in the discussions thread with details of what was accomplished and what remains
|
|
91
|
-
|
|
92
64
|
## Prerequisites
|
|
93
65
|
|
|
94
66
|
- The `set-schedule` action requires a `WORKFLOW_TOKEN` secret (classic PAT with `workflow` scope) to push workflow file changes to main.
|
|
@@ -97,13 +69,13 @@ When declaring mission failed:
|
|
|
97
69
|
|
|
98
70
|
Check the Recent Activity log and Recently Closed Issues for patterns:
|
|
99
71
|
|
|
100
|
-
**
|
|
101
|
-
- If
|
|
102
|
-
- If the last 2+ workflow runs produced no transform commits (only maintain-only or nop outcomes), AND all open issues are closed,
|
|
72
|
+
**All metrics MET signals:**
|
|
73
|
+
- If all rows in the Mission-Complete Metrics table show MET/OK, use `nop` — the director will evaluate mission-complete.
|
|
74
|
+
- If the last 2+ workflow runs produced no transform commits (only maintain-only or nop outcomes), AND all open issues are closed, use `nop`.
|
|
103
75
|
|
|
104
|
-
**
|
|
105
|
-
- If the Transformation Budget shows usage near capacity (e.g. 28/32) and acceptance criteria are still unmet,
|
|
106
|
-
- If the last 3+ cycles show the pattern: create issue → review closes as resolved → no transform → create identical issue, the pipeline is stuck. Check if acceptance criteria are truly met (
|
|
76
|
+
**Budget exhaustion signals:**
|
|
77
|
+
- If the Transformation Budget shows usage near capacity (e.g. 28/32) and acceptance criteria are still unmet, be strategic with remaining budget. Create highly-targeted issues that address the most critical gaps.
|
|
78
|
+
- If the last 3+ cycles show the pattern: create issue → review closes as resolved → no transform → create identical issue, the pipeline is stuck. Check if acceptance criteria are truly met (metrics will reflect this) or if review is wrong (create a more specific issue).
|
|
107
79
|
- Look for `transform: nop` or `transform: transformed` patterns in the activity log to distinguish productive iterations from idle ones.
|
|
108
80
|
|
|
109
81
|
**Dedup deadlock recovery:**
|
|
@@ -115,7 +87,7 @@ Check the Recent Activity log for discussion bot referrals (lines containing `di
|
|
|
115
87
|
|
|
116
88
|
Also check for notable progress worth reporting:
|
|
117
89
|
- Mission milestones achieved (all core functions implemented, all tests passing)
|
|
118
|
-
- Schedule changes (
|
|
90
|
+
- Schedule changes (throttling down)
|
|
119
91
|
- Significant code changes (large PRs merged, new features completed)
|
|
120
92
|
- Website first deployed or significantly updated (include the URL: `https://<owner>.github.io/<repo>/`)
|
|
121
93
|
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
import { test, expect } from "@playwright/test";
|
|
4
4
|
|
|
5
5
|
test("homepage returns 200 and renders", async ({ page }) => {
|
|
6
|
-
const response = await page.goto("/");
|
|
6
|
+
const response = await page.goto("/", { waitUntil: "networkidle" });
|
|
7
7
|
expect(response.status()).toBe(200);
|
|
8
8
|
|
|
9
|
-
await expect(page.locator("#lib-name")).toBeVisible();
|
|
10
|
-
await expect(page.locator("#lib-version")).toBeVisible();
|
|
11
|
-
await expect(page.locator("#demo-output")).toBeVisible();
|
|
9
|
+
await expect(page.locator("#lib-name")).toBeVisible({ timeout: 10000 });
|
|
10
|
+
await expect(page.locator("#lib-version")).toBeVisible({ timeout: 10000 });
|
|
11
|
+
await expect(page.locator("#demo-output")).toBeVisible({ timeout: 10000 });
|
|
12
12
|
|
|
13
13
|
await page.screenshot({ path: "SCREENSHOT_INDEX.png", fullPage: true });
|
|
14
14
|
});
|