@xn-intenton-z2a/agentic-lib 7.2.5 → 7.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/agentic-lib-init.yml +56 -0
- package/.github/workflows/agentic-lib-test.yml +7 -2
- package/.github/workflows/agentic-lib-workflow.yml +50 -3
- package/README.md +88 -17
- package/agentic-lib.toml +7 -0
- package/bin/agentic-lib.js +260 -496
- package/package.json +2 -1
- package/src/actions/agentic-step/config-loader.js +9 -0
- package/src/actions/agentic-step/index.js +104 -7
- package/src/actions/agentic-step/tasks/direct.js +435 -0
- package/src/actions/agentic-step/tasks/supervise.js +107 -180
- package/src/agents/agent-apply-fix.md +5 -2
- package/src/agents/agent-director.md +58 -0
- package/src/agents/agent-discovery.md +52 -0
- package/src/agents/agent-issue-resolution.md +18 -0
- package/src/agents/agent-iterate.md +45 -0
- package/src/agents/agent-supervisor.md +22 -50
- package/src/copilot/agents.js +39 -0
- package/src/copilot/config.js +308 -0
- package/src/copilot/context.js +318 -0
- package/src/copilot/hybrid-session.js +330 -0
- package/src/copilot/logger.js +43 -0
- package/src/copilot/sdk.js +36 -0
- package/src/copilot/session.js +372 -0
- package/src/copilot/tasks/fix-code.js +73 -0
- package/src/copilot/tasks/maintain-features.js +61 -0
- package/src/copilot/tasks/maintain-library.js +66 -0
- package/src/copilot/tasks/transform.js +120 -0
- package/src/copilot/tools.js +141 -0
- package/src/mcp/server.js +43 -25
- package/src/seeds/zero-README.md +31 -0
- package/src/seeds/zero-behaviour.test.js +12 -4
- package/src/seeds/zero-package.json +1 -1
- package/src/seeds/zero-playwright.config.js +1 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// asks the Copilot SDK to choose multiple concurrent actions, then dispatches them.
|
|
7
7
|
|
|
8
8
|
import * as core from "@actions/core";
|
|
9
|
-
import { existsSync, readFileSync,
|
|
9
|
+
import { existsSync, readFileSync, readdirSync, statSync } from "fs";
|
|
10
10
|
import { runCopilotTask, readOptionalFile, scanDirectory, filterIssues } from "../copilot.js";
|
|
11
11
|
|
|
12
12
|
/**
|
|
@@ -218,6 +218,13 @@ async function gatherContext(octokit, repo, config, t) {
|
|
|
218
218
|
closeReason = "RESOLVED";
|
|
219
219
|
}
|
|
220
220
|
}
|
|
221
|
+
// Check for automerge closure (issue has "merged" label — set by ci-automerge)
|
|
222
|
+
if (closeReason !== "RESOLVED") {
|
|
223
|
+
const issueLabels = ci.labels.map((l) => (typeof l === "string" ? l : l.name));
|
|
224
|
+
if (issueLabels.includes("merged")) {
|
|
225
|
+
closeReason = "RESOLVED";
|
|
226
|
+
}
|
|
227
|
+
}
|
|
221
228
|
} catch (_) { /* ignore */ }
|
|
222
229
|
recentlyClosedSummary.push(`#${ci.number}: ${ci.title} — ${closeReason}`);
|
|
223
230
|
}
|
|
@@ -305,6 +312,61 @@ async function gatherContext(octokit, repo, config, t) {
|
|
|
305
312
|
}
|
|
306
313
|
} catch { /* ignore */ }
|
|
307
314
|
|
|
315
|
+
// Check for dedicated test files (not just seed tests)
|
|
316
|
+
// A dedicated test imports from the source directory (src/lib/) rather than being a seed test
|
|
317
|
+
let hasDedicatedTests = false;
|
|
318
|
+
let dedicatedTestFiles = [];
|
|
319
|
+
try {
|
|
320
|
+
const testDirs = ["tests", "__tests__"];
|
|
321
|
+
for (const dir of testDirs) {
|
|
322
|
+
if (existsSync(dir)) {
|
|
323
|
+
const testFiles = scanDirectory(dir, [".js", ".ts", ".mjs"], { limit: 20 });
|
|
324
|
+
for (const tf of testFiles) {
|
|
325
|
+
// Skip seed test files (main.test.js, web.test.js, behaviour.test.js)
|
|
326
|
+
if (/^(main|web|behaviour)\.test\.[jt]s$/.test(tf.name)) continue;
|
|
327
|
+
const content = readFileSync(tf.path, "utf8");
|
|
328
|
+
// Check if it imports from src/lib/ (mission-specific code)
|
|
329
|
+
if (/from\s+['"].*src\/lib\//.test(content) || /require\s*\(\s*['"].*src\/lib\//.test(content)) {
|
|
330
|
+
hasDedicatedTests = true;
|
|
331
|
+
dedicatedTestFiles.push(tf.name);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
} catch { /* ignore */ }
|
|
337
|
+
|
|
338
|
+
// W9: Count TODO comments in source directory
|
|
339
|
+
let sourceTodoCount = 0;
|
|
340
|
+
try {
|
|
341
|
+
const sourcePath = config.paths.source?.path || "src/lib/";
|
|
342
|
+
const sourceDir = sourcePath.endsWith("/") ? sourcePath.slice(0, -1) : sourcePath;
|
|
343
|
+
const srcRoot = sourceDir.includes("/") ? sourceDir.split("/").slice(0, -1).join("/") || "src" : "src";
|
|
344
|
+
// Inline recursive TODO counter (avoids circular import with index.js)
|
|
345
|
+
const countTodos = (dir) => {
|
|
346
|
+
let n = 0;
|
|
347
|
+
if (!existsSync(dir)) return 0;
|
|
348
|
+
try {
|
|
349
|
+
const entries = readdirSync(dir);
|
|
350
|
+
for (const entry of entries) {
|
|
351
|
+
if (entry === "node_modules" || entry.startsWith(".")) continue;
|
|
352
|
+
const fp = `${dir}/${entry}`;
|
|
353
|
+
try {
|
|
354
|
+
const stat = statSync(fp);
|
|
355
|
+
if (stat.isDirectory()) {
|
|
356
|
+
n += countTodos(fp);
|
|
357
|
+
} else if (/\.(js|ts|mjs)$/.test(entry)) {
|
|
358
|
+
const content = readFileSync(fp, "utf8");
|
|
359
|
+
const m = content.match(/\bTODO\b/gi);
|
|
360
|
+
if (m) n += m.length;
|
|
361
|
+
}
|
|
362
|
+
} catch { /* skip */ }
|
|
363
|
+
}
|
|
364
|
+
} catch { /* skip */ }
|
|
365
|
+
return n;
|
|
366
|
+
};
|
|
367
|
+
sourceTodoCount = countTodos(srcRoot);
|
|
368
|
+
} catch { /* ignore */ }
|
|
369
|
+
|
|
308
370
|
return {
|
|
309
371
|
mission,
|
|
310
372
|
recentActivity,
|
|
@@ -333,10 +395,13 @@ async function gatherContext(octokit, repo, config, t) {
|
|
|
333
395
|
cumulativeTransformationCost,
|
|
334
396
|
recentlyClosedSummary,
|
|
335
397
|
sourceExports,
|
|
398
|
+
hasDedicatedTests,
|
|
399
|
+
dedicatedTestFiles,
|
|
400
|
+
sourceTodoCount,
|
|
336
401
|
};
|
|
337
402
|
}
|
|
338
403
|
|
|
339
|
-
function buildPrompt(ctx, agentInstructions) {
|
|
404
|
+
function buildPrompt(ctx, agentInstructions, config) {
|
|
340
405
|
return [
|
|
341
406
|
"## Instructions",
|
|
342
407
|
agentInstructions,
|
|
@@ -368,6 +433,39 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
368
433
|
"",
|
|
369
434
|
]
|
|
370
435
|
: []),
|
|
436
|
+
`### Test Coverage`,
|
|
437
|
+
ctx.hasDedicatedTests
|
|
438
|
+
? `Dedicated test files: ${ctx.dedicatedTestFiles.join(", ")}`
|
|
439
|
+
: "**No dedicated test files found.** Only seed tests (main.test.js, web.test.js) exist. Mission-complete requires dedicated tests that import from src/lib/.",
|
|
440
|
+
"",
|
|
441
|
+
`### Source TODO Count: ${ctx.sourceTodoCount}`,
|
|
442
|
+
ctx.sourceTodoCount > 0
|
|
443
|
+
? `**${ctx.sourceTodoCount} TODO(s) found in source.** All TODOs must be resolved before mission-complete can be declared.`
|
|
444
|
+
: "No TODOs found in source — this criterion is met.",
|
|
445
|
+
"",
|
|
446
|
+
...(() => {
|
|
447
|
+
// W10: Build mission-complete metrics inline for the LLM
|
|
448
|
+
const thresholds = config?.missionCompleteThresholds || {};
|
|
449
|
+
const minResolved = thresholds.minResolvedIssues ?? 3;
|
|
450
|
+
const requireTests = thresholds.requireDedicatedTests ?? true;
|
|
451
|
+
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
452
|
+
const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
|
|
453
|
+
const rows = [
|
|
454
|
+
`### Mission-Complete Metrics`,
|
|
455
|
+
"| Metric | Value | Target | Status |",
|
|
456
|
+
"|--------|-------|--------|--------|",
|
|
457
|
+
`| Open issues | ${ctx.issuesSummary.length} | 0 | ${ctx.issuesSummary.length === 0 ? "MET" : "NOT MET"} |`,
|
|
458
|
+
`| Open PRs | ${ctx.prsSummary.length} | 0 | ${ctx.prsSummary.length === 0 ? "MET" : "NOT MET"} |`,
|
|
459
|
+
`| Issues resolved (RESOLVED) | ${resolvedCount} | >= ${minResolved} | ${resolvedCount >= minResolved ? "MET" : "NOT MET"} |`,
|
|
460
|
+
`| Dedicated test files | ${ctx.hasDedicatedTests ? "YES" : "NO"} | ${requireTests ? "YES" : "—"} | ${!requireTests || ctx.hasDedicatedTests ? "MET" : "NOT MET"} |`,
|
|
461
|
+
`| Source TODO count | ${ctx.sourceTodoCount} | <= ${maxTodos} | ${ctx.sourceTodoCount <= maxTodos ? "MET" : "NOT MET"} |`,
|
|
462
|
+
`| Budget used | ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} | < ${ctx.transformationBudget || "unlimited"} | ${ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget ? "EXHAUSTED" : "OK"} |`,
|
|
463
|
+
"",
|
|
464
|
+
"**All metrics must show MET/OK for mission-complete to be declared.**",
|
|
465
|
+
"",
|
|
466
|
+
];
|
|
467
|
+
return rows;
|
|
468
|
+
})(),
|
|
371
469
|
`### Recent Workflow Runs`,
|
|
372
470
|
ctx.workflowsSummary.join("\n") || "none",
|
|
373
471
|
"",
|
|
@@ -419,7 +517,7 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
419
517
|
]
|
|
420
518
|
: []),
|
|
421
519
|
...(ctx.transformationBudget > 0
|
|
422
|
-
? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, ""]
|
|
520
|
+
? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, "Note: instability transforms (infrastructure fixes) do not count against this budget.", ""]
|
|
423
521
|
: []),
|
|
424
522
|
`### Issue Limits`,
|
|
425
523
|
`Feature development WIP limit: ${ctx.featureIssuesWipLimit}`,
|
|
@@ -444,10 +542,6 @@ function buildPrompt(ctx, agentInstructions) {
|
|
|
444
542
|
"### Communication",
|
|
445
543
|
"- `respond:discussions | message: <text> | discussion-url: <url>` — Reply via discussions bot",
|
|
446
544
|
"",
|
|
447
|
-
"### Mission Lifecycle",
|
|
448
|
-
"- `mission-complete | reason: <text>` — Declare mission accomplished. Writes MISSION_COMPLETE.md and sets schedule to off. Use when: all acceptance criteria in MISSION.md are satisfied, tests pass, and recently-closed issues confirm resolution.",
|
|
449
|
-
"- `mission-failed | reason: <text>` — Declare mission failed. Writes MISSION_FAILED.md and sets schedule to off. Use when: transformation budget is exhausted with no progress, pipeline is stuck in a loop, or the mission is unachievable.",
|
|
450
|
-
"",
|
|
451
545
|
"### Schedule Control",
|
|
452
546
|
"- `set-schedule:<frequency>` — Change supervisor schedule (off, weekly, daily, hourly, continuous). Use `set-schedule:weekly` when mission is substantially complete, `set-schedule:continuous` to ramp up.",
|
|
453
547
|
"",
|
|
@@ -605,152 +699,11 @@ async function executeRespondDiscussions(octokit, repo, params, ctx) {
|
|
|
605
699
|
return "skipped:respond-no-message";
|
|
606
700
|
}
|
|
607
701
|
|
|
608
|
-
async function executeMissionComplete(octokit, repo, params, ctx) {
|
|
609
|
-
const reason = params.reason || "All acceptance criteria satisfied";
|
|
610
|
-
const signal = [
|
|
611
|
-
"# Mission Complete",
|
|
612
|
-
"",
|
|
613
|
-
`- **Timestamp:** ${new Date().toISOString()}`,
|
|
614
|
-
`- **Detected by:** supervisor`,
|
|
615
|
-
`- **Reason:** ${reason}`,
|
|
616
|
-
"",
|
|
617
|
-
"This file was created automatically. To restart transformations, delete this file or run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
|
|
618
|
-
].join("\n");
|
|
619
|
-
writeFileSync("MISSION_COMPLETE.md", signal);
|
|
620
|
-
core.info(`Mission complete signal written: ${reason}`);
|
|
621
|
-
|
|
622
|
-
// Persist MISSION_COMPLETE.md to the repository via Contents API so it survives across runs
|
|
623
|
-
try {
|
|
624
|
-
const contentBase64 = Buffer.from(signal).toString("base64");
|
|
625
|
-
// Check if file already exists (to get its SHA for updates)
|
|
626
|
-
let existingSha;
|
|
627
|
-
try {
|
|
628
|
-
const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_COMPLETE.md", ref: "main" });
|
|
629
|
-
existingSha = data.sha;
|
|
630
|
-
} catch {
|
|
631
|
-
// File doesn't exist yet — that's fine
|
|
632
|
-
}
|
|
633
|
-
await octokit.rest.repos.createOrUpdateFileContents({
|
|
634
|
-
...repo,
|
|
635
|
-
path: "MISSION_COMPLETE.md",
|
|
636
|
-
message: "mission-complete: " + reason.substring(0, 72),
|
|
637
|
-
content: contentBase64,
|
|
638
|
-
branch: "main",
|
|
639
|
-
...(existingSha ? { sha: existingSha } : {}),
|
|
640
|
-
});
|
|
641
|
-
core.info("MISSION_COMPLETE.md committed to main via Contents API");
|
|
642
|
-
} catch (err) {
|
|
643
|
-
core.warning(`Could not commit MISSION_COMPLETE.md to repo: ${err.message}`);
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
|
|
647
|
-
// Only turn off schedule if it's not already off or in maintenance mode
|
|
648
|
-
let currentSupervisor = "";
|
|
649
|
-
try {
|
|
650
|
-
const tomlContent = readFileSync("agentic-lib.toml", "utf8");
|
|
651
|
-
const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
|
|
652
|
-
if (match) currentSupervisor = match[1];
|
|
653
|
-
} catch { /* ignore */ }
|
|
654
|
-
|
|
655
|
-
if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
|
|
656
|
-
core.info(`Schedule already "${currentSupervisor}" — not changing on mission-complete`);
|
|
657
|
-
} else {
|
|
658
|
-
try {
|
|
659
|
-
await octokit.rest.actions.createWorkflowDispatch({
|
|
660
|
-
...repo,
|
|
661
|
-
workflow_id: "agentic-lib-schedule.yml",
|
|
662
|
-
ref: "main",
|
|
663
|
-
inputs: { frequency: "off" },
|
|
664
|
-
});
|
|
665
|
-
} catch (err) {
|
|
666
|
-
core.warning(`Could not set schedule to off: ${err.message}`);
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
// Announce mission complete via bot
|
|
671
|
-
const websiteUrl = getWebsiteUrl(repo);
|
|
672
|
-
const discussionUrl = ctx?.activeDiscussionUrl || "";
|
|
673
|
-
await dispatchBot(octokit, repo, `Mission complete! ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
|
|
674
|
-
}
|
|
675
|
-
return `mission-complete:${reason.substring(0, 100)}`;
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
async function executeMissionFailed(octokit, repo, params, ctx) {
|
|
679
|
-
const reason = params.reason || "Mission could not be completed";
|
|
680
|
-
const signal = [
|
|
681
|
-
"# Mission Failed",
|
|
682
|
-
"",
|
|
683
|
-
`- **Timestamp:** ${new Date().toISOString()}`,
|
|
684
|
-
`- **Detected by:** supervisor`,
|
|
685
|
-
`- **Reason:** ${reason}`,
|
|
686
|
-
"",
|
|
687
|
-
"This file was created automatically. To restart, delete this file and run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
|
|
688
|
-
].join("\n");
|
|
689
|
-
writeFileSync("MISSION_FAILED.md", signal);
|
|
690
|
-
core.info(`Mission failed signal written: ${reason}`);
|
|
691
|
-
|
|
692
|
-
// Persist MISSION_FAILED.md to the repository via Contents API so it survives across runs
|
|
693
|
-
try {
|
|
694
|
-
const contentBase64 = Buffer.from(signal).toString("base64");
|
|
695
|
-
let existingSha;
|
|
696
|
-
try {
|
|
697
|
-
const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_FAILED.md", ref: "main" });
|
|
698
|
-
existingSha = data.sha;
|
|
699
|
-
} catch {
|
|
700
|
-
// File doesn't exist yet — that's fine
|
|
701
|
-
}
|
|
702
|
-
await octokit.rest.repos.createOrUpdateFileContents({
|
|
703
|
-
...repo,
|
|
704
|
-
path: "MISSION_FAILED.md",
|
|
705
|
-
message: "mission-failed: " + reason.substring(0, 72),
|
|
706
|
-
content: contentBase64,
|
|
707
|
-
branch: "main",
|
|
708
|
-
...(existingSha ? { sha: existingSha } : {}),
|
|
709
|
-
});
|
|
710
|
-
core.info("MISSION_FAILED.md committed to main via Contents API");
|
|
711
|
-
} catch (err) {
|
|
712
|
-
core.warning(`Could not commit MISSION_FAILED.md to repo: ${err.message}`);
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
|
|
716
|
-
// Only turn off schedule if it's not already off or in maintenance mode
|
|
717
|
-
let currentSupervisor = "";
|
|
718
|
-
try {
|
|
719
|
-
const tomlContent = readFileSync("agentic-lib.toml", "utf8");
|
|
720
|
-
const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
|
|
721
|
-
if (match) currentSupervisor = match[1];
|
|
722
|
-
} catch { /* ignore */ }
|
|
723
|
-
|
|
724
|
-
if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
|
|
725
|
-
core.info(`Schedule already "${currentSupervisor}" — not changing on mission-failed`);
|
|
726
|
-
} else {
|
|
727
|
-
try {
|
|
728
|
-
await octokit.rest.actions.createWorkflowDispatch({
|
|
729
|
-
...repo,
|
|
730
|
-
workflow_id: "agentic-lib-schedule.yml",
|
|
731
|
-
ref: "main",
|
|
732
|
-
inputs: { frequency: "off" },
|
|
733
|
-
});
|
|
734
|
-
} catch (err) {
|
|
735
|
-
core.warning(`Could not set schedule to off: ${err.message}`);
|
|
736
|
-
}
|
|
737
|
-
}
|
|
738
|
-
|
|
739
|
-
// Announce mission failed via bot
|
|
740
|
-
const websiteUrl = getWebsiteUrl(repo);
|
|
741
|
-
const discussionUrl = ctx?.activeDiscussionUrl || "";
|
|
742
|
-
await dispatchBot(octokit, repo, `Mission failed. ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
|
|
743
|
-
}
|
|
744
|
-
return `mission-failed:${reason.substring(0, 100)}`;
|
|
745
|
-
}
|
|
746
|
-
|
|
747
702
|
const ACTION_HANDLERS = {
|
|
748
703
|
"github:create-issue": executeCreateIssue,
|
|
749
704
|
"github:label-issue": executeLabelIssue,
|
|
750
705
|
"github:close-issue": executeCloseIssue,
|
|
751
706
|
"respond:discussions": executeRespondDiscussions,
|
|
752
|
-
"mission-complete": executeMissionComplete,
|
|
753
|
-
"mission-failed": executeMissionFailed,
|
|
754
707
|
};
|
|
755
708
|
|
|
756
709
|
async function executeSetSchedule(octokit, repo, frequency) {
|
|
@@ -778,7 +731,7 @@ async function executeAction(octokit, repo, action, params, ctx) {
|
|
|
778
731
|
if (action === "nop") return "nop";
|
|
779
732
|
const handler = ACTION_HANDLERS[action];
|
|
780
733
|
if (handler) return handler(octokit, repo, params, ctx);
|
|
781
|
-
core.
|
|
734
|
+
core.debug(`Ignoring unrecognised action: ${action}`);
|
|
782
735
|
return `unknown:${action}`;
|
|
783
736
|
}
|
|
784
737
|
|
|
@@ -814,7 +767,7 @@ export async function supervise(context) {
|
|
|
814
767
|
|
|
815
768
|
// --- LLM decision ---
|
|
816
769
|
const agentInstructions = instructions || "You are the supervisor. Decide what actions to take.";
|
|
817
|
-
const prompt = buildPrompt(ctx, agentInstructions);
|
|
770
|
+
const prompt = buildPrompt(ctx, agentInstructions, config);
|
|
818
771
|
|
|
819
772
|
const { content, tokensUsed, inputTokens, outputTokens, cost } = await runCopilotTask({
|
|
820
773
|
model,
|
|
@@ -845,28 +798,8 @@ export async function supervise(context) {
|
|
|
845
798
|
|
|
846
799
|
// --- Deterministic lifecycle posts (after LLM) ---
|
|
847
800
|
|
|
848
|
-
//
|
|
849
|
-
//
|
|
850
|
-
// Skip in maintenance mode — maintenance keeps running regardless of mission status.
|
|
851
|
-
if (!ctx.missionComplete && !ctx.missionFailed && config.supervisor !== "maintenance") {
|
|
852
|
-
const llmChoseMissionComplete = results.some((r) => r.startsWith("mission-complete:"));
|
|
853
|
-
if (!llmChoseMissionComplete) {
|
|
854
|
-
const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
|
|
855
|
-
const hasNoOpenIssues = ctx.issuesSummary.length === 0;
|
|
856
|
-
const hasNoOpenPRs = ctx.prsSummary.length === 0;
|
|
857
|
-
if (hasNoOpenIssues && hasNoOpenPRs && resolvedCount >= 1) {
|
|
858
|
-
core.info(`Deterministic mission-complete: 0 open issues, 0 open PRs, ${resolvedCount} recently resolved — LLM did not detect completion`);
|
|
859
|
-
try {
|
|
860
|
-
const autoResult = await executeMissionComplete(octokit, repo,
|
|
861
|
-
{ reason: `All acceptance criteria satisfied (${resolvedCount} issues resolved, 0 open issues, 0 open PRs)` },
|
|
862
|
-
ctx);
|
|
863
|
-
results.push(autoResult);
|
|
864
|
-
} catch (err) {
|
|
865
|
-
core.warning(`Deterministic mission-complete failed: ${err.message}`);
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
}
|
|
869
|
-
}
|
|
801
|
+
// W12: Mission-complete/failed evaluation moved to the director task.
|
|
802
|
+
// The supervisor no longer declares mission-complete or mission-failed.
|
|
870
803
|
|
|
871
804
|
// Step 3: Auto-respond when a message referral is present
|
|
872
805
|
// If the workflow was triggered with a message (from bot's request-supervisor),
|
|
@@ -888,14 +821,8 @@ export async function supervise(context) {
|
|
|
888
821
|
|
|
889
822
|
// Build changes list from executed actions
|
|
890
823
|
const changes = results
|
|
891
|
-
.filter((r) => r.startsWith("created-issue:")
|
|
892
|
-
.map((r) => {
|
|
893
|
-
if (r.startsWith("created-issue:")) return { action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" };
|
|
894
|
-
if (r.startsWith("mission-complete:")) return { action: "mission-complete", file: "MISSION_COMPLETE.md", sizeInfo: r.replace("mission-complete:", "") };
|
|
895
|
-
if (r.startsWith("mission-failed:")) return { action: "mission-failed", file: "MISSION_FAILED.md", sizeInfo: r.replace("mission-failed:", "") };
|
|
896
|
-
return null;
|
|
897
|
-
})
|
|
898
|
-
.filter(Boolean);
|
|
824
|
+
.filter((r) => r.startsWith("created-issue:"))
|
|
825
|
+
.map((r) => ({ action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" }));
|
|
899
826
|
|
|
900
827
|
return {
|
|
901
828
|
outcome: actions.length === 0 ? "nop" : `supervised:${actions.length}-actions`,
|
|
@@ -18,10 +18,13 @@ A fix is never just one file. These layers form a single unit — if you change
|
|
|
18
18
|
|
|
19
19
|
- **Library source** (`src/lib/main.js`) — the core implementation
|
|
20
20
|
- **Unit tests** (`tests/unit/`) — test every function at the API level with exact values and edge cases
|
|
21
|
-
- **Website** (`src/web/index.html` and related files) — imports and calls the library to demonstrate features
|
|
21
|
+
- **Website** (`src/web/index.html` and related files) — imports and calls the library to demonstrate features.
|
|
22
|
+
**NEVER duplicate library functions inline in the web page** — use the build pipeline (`lib-meta.js`, or
|
|
23
|
+
a generated browser module) to share code. Inline copies cause behaviour tests to test a simulation, not the real library.
|
|
22
24
|
- **Website unit tests** (`tests/unit/web.test.js`) — verify HTML structure and library wiring
|
|
23
25
|
- **Behaviour tests** (`tests/behaviour/`) — Playwright tests that load the website in a real browser
|
|
24
|
-
and verify features work at a high navigational level (demo output visible, interactive elements work)
|
|
26
|
+
and verify features work at a high navigational level (demo output visible, interactive elements work).
|
|
27
|
+
Includes a coupling test that imports `getIdentity()` from `src/lib/main.js` and asserts the page version matches.
|
|
25
28
|
|
|
26
29
|
If the failure is in one layer, the fix often requires coordinating changes across multiple layers.
|
|
27
30
|
For example, if a unit test fails because a function signature changed, the website and behaviour tests
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
You are the director of an autonomous coding repository. Your sole responsibility is evaluating whether the mission is complete, failed, or in progress.
|
|
2
|
+
|
|
3
|
+
## Your Role
|
|
4
|
+
|
|
5
|
+
You do NOT dispatch workflows, create issues, or manage the schedule. That is the supervisor's job. You ONLY assess mission status and produce a structured evaluation.
|
|
6
|
+
|
|
7
|
+
## Input
|
|
8
|
+
|
|
9
|
+
You receive:
|
|
10
|
+
1. **MISSION.md** — the acceptance criteria
|
|
11
|
+
2. **Mission-Complete Metrics** — a table of mechanical checks (open issues, PRs, resolved count, test coverage, TODO count, budget)
|
|
12
|
+
3. **Metric based mission complete assessment** — a pre-computed advisory from the mechanical check
|
|
13
|
+
4. **Source Exports** — functions exported from source files
|
|
14
|
+
5. **Recently Closed Issues** — issues resolved since init
|
|
15
|
+
6. **Recent Activity** — the latest entries from the activity log
|
|
16
|
+
|
|
17
|
+
## Decision Framework
|
|
18
|
+
|
|
19
|
+
### Mission Complete
|
|
20
|
+
Declare `mission-complete` when ALL of the following are true:
|
|
21
|
+
1. Every row in the Mission-Complete Metrics table shows **MET** or **OK**
|
|
22
|
+
2. The Source Exports demonstrate that all functions required by MISSION.md are implemented
|
|
23
|
+
3. The Recently Closed Issues confirm that acceptance criteria have been addressed
|
|
24
|
+
4. No TODOs remain in source code
|
|
25
|
+
5. Dedicated test files exist (not just seed tests)
|
|
26
|
+
|
|
27
|
+
### Mission Failed
|
|
28
|
+
Declare `mission-failed` when ANY of the following are true:
|
|
29
|
+
1. Transformation budget is EXHAUSTED and acceptance criteria are still unmet
|
|
30
|
+
2. The last 3+ transforms produced no meaningful code changes
|
|
31
|
+
3. The pipeline is stuck in a loop (same issues created and closed repeatedly)
|
|
32
|
+
|
|
33
|
+
### Gap Analysis (most common output)
|
|
34
|
+
When the mission is neither complete nor failed, produce a detailed gap analysis:
|
|
35
|
+
- What has been achieved so far
|
|
36
|
+
- What specific gaps remain between the current state and mission-complete
|
|
37
|
+
- Which metrics are NOT MET and what needs to happen to satisfy them
|
|
38
|
+
- Prioritised list of what should be done next
|
|
39
|
+
|
|
40
|
+
## Output Format
|
|
41
|
+
|
|
42
|
+
Respond with EXACTLY this structure:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
[DECISION]
|
|
46
|
+
mission-complete | mission-failed | in-progress
|
|
47
|
+
[/DECISION]
|
|
48
|
+
[REASON]
|
|
49
|
+
One-line summary of the decision.
|
|
50
|
+
[/REASON]
|
|
51
|
+
[ANALYSIS]
|
|
52
|
+
Detailed gap analysis or completion summary. Include:
|
|
53
|
+
- Metrics status (which are MET, which are NOT MET)
|
|
54
|
+
- What has been achieved
|
|
55
|
+
- What remains (if in-progress)
|
|
56
|
+
- Recommended next actions (if in-progress)
|
|
57
|
+
[/ANALYSIS]
|
|
58
|
+
```
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
You are a project discovery agent running locally via the intentïon CLI.
|
|
2
|
+
|
|
3
|
+
Your job is to explore the current working directory and generate a MISSION.md file that describes what this project is and what improvements could be made.
|
|
4
|
+
|
|
5
|
+
## Your Goal
|
|
6
|
+
|
|
7
|
+
Analyse the project in the current directory and produce a MISSION.md that a code transformation agent can act on. The mission should be achievable, specific, and grounded in what you actually find.
|
|
8
|
+
|
|
9
|
+
## Discovery Strategy
|
|
10
|
+
|
|
11
|
+
1. **Read the project structure** — list files, examine package.json (or equivalent), read README.md if present
|
|
12
|
+
2. **Understand the tech stack** — identify the language, framework, build system, test framework
|
|
13
|
+
3. **Read existing source code** — understand what the project does, its architecture, and its current state
|
|
14
|
+
4. **Read existing tests** — understand test coverage and what's already verified
|
|
15
|
+
5. **Run tests** — use `run_tests` to see the current pass/fail state
|
|
16
|
+
6. **Identify opportunities** — look for:
|
|
17
|
+
- Missing or incomplete functionality based on README/docs promises
|
|
18
|
+
- Missing test coverage for existing functions
|
|
19
|
+
- Code quality improvements (error handling, input validation, edge cases)
|
|
20
|
+
- Documentation gaps
|
|
21
|
+
- Obvious bugs or issues
|
|
22
|
+
|
|
23
|
+
## Output Format
|
|
24
|
+
|
|
25
|
+
Write a MISSION.md file with the following structure:
|
|
26
|
+
|
|
27
|
+
```markdown
|
|
28
|
+
# Mission: [Descriptive Title]
|
|
29
|
+
|
|
30
|
+
[1-2 sentence summary of what this project is and what the mission aims to achieve]
|
|
31
|
+
|
|
32
|
+
## Current State
|
|
33
|
+
|
|
34
|
+
[Brief description of what exists — key files, main functionality, test coverage]
|
|
35
|
+
|
|
36
|
+
## Objectives
|
|
37
|
+
|
|
38
|
+
[Numbered list of specific, achievable improvements]
|
|
39
|
+
|
|
40
|
+
## Acceptance Criteria
|
|
41
|
+
|
|
42
|
+
[Specific, testable criteria that define "done" — these become the test assertions]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Important Rules
|
|
46
|
+
|
|
47
|
+
- **Be specific** — don't write vague goals like "improve code quality". Write "add input validation to the `parse()` function for empty strings and null values"
|
|
48
|
+
- **Be achievable** — scope the mission to what can be done in a single transformation session (30-60 minutes of agent work)
|
|
49
|
+
- **Be grounded** — every objective must reference actual code you found in the project
|
|
50
|
+
- **Prioritise test coverage** — if the project has functions without tests, that's always a good mission
|
|
51
|
+
- **Don't hallucinate** — only reference files, functions, and patterns you actually observed
|
|
52
|
+
- **Write the MISSION.md file** — use the file writing tool to create or overwrite MISSION.md in the workspace root
|
|
@@ -92,6 +92,24 @@ automatically (from `docs/` via `npm run build:web`).
|
|
|
92
92
|
- **Demonstrate features**: Each significant library feature should be visible and usable on the website.
|
|
93
93
|
Behaviour tests will verify these demonstrations work, so make them testable (use IDs, structured output).
|
|
94
94
|
|
|
95
|
+
### CRITICAL: Never duplicate library code in the web page
|
|
96
|
+
|
|
97
|
+
**Do NOT copy or recreate library functions inline in `src/web/index.html`.** The web page must consume
|
|
98
|
+
the library through the build pipeline — not by reimplementing functions in a `<script>` block.
|
|
99
|
+
|
|
100
|
+
Why this matters:
|
|
101
|
+
- Unit tests test `src/lib/main.js` directly
|
|
102
|
+
- Behaviour tests test the web page via Playwright
|
|
103
|
+
- If the web page has its own copy of the functions, behaviour tests test a **simulation** — not the real library
|
|
104
|
+
- The two copies can diverge silently, and tests pass even when the real library is broken
|
|
105
|
+
|
|
106
|
+
How to share code between library and web:
|
|
107
|
+
- Identity (name, version, description) flows via `lib-meta.js` (generated by `build:web` from `package.json`)
|
|
108
|
+
- If the web page needs to call mission-specific functions, make them available through the build pipeline
|
|
109
|
+
(e.g. generate a browser-compatible module in `docs/` or add a build step that packages the pure functions)
|
|
110
|
+
- The behaviour test imports `getIdentity()` from `src/lib/main.js` and asserts the page displays the same
|
|
111
|
+
version — this coupling test proves the pipeline is wired up correctly
|
|
112
|
+
|
|
95
113
|
### Guidelines
|
|
96
114
|
|
|
97
115
|
- `src/web/index.html` is the main page — update it as the library grows
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
You are an autonomous code transformation agent running locally via the intentïon CLI.
|
|
2
|
+
|
|
3
|
+
Your workspace is the current working directory. You have been given a MISSION to implement.
|
|
4
|
+
|
|
5
|
+
## Your Goal
|
|
6
|
+
|
|
7
|
+
Implement the MISSION described in the user prompt. This means:
|
|
8
|
+
|
|
9
|
+
1. Read and understand the mission requirements
|
|
10
|
+
2. Read the existing source code and tests to understand the current state
|
|
11
|
+
3. Write the implementation code — keep existing exports, add new ones
|
|
12
|
+
4. Write comprehensive tests covering all acceptance criteria
|
|
13
|
+
5. Run `run_tests` to verify everything passes
|
|
14
|
+
6. If tests fail, read the error output carefully, fix the code, and iterate
|
|
15
|
+
|
|
16
|
+
## Strategy
|
|
17
|
+
|
|
18
|
+
1. Read MISSION.md to understand what needs to be built
|
|
19
|
+
2. Examine the project structure — look at package.json, existing source, and test files
|
|
20
|
+
3. Implement the required functionality in the source files
|
|
21
|
+
4. Write dedicated test files covering ALL acceptance criteria from the mission
|
|
22
|
+
5. Run `run_tests` to verify everything passes
|
|
23
|
+
6. If tests fail, read the error output carefully, fix the code, and repeat
|
|
24
|
+
|
|
25
|
+
## Important Rules
|
|
26
|
+
|
|
27
|
+
- Keep existing exports and functionality — add to them, don't replace
|
|
28
|
+
- Write tests that import from the library's main entry point
|
|
29
|
+
- Do NOT modify existing test files unless the mission specifically requires it — create new test files for mission-specific tests
|
|
30
|
+
- Keep going until all tests pass or you've exhausted your options
|
|
31
|
+
- Prefer simple, clean implementations over clever ones
|
|
32
|
+
- Follow the project's existing code style and conventions
|
|
33
|
+
|
|
34
|
+
## All Code Must Change Together
|
|
35
|
+
|
|
36
|
+
When you change a function signature, return value, or error type, update ALL consumers:
|
|
37
|
+
- Source code
|
|
38
|
+
- Unit tests
|
|
39
|
+
- Any documentation or examples
|
|
40
|
+
|
|
41
|
+
A partial change that updates the source but not the tests will fail.
|
|
42
|
+
|
|
43
|
+
## Tests Must Pass
|
|
44
|
+
|
|
45
|
+
Your changes MUST leave all existing tests passing. The mission's acceptance criteria are the source of truth — if tests and acceptance criteria disagree, fix the tests to match the acceptance criteria and fix the code to pass those tests.
|