@xn-intenton-z2a/agentic-lib 7.4.32 → 7.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/agentic-lib-init.yml +12 -10
- package/.github/workflows/agentic-lib-schedule.yml +122 -56
- package/.github/workflows/agentic-lib-test.yml +1 -1
- package/.github/workflows/agentic-lib-workflow.yml +30 -2
- package/agentic-lib.toml +22 -2
- package/bin/agentic-lib.js +166 -19
- package/package.json +1 -1
- package/src/actions/agentic-step/tasks/direct.js +17 -9
- package/src/actions/agentic-step/tasks/implementation-review.js +66 -15
- package/src/actions/agentic-step/tasks/maintain-features.js +5 -0
- package/src/actions/agentic-step/tasks/review-issue.js +13 -2
- package/src/actions/agentic-step/tasks/transform.js +18 -0
- package/src/copilot/config.js +17 -3
- package/src/copilot/telemetry.js +18 -1
- package/src/seeds/zero-MISSION.md +14 -14
- package/src/seeds/zero-README.md +106 -65
- package/src/seeds/zero-package.json +1 -1
package/bin/agentic-lib.js
CHANGED
|
@@ -110,7 +110,7 @@ const target = resolve(targetPath);
|
|
|
110
110
|
const modelIdx = flags.indexOf("--model");
|
|
111
111
|
const model = modelIdx >= 0 ? flags[modelIdx + 1] : "claude-sonnet-4";
|
|
112
112
|
const missionIdx = flags.indexOf("--mission");
|
|
113
|
-
const mission = missionIdx >= 0 ? flags[missionIdx + 1] : "
|
|
113
|
+
const mission = missionIdx >= 0 ? flags[missionIdx + 1] : "7-kyu-understand-fizz-buzz";
|
|
114
114
|
const cyclesIdx = flags.indexOf("--cycles");
|
|
115
115
|
const cycles = cyclesIdx >= 0 ? parseInt(flags[cyclesIdx + 1], 10) : 0;
|
|
116
116
|
const stepsIdx = flags.indexOf("--steps");
|
|
@@ -734,7 +734,7 @@ function clearAndRecreateDir(dirPath, label) {
|
|
|
734
734
|
if (!dryRun) mkdirSync(fullPath, { recursive: true });
|
|
735
735
|
}
|
|
736
736
|
|
|
737
|
-
function initPurge(seedsDir, missionName, initTimestamp) {
|
|
737
|
+
async function initPurge(seedsDir, missionName, initTimestamp) {
|
|
738
738
|
console.log("\n--- Purge: Reset Source Files to Seed State ---");
|
|
739
739
|
|
|
740
740
|
const { sourcePath, testsPath, behaviourPath, examplesPath, webPath } = readTomlPaths();
|
|
@@ -779,12 +779,52 @@ function initPurge(seedsDir, missionName, initTimestamp) {
|
|
|
779
779
|
console.log(" CREATE: docs/.nojekyll");
|
|
780
780
|
}
|
|
781
781
|
|
|
782
|
+
// W10: Preserve TOML values through purge
|
|
783
|
+
const tomlTarget = resolve(target, "agentic-lib.toml");
|
|
784
|
+
let preservedTomlValues = {};
|
|
785
|
+
if (existsSync(tomlTarget)) {
|
|
786
|
+
const existingToml = readFileSync(tomlTarget, "utf8");
|
|
787
|
+
const readTomlValue = (key) => {
|
|
788
|
+
const m = existingToml.match(new RegExp(`^\\s*${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=\\s*"([^"]*)"`, "m"));
|
|
789
|
+
return m ? m[1] : null;
|
|
790
|
+
};
|
|
791
|
+
const readTomlNum = (key) => {
|
|
792
|
+
const m = existingToml.match(new RegExp(`^\\s*${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=\\s*(\\d+)`, "m"));
|
|
793
|
+
return m ? parseInt(m[1], 10) : null;
|
|
794
|
+
};
|
|
795
|
+
preservedTomlValues = {
|
|
796
|
+
supervisor: readTomlValue("supervisor"),
|
|
797
|
+
focus: readTomlValue("focus"),
|
|
798
|
+
model: readTomlValue("model"),
|
|
799
|
+
profile: readTomlValue("profile"),
|
|
800
|
+
"acceptance-criteria-threshold": readTomlNum("acceptance-criteria-threshold"),
|
|
801
|
+
"min-resolved-issues": readTomlNum("min-resolved-issues"),
|
|
802
|
+
"mission-type": readTomlValue("mission-type"),
|
|
803
|
+
};
|
|
804
|
+
console.log(" PRESERVE: saved TOML values for restoration after purge");
|
|
805
|
+
}
|
|
806
|
+
|
|
782
807
|
// Force-overwrite agentic-lib.toml during purge (transformed from root)
|
|
783
808
|
const tomlSource = resolve(pkgRoot, "agentic-lib.toml");
|
|
784
809
|
if (existsSync(tomlSource)) {
|
|
785
810
|
initTransformFile(tomlSource, resolve(target, "agentic-lib.toml"), "SEED: agentic-lib.toml (transformed)");
|
|
786
811
|
}
|
|
787
812
|
|
|
813
|
+
// Restore preserved values into the new TOML
|
|
814
|
+
if (existsSync(tomlTarget) && Object.values(preservedTomlValues).some(v => v !== null)) {
|
|
815
|
+
let toml = readFileSync(tomlTarget, "utf8");
|
|
816
|
+
for (const [key, value] of Object.entries(preservedTomlValues)) {
|
|
817
|
+
if (value === null) continue;
|
|
818
|
+
const isNum = typeof value === "number";
|
|
819
|
+
const regex = new RegExp(`^(\\s*${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*=\\s*)${isNum ? "\\d+" : '"[^"]*"'}`, "m");
|
|
820
|
+
if (regex.test(toml)) {
|
|
821
|
+
toml = toml.replace(regex, `$1${isNum ? value : `"${value}"`}`);
|
|
822
|
+
console.log(` RESTORE: ${key} = ${value}`);
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
if (!dryRun) writeFileSync(tomlTarget, toml);
|
|
826
|
+
}
|
|
827
|
+
|
|
788
828
|
// Clear agent log files (written by implementation-review and other tasks)
|
|
789
829
|
try {
|
|
790
830
|
const agentLogs = readdirSync(target).filter((f) => f.startsWith("agent-log-") && f.endsWith(".md"));
|
|
@@ -798,27 +838,133 @@ function initPurge(seedsDir, missionName, initTimestamp) {
|
|
|
798
838
|
if (agentLogs.length > 0) console.log(` Cleared ${agentLogs.length} agent log file(s)`);
|
|
799
839
|
} catch { /* ignore — directory may not have agent logs */ }
|
|
800
840
|
|
|
801
|
-
// Copy mission seed file as MISSION.md
|
|
841
|
+
// Copy mission seed file as MISSION.md (with random/generate support)
|
|
802
842
|
const missionsDir = resolve(seedsDir, "missions");
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
//
|
|
843
|
+
let resolvedMission = missionName;
|
|
844
|
+
let missionType = missionName; // "random", "generate", or the specific seed name
|
|
845
|
+
|
|
846
|
+
if (missionName === "random") {
|
|
847
|
+
// W11: Pick a random mission from available seeds
|
|
808
848
|
const available = existsSync(missionsDir)
|
|
809
|
-
? readdirSync(missionsDir)
|
|
810
|
-
.filter((f) => f.endsWith(".md"))
|
|
811
|
-
.map((f) => f.replace(/\.md$/, ""))
|
|
849
|
+
? readdirSync(missionsDir).filter((f) => f.endsWith(".md")).map((f) => f.replace(/\.md$/, ""))
|
|
812
850
|
: [];
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
851
|
+
if (available.length === 0) {
|
|
852
|
+
console.error("\nERROR: No missions available for random selection.");
|
|
853
|
+
process.exit(1);
|
|
854
|
+
}
|
|
855
|
+
resolvedMission = available[Math.floor(Math.random() * available.length)];
|
|
856
|
+
console.log(` RANDOM: selected mission "${resolvedMission}" from ${available.length} available`);
|
|
857
|
+
} else if (missionName === "generate") {
|
|
858
|
+
// W12: Generate a mission using LLM
|
|
859
|
+
console.log(" GENERATE: Creating LLM-generated mission...");
|
|
860
|
+
try {
|
|
861
|
+
const { runCopilotSession } = await import("../src/copilot/copilot-session.js");
|
|
862
|
+
const available = existsSync(missionsDir)
|
|
863
|
+
? readdirSync(missionsDir).filter((f) => f.endsWith(".md")).map((f) => f.replace(/\.md$/, ""))
|
|
864
|
+
: [];
|
|
865
|
+
const sampleMission = existsSync(resolve(missionsDir, "7-kyu-understand-fizz-buzz.md"))
|
|
866
|
+
? readFileSync(resolve(missionsDir, "7-kyu-understand-fizz-buzz.md"), "utf8")
|
|
867
|
+
: "";
|
|
868
|
+
const prompt = [
|
|
869
|
+
"Generate a novel JavaScript library mission for an autonomous coding pipeline.",
|
|
870
|
+
"The mission should follow this exact structure (use the example as a template):",
|
|
871
|
+
"",
|
|
872
|
+
sampleMission,
|
|
873
|
+
"",
|
|
874
|
+
"Requirements:",
|
|
875
|
+
"- Be distinct from all existing missions: " + available.join(", "),
|
|
876
|
+
"- Difficulty should be between 8-kyu (trivial) and 2-kyu (expert)",
|
|
877
|
+
"- Include 5-10 acceptance criteria as markdown checkboxes (- [ ] ...)",
|
|
878
|
+
"- The library must be implementable in a single src/lib/main.js file",
|
|
879
|
+
"- Include edge cases and error handling in the requirements",
|
|
880
|
+
"",
|
|
881
|
+
"Write the mission to MISSION.md using the write_file tool.",
|
|
882
|
+
].join("\n");
|
|
883
|
+
await runCopilotSession({
|
|
884
|
+
task: "generate-mission",
|
|
885
|
+
model,
|
|
886
|
+
target,
|
|
887
|
+
prompt,
|
|
888
|
+
timeoutMs: 120000,
|
|
889
|
+
dryRun,
|
|
890
|
+
});
|
|
891
|
+
resolvedMission = "generated";
|
|
892
|
+
console.log(" GENERATE: Mission written to MISSION.md");
|
|
893
|
+
} catch (err) {
|
|
894
|
+
console.error(` GENERATE: LLM generation failed (${err.message}), falling back to fizz-buzz`);
|
|
895
|
+
resolvedMission = "7-kyu-understand-fizz-buzz";
|
|
896
|
+
missionType = "generate-fallback";
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if (missionName !== "generate" || resolvedMission !== "generated") {
|
|
901
|
+
const selectedMissionFile = resolve(missionsDir, `${resolvedMission}.md`);
|
|
902
|
+
if (existsSync(selectedMissionFile)) {
|
|
903
|
+
initCopyFile(selectedMissionFile, resolve(target, "MISSION.md"), `MISSION: missions/${resolvedMission}.md → MISSION.md`);
|
|
904
|
+
} else {
|
|
905
|
+
const available = existsSync(missionsDir)
|
|
906
|
+
? readdirSync(missionsDir).filter((f) => f.endsWith(".md")).map((f) => f.replace(/\.md$/, ""))
|
|
907
|
+
: [];
|
|
908
|
+
console.error(`\nERROR: Unknown mission "${resolvedMission}".`);
|
|
909
|
+
if (available.length > 0) {
|
|
910
|
+
console.error(`Available missions: ${available.join(", ")}`);
|
|
911
|
+
}
|
|
912
|
+
process.exit(1);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
// W17: Generate structured acceptance criteria in TOML
|
|
917
|
+
const missionTargetPath = resolve(target, "MISSION.md");
|
|
918
|
+
if (existsSync(missionTargetPath)) {
|
|
919
|
+
const missionContent = readFileSync(missionTargetPath, "utf8");
|
|
920
|
+
const checkboxes = missionContent.match(/- \[ \] (.+)/g) || [];
|
|
921
|
+
if (checkboxes.length > 0) {
|
|
922
|
+
const criteriaEntries = checkboxes.map((line, i) => {
|
|
923
|
+
const text = line.replace(/^- \[ \] /, "").trim();
|
|
924
|
+
return `${i + 1} = { text = ${JSON.stringify(text)}, met = false }`;
|
|
925
|
+
});
|
|
926
|
+
const criteriaSection = [
|
|
927
|
+
"",
|
|
928
|
+
"[acceptance-criteria]",
|
|
929
|
+
`# Auto-generated from MISSION.md on init. Updated by implementation-review.`,
|
|
930
|
+
`total = ${checkboxes.length}`,
|
|
931
|
+
...criteriaEntries,
|
|
932
|
+
].join("\n");
|
|
933
|
+
const tomlFile = resolve(target, "agentic-lib.toml");
|
|
934
|
+
if (existsSync(tomlFile)) {
|
|
935
|
+
let toml = readFileSync(tomlFile, "utf8");
|
|
936
|
+
if (/^\[acceptance-criteria\]/m.test(toml)) {
|
|
937
|
+
toml = toml.replace(/\n?\[acceptance-criteria\][^\[]*/, criteriaSection);
|
|
938
|
+
} else {
|
|
939
|
+
toml = toml.trimEnd() + "\n" + criteriaSection + "\n";
|
|
940
|
+
}
|
|
941
|
+
if (!dryRun) writeFileSync(tomlFile, toml);
|
|
942
|
+
console.log(` WRITE: [acceptance-criteria] section (${checkboxes.length} criteria)`);
|
|
943
|
+
initChanges++;
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
// Set acceptance criteria threshold based on mission difficulty
|
|
949
|
+
const difficultyMatch = resolvedMission.match(/^(\d+)-(?:kyu|dan)/);
|
|
950
|
+
if (difficultyMatch) {
|
|
951
|
+
const level = parseInt(difficultyMatch[1], 10);
|
|
952
|
+
const isDan = resolvedMission.includes("-dan-");
|
|
953
|
+
const THRESHOLD_MAP = { 8: 100, 7: 75, 6: 60, 5: 50, 4: 50, 3: 40, 2: isDan ? 30 : 35, 1: 30 };
|
|
954
|
+
const threshold = THRESHOLD_MAP[level] || 50;
|
|
955
|
+
const tomlFile = resolve(target, "agentic-lib.toml");
|
|
956
|
+
if (existsSync(tomlFile)) {
|
|
957
|
+
let toml = readFileSync(tomlFile, "utf8");
|
|
958
|
+
const regex = /^(\s*acceptance-criteria-threshold\s*=\s*)\d+/m;
|
|
959
|
+
if (regex.test(toml)) {
|
|
960
|
+
toml = toml.replace(regex, `$1${threshold}`);
|
|
961
|
+
if (!dryRun) writeFileSync(tomlFile, toml);
|
|
962
|
+
console.log(` SET: acceptance-criteria-threshold = ${threshold} (${resolvedMission})`);
|
|
963
|
+
}
|
|
816
964
|
}
|
|
817
|
-
process.exit(1);
|
|
818
965
|
}
|
|
819
966
|
|
|
820
967
|
// Write init metadata to agentic-lib.toml
|
|
821
|
-
const tomlTarget = resolve(target, "agentic-lib.toml");
|
|
822
968
|
if (existsSync(tomlTarget)) {
|
|
823
969
|
let toml = readFileSync(tomlTarget, "utf8");
|
|
824
970
|
const pkg = JSON.parse(readFileSync(resolve(pkgRoot, "package.json"), "utf8"));
|
|
@@ -827,7 +973,8 @@ function initPurge(seedsDir, missionName, initTimestamp) {
|
|
|
827
973
|
"[init]",
|
|
828
974
|
`timestamp = "${initTimestamp}"`,
|
|
829
975
|
`mode = "purge"`,
|
|
830
|
-
`mission = "${
|
|
976
|
+
`mission = "${resolvedMission}"`,
|
|
977
|
+
`mission-type = "${missionType}"`,
|
|
831
978
|
`version = "${pkg.version}"`,
|
|
832
979
|
].join("\n");
|
|
833
980
|
// Replace existing [init] section or append
|
|
@@ -1260,7 +1407,7 @@ function initPurgeGitHub() {
|
|
|
1260
1407
|
}
|
|
1261
1408
|
}
|
|
1262
1409
|
|
|
1263
|
-
function runInit() {
|
|
1410
|
+
async function runInit() {
|
|
1264
1411
|
if (!existsSync(target)) {
|
|
1265
1412
|
console.error(`Target directory does not exist: ${target}`);
|
|
1266
1413
|
process.exit(1);
|
|
@@ -1316,7 +1463,7 @@ function runInit() {
|
|
|
1316
1463
|
initScripts(agenticDir);
|
|
1317
1464
|
initConfig(seedsDir);
|
|
1318
1465
|
if (reseed) initReseed(initTimestamp);
|
|
1319
|
-
if (purge) initPurge(seedsDir, mission, initTimestamp);
|
|
1466
|
+
if (purge) await initPurge(seedsDir, mission, initTimestamp);
|
|
1320
1467
|
if (purge) initPurgeGitHub();
|
|
1321
1468
|
|
|
1322
1469
|
console.log(`\n${initChanges} change(s)${dryRun ? " (dry run)" : ""}`);
|
package/package.json
CHANGED
|
@@ -67,8 +67,13 @@ function detectDedicatedTests() {
|
|
|
67
67
|
*/
|
|
68
68
|
async function buildMetricAssessment(ctx, config) {
|
|
69
69
|
const thresholds = config.missionCompleteThresholds || {};
|
|
70
|
-
const minResolved = thresholds.minResolvedIssues ??
|
|
70
|
+
const minResolved = thresholds.minResolvedIssues ?? 1;
|
|
71
71
|
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
72
|
+
const minCumulativeTransforms = thresholds.minCumulativeTransforms ?? 1;
|
|
73
|
+
const acceptanceThreshold = thresholds.acceptanceCriteriaThreshold ?? 50;
|
|
74
|
+
const requireNoOpenIssues = thresholds.requireNoOpenIssues ?? true;
|
|
75
|
+
const requireNoOpenPrs = thresholds.requireNoOpenPrs ?? true;
|
|
76
|
+
const requireNoCriticalGaps = thresholds.requireNoCriticalGaps ?? true;
|
|
72
77
|
|
|
73
78
|
// Implementation review gaps (passed from workflow via env)
|
|
74
79
|
let reviewGaps = [];
|
|
@@ -78,22 +83,23 @@ async function buildMetricAssessment(ctx, config) {
|
|
|
78
83
|
} catch { /* ignore parse errors */ }
|
|
79
84
|
const criticalGaps = reviewGaps.filter((g) => g.severity === "critical");
|
|
80
85
|
|
|
81
|
-
// Acceptance criteria from MISSION.md checkboxes
|
|
86
|
+
// Acceptance criteria from MISSION.md checkboxes (or structured TOML if available)
|
|
82
87
|
const { countAcceptanceCriteria } = await import("../../../copilot/telemetry.js");
|
|
83
88
|
const missionPath = config.paths?.mission?.path || "MISSION.md";
|
|
84
89
|
const acceptance = countAcceptanceCriteria(missionPath);
|
|
85
|
-
const
|
|
90
|
+
const acceptancePct = acceptance.total > 0 ? (acceptance.met / acceptance.total) * 100 : 0;
|
|
91
|
+
const acceptanceMet = acceptance.total > 0 && acceptancePct >= acceptanceThreshold;
|
|
86
92
|
|
|
87
93
|
// C6: Removed "Dedicated tests" metric; using cumulative transforms instead
|
|
88
94
|
const metrics = [
|
|
89
|
-
{ metric: "Open issues", value: ctx.issuesSummary.length, target: 0, met: ctx.issuesSummary.length === 0 },
|
|
90
|
-
{ metric: "Open PRs", value: ctx.prsSummary.length, target: 0, met: ctx.prsSummary.length === 0 },
|
|
95
|
+
{ metric: "Open issues", value: ctx.issuesSummary.length, target: 0, met: requireNoOpenIssues ? ctx.issuesSummary.length === 0 : true },
|
|
96
|
+
{ metric: "Open PRs", value: ctx.prsSummary.length, target: 0, met: requireNoOpenPrs ? ctx.prsSummary.length === 0 : true },
|
|
91
97
|
{ metric: "Issues resolved", value: ctx.resolvedCount, target: minResolved, met: ctx.resolvedCount >= minResolved },
|
|
92
98
|
{ metric: "Source TODOs", value: ctx.sourceTodoCount, target: maxTodos, met: ctx.sourceTodoCount <= maxTodos },
|
|
93
|
-
{ metric: "Cumulative transforms", value: ctx.cumulativeTransformationCost, target:
|
|
99
|
+
{ metric: "Cumulative transforms", value: ctx.cumulativeTransformationCost, target: minCumulativeTransforms, met: ctx.cumulativeTransformationCost >= minCumulativeTransforms },
|
|
94
100
|
{ metric: "Budget", value: ctx.cumulativeTransformationCost, target: ctx.transformationBudget || "unlimited", met: !(ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget) },
|
|
95
|
-
{ metric: "Implementation review", value: criticalGaps.length === 0 ? "No critical gaps" : `${criticalGaps.length} critical gap(s)`, target: "No critical gaps", met: criticalGaps.length === 0 },
|
|
96
|
-
{ metric: "Acceptance criteria", value: acceptance.total > 0 ? `${acceptance.met}/${acceptance.total}` : "N/A", target:
|
|
101
|
+
{ metric: "Implementation review", value: criticalGaps.length === 0 ? "No critical gaps" : `${criticalGaps.length} critical gap(s)`, target: "No critical gaps", met: requireNoCriticalGaps ? criticalGaps.length === 0 : true },
|
|
102
|
+
{ metric: "Acceptance criteria", value: acceptance.total > 0 ? `${acceptance.met}/${acceptance.total} (${Math.round(acceptancePct)}%)` : "N/A", target: `>= ${acceptanceThreshold}%`, met: acceptanceMet },
|
|
97
103
|
];
|
|
98
104
|
|
|
99
105
|
const allMet = metrics.every((m) => m.met);
|
|
@@ -166,7 +172,9 @@ function buildPrompt(ctx, agentInstructions, metricAssessment) {
|
|
|
166
172
|
"Check the acceptance criteria in the Mission section above. If all criteria are clearly satisfied by the current source code and tests (verified via read_file), you SHOULD declare mission-complete even if not all mechanical metrics are MET.",
|
|
167
173
|
"For simple missions (few functions, clear acceptance criteria), do not require elaborate test coverage or documentation beyond what the acceptance criteria specify.",
|
|
168
174
|
"",
|
|
169
|
-
|
|
175
|
+
`**Focus mode:** ${config.focus === "maintenance" ? "MAINTENANCE — The mission is substantially complete. Focus on adding value: improve test coverage, refactor for clarity, improve documentation, optimise performance. Do NOT declare mission-complete or mission-failed. Dispatch maintenance work instead." : "MISSION — Work toward mission completion. Declare mission-complete when criteria are met."}`,
|
|
176
|
+
"",
|
|
177
|
+
`**Post-merge evaluation context:** This director runs AFTER a dev transformation has been merged. The source code, tests, README, and website you see are the result of that merge. The acceptance criteria checkboxes in MISSION.md reflect the implementation review's findings. If the metrics show all conditions MET and the acceptance criteria meet the ${metricAssessment.metrics.find(m => m.metric === "Acceptance criteria")?.target || ">= 50%"} threshold, you should declare mission-complete unless you find a critical implementation gap via read_file. Do not defer to a future run — the pipeline has a structural 2-run minimum, and this is your chance to complete in 1 run.`,
|
|
170
178
|
"",
|
|
171
179
|
"Then call report_director_decision with your determination.",
|
|
172
180
|
"",
|
|
@@ -48,9 +48,11 @@ function buildReviewPrompt(mission, config, agentInstructions, agentLogsSummary)
|
|
|
48
48
|
" - Tests that don't assert anything meaningful (empty/trivial)",
|
|
49
49
|
" - Features listed as done in docs but missing from code",
|
|
50
50
|
" - PRs merged without test coverage for the claimed feature",
|
|
51
|
-
"4. Check the MISSION.md Acceptance Criteria
|
|
52
|
-
"
|
|
53
|
-
" `
|
|
51
|
+
"4. Check the MISSION.md Acceptance Criteria. For each criterion that you verified is",
|
|
52
|
+
" implemented AND unit-tested, include its **index number** (1-based) in the",
|
|
53
|
+
" `acceptanceCriteriaMetIndices` array. Also include the text in `acceptanceCriteriaMet`",
|
|
54
|
+
" for backwards compatibility. The indexed criteria are listed in agentic-lib.toml",
|
|
55
|
+
" under [acceptance-criteria] if available.",
|
|
54
56
|
"5. Call report_implementation_review with your findings.",
|
|
55
57
|
"",
|
|
56
58
|
"**You MUST call report_implementation_review exactly once.**",
|
|
@@ -172,34 +174,83 @@ export async function implementationReview(context) {
|
|
|
172
174
|
acceptanceCriteriaMet: {
|
|
173
175
|
type: "array",
|
|
174
176
|
items: { type: "string" },
|
|
175
|
-
description: "
|
|
177
|
+
description: "Text of each acceptance criterion verified as implemented AND unit-tested (for backwards compatibility).",
|
|
178
|
+
},
|
|
179
|
+
acceptanceCriteriaMetIndices: {
|
|
180
|
+
type: "array",
|
|
181
|
+
items: { type: "integer" },
|
|
182
|
+
description: "1-based indices of acceptance criteria verified as met (preferred over text matching). See [acceptance-criteria] in agentic-lib.toml.",
|
|
176
183
|
},
|
|
177
184
|
},
|
|
178
185
|
required: ["elements", "gaps", "advice"],
|
|
179
186
|
},
|
|
180
|
-
handler: async ({ elements, gaps, advice, misleadingMetrics, acceptanceCriteriaMet }) => {
|
|
187
|
+
handler: async ({ elements, gaps, advice, misleadingMetrics, acceptanceCriteriaMet, acceptanceCriteriaMetIndices }) => {
|
|
181
188
|
reviewResult.elements = elements || [];
|
|
182
189
|
reviewResult.gaps = gaps || [];
|
|
183
190
|
reviewResult.advice = advice || "";
|
|
184
191
|
reviewResult.misleadingMetrics = misleadingMetrics || [];
|
|
185
192
|
|
|
186
|
-
|
|
193
|
+
const metIndices = acceptanceCriteriaMetIndices || [];
|
|
187
194
|
const metCriteria = acceptanceCriteriaMet || [];
|
|
188
|
-
|
|
195
|
+
const totalUpdated = metIndices.length || metCriteria.length;
|
|
196
|
+
|
|
197
|
+
// W17: Update structured TOML acceptance criteria by index (primary)
|
|
198
|
+
if (metIndices.length > 0) {
|
|
199
|
+
try {
|
|
200
|
+
const { readFileSync, writeFileSync } = await import("fs");
|
|
201
|
+
const tomlPath = config.configToml ? "agentic-lib.toml" : null;
|
|
202
|
+
if (tomlPath && readFileSync(tomlPath, "utf8").includes("[acceptance-criteria]")) {
|
|
203
|
+
let toml = readFileSync(tomlPath, "utf8");
|
|
204
|
+
for (const idx of metIndices) {
|
|
205
|
+
const regex = new RegExp(`^(${idx}\\s*=\\s*\\{[^}]*met\\s*=\\s*)false`, "m");
|
|
206
|
+
if (regex.test(toml)) {
|
|
207
|
+
toml = toml.replace(regex, "$1true");
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
writeFileSync(tomlPath, toml, "utf8");
|
|
211
|
+
core.info(`Updated ${metIndices.length} acceptance criteria by index in TOML`);
|
|
212
|
+
}
|
|
213
|
+
} catch (err) {
|
|
214
|
+
core.warning(`Could not update TOML acceptance criteria: ${err.message}`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Also update MISSION.md checkboxes (best-effort, not critical)
|
|
219
|
+
if (metCriteria.length > 0 || metIndices.length > 0) {
|
|
189
220
|
try {
|
|
190
221
|
const missionPath = config.paths?.mission?.path || "MISSION.md";
|
|
191
222
|
const { readFileSync, writeFileSync } = await import("fs");
|
|
192
223
|
let missionContent = readFileSync(missionPath, "utf8");
|
|
193
224
|
let checkedCount = 0;
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
const
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
225
|
+
|
|
226
|
+
// Index-based update: find the Nth checkbox and check it
|
|
227
|
+
if (metIndices.length > 0) {
|
|
228
|
+
const lines = missionContent.split("\n");
|
|
229
|
+
let checkboxIdx = 0;
|
|
230
|
+
for (let i = 0; i < lines.length; i++) {
|
|
231
|
+
if (/^- \[ \] /.test(lines[i])) {
|
|
232
|
+
checkboxIdx++;
|
|
233
|
+
if (metIndices.includes(checkboxIdx)) {
|
|
234
|
+
lines[i] = lines[i].replace(/^- \[ \] /, "- [x] ");
|
|
235
|
+
checkedCount++;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
201
238
|
}
|
|
239
|
+
missionContent = lines.join("\n");
|
|
202
240
|
}
|
|
241
|
+
|
|
242
|
+
// Text-based update (fallback for backwards compatibility)
|
|
243
|
+
if (checkedCount === 0 && metCriteria.length > 0) {
|
|
244
|
+
for (const criterionText of metCriteria) {
|
|
245
|
+
const escaped = criterionText.replace(/[.*+?^${}()|[\]\\]/g, "\\$&").trim();
|
|
246
|
+
const re = new RegExp(`- \\[ \\] ${escaped}`);
|
|
247
|
+
if (re.test(missionContent)) {
|
|
248
|
+
missionContent = missionContent.replace(re, `- [x] ${criterionText.trim()}`);
|
|
249
|
+
checkedCount++;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
203
254
|
if (checkedCount > 0) {
|
|
204
255
|
writeFileSync(missionPath, missionContent, "utf8");
|
|
205
256
|
core.info(`Updated ${checkedCount} acceptance criteria checkboxes in ${missionPath}`);
|
|
@@ -209,7 +260,7 @@ export async function implementationReview(context) {
|
|
|
209
260
|
}
|
|
210
261
|
}
|
|
211
262
|
|
|
212
|
-
return { textResultForLlm: `Review recorded: ${elements?.length || 0} elements traced, ${gaps?.length || 0} gaps found, ${
|
|
263
|
+
return { textResultForLlm: `Review recorded: ${elements?.length || 0} elements traced, ${gaps?.length || 0} gaps found, ${totalUpdated} criteria checked` };
|
|
213
264
|
},
|
|
214
265
|
});
|
|
215
266
|
|
|
@@ -87,6 +87,11 @@ export async function maintainFeatures(context) {
|
|
|
87
87
|
`2. If there are fewer than ${featureLimit} features, create new features aligned with the mission.`,
|
|
88
88
|
"3. Ensure each feature has clear, testable acceptance criteria.",
|
|
89
89
|
"",
|
|
90
|
+
`## Focus Mode: ${config.focus === "maintenance" ? "MAINTENANCE" : "MISSION"}`,
|
|
91
|
+
config.focus === "maintenance"
|
|
92
|
+
? "The mission is substantially complete. Generate maintenance-oriented features: refactoring, test coverage improvement, documentation, performance optimisation. Do not create mission-gap features."
|
|
93
|
+
: "Create features that advance the mission toward completion. Focus on unimplemented capabilities and gaps.",
|
|
94
|
+
"",
|
|
90
95
|
formatPathsSection(writablePaths, config.readOnlyPaths, config),
|
|
91
96
|
"",
|
|
92
97
|
"## Constraints",
|
|
@@ -237,8 +237,9 @@ export async function reviewIssue(context) {
|
|
|
237
237
|
return reviewSingleIssue({ octokit, repo, config, targetIssueNumber: issueNumber, instructions, model, tuning: t, logFilePath, screenshotFilePath });
|
|
238
238
|
}
|
|
239
239
|
|
|
240
|
-
// Batch mode: find
|
|
241
|
-
const
|
|
240
|
+
// Batch mode: find unreviewed issues (cap from config, default 3)
|
|
241
|
+
const reviewCap = config.reviewIssuesCap ?? 3;
|
|
242
|
+
const issueNumbers = await findUnreviewedIssues(octokit, repo, reviewCap);
|
|
242
243
|
if (issueNumbers.length === 0) {
|
|
243
244
|
return { outcome: "nop", details: "No open automated issues to review" };
|
|
244
245
|
}
|
|
@@ -248,7 +249,17 @@ export async function reviewIssue(context) {
|
|
|
248
249
|
let totalInputTokens = 0;
|
|
249
250
|
let totalOutputTokens = 0;
|
|
250
251
|
|
|
252
|
+
// W19: Remaining-time guard — work within the 10-minute step timeout
|
|
253
|
+
const STEP_TIMEOUT_MS = 10 * 60 * 1000;
|
|
254
|
+
const MIN_REMAINING_MS = 4 * 60 * 1000; // need at least 4 min for a review
|
|
255
|
+
const batchStart = Date.now();
|
|
256
|
+
|
|
251
257
|
for (const num of issueNumbers) {
|
|
258
|
+
const elapsed = Date.now() - batchStart;
|
|
259
|
+
if (elapsed + MIN_REMAINING_MS > STEP_TIMEOUT_MS) {
|
|
260
|
+
core.warning(`Skipping issue #${num} — only ${Math.round((STEP_TIMEOUT_MS - elapsed) / 1000)}s remaining (need ${MIN_REMAINING_MS / 1000}s). Reviewed ${results.length}/${issueNumbers.length} issues.`);
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
252
263
|
core.info(`Batch reviewing issue #${num} (${results.length + 1}/${issueNumbers.length})`);
|
|
253
264
|
const result = await reviewSingleIssue({
|
|
254
265
|
octokit, repo, config, targetIssueNumber: num, instructions, model, tuning: t, logFilePath, screenshotFilePath,
|
|
@@ -215,6 +215,11 @@ export async function transform(context) {
|
|
|
215
215
|
})()),
|
|
216
216
|
] : []),
|
|
217
217
|
"",
|
|
218
|
+
`## Focus Mode: ${config.focus === "maintenance" ? "MAINTENANCE" : "MISSION"}`,
|
|
219
|
+
config.focus === "maintenance"
|
|
220
|
+
? "The mission is substantially complete. Focus on adding value to the existing codebase: improve test coverage, refactor for clarity, improve documentation, optimise performance. Do not create new feature issues or push for mission-complete."
|
|
221
|
+
: "Work toward completing the mission. Implement missing capabilities, resolve gaps, and advance toward mission completion.",
|
|
222
|
+
"",
|
|
218
223
|
"## Your Task",
|
|
219
224
|
"Analyze the mission and open issues (use list_issues tool).",
|
|
220
225
|
"Read the source files you need (use read_file tool).",
|
|
@@ -332,6 +337,19 @@ export async function transform(context) {
|
|
|
332
337
|
const sessionDurationMs = Date.now() - sessionStartTime;
|
|
333
338
|
core.info(`Transform session completed in ${Math.round(sessionDurationMs / 1000)}s (${result.tokensIn + result.tokensOut} tokens, maxToolCalls=${maxToolCalls})`);
|
|
334
339
|
|
|
340
|
+
// W15: Post-transform lockfile sync — if package.json was modified, regenerate lockfile
|
|
341
|
+
try {
|
|
342
|
+
const { execSync } = await import("child_process");
|
|
343
|
+
const gitDiff = execSync("git diff --name-only HEAD", { encoding: "utf8", timeout: 10000 }).trim();
|
|
344
|
+
if (gitDiff.split("\n").some(f => f.endsWith("package.json"))) {
|
|
345
|
+
core.info("package.json changed during transform — syncing lockfile");
|
|
346
|
+
execSync("npm install --package-lock-only", { encoding: "utf8", timeout: 60000, stdio: "pipe" });
|
|
347
|
+
core.info("Lockfile synced successfully");
|
|
348
|
+
}
|
|
349
|
+
} catch (err) {
|
|
350
|
+
core.warning(`Post-transform lockfile sync failed: ${err.message}`);
|
|
351
|
+
}
|
|
352
|
+
|
|
335
353
|
// Detect mission-complete hint
|
|
336
354
|
const lowerResult = (result.agentMessage || "").toLowerCase();
|
|
337
355
|
if (lowerResult.includes("mission is satisfied") || lowerResult.includes("mission is complete") || lowerResult.includes("no changes needed")) {
|
package/src/copilot/config.js
CHANGED
|
@@ -260,16 +260,29 @@ export function loadConfig(configPath) {
|
|
|
260
260
|
minBranchCoverage: goals["min-branch-coverage"] ?? 30,
|
|
261
261
|
};
|
|
262
262
|
|
|
263
|
-
// Mission-complete thresholds (with safe defaults)
|
|
263
|
+
// Mission-complete thresholds (with safe defaults from profile)
|
|
264
264
|
// C6: Removed minDedicatedTests and requireDedicatedTests
|
|
265
265
|
const mc = toml["mission-complete"] || {};
|
|
266
|
+
const activeProfile = profilesSection[tuning.profileName] || {};
|
|
266
267
|
const missionCompleteThresholds = {
|
|
267
|
-
minResolvedIssues: mc["min-resolved-issues"] ??
|
|
268
|
-
maxSourceTodos: mc["max-source-todos"] ?? 0,
|
|
268
|
+
minResolvedIssues: mc["min-resolved-issues"] ?? activeProfile["min-resolved-issues"] ?? 1,
|
|
269
|
+
maxSourceTodos: mc["max-source-todos"] ?? activeProfile["max-source-todos"] ?? 0,
|
|
270
|
+
acceptanceCriteriaThreshold: mc["acceptance-criteria-threshold"] ?? activeProfile["acceptance-criteria-threshold"] ?? 50,
|
|
271
|
+
minCumulativeTransforms: mc["min-cumulative-transforms"] ?? activeProfile["min-cumulative-transforms"] ?? 1,
|
|
272
|
+
requireNoOpenIssues: mc["require-no-open-issues"] ?? true,
|
|
273
|
+
requireNoOpenPrs: mc["require-no-open-prs"] ?? true,
|
|
274
|
+
requireNoCriticalGaps: mc["require-no-critical-gaps"] ?? true,
|
|
269
275
|
};
|
|
270
276
|
|
|
277
|
+
// Review issues cap (from limits, with profile fallback)
|
|
278
|
+
const reviewIssuesCap = limitsSection["review-issues-cap"] ?? activeProfile["review-issues-cap"] ?? 3;
|
|
279
|
+
|
|
280
|
+
// Schedule focus
|
|
281
|
+
const focus = toml.schedule?.focus || "mission";
|
|
282
|
+
|
|
271
283
|
return {
|
|
272
284
|
supervisor: toml.schedule?.supervisor || "daily",
|
|
285
|
+
focus,
|
|
273
286
|
model: toml.tuning?.model || toml.schedule?.model || "gpt-5-mini",
|
|
274
287
|
tuning,
|
|
275
288
|
paths,
|
|
@@ -288,6 +301,7 @@ export function loadConfig(configPath) {
|
|
|
288
301
|
init: toml.init || null,
|
|
289
302
|
tdd: toml.tdd === true,
|
|
290
303
|
missionCompleteThresholds,
|
|
304
|
+
reviewIssuesCap,
|
|
291
305
|
coverageGoals,
|
|
292
306
|
maxTokensPerMaintain: resolvedLimits.maxTokensPerMaintain || 200000,
|
|
293
307
|
writablePaths,
|
package/src/copilot/telemetry.js
CHANGED
|
@@ -70,6 +70,23 @@ export function countSourceLines(dir) {
|
|
|
70
70
|
* @returns {{ met: number, total: number }}
|
|
71
71
|
*/
|
|
72
72
|
export function countAcceptanceCriteria(missionPath) {
|
|
73
|
+
// W17: Try structured TOML first (primary source)
|
|
74
|
+
try {
|
|
75
|
+
const tomlPath = "agentic-lib.toml";
|
|
76
|
+
if (existsSync(tomlPath)) {
|
|
77
|
+
const toml = readFileSync(tomlPath, "utf8");
|
|
78
|
+
if (toml.includes("[acceptance-criteria]")) {
|
|
79
|
+
const totalMatch = toml.match(/^\s*total\s*=\s*(\d+)/m);
|
|
80
|
+
if (totalMatch) {
|
|
81
|
+
const total = parseInt(totalMatch[1], 10);
|
|
82
|
+
const metMatches = toml.match(/met\s*=\s*true/g) || [];
|
|
83
|
+
return { met: metMatches.length, total };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
} catch { /* fall through to MISSION.md */ }
|
|
88
|
+
|
|
89
|
+
// Fallback: count checkboxes in MISSION.md
|
|
73
90
|
if (!missionPath || !existsSync(missionPath)) return { met: 0, total: 0 };
|
|
74
91
|
try {
|
|
75
92
|
const content = readFileSync(missionPath, "utf8");
|
|
@@ -112,7 +129,7 @@ export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCos
|
|
|
112
129
|
const todoCount = countSourceTodos(srcRoot);
|
|
113
130
|
|
|
114
131
|
const thresholds = config.missionCompleteThresholds || {};
|
|
115
|
-
const minResolved = thresholds.minResolvedIssues ??
|
|
132
|
+
const minResolved = thresholds.minResolvedIssues ?? 1;
|
|
116
133
|
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
117
134
|
|
|
118
135
|
// C6: Dynamic metrics
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
# Mission
|
|
2
2
|
|
|
3
|
-
A JavaScript library
|
|
3
|
+
A JavaScript library exporting FizzBuzz functions. This is the simplest possible mission — if the pipeline can't complete this and stop, something is fundamentally broken.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Core Functions
|
|
6
6
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
- Handle Unicode strings correctly (compare code points, not UTF-16 code units).
|
|
10
|
-
- Validate inputs: throw `TypeError` for non-string/non-integer arguments, `RangeError` for unequal-length strings or negative integers.
|
|
7
|
+
- `fizzBuzz(n)` — return an array of strings from 1 to n, replacing multiples of 3 with "Fizz", multiples of 5 with "Buzz", and multiples of both with "FizzBuzz".
|
|
8
|
+
- `fizzBuzzSingle(n)` — return the FizzBuzz string for a single positive integer.
|
|
11
9
|
|
|
12
10
|
## Requirements
|
|
13
11
|
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
12
|
+
- Handle edge cases: `n = 0` returns an empty array, negative numbers throw `RangeError`, non-integers throw `TypeError`.
|
|
13
|
+
- Export both functions as named exports from `src/lib/main.js`.
|
|
14
|
+
- Comprehensive unit tests covering normal operation and all edge cases.
|
|
15
|
+
- README with usage examples.
|
|
17
16
|
|
|
18
17
|
## Acceptance Criteria
|
|
19
18
|
|
|
20
|
-
- [ ]
|
|
21
|
-
- [ ]
|
|
22
|
-
- [ ]
|
|
23
|
-
- [ ]
|
|
24
|
-
- [ ]
|
|
19
|
+
- [ ] `fizzBuzz(15)` returns the correct 15-element array ending with "FizzBuzz"
|
|
20
|
+
- [ ] `fizzBuzzSingle(3)` returns "Fizz"
|
|
21
|
+
- [ ] `fizzBuzzSingle(5)` returns "Buzz"
|
|
22
|
+
- [ ] `fizzBuzzSingle(15)` returns "FizzBuzz"
|
|
23
|
+
- [ ] `fizzBuzzSingle(7)` returns "7"
|
|
24
|
+
- [ ] `fizzBuzz(0)` returns `[]`
|
|
25
25
|
- [ ] All unit tests pass
|
|
26
26
|
- [ ] README documents usage with examples
|