@xn-intenton-z2a/agentic-lib 7.4.13 → 7.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/agent-apply-fix.md +30 -1
- package/.github/agents/agent-director.md +28 -7
- package/.github/agents/agent-discussion-bot.md +28 -0
- package/.github/agents/agent-implementation-review.md +21 -0
- package/.github/agents/agent-issue-resolution.md +32 -0
- package/.github/agents/agent-iterate.md +33 -0
- package/.github/agents/agent-maintain-features.md +34 -0
- package/.github/agents/agent-maintain-library.md +39 -0
- package/.github/agents/agent-ready-issue.md +21 -0
- package/.github/agents/agent-review-issue.md +16 -0
- package/.github/agents/agent-supervisor.md +60 -0
- package/.github/workflows/agentic-lib-init.yml +76 -11
- package/.github/workflows/agentic-lib-schedule.yml +58 -6
- package/.github/workflows/agentic-lib-test.yml +31 -3
- package/.github/workflows/agentic-lib-update.yml +20 -0
- package/.github/workflows/agentic-lib-workflow.yml +63 -52
- package/README.md +23 -12
- package/agentic-lib.toml +3 -3
- package/bin/agentic-lib.js +34 -4
- package/package.json +1 -1
- package/src/actions/agentic-step/index.js +51 -34
- package/src/actions/agentic-step/logging.js +7 -14
- package/src/actions/agentic-step/tasks/direct.js +52 -11
- package/src/actions/agentic-step/tasks/maintain-features.js +7 -0
- package/src/actions/agentic-step/tasks/maintain-library.js +10 -0
- package/src/actions/agentic-step/tasks/supervise.js +14 -6
- package/src/actions/agentic-step/tasks/transform.js +37 -1
- package/src/actions/commit-if-changed/action.yml +2 -1
- package/src/copilot/config.js +3 -3
- package/src/copilot/guards.js +5 -5
- package/src/copilot/state.js +211 -0
- package/src/copilot/telemetry.js +88 -10
- package/src/seeds/missions/1-dan-create-c64-emulator.md +13 -13
- package/src/seeds/missions/1-dan-create-planning-engine.md +82 -0
- package/src/seeds/missions/1-kyu-create-ray-tracer.md +31 -8
- package/src/seeds/missions/2-dan-create-self-hosted.md +67 -0
- package/src/seeds/missions/2-kyu-create-markdown-compiler.md +48 -0
- package/src/seeds/missions/2-kyu-create-plot-code-lib.md +35 -16
- package/src/seeds/missions/3-kyu-analyze-lunar-lander.md +13 -14
- package/src/seeds/missions/3-kyu-evaluate-time-series-lab.md +22 -28
- package/src/seeds/missions/4-kyu-analyze-json-schema-diff.md +46 -2
- package/src/seeds/missions/4-kyu-apply-cron-engine.md +16 -18
- package/src/seeds/missions/4-kyu-apply-dense-encoding.md +14 -11
- package/src/seeds/missions/4-kyu-apply-owl-ontology.md +47 -0
- package/src/seeds/missions/5-kyu-apply-ascii-face.md +40 -0
- package/src/seeds/missions/5-kyu-apply-string-utils.md +17 -17
- package/src/seeds/missions/6-kyu-understand-hamming-distance.md +12 -12
- package/src/seeds/missions/6-kyu-understand-roman-numerals.md +12 -12
- package/src/seeds/missions/8-kyu-remember-hello-world.md +10 -0
- package/src/seeds/zero-MISSION.md +12 -12
- package/src/seeds/zero-package.json +1 -1
- package/src/seeds/missions/2-dan-create-agi.md +0 -22
- package/src/seeds/missions/2-kyu-evaluate-markdown-compiler.md +0 -33
- package/src/seeds/missions/3-kyu-evaluate-owl-ontology.md +0 -34
- package/src/seeds/missions/5-kyu-create-ascii-face.md +0 -4
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// SPDX-License-Identifier: GPL-3.0-only
|
|
2
|
+
// Copyright (C) 2025-2026 Polycode Limited
|
|
3
|
+
// state.js — Persistent state across workflow runs via agentic-lib-state.toml
|
|
4
|
+
//
|
|
5
|
+
// Lives on the agentic-lib-logs branch. Read at the start of each
|
|
6
|
+
// agentic-step invocation, written at the end.
|
|
7
|
+
|
|
8
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
9
|
+
import { join } from "path";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Default state structure — used when no state file exists (first run after init).
|
|
13
|
+
*/
|
|
14
|
+
export function defaultState() {
|
|
15
|
+
return {
|
|
16
|
+
counters: {
|
|
17
|
+
"log-sequence": 0,
|
|
18
|
+
"cumulative-transforms": 0,
|
|
19
|
+
"cumulative-maintain-features": 0,
|
|
20
|
+
"cumulative-maintain-library": 0,
|
|
21
|
+
"cumulative-nop-cycles": 0,
|
|
22
|
+
"total-tokens": 0,
|
|
23
|
+
},
|
|
24
|
+
budget: {
|
|
25
|
+
"transformation-budget-used": 0,
|
|
26
|
+
"transformation-budget-cap": 0,
|
|
27
|
+
},
|
|
28
|
+
status: {
|
|
29
|
+
"mission-complete": false,
|
|
30
|
+
"mission-failed": false,
|
|
31
|
+
"mission-failed-reason": "",
|
|
32
|
+
"last-transform-at": "",
|
|
33
|
+
"last-non-nop-at": "",
|
|
34
|
+
},
|
|
35
|
+
schedule: {
|
|
36
|
+
current: "",
|
|
37
|
+
"auto-disabled": false,
|
|
38
|
+
"auto-disabled-reason": "",
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Serialize a state object to TOML format.
|
|
45
|
+
* Uses a simple serializer — no external TOML library needed for writing.
|
|
46
|
+
*/
|
|
47
|
+
export function serializeState(state) {
|
|
48
|
+
const lines = [
|
|
49
|
+
"# agentic-lib-state.toml — Persistent state across workflow runs",
|
|
50
|
+
"# Written to the agentic-lib-logs branch by each agentic-step invocation",
|
|
51
|
+
"",
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
for (const [section, values] of Object.entries(state)) {
|
|
55
|
+
lines.push(`[${section}]`);
|
|
56
|
+
for (const [key, val] of Object.entries(values)) {
|
|
57
|
+
if (typeof val === "boolean") {
|
|
58
|
+
lines.push(`${key} = ${val}`);
|
|
59
|
+
} else if (typeof val === "number") {
|
|
60
|
+
lines.push(`${key} = ${val}`);
|
|
61
|
+
} else {
|
|
62
|
+
lines.push(`${key} = "${String(val).replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
lines.push("");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return lines.join("\n");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Parse a TOML state file into a state object.
|
|
73
|
+
* Simple parser that handles the known state structure.
|
|
74
|
+
*/
|
|
75
|
+
export function parseState(content) {
|
|
76
|
+
const state = defaultState();
|
|
77
|
+
let currentSection = null;
|
|
78
|
+
|
|
79
|
+
for (const line of content.split("\n")) {
|
|
80
|
+
const trimmed = line.trim();
|
|
81
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
82
|
+
|
|
83
|
+
const sectionMatch = trimmed.match(/^\[(\w[\w-]*)\]$/);
|
|
84
|
+
if (sectionMatch) {
|
|
85
|
+
currentSection = sectionMatch[1];
|
|
86
|
+
if (!state[currentSection]) state[currentSection] = {};
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (!currentSection) continue;
|
|
91
|
+
|
|
92
|
+
const kvMatch = trimmed.match(/^([\w-]+)\s*=\s*(.+)$/);
|
|
93
|
+
if (!kvMatch) continue;
|
|
94
|
+
|
|
95
|
+
const [, key, rawVal] = kvMatch;
|
|
96
|
+
let val;
|
|
97
|
+
if (rawVal === "true") val = true;
|
|
98
|
+
else if (rawVal === "false") val = false;
|
|
99
|
+
else if (/^-?\d+$/.test(rawVal)) val = parseInt(rawVal, 10);
|
|
100
|
+
else if (/^-?\d+\.\d+$/.test(rawVal)) val = parseFloat(rawVal);
|
|
101
|
+
else if (rawVal.startsWith('"') && rawVal.endsWith('"')) {
|
|
102
|
+
val = rawVal.slice(1, -1).replace(/\\"/g, '"').replace(/\\\\/g, "\\");
|
|
103
|
+
} else {
|
|
104
|
+
val = rawVal;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (state[currentSection]) {
|
|
108
|
+
state[currentSection][key] = val;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return state;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Read state from the agentic-lib-state.toml file in the given directory.
|
|
117
|
+
*
|
|
118
|
+
* @param {string} logsDir - Directory containing agentic-lib-state.toml (usually workspace root where logs are checked out)
|
|
119
|
+
* @returns {Object} Parsed state, or defaults if file is missing
|
|
120
|
+
*/
|
|
121
|
+
export function readState(logsDir) {
|
|
122
|
+
const filePath = join(logsDir || ".", "agentic-lib-state.toml");
|
|
123
|
+
if (!existsSync(filePath)) return defaultState();
|
|
124
|
+
try {
|
|
125
|
+
const content = readFileSync(filePath, "utf8");
|
|
126
|
+
return parseState(content);
|
|
127
|
+
} catch {
|
|
128
|
+
return defaultState();
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Write state to the agentic-lib-state.toml file in the given directory.
|
|
134
|
+
*
|
|
135
|
+
* @param {string} logsDir - Directory to write to
|
|
136
|
+
* @param {Object} state - State object to serialize
|
|
137
|
+
*/
|
|
138
|
+
export function writeState(logsDir, state) {
|
|
139
|
+
const filePath = join(logsDir || ".", "agentic-lib-state.toml");
|
|
140
|
+
writeFileSync(filePath, serializeState(state));
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Increment a counter in the state object. Returns the updated state.
|
|
145
|
+
*
|
|
146
|
+
* @param {Object} state - State object
|
|
147
|
+
* @param {string} key - Counter key (e.g. "cumulative-transforms")
|
|
148
|
+
* @returns {Object} The same state object (mutated)
|
|
149
|
+
*/
|
|
150
|
+
export function incrementCounter(state, key) {
|
|
151
|
+
if (state.counters && key in state.counters) {
|
|
152
|
+
state.counters[key] = (state.counters[key] || 0) + 1;
|
|
153
|
+
}
|
|
154
|
+
return state;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Reset the consecutive nop cycle counter (called on any non-nop outcome).
|
|
159
|
+
*
|
|
160
|
+
* @param {Object} state - State object
|
|
161
|
+
* @returns {Object} The same state object (mutated)
|
|
162
|
+
*/
|
|
163
|
+
export function resetConsecutiveNops(state) {
|
|
164
|
+
if (state.counters) {
|
|
165
|
+
state.counters["cumulative-nop-cycles"] = 0;
|
|
166
|
+
}
|
|
167
|
+
return state;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Update state after a task completes.
|
|
172
|
+
*
|
|
173
|
+
* @param {Object} state - State object
|
|
174
|
+
* @param {Object} params
|
|
175
|
+
* @param {string} params.task - Task name
|
|
176
|
+
* @param {string} params.outcome - Task outcome
|
|
177
|
+
* @param {number} params.transformationCost - 0 or 1
|
|
178
|
+
* @param {number} params.tokensUsed - Tokens consumed by this task
|
|
179
|
+
* @returns {Object} The same state object (mutated)
|
|
180
|
+
*/
|
|
181
|
+
export function updateStateAfterTask(state, { task, outcome, transformationCost, tokensUsed }) {
|
|
182
|
+
const now = new Date().toISOString();
|
|
183
|
+
|
|
184
|
+
// Update counters
|
|
185
|
+
state.counters["log-sequence"] = (state.counters["log-sequence"] || 0) + 1;
|
|
186
|
+
state.counters["total-tokens"] = (state.counters["total-tokens"] || 0) + (tokensUsed || 0);
|
|
187
|
+
|
|
188
|
+
if (transformationCost > 0) {
|
|
189
|
+
state.counters["cumulative-transforms"] = (state.counters["cumulative-transforms"] || 0) + transformationCost;
|
|
190
|
+
state.budget["transformation-budget-used"] = (state.budget["transformation-budget-used"] || 0) + transformationCost;
|
|
191
|
+
state.status["last-transform-at"] = now;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Track task-specific counters
|
|
195
|
+
if (task === "maintain-features" && outcome !== "nop" && outcome !== "error") {
|
|
196
|
+
state.counters["cumulative-maintain-features"] = (state.counters["cumulative-maintain-features"] || 0) + 1;
|
|
197
|
+
}
|
|
198
|
+
if (task === "maintain-library" && outcome !== "nop" && outcome !== "error") {
|
|
199
|
+
state.counters["cumulative-maintain-library"] = (state.counters["cumulative-maintain-library"] || 0) + 1;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Consecutive nop tracking
|
|
203
|
+
if (outcome === "nop") {
|
|
204
|
+
state.counters["cumulative-nop-cycles"] = (state.counters["cumulative-nop-cycles"] || 0) + 1;
|
|
205
|
+
} else {
|
|
206
|
+
state.counters["cumulative-nop-cycles"] = 0;
|
|
207
|
+
state.status["last-non-nop-at"] = now;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return state;
|
|
211
|
+
}
|
package/src/copilot/telemetry.js
CHANGED
|
@@ -37,18 +37,68 @@ export function countSourceTodos(dir, extensions = [".js", ".ts", ".mjs"]) {
|
|
|
37
37
|
return count;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
/**
|
|
41
|
+
* Count source lines in a directory (recursive, .js/.ts/.mjs files).
|
|
42
|
+
* @param {string} dir
|
|
43
|
+
* @returns {number}
|
|
44
|
+
*/
|
|
45
|
+
export function countSourceLines(dir) {
|
|
46
|
+
if (!dir || !existsSync(dir)) return 0;
|
|
47
|
+
let count = 0;
|
|
48
|
+
try {
|
|
49
|
+
const entries = readdirSync(dir);
|
|
50
|
+
for (const entry of entries) {
|
|
51
|
+
if (entry === "node_modules" || entry.startsWith(".")) continue;
|
|
52
|
+
const fullPath = join(dir, entry);
|
|
53
|
+
try {
|
|
54
|
+
const stat = statSync(fullPath);
|
|
55
|
+
if (stat.isDirectory()) {
|
|
56
|
+
count += countSourceLines(fullPath);
|
|
57
|
+
} else if (/\.(js|ts|mjs)$/.test(entry)) {
|
|
58
|
+
const content = readFileSync(fullPath, "utf8");
|
|
59
|
+
count += content.split("\n").length;
|
|
60
|
+
}
|
|
61
|
+
} catch { /* skip */ }
|
|
62
|
+
}
|
|
63
|
+
} catch { /* skip */ }
|
|
64
|
+
return count;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Count acceptance criteria checkboxes in MISSION.md.
|
|
69
|
+
* @param {string} missionPath
|
|
70
|
+
* @returns {{ met: number, total: number }}
|
|
71
|
+
*/
|
|
72
|
+
export function countAcceptanceCriteria(missionPath) {
|
|
73
|
+
if (!missionPath || !existsSync(missionPath)) return { met: 0, total: 0 };
|
|
74
|
+
try {
|
|
75
|
+
const content = readFileSync(missionPath, "utf8");
|
|
76
|
+
const checked = (content.match(/- \[x\]/gi) || []).length;
|
|
77
|
+
const unchecked = (content.match(/- \[ \]/g) || []).length;
|
|
78
|
+
return { met: checked, total: checked + unchecked };
|
|
79
|
+
} catch { return { met: 0, total: 0 }; }
|
|
80
|
+
}
|
|
81
|
+
|
|
40
82
|
/**
|
|
41
83
|
* Build mission-complete metrics array for the intentïon.md dashboard.
|
|
42
84
|
*
|
|
85
|
+
* C2: Uses cumulativeCost from persistent state (not per-run).
|
|
86
|
+
* C5: Includes both per-task and cumulative values.
|
|
87
|
+
* C6: Replaces "Dedicated test files" with dynamic metrics.
|
|
88
|
+
*
|
|
43
89
|
* @param {Object} config - Parsed agentic-lib config
|
|
44
90
|
* @param {Object} result - Task result object
|
|
45
91
|
* @param {Array} _limitsStatus - Limits status array (unused but kept for signature compatibility)
|
|
46
|
-
* @param {number} cumulativeCost - Cumulative transformation cost
|
|
92
|
+
* @param {number} cumulativeCost - Cumulative transformation cost (from state.toml)
|
|
47
93
|
* @param {number} featureIssueCount - Number of open feature issues
|
|
48
94
|
* @param {number} maintenanceIssueCount - Number of open maintenance issues
|
|
95
|
+
* @param {Object} [taskCosts] - Per-task costs for split display
|
|
96
|
+
* @param {number} [taskCosts.transformationCost] - This task's transformation cost (0 or 1)
|
|
97
|
+
* @param {number} [taskCosts.tokensUsed] - This task's token usage
|
|
98
|
+
* @param {number} [taskCosts.cumulativeTokens] - Cumulative tokens from state
|
|
49
99
|
* @returns {Array} Mission metrics entries
|
|
50
100
|
*/
|
|
51
|
-
export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCost, featureIssueCount, maintenanceIssueCount) {
|
|
101
|
+
export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCost, featureIssueCount, maintenanceIssueCount, taskCosts) {
|
|
52
102
|
const openIssues = featureIssueCount + maintenanceIssueCount;
|
|
53
103
|
const budgetCap = config.transformationBudget || 0;
|
|
54
104
|
const resolvedCount = result.resolvedCount || 0;
|
|
@@ -61,26 +111,54 @@ export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCos
|
|
|
61
111
|
const srcRoot = sourceDir.includes("/") ? sourceDir.split("/").slice(0, -1).join("/") || "src" : "src";
|
|
62
112
|
const todoCount = countSourceTodos(srcRoot);
|
|
63
113
|
|
|
64
|
-
const dedicatedTestCount = result.dedicatedTestCount ?? 0;
|
|
65
|
-
|
|
66
114
|
const thresholds = config.missionCompleteThresholds || {};
|
|
67
115
|
const minResolved = thresholds.minResolvedIssues ?? 3;
|
|
68
|
-
const minTests = thresholds.minDedicatedTests ?? 1;
|
|
69
116
|
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
70
117
|
|
|
118
|
+
// C6: Dynamic metrics
|
|
119
|
+
const sourceLines = countSourceLines(sourceDir);
|
|
120
|
+
const featuresPath = config.paths?.features?.path || "features/";
|
|
121
|
+
const featureSpecCount = countMdFilesInDir(featuresPath);
|
|
122
|
+
const missionPath = config.paths?.mission?.path || "MISSION.md";
|
|
123
|
+
const acceptance = countAcceptanceCriteria(missionPath);
|
|
124
|
+
|
|
125
|
+
// C5: Per-task costs (optional)
|
|
126
|
+
const tc = taskCosts || {};
|
|
127
|
+
const thisTaskCost = tc.transformationCost ?? 0;
|
|
128
|
+
const thisTaskTokens = tc.tokensUsed ?? 0;
|
|
129
|
+
const cumulativeTokens = tc.cumulativeTokens ?? 0;
|
|
130
|
+
|
|
71
131
|
return [
|
|
72
132
|
{ metric: "Open issues", value: String(openIssues), target: "0", status: openIssues === 0 ? "MET" : "NOT MET" },
|
|
73
133
|
{ metric: "Open PRs", value: String(openPrs), target: "0", status: openPrs === 0 ? "MET" : "NOT MET" },
|
|
74
134
|
{ metric: "Issues resolved (review or PR merge)", value: String(resolvedCount), target: `>= ${minResolved}`, status: resolvedCount >= minResolved ? "MET" : "NOT MET" },
|
|
75
|
-
{ metric: "Dedicated test files", value: String(dedicatedTestCount), target: `>= ${minTests}`, status: dedicatedTestCount >= minTests ? "MET" : "NOT MET" },
|
|
76
135
|
{ metric: "Source TODO count", value: String(todoCount), target: `<= ${maxTodos}`, status: todoCount <= maxTodos ? "MET" : "NOT MET" },
|
|
77
|
-
{ metric: "
|
|
78
|
-
{ metric: "
|
|
136
|
+
{ metric: "Source lines", value: String(sourceLines), target: "—", status: "—" },
|
|
137
|
+
{ metric: "Feature specs", value: String(featureSpecCount), target: "—", status: "—" },
|
|
138
|
+
{ metric: "Acceptance criteria", value: acceptance.total > 0 ? `${acceptance.met}/${acceptance.total}` : "—", target: "—", status: "—" },
|
|
139
|
+
{ metric: "Transforms (this task)", value: String(thisTaskCost), target: "—", status: "—" },
|
|
140
|
+
{ metric: "Transforms (cumulative)", value: String(cumulativeCost), target: ">= 1", status: cumulativeCost >= 1 ? "MET" : "NOT MET" },
|
|
141
|
+
{ metric: "Budget (this task)", value: String(thisTaskCost), target: "—", status: "—" },
|
|
142
|
+
{ metric: "Budget (cumulative)", value: `${cumulativeCost}/${budgetCap}`, target: budgetCap > 0 ? `< ${budgetCap}` : "unlimited", status: budgetCap > 0 && cumulativeCost >= budgetCap ? "EXHAUSTED" : "OK" },
|
|
143
|
+
{ metric: "Tokens (this task)", value: String(thisTaskTokens), target: "—", status: "—" },
|
|
144
|
+
{ metric: "Tokens (cumulative)", value: String(cumulativeTokens), target: "—", status: "—" },
|
|
79
145
|
{ metric: "Mission complete declared", value: missionComplete ? "YES" : "NO", target: "—", status: "—" },
|
|
80
146
|
{ metric: "Mission failed declared", value: missionFailed ? "YES" : "NO", target: "—", status: "—" },
|
|
81
147
|
];
|
|
82
148
|
}
|
|
83
149
|
|
|
150
|
+
/**
|
|
151
|
+
* Count .md files in a directory (non-recursive).
|
|
152
|
+
* @param {string} dir
|
|
153
|
+
* @returns {number}
|
|
154
|
+
*/
|
|
155
|
+
function countMdFilesInDir(dir) {
|
|
156
|
+
if (!dir || !existsSync(dir)) return 0;
|
|
157
|
+
try {
|
|
158
|
+
return readdirSync(dir).filter(f => f.endsWith(".md")).length;
|
|
159
|
+
} catch { return 0; }
|
|
160
|
+
}
|
|
161
|
+
|
|
84
162
|
/**
|
|
85
163
|
* Build mission-complete readiness narrative from metrics.
|
|
86
164
|
*
|
|
@@ -91,8 +169,8 @@ export function buildMissionReadiness(metrics) {
|
|
|
91
169
|
const openIssues = parseInt(metrics.find((m) => m.metric === "Open issues")?.value || "0", 10);
|
|
92
170
|
const openPrs = parseInt(metrics.find((m) => m.metric === "Open PRs")?.value || "0", 10);
|
|
93
171
|
const resolved = parseInt(metrics.find((m) => m.metric === "Issues resolved (review or PR merge)")?.value || "0", 10);
|
|
94
|
-
const dedicatedTests = parseInt(metrics.find((m) => m.metric === "Dedicated test files")?.value || "0", 10);
|
|
95
172
|
const todoCount = parseInt(metrics.find((m) => m.metric === "Source TODO count")?.value || "0", 10);
|
|
173
|
+
const sourceLines = parseInt(metrics.find((m) => m.metric === "Source lines")?.value || "0", 10);
|
|
96
174
|
const missionComplete = metrics.find((m) => m.metric === "Mission complete declared")?.value === "YES";
|
|
97
175
|
const missionFailed = metrics.find((m) => m.metric === "Mission failed declared")?.value === "YES";
|
|
98
176
|
|
|
@@ -105,7 +183,7 @@ export function buildMissionReadiness(metrics) {
|
|
|
105
183
|
|
|
106
184
|
if (allMet) {
|
|
107
185
|
parts.push("Mission complete conditions ARE met.");
|
|
108
|
-
parts.push(`0 open issues, 0 open PRs, ${resolved} issue(s) resolved, ${
|
|
186
|
+
parts.push(`0 open issues, 0 open PRs, ${resolved} issue(s) resolved, ${sourceLines} source lines, TODOs: ${todoCount}.`);
|
|
109
187
|
} else {
|
|
110
188
|
parts.push("Mission complete conditions are NOT met.");
|
|
111
189
|
if (openIssues > 0) parts.push(`${openIssues} open issue(s) remain.`);
|
|
@@ -55,19 +55,19 @@ During web-search and document-gathering workflow phases, the agent should look
|
|
|
55
55
|
|
|
56
56
|
The opcode table in particular should be assembled from reference data during the research phase and stored as `src/lib/opcodes.js` — a data-driven 256-entry array — rather than hand-coded instruction by instruction. This avoids the agent losing track of which opcodes are implemented and reduces the chance of transcription errors.
|
|
57
57
|
|
|
58
|
-
##
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
65
|
-
-
|
|
66
|
-
-
|
|
67
|
-
-
|
|
68
|
-
-
|
|
69
|
-
-
|
|
70
|
-
-
|
|
58
|
+
## Required Capabilities
|
|
59
|
+
|
|
60
|
+
The emulator must provide a public API (exported from `src/lib/main.js`, re-exporting from submodules) that supports:
|
|
61
|
+
|
|
62
|
+
- Creating an emulator instance with 64KB RAM and all subsystem objects (CPU, memory, VIC-II, SID, CIAs).
|
|
63
|
+
- Loading ROM images (KERNAL, BASIC, character generator) as Uint8Arrays. Must be called before running.
|
|
64
|
+
- Loading `.prg` files into memory at the address from their two-byte header.
|
|
65
|
+
- Single-stepping one CPU instruction with cycle-accurate timing and timer updates.
|
|
66
|
+
- Running a full PAL video frame (~19656 cycles) with raster interrupt handling, returning an RGBA framebuffer.
|
|
67
|
+
- Reading the current screen as a Uint8Array RGBA pixel buffer (320x200).
|
|
68
|
+
- Simulating keyboard input via the CIA1 keyboard matrix (press and release).
|
|
69
|
+
- Setting joystick state (up/down/left/right/fire) on port 1 or 2.
|
|
70
|
+
- Hardware reset (CPU to reset vector, clear subsystem state).
|
|
71
71
|
|
|
72
72
|
## CPU (src/lib/cpu.js, src/lib/opcodes.js)
|
|
73
73
|
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
A JavaScript planning engine that implements partial-order planning with constraint satisfaction and belief revision. The engine reads a committed plan file, finds proceedable actions, assembles agents from capabilities, executes them, witnesses the results, and iterates — all within a budget of compute.
|
|
4
|
+
|
|
5
|
+
## Background
|
|
6
|
+
|
|
7
|
+
The engine draws on three interconnected disciplines:
|
|
8
|
+
|
|
9
|
+
- **Knowledge representation** — event calculus for tracking what conditions are initiated and terminated over time, plus truth maintenance for assumption management
|
|
10
|
+
- **Constraint satisfaction** — matching agents to actions based on capabilities and resource requirements, finding non-conflicting sets of actions to execute in parallel
|
|
11
|
+
- **Planning** — partial-order planning (POP) where actions have preconditions and effects, linked by causal chains that can be threatened by other actions
|
|
12
|
+
|
|
13
|
+
## Required Capabilities
|
|
14
|
+
|
|
15
|
+
### The Plan File
|
|
16
|
+
|
|
17
|
+
A committed markdown file with YAML front matter that persists across engine cycles:
|
|
18
|
+
|
|
19
|
+
- **Front matter**: cycle count, realization score (0.0–1.0), iteration and token budgets
|
|
20
|
+
- **Actions table**: each action has an ID, description, preconditions, effects, assigned agent, status (`open`/`ready`/`in-progress`/`achieved`/`failed`), and resource paths
|
|
21
|
+
- **Causal links**: action A provides condition C that action B needs — forming a dependency chain
|
|
22
|
+
- **Threats**: action X might undo condition C that a causal link protects, with a resolution strategy
|
|
23
|
+
- **Assumptions**: beliefs held by the system with justification, strength, and what depends on them
|
|
24
|
+
- **Open conditions**: conditions needed but not yet provided by any action (explicit gaps)
|
|
25
|
+
- **Observations**: event calculus entries recording what happened, what conditions were initiated/terminated
|
|
26
|
+
- **Witness log**: per-cycle realization score with evidence
|
|
27
|
+
|
|
28
|
+
The engine must parse this plan, serialize it back losslessly (round-trip fidelity), and update it after each engine step.
|
|
29
|
+
|
|
30
|
+
### The Engine Loop (7 steps)
|
|
31
|
+
|
|
32
|
+
1. **Assess** — Read current state: plan + source files + logs + agent definitions + capabilities
|
|
33
|
+
2. **Plan** — Refine the planning artifact (add actions, resolve threats, close open conditions)
|
|
34
|
+
3. **Solve** — Find proceedable actions via constraint satisfaction (met preconditions, no unresolved threats, no resource conflicts)
|
|
35
|
+
4. **Assemble** — Match or compose agents from capabilities for each proceedable action
|
|
36
|
+
5. **Execute** — Run agents in parallel (within concurrency limit), each producing changes
|
|
37
|
+
6. **Witness** — Assess realization (0.0–1.0), record observations
|
|
38
|
+
7. **Iterate** — If budget remains and realization is below threshold, loop back to Assess
|
|
39
|
+
|
|
40
|
+
### Constraint Solver
|
|
41
|
+
|
|
42
|
+
An action is **proceedable** when:
|
|
43
|
+
- All preconditions are satisfied (conditions initiated by achieved actions or initial state)
|
|
44
|
+
- No unresolved threats exist against causal links providing those preconditions
|
|
45
|
+
- Its resource paths don't conflict with other actions in the same batch
|
|
46
|
+
|
|
47
|
+
### Belief Revision
|
|
48
|
+
|
|
49
|
+
When an observation contradicts an assumption:
|
|
50
|
+
1. Find the weakest-justified contradicted assumption
|
|
51
|
+
2. Retract it
|
|
52
|
+
3. Cascade: re-evaluate all dependents — any action whose sole support was the retracted assumption reverts to `open`
|
|
53
|
+
4. When an action is achieved, propagate its effects as available preconditions for blocked actions
|
|
54
|
+
|
|
55
|
+
### Agent Assembly
|
|
56
|
+
|
|
57
|
+
Given an action's requirements, find an agent definition whose capabilities cover the needs. If no existing agent matches, compose one from the minimum set of capabilities that provides all needed tools (constraint satisfaction over the capability set).
|
|
58
|
+
|
|
59
|
+
## Requirements
|
|
60
|
+
|
|
61
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
62
|
+
- The plan file format must survive parse → serialize → parse round-trips losslessly.
|
|
63
|
+
- The constraint solver must handle preconditions, threats, and resource conflicts correctly.
|
|
64
|
+
- Belief revision must cascade retractions to dependent actions.
|
|
65
|
+
- No external runtime dependencies.
|
|
66
|
+
- Comprehensive unit tests for plan parsing/serialization, constraint solving, belief revision, and agent assembly.
|
|
67
|
+
- README documenting the planning model, engine loop, and plan file format.
|
|
68
|
+
|
|
69
|
+
## Acceptance Criteria
|
|
70
|
+
|
|
71
|
+
- [ ] Plan file parses from markdown with YAML front matter into a structured object
|
|
72
|
+
- [ ] Plan file serializes back to markdown losslessly (round-trip)
|
|
73
|
+
- [ ] Constraint solver identifies proceedable actions (all preconditions met, no threats)
|
|
74
|
+
- [ ] Constraint solver excludes actions with unmet preconditions
|
|
75
|
+
- [ ] Constraint solver excludes actions with resource conflicts against the current batch
|
|
76
|
+
- [ ] Belief revision retracts the weakest-justified contradicted assumption
|
|
77
|
+
- [ ] Belief revision cascades: actions depending solely on a retracted assumption revert to `open`
|
|
78
|
+
- [ ] Agent assembly matches an agent definition to an action based on capabilities
|
|
79
|
+
- [ ] Agent assembly composes a novel agent when no existing definition matches
|
|
80
|
+
- [ ] Engine loop iterates through all 7 steps and terminates on budget exhaustion or realization threshold
|
|
81
|
+
- [ ] All unit tests pass
|
|
82
|
+
- [ ] README documents the planning model and engine loop
|
|
@@ -18,14 +18,37 @@ The library should progressively implement:
|
|
|
18
18
|
- Output PPM (P3) format — simple text-based image format
|
|
19
19
|
- Vector3 class for all geometric operations
|
|
20
20
|
- Configurable resolution and ray depth
|
|
21
|
-
- Deterministic output
|
|
21
|
+
- Deterministic output: all random sampling must use a seeded PRNG. Given the same scene JSON, output must be byte-identical across runs.
|
|
22
|
+
|
|
23
|
+
## Scene JSON Structure
|
|
24
|
+
|
|
25
|
+
The scene description format must support at minimum:
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"camera": { "position": [0,2,-5], "lookAt": [0,0,0], "fov": 60 },
|
|
30
|
+
"lights": [{ "position": [5,10,-5], "color": [1,1,1] }],
|
|
31
|
+
"objects": [
|
|
32
|
+
{ "type": "sphere", "center": [0,1,0], "radius": 1, "material": { "color": [1,0,0], "reflective": 0.3 } },
|
|
33
|
+
{ "type": "plane", "normal": [0,1,0], "d": 0, "material": { "color": [0.5,0.5,0.5] } }
|
|
34
|
+
]
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
41
|
+
- No external runtime dependencies.
|
|
42
|
+
- Comprehensive unit tests verifying ray-sphere intersection, reflection vectors, and Snell's law.
|
|
43
|
+
- A sample scene JSON file included in `docs/examples/`.
|
|
44
|
+
- README with rendering examples and scene format documentation.
|
|
22
45
|
|
|
23
46
|
## Acceptance Criteria
|
|
24
47
|
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
- Renders a scene with 3+ spheres, a plane, and a point light in under
|
|
28
|
-
- At least one sphere is reflective and one is refractive
|
|
29
|
-
- Unit tests verify ray-sphere intersection, reflection vectors, and Snell's law
|
|
30
|
-
- A sample scene JSON file is included in `docs/examples/`
|
|
31
|
-
- Output PPM can be viewed in any image viewer (validated by checking header format)
|
|
48
|
+
- [ ] Rendering a scene from JSON returns a PPM string
|
|
49
|
+
- [ ] Parsing a scene JSON string returns a usable scene object
|
|
50
|
+
- [ ] Renders a scene with 3+ spheres, a plane, and a point light in under 30 seconds (640x480)
|
|
51
|
+
- [ ] At least one sphere is reflective and one is refractive
|
|
52
|
+
- [ ] Unit tests verify ray-sphere intersection, reflection vectors, and Snell's law
|
|
53
|
+
- [ ] A sample scene JSON file is included in `docs/examples/`
|
|
54
|
+
- [ ] Output PPM can be viewed in any image viewer (validated by checking header format)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
A JavaScript test framework that proves a code transformation system can manage its own source code — the software engineering equivalent of a compiler that compiles itself.
|
|
4
|
+
|
|
5
|
+
## Background
|
|
6
|
+
|
|
7
|
+
Self-hosting is the strongest proof of capability: if a system can maintain and recreate itself, it can maintain anything. This mission builds a test harness that validates self-hosting through four scenarios of increasing ambition.
|
|
8
|
+
|
|
9
|
+
## Required Capabilities
|
|
10
|
+
|
|
11
|
+
### Scenario 1: Clone Self
|
|
12
|
+
|
|
13
|
+
Copy the system's own source tree into a temporary workspace, write a narrowly-scoped improvement goal (e.g. "Add JSDoc to exported functions in safety.js"), run a transform cycle, and verify the system made a substantive change to its own code.
|
|
14
|
+
|
|
15
|
+
- Workspace: copy of source tree (excluding `.git/`, `node_modules/`, `models/`)
|
|
16
|
+
- Assertions: target file modified, still valid JavaScript, diff is substantive (not just whitespace)
|
|
17
|
+
|
|
18
|
+
### Scenario 2: Empty Bootstrap
|
|
19
|
+
|
|
20
|
+
Start from an empty repository, run an init/purge to create the seed state, write a goal describing the delta between version N and version N+1 (which already exists as a known target), run a transform, and verify convergence toward the known target.
|
|
21
|
+
|
|
22
|
+
- Workspace: empty, then init creates seed state
|
|
23
|
+
- Key insight: because the target already exists, convergence is objectively measurable
|
|
24
|
+
- Assertions: seed files created, features generated, source modified, valid JavaScript
|
|
25
|
+
- Soft assertion: convergence score — keywords from the N+1 delta found in generated code
|
|
26
|
+
|
|
27
|
+
### Scenario 3: Version Increment
|
|
28
|
+
|
|
29
|
+
Copy the source tree, write a goal to update the package version and synchronise seeds, run a transform, and verify the version was updated correctly.
|
|
30
|
+
|
|
31
|
+
- Assertions: `package.json` modified, still valid JSON
|
|
32
|
+
- Soft: version field matches target, seeds updated
|
|
33
|
+
|
|
34
|
+
### Scenario 4: Seed Sync
|
|
35
|
+
|
|
36
|
+
Copy the source tree, tamper with a seed file to introduce an outdated function, write a goal to review and fix seeds, run a transform, and verify the tampered file was corrected.
|
|
37
|
+
|
|
38
|
+
- Assertions: tampered file modified, still valid JavaScript
|
|
39
|
+
- Soft: modification moves toward correctness
|
|
40
|
+
|
|
41
|
+
## Infrastructure Required
|
|
42
|
+
|
|
43
|
+
- A source tree copy function that excludes `.git/`, `node_modules/`, and `models/` directories
|
|
44
|
+
- A diff quality checker that distinguishes substantive changes from whitespace-only edits
|
|
45
|
+
- A JSON validity checker for `package.json` verification
|
|
46
|
+
- A convergence scoring function (0.0–1.0) that measures how many target keywords appear in generated code
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
51
|
+
- Each scenario must be independently runnable and independently pass/fail.
|
|
52
|
+
- Scenarios must work with a local LLM (no external API dependency required for mechanical validation).
|
|
53
|
+
- No external runtime dependencies beyond what the host system already provides.
|
|
54
|
+
- Comprehensive unit tests for each helper function and integration tests for each scenario.
|
|
55
|
+
- README documenting what self-hosting means, how to run each scenario, and how to interpret results.
|
|
56
|
+
|
|
57
|
+
## Acceptance Criteria
|
|
58
|
+
|
|
59
|
+
- [ ] Clone-self scenario: modifies a file in the source tree, output is valid JavaScript, diff is substantive
|
|
60
|
+
- [ ] Empty-bootstrap scenario: creates seed files, generates features, modifies source, output is valid JavaScript
|
|
61
|
+
- [ ] Version-increment scenario: modifies `package.json`, output is valid JSON
|
|
62
|
+
- [ ] Seed-sync scenario: corrects a tampered seed file, output is valid JavaScript
|
|
63
|
+
- [ ] Convergence score function returns 0.0–1.0 based on target keyword matching
|
|
64
|
+
- [ ] Source tree copy excludes `.git/`, `node_modules/`, and `models/`
|
|
65
|
+
- [ ] Each scenario is independently runnable
|
|
66
|
+
- [ ] All unit tests pass
|
|
67
|
+
- [ ] README documents self-hosting concept and scenario execution
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
Build a Markdown-to-HTML compiler library that converts GitHub Flavored Markdown (GFM) to semantic HTML.
|
|
4
|
+
|
|
5
|
+
## Required Capabilities
|
|
6
|
+
|
|
7
|
+
The library must parse and render these 10 GFM feature areas:
|
|
8
|
+
|
|
9
|
+
1. Headings (h1-h6 via `#` markers) and paragraphs
|
|
10
|
+
2. Inline formatting: bold (`**`), italic (`*`), code (`` ` ``), strikethrough (`~~`)
|
|
11
|
+
3. Links `[text](url)` and images ``
|
|
12
|
+
4. Ordered and unordered lists (including nested lists)
|
|
13
|
+
5. Code blocks (fenced with ``` and language annotation)
|
|
14
|
+
6. Blockquotes (nested `>`)
|
|
15
|
+
7. Tables (GFM pipe syntax with alignment)
|
|
16
|
+
8. Horizontal rules (`---`, `***`, `___`)
|
|
17
|
+
9. Task lists (`- [ ]`, `- [x]`)
|
|
18
|
+
10. Auto-linked URLs and HTML entity escaping
|
|
19
|
+
|
|
20
|
+
It must also provide a tokenization/inspection mode for testing intermediate representations.
|
|
21
|
+
|
|
22
|
+
## Technical Requirements
|
|
23
|
+
|
|
24
|
+
- Pure JavaScript, no external Markdown parsing libraries
|
|
25
|
+
- XSS-safe: all user content must be HTML-escaped before insertion. Specifically, compiling `<script>alert('xss')</script>` must produce escaped output with `<script>`, never executable script tags.
|
|
26
|
+
- Well-formed HTML output: every opening tag must have a matching closing tag. Self-closing tags (`<br/>`, `<img/>`) use XHTML syntax.
|
|
27
|
+
- Exported as both CommonJS and ESM
|
|
28
|
+
|
|
29
|
+
## Suggested Approach
|
|
30
|
+
|
|
31
|
+
A two-pass architecture (tokeniser/lexer pass, then renderer pass) works well for this problem, but any architecture that passes the acceptance criteria is acceptable.
|
|
32
|
+
|
|
33
|
+
## Requirements
|
|
34
|
+
|
|
35
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
36
|
+
- Comprehensive test suite covering: 1 test per feature area (10 minimum), nesting combinations (bold in links, links in lists, code in blockquotes — 5 minimum), edge cases (empty input, single character, whitespace only, deeply nested lists — 5 minimum).
|
|
37
|
+
- README with usage examples.
|
|
38
|
+
|
|
39
|
+
## Acceptance Criteria
|
|
40
|
+
|
|
41
|
+
- [ ] Compiling markdown returns an HTML string
|
|
42
|
+
- [ ] Tokenizing markdown returns an array of token objects for inspection
|
|
43
|
+
- [ ] Handles all 10 feature areas listed above
|
|
44
|
+
- [ ] Nested constructs work: bold inside links, links inside lists, code inside blockquotes
|
|
45
|
+
- [ ] Compiling `<script>alert('xss')</script>` produces `<script>` (XSS-safe)
|
|
46
|
+
- [ ] A sample document is compiled and saved to `docs/examples/sample.html`
|
|
47
|
+
- [ ] Output is well-formed HTML (every opening tag has a matching closing tag)
|
|
48
|
+
- [ ] All unit tests pass
|