@xn-intenton-z2a/agentic-lib 7.4.14 → 7.4.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/agent-apply-fix.md +30 -1
- package/.github/agents/agent-director.md +28 -7
- package/.github/agents/agent-discussion-bot.md +28 -0
- package/.github/agents/agent-implementation-review.md +21 -0
- package/.github/agents/agent-issue-resolution.md +32 -0
- package/.github/agents/agent-iterate.md +33 -0
- package/.github/agents/agent-maintain-features.md +34 -0
- package/.github/agents/agent-maintain-library.md +39 -0
- package/.github/agents/agent-ready-issue.md +21 -0
- package/.github/agents/agent-review-issue.md +16 -0
- package/.github/agents/agent-supervisor.md +60 -0
- package/.github/workflows/agentic-lib-init.yml +76 -11
- package/.github/workflows/agentic-lib-schedule.yml +58 -6
- package/.github/workflows/agentic-lib-test.yml +31 -3
- package/.github/workflows/agentic-lib-update.yml +20 -0
- package/.github/workflows/agentic-lib-workflow.yml +42 -7
- package/README.md +23 -12
- package/agentic-lib.toml +2 -2
- package/bin/agentic-lib.js +34 -4
- package/package.json +1 -1
- package/src/actions/agentic-step/index.js +35 -10
- package/src/actions/agentic-step/logging.js +5 -2
- package/src/actions/agentic-step/tasks/direct.js +50 -16
- package/src/actions/agentic-step/tasks/maintain-features.js +7 -0
- package/src/actions/agentic-step/tasks/maintain-library.js +10 -0
- package/src/actions/agentic-step/tasks/transform.js +37 -1
- package/src/actions/commit-if-changed/action.yml +2 -1
- package/src/copilot/config.js +2 -2
- package/src/copilot/github-tools.js +8 -2
- package/src/copilot/guards.js +4 -10
- package/src/copilot/state.js +214 -0
- package/src/copilot/telemetry.js +92 -10
- package/src/seeds/missions/1-dan-create-c64-emulator.md +13 -13
- package/src/seeds/missions/1-dan-create-planning-engine.md +82 -0
- package/src/seeds/missions/1-kyu-create-ray-tracer.md +31 -8
- package/src/seeds/missions/2-dan-create-self-hosted.md +67 -0
- package/src/seeds/missions/2-kyu-create-markdown-compiler.md +48 -0
- package/src/seeds/missions/2-kyu-create-plot-code-lib.md +35 -16
- package/src/seeds/missions/3-kyu-analyze-lunar-lander.md +13 -14
- package/src/seeds/missions/3-kyu-evaluate-time-series-lab.md +22 -28
- package/src/seeds/missions/4-kyu-analyze-json-schema-diff.md +46 -2
- package/src/seeds/missions/4-kyu-apply-cron-engine.md +16 -18
- package/src/seeds/missions/4-kyu-apply-dense-encoding.md +14 -11
- package/src/seeds/missions/4-kyu-apply-owl-ontology.md +47 -0
- package/src/seeds/missions/5-kyu-apply-ascii-face.md +40 -0
- package/src/seeds/missions/5-kyu-apply-string-utils.md +17 -17
- package/src/seeds/missions/6-kyu-understand-hamming-distance.md +12 -12
- package/src/seeds/missions/6-kyu-understand-roman-numerals.md +12 -12
- package/src/seeds/missions/8-kyu-remember-hello-world.md +10 -0
- package/src/seeds/zero-MISSION.md +12 -12
- package/src/seeds/zero-package.json +1 -1
- package/src/seeds/missions/2-dan-create-agi.md +0 -22
- package/src/seeds/missions/2-kyu-evaluate-markdown-compiler.md +0 -33
- package/src/seeds/missions/3-kyu-evaluate-owl-ontology.md +0 -34
- package/src/seeds/missions/5-kyu-create-ascii-face.md +0 -4
package/src/copilot/telemetry.js
CHANGED
|
@@ -37,18 +37,68 @@ export function countSourceTodos(dir, extensions = [".js", ".ts", ".mjs"]) {
|
|
|
37
37
|
return count;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
/**
|
|
41
|
+
* Count source lines in a directory (recursive, .js/.ts/.mjs files).
|
|
42
|
+
* @param {string} dir
|
|
43
|
+
* @returns {number}
|
|
44
|
+
*/
|
|
45
|
+
export function countSourceLines(dir) {
|
|
46
|
+
if (!dir || !existsSync(dir)) return 0;
|
|
47
|
+
let count = 0;
|
|
48
|
+
try {
|
|
49
|
+
const entries = readdirSync(dir);
|
|
50
|
+
for (const entry of entries) {
|
|
51
|
+
if (entry === "node_modules" || entry.startsWith(".")) continue;
|
|
52
|
+
const fullPath = join(dir, entry);
|
|
53
|
+
try {
|
|
54
|
+
const stat = statSync(fullPath);
|
|
55
|
+
if (stat.isDirectory()) {
|
|
56
|
+
count += countSourceLines(fullPath);
|
|
57
|
+
} else if (/\.(js|ts|mjs)$/.test(entry)) {
|
|
58
|
+
const content = readFileSync(fullPath, "utf8");
|
|
59
|
+
count += content.split("\n").length;
|
|
60
|
+
}
|
|
61
|
+
} catch { /* skip */ }
|
|
62
|
+
}
|
|
63
|
+
} catch { /* skip */ }
|
|
64
|
+
return count;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Count acceptance criteria checkboxes in MISSION.md.
|
|
69
|
+
* @param {string} missionPath
|
|
70
|
+
* @returns {{ met: number, total: number }}
|
|
71
|
+
*/
|
|
72
|
+
export function countAcceptanceCriteria(missionPath) {
|
|
73
|
+
if (!missionPath || !existsSync(missionPath)) return { met: 0, total: 0 };
|
|
74
|
+
try {
|
|
75
|
+
const content = readFileSync(missionPath, "utf8");
|
|
76
|
+
const checked = (content.match(/- \[x\]/gi) || []).length;
|
|
77
|
+
const unchecked = (content.match(/- \[ \]/g) || []).length;
|
|
78
|
+
return { met: checked, total: checked + unchecked };
|
|
79
|
+
} catch { return { met: 0, total: 0 }; }
|
|
80
|
+
}
|
|
81
|
+
|
|
40
82
|
/**
|
|
41
83
|
* Build mission-complete metrics array for the intentïon.md dashboard.
|
|
42
84
|
*
|
|
85
|
+
* C2: Uses cumulativeCost from persistent state (not per-run).
|
|
86
|
+
* C5: Includes both per-task and cumulative values.
|
|
87
|
+
* C6: Replaces "Dedicated test files" with dynamic metrics.
|
|
88
|
+
*
|
|
43
89
|
* @param {Object} config - Parsed agentic-lib config
|
|
44
90
|
* @param {Object} result - Task result object
|
|
45
91
|
* @param {Array} _limitsStatus - Limits status array (unused but kept for signature compatibility)
|
|
46
|
-
* @param {number} cumulativeCost - Cumulative transformation cost
|
|
92
|
+
* @param {number} cumulativeCost - Cumulative transformation cost (from state.toml)
|
|
47
93
|
* @param {number} featureIssueCount - Number of open feature issues
|
|
48
94
|
* @param {number} maintenanceIssueCount - Number of open maintenance issues
|
|
95
|
+
* @param {Object} [taskCosts] - Per-task costs for split display
|
|
96
|
+
* @param {number} [taskCosts.transformationCost] - This task's transformation cost (0 or 1)
|
|
97
|
+
* @param {number} [taskCosts.tokensUsed] - This task's token usage
|
|
98
|
+
* @param {number} [taskCosts.cumulativeTokens] - Cumulative tokens from state
|
|
49
99
|
* @returns {Array} Mission metrics entries
|
|
50
100
|
*/
|
|
51
|
-
export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCost, featureIssueCount, maintenanceIssueCount) {
|
|
101
|
+
export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCost, featureIssueCount, maintenanceIssueCount, taskCosts) {
|
|
52
102
|
const openIssues = featureIssueCount + maintenanceIssueCount;
|
|
53
103
|
const budgetCap = config.transformationBudget || 0;
|
|
54
104
|
const resolvedCount = result.resolvedCount || 0;
|
|
@@ -61,26 +111,58 @@ export function buildMissionMetrics(config, result, _limitsStatus, cumulativeCos
|
|
|
61
111
|
const srcRoot = sourceDir.includes("/") ? sourceDir.split("/").slice(0, -1).join("/") || "src" : "src";
|
|
62
112
|
const todoCount = countSourceTodos(srcRoot);
|
|
63
113
|
|
|
64
|
-
const dedicatedTestCount = result.dedicatedTestCount ?? 0;
|
|
65
|
-
|
|
66
114
|
const thresholds = config.missionCompleteThresholds || {};
|
|
67
115
|
const minResolved = thresholds.minResolvedIssues ?? 3;
|
|
68
|
-
const minTests = thresholds.minDedicatedTests ?? 1;
|
|
69
116
|
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
70
117
|
|
|
118
|
+
// C6: Dynamic metrics
|
|
119
|
+
const sourceLines = countSourceLines(sourceDir);
|
|
120
|
+
const featuresPath = config.paths?.features?.path || "features/";
|
|
121
|
+
const featureSpecCount = countMdFilesInDir(featuresPath);
|
|
122
|
+
const missionPath = config.paths?.mission?.path || "MISSION.md";
|
|
123
|
+
const acceptance = countAcceptanceCriteria(missionPath);
|
|
124
|
+
|
|
125
|
+
// C5: Per-task costs (optional)
|
|
126
|
+
const tc = taskCosts || {};
|
|
127
|
+
const thisTaskCost = tc.transformationCost ?? 0;
|
|
128
|
+
const thisTaskTokens = tc.tokensUsed ?? 0;
|
|
129
|
+
const cumulativeTokens = tc.cumulativeTokens ?? 0;
|
|
130
|
+
const thisTaskDurationMs = tc.durationMs ?? 0;
|
|
131
|
+
const cumulativeDurationMs = tc.cumulativeDurationMs ?? 0;
|
|
132
|
+
|
|
71
133
|
return [
|
|
72
134
|
{ metric: "Open issues", value: String(openIssues), target: "0", status: openIssues === 0 ? "MET" : "NOT MET" },
|
|
73
135
|
{ metric: "Open PRs", value: String(openPrs), target: "0", status: openPrs === 0 ? "MET" : "NOT MET" },
|
|
74
136
|
{ metric: "Issues resolved (review or PR merge)", value: String(resolvedCount), target: `>= ${minResolved}`, status: resolvedCount >= minResolved ? "MET" : "NOT MET" },
|
|
75
|
-
{ metric: "Dedicated test files", value: String(dedicatedTestCount), target: `>= ${minTests}`, status: dedicatedTestCount >= minTests ? "MET" : "NOT MET" },
|
|
76
137
|
{ metric: "Source TODO count", value: String(todoCount), target: `<= ${maxTodos}`, status: todoCount <= maxTodos ? "MET" : "NOT MET" },
|
|
77
|
-
{ metric: "
|
|
78
|
-
{ metric: "
|
|
138
|
+
{ metric: "Source lines", value: String(sourceLines), target: "—", status: "—" },
|
|
139
|
+
{ metric: "Feature specs", value: String(featureSpecCount), target: "—", status: "—" },
|
|
140
|
+
{ metric: "Acceptance criteria", value: acceptance.total > 0 ? `${acceptance.met}/${acceptance.total}` : "—", target: "—", status: "—" },
|
|
141
|
+
{ metric: "Transforms (this task)", value: String(thisTaskCost), target: "—", status: "—" },
|
|
142
|
+
{ metric: "Transforms (cumulative)", value: String(cumulativeCost), target: ">= 1", status: cumulativeCost >= 1 ? "MET" : "NOT MET" },
|
|
143
|
+
{ metric: "Budget (this task)", value: String(thisTaskCost), target: "—", status: "—" },
|
|
144
|
+
{ metric: "Budget (cumulative)", value: `${cumulativeCost}/${budgetCap}`, target: budgetCap > 0 ? `< ${budgetCap}` : "unlimited", status: budgetCap > 0 && cumulativeCost >= budgetCap ? "EXHAUSTED" : "OK" },
|
|
145
|
+
{ metric: "Tokens (this task)", value: String(thisTaskTokens), target: "—", status: "—" },
|
|
146
|
+
{ metric: "Tokens (cumulative)", value: String(cumulativeTokens), target: "—", status: "—" },
|
|
147
|
+
{ metric: "Duration (this task)", value: thisTaskDurationMs > 0 ? `${Math.round(thisTaskDurationMs / 1000)}s` : "—", target: "—", status: "—" },
|
|
148
|
+
{ metric: "Duration (cumulative)", value: cumulativeDurationMs > 0 ? `${Math.round(cumulativeDurationMs / 1000)}s` : "—", target: "—", status: "—" },
|
|
79
149
|
{ metric: "Mission complete declared", value: missionComplete ? "YES" : "NO", target: "—", status: "—" },
|
|
80
150
|
{ metric: "Mission failed declared", value: missionFailed ? "YES" : "NO", target: "—", status: "—" },
|
|
81
151
|
];
|
|
82
152
|
}
|
|
83
153
|
|
|
154
|
+
/**
|
|
155
|
+
* Count .md files in a directory (non-recursive).
|
|
156
|
+
* @param {string} dir
|
|
157
|
+
* @returns {number}
|
|
158
|
+
*/
|
|
159
|
+
function countMdFilesInDir(dir) {
|
|
160
|
+
if (!dir || !existsSync(dir)) return 0;
|
|
161
|
+
try {
|
|
162
|
+
return readdirSync(dir).filter(f => f.endsWith(".md")).length;
|
|
163
|
+
} catch { return 0; }
|
|
164
|
+
}
|
|
165
|
+
|
|
84
166
|
/**
|
|
85
167
|
* Build mission-complete readiness narrative from metrics.
|
|
86
168
|
*
|
|
@@ -91,8 +173,8 @@ export function buildMissionReadiness(metrics) {
|
|
|
91
173
|
const openIssues = parseInt(metrics.find((m) => m.metric === "Open issues")?.value || "0", 10);
|
|
92
174
|
const openPrs = parseInt(metrics.find((m) => m.metric === "Open PRs")?.value || "0", 10);
|
|
93
175
|
const resolved = parseInt(metrics.find((m) => m.metric === "Issues resolved (review or PR merge)")?.value || "0", 10);
|
|
94
|
-
const dedicatedTests = parseInt(metrics.find((m) => m.metric === "Dedicated test files")?.value || "0", 10);
|
|
95
176
|
const todoCount = parseInt(metrics.find((m) => m.metric === "Source TODO count")?.value || "0", 10);
|
|
177
|
+
const sourceLines = parseInt(metrics.find((m) => m.metric === "Source lines")?.value || "0", 10);
|
|
96
178
|
const missionComplete = metrics.find((m) => m.metric === "Mission complete declared")?.value === "YES";
|
|
97
179
|
const missionFailed = metrics.find((m) => m.metric === "Mission failed declared")?.value === "YES";
|
|
98
180
|
|
|
@@ -105,7 +187,7 @@ export function buildMissionReadiness(metrics) {
|
|
|
105
187
|
|
|
106
188
|
if (allMet) {
|
|
107
189
|
parts.push("Mission complete conditions ARE met.");
|
|
108
|
-
parts.push(`0 open issues, 0 open PRs, ${resolved} issue(s) resolved, ${
|
|
190
|
+
parts.push(`0 open issues, 0 open PRs, ${resolved} issue(s) resolved, ${sourceLines} source lines, TODOs: ${todoCount}.`);
|
|
109
191
|
} else {
|
|
110
192
|
parts.push("Mission complete conditions are NOT met.");
|
|
111
193
|
if (openIssues > 0) parts.push(`${openIssues} open issue(s) remain.`);
|
|
@@ -55,19 +55,19 @@ During web-search and document-gathering workflow phases, the agent should look
|
|
|
55
55
|
|
|
56
56
|
The opcode table in particular should be assembled from reference data during the research phase and stored as `src/lib/opcodes.js` — a data-driven 256-entry array — rather than hand-coded instruction by instruction. This avoids the agent losing track of which opcodes are implemented and reduces the chance of transcription errors.
|
|
57
57
|
|
|
58
|
-
##
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
65
|
-
-
|
|
66
|
-
-
|
|
67
|
-
-
|
|
68
|
-
-
|
|
69
|
-
-
|
|
70
|
-
-
|
|
58
|
+
## Required Capabilities
|
|
59
|
+
|
|
60
|
+
The emulator must provide a public API (exported from `src/lib/main.js`, re-exporting from submodules) that supports:
|
|
61
|
+
|
|
62
|
+
- Creating an emulator instance with 64KB RAM and all subsystem objects (CPU, memory, VIC-II, SID, CIAs).
|
|
63
|
+
- Loading ROM images (KERNAL, BASIC, character generator) as Uint8Arrays. Must be called before running.
|
|
64
|
+
- Loading `.prg` files into memory at the address from their two-byte header.
|
|
65
|
+
- Single-stepping one CPU instruction with cycle-accurate timing and timer updates.
|
|
66
|
+
- Running a full PAL video frame (~19656 cycles) with raster interrupt handling, returning an RGBA framebuffer.
|
|
67
|
+
- Reading the current screen as a Uint8Array RGBA pixel buffer (320x200).
|
|
68
|
+
- Simulating keyboard input via the CIA1 keyboard matrix (press and release).
|
|
69
|
+
- Setting joystick state (up/down/left/right/fire) on port 1 or 2.
|
|
70
|
+
- Hardware reset (CPU to reset vector, clear subsystem state).
|
|
71
71
|
|
|
72
72
|
## CPU (src/lib/cpu.js, src/lib/opcodes.js)
|
|
73
73
|
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
A JavaScript planning engine that implements partial-order planning with constraint satisfaction and belief revision. The engine reads a committed plan file, finds proceedable actions, assembles agents from capabilities, executes them, witnesses the results, and iterates — all within a budget of compute.
|
|
4
|
+
|
|
5
|
+
## Background
|
|
6
|
+
|
|
7
|
+
The engine draws on three interconnected disciplines:
|
|
8
|
+
|
|
9
|
+
- **Knowledge representation** — event calculus for tracking what conditions are initiated and terminated over time, plus truth maintenance for assumption management
|
|
10
|
+
- **Constraint satisfaction** — matching agents to actions based on capabilities and resource requirements, finding non-conflicting sets of actions to execute in parallel
|
|
11
|
+
- **Planning** — partial-order planning (POP) where actions have preconditions and effects, linked by causal chains that can be threatened by other actions
|
|
12
|
+
|
|
13
|
+
## Required Capabilities
|
|
14
|
+
|
|
15
|
+
### The Plan File
|
|
16
|
+
|
|
17
|
+
A committed markdown file with YAML front matter that persists across engine cycles:
|
|
18
|
+
|
|
19
|
+
- **Front matter**: cycle count, realization score (0.0–1.0), iteration and token budgets
|
|
20
|
+
- **Actions table**: each action has an ID, description, preconditions, effects, assigned agent, status (`open`/`ready`/`in-progress`/`achieved`/`failed`), and resource paths
|
|
21
|
+
- **Causal links**: action A provides condition C that action B needs — forming a dependency chain
|
|
22
|
+
- **Threats**: action X might undo condition C that a causal link protects, with a resolution strategy
|
|
23
|
+
- **Assumptions**: beliefs held by the system with justification, strength, and what depends on them
|
|
24
|
+
- **Open conditions**: conditions needed but not yet provided by any action (explicit gaps)
|
|
25
|
+
- **Observations**: event calculus entries recording what happened, what conditions were initiated/terminated
|
|
26
|
+
- **Witness log**: per-cycle realization score with evidence
|
|
27
|
+
|
|
28
|
+
The engine must parse this plan, serialize it back losslessly (round-trip fidelity), and update it after each engine step.
|
|
29
|
+
|
|
30
|
+
### The Engine Loop (7 steps)
|
|
31
|
+
|
|
32
|
+
1. **Assess** — Read current state: plan + source files + logs + agent definitions + capabilities
|
|
33
|
+
2. **Plan** — Refine the planning artifact (add actions, resolve threats, close open conditions)
|
|
34
|
+
3. **Solve** — Find proceedable actions via constraint satisfaction (met preconditions, no unresolved threats, no resource conflicts)
|
|
35
|
+
4. **Assemble** — Match or compose agents from capabilities for each proceedable action
|
|
36
|
+
5. **Execute** — Run agents in parallel (within concurrency limit), each producing changes
|
|
37
|
+
6. **Witness** — Assess realization (0.0–1.0), record observations
|
|
38
|
+
7. **Iterate** — If budget remains and realization is below threshold, loop back to Assess
|
|
39
|
+
|
|
40
|
+
### Constraint Solver
|
|
41
|
+
|
|
42
|
+
An action is **proceedable** when:
|
|
43
|
+
- All preconditions are satisfied (conditions initiated by achieved actions or initial state)
|
|
44
|
+
- No unresolved threats exist against causal links providing those preconditions
|
|
45
|
+
- Its resource paths don't conflict with other actions in the same batch
|
|
46
|
+
|
|
47
|
+
### Belief Revision
|
|
48
|
+
|
|
49
|
+
When an observation contradicts an assumption:
|
|
50
|
+
1. Find the weakest-justified contradicted assumption
|
|
51
|
+
2. Retract it
|
|
52
|
+
3. Cascade: re-evaluate all dependents — any action whose sole support was the retracted assumption reverts to `open`
|
|
53
|
+
4. When an action is achieved, propagate its effects as available preconditions for blocked actions
|
|
54
|
+
|
|
55
|
+
### Agent Assembly
|
|
56
|
+
|
|
57
|
+
Given an action's requirements, find an agent definition whose capabilities cover the needs. If no existing agent matches, compose one from the minimum set of capabilities that provides all needed tools (constraint satisfaction over the capability set).
|
|
58
|
+
|
|
59
|
+
## Requirements
|
|
60
|
+
|
|
61
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
62
|
+
- The plan file format must survive parse → serialize → parse round-trips losslessly.
|
|
63
|
+
- The constraint solver must handle preconditions, threats, and resource conflicts correctly.
|
|
64
|
+
- Belief revision must cascade retractions to dependent actions.
|
|
65
|
+
- No external runtime dependencies.
|
|
66
|
+
- Comprehensive unit tests for plan parsing/serialization, constraint solving, belief revision, and agent assembly.
|
|
67
|
+
- README documenting the planning model, engine loop, and plan file format.
|
|
68
|
+
|
|
69
|
+
## Acceptance Criteria
|
|
70
|
+
|
|
71
|
+
- [ ] Plan file parses from markdown with YAML front matter into a structured object
|
|
72
|
+
- [ ] Plan file serializes back to markdown losslessly (round-trip)
|
|
73
|
+
- [ ] Constraint solver identifies proceedable actions (all preconditions met, no threats)
|
|
74
|
+
- [ ] Constraint solver excludes actions with unmet preconditions
|
|
75
|
+
- [ ] Constraint solver excludes actions with resource conflicts against the current batch
|
|
76
|
+
- [ ] Belief revision retracts the weakest-justified contradicted assumption
|
|
77
|
+
- [ ] Belief revision cascades: actions depending solely on a retracted assumption revert to `open`
|
|
78
|
+
- [ ] Agent assembly matches an agent definition to an action based on capabilities
|
|
79
|
+
- [ ] Agent assembly composes a novel agent when no existing definition matches
|
|
80
|
+
- [ ] Engine loop iterates through all 7 steps and terminates on budget exhaustion or realization threshold
|
|
81
|
+
- [ ] All unit tests pass
|
|
82
|
+
- [ ] README documents the planning model and engine loop
|
|
@@ -18,14 +18,37 @@ The library should progressively implement:
|
|
|
18
18
|
- Output PPM (P3) format — simple text-based image format
|
|
19
19
|
- Vector3 class for all geometric operations
|
|
20
20
|
- Configurable resolution and ray depth
|
|
21
|
-
- Deterministic output
|
|
21
|
+
- Deterministic output: all random sampling must use a seeded PRNG. Given the same scene JSON, output must be byte-identical across runs.
|
|
22
|
+
|
|
23
|
+
## Scene JSON Structure
|
|
24
|
+
|
|
25
|
+
The scene description format must support at minimum:
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"camera": { "position": [0,2,-5], "lookAt": [0,0,0], "fov": 60 },
|
|
30
|
+
"lights": [{ "position": [5,10,-5], "color": [1,1,1] }],
|
|
31
|
+
"objects": [
|
|
32
|
+
{ "type": "sphere", "center": [0,1,0], "radius": 1, "material": { "color": [1,0,0], "reflective": 0.3 } },
|
|
33
|
+
{ "type": "plane", "normal": [0,1,0], "d": 0, "material": { "color": [0.5,0.5,0.5] } }
|
|
34
|
+
]
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
41
|
+
- No external runtime dependencies.
|
|
42
|
+
- Comprehensive unit tests verifying ray-sphere intersection, reflection vectors, and Snell's law.
|
|
43
|
+
- A sample scene JSON file included in `docs/examples/`.
|
|
44
|
+
- README with rendering examples and scene format documentation.
|
|
22
45
|
|
|
23
46
|
## Acceptance Criteria
|
|
24
47
|
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
- Renders a scene with 3+ spheres, a plane, and a point light in under
|
|
28
|
-
- At least one sphere is reflective and one is refractive
|
|
29
|
-
- Unit tests verify ray-sphere intersection, reflection vectors, and Snell's law
|
|
30
|
-
- A sample scene JSON file is included in `docs/examples/`
|
|
31
|
-
- Output PPM can be viewed in any image viewer (validated by checking header format)
|
|
48
|
+
- [ ] Rendering a scene from JSON returns a PPM string
|
|
49
|
+
- [ ] Parsing a scene JSON string returns a usable scene object
|
|
50
|
+
- [ ] Renders a scene with 3+ spheres, a plane, and a point light in under 30 seconds (640x480)
|
|
51
|
+
- [ ] At least one sphere is reflective and one is refractive
|
|
52
|
+
- [ ] Unit tests verify ray-sphere intersection, reflection vectors, and Snell's law
|
|
53
|
+
- [ ] A sample scene JSON file is included in `docs/examples/`
|
|
54
|
+
- [ ] Output PPM can be viewed in any image viewer (validated by checking header format)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
A JavaScript test framework that proves a code transformation system can manage its own source code — the software engineering equivalent of a compiler that compiles itself.
|
|
4
|
+
|
|
5
|
+
## Background
|
|
6
|
+
|
|
7
|
+
Self-hosting is the strongest proof of capability: if a system can maintain and recreate itself, it can maintain anything. This mission builds a test harness that validates self-hosting through four scenarios of increasing ambition.
|
|
8
|
+
|
|
9
|
+
## Required Capabilities
|
|
10
|
+
|
|
11
|
+
### Scenario 1: Clone Self
|
|
12
|
+
|
|
13
|
+
Copy the system's own source tree into a temporary workspace, write a narrowly-scoped improvement goal (e.g. "Add JSDoc to exported functions in safety.js"), run a transform cycle, and verify the system made a substantive change to its own code.
|
|
14
|
+
|
|
15
|
+
- Workspace: copy of source tree (excluding `.git/`, `node_modules/`, `models/`)
|
|
16
|
+
- Assertions: target file modified, still valid JavaScript, diff is substantive (not just whitespace)
|
|
17
|
+
|
|
18
|
+
### Scenario 2: Empty Bootstrap
|
|
19
|
+
|
|
20
|
+
Start from an empty repository, run an init/purge to create the seed state, write a goal describing the delta between version N and version N+1 (which already exists as a known target), run a transform, and verify convergence toward the known target.
|
|
21
|
+
|
|
22
|
+
- Workspace: empty, then init creates seed state
|
|
23
|
+
- Key insight: because the target already exists, convergence is objectively measurable
|
|
24
|
+
- Assertions: seed files created, features generated, source modified, valid JavaScript
|
|
25
|
+
- Soft assertion: convergence score — keywords from the N+1 delta found in generated code
|
|
26
|
+
|
|
27
|
+
### Scenario 3: Version Increment
|
|
28
|
+
|
|
29
|
+
Copy the source tree, write a goal to update the package version and synchronise seeds, run a transform, and verify the version was updated correctly.
|
|
30
|
+
|
|
31
|
+
- Assertions: `package.json` modified, still valid JSON
|
|
32
|
+
- Soft: version field matches target, seeds updated
|
|
33
|
+
|
|
34
|
+
### Scenario 4: Seed Sync
|
|
35
|
+
|
|
36
|
+
Copy the source tree, tamper with a seed file to introduce an outdated function, write a goal to review and fix seeds, run a transform, and verify the tampered file was corrected.
|
|
37
|
+
|
|
38
|
+
- Assertions: tampered file modified, still valid JavaScript
|
|
39
|
+
- Soft: modification moves toward correctness
|
|
40
|
+
|
|
41
|
+
## Infrastructure Required
|
|
42
|
+
|
|
43
|
+
- A source tree copy function that excludes `.git/`, `node_modules/`, and `models/` directories
|
|
44
|
+
- A diff quality checker that distinguishes substantive changes from whitespace-only edits
|
|
45
|
+
- A JSON validity checker for `package.json` verification
|
|
46
|
+
- A convergence scoring function (0.0–1.0) that measures how many target keywords appear in generated code
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
51
|
+
- Each scenario must be independently runnable and independently pass/fail.
|
|
52
|
+
- Scenarios must work with a local LLM (no external API dependency required for mechanical validation).
|
|
53
|
+
- No external runtime dependencies beyond what the host system already provides.
|
|
54
|
+
- Comprehensive unit tests for each helper function and integration tests for each scenario.
|
|
55
|
+
- README documenting what self-hosting means, how to run each scenario, and how to interpret results.
|
|
56
|
+
|
|
57
|
+
## Acceptance Criteria
|
|
58
|
+
|
|
59
|
+
- [ ] Clone-self scenario: modifies a file in the source tree, output is valid JavaScript, diff is substantive
|
|
60
|
+
- [ ] Empty-bootstrap scenario: creates seed files, generates features, modifies source, output is valid JavaScript
|
|
61
|
+
- [ ] Version-increment scenario: modifies `package.json`, output is valid JSON
|
|
62
|
+
- [ ] Seed-sync scenario: corrects a tampered seed file, output is valid JavaScript
|
|
63
|
+
- [ ] Convergence score function returns 0.0–1.0 based on target keyword matching
|
|
64
|
+
- [ ] Source tree copy excludes `.git/`, `node_modules/`, and `models/`
|
|
65
|
+
- [ ] Each scenario is independently runnable
|
|
66
|
+
- [ ] All unit tests pass
|
|
67
|
+
- [ ] README documents self-hosting concept and scenario execution
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Mission
|
|
2
|
+
|
|
3
|
+
Build a Markdown-to-HTML compiler library that converts GitHub Flavored Markdown (GFM) to semantic HTML.
|
|
4
|
+
|
|
5
|
+
## Required Capabilities
|
|
6
|
+
|
|
7
|
+
The library must parse and render these 10 GFM feature areas:
|
|
8
|
+
|
|
9
|
+
1. Headings (h1-h6 via `#` markers) and paragraphs
|
|
10
|
+
2. Inline formatting: bold (`**`), italic (`*`), code (`` ` ``), strikethrough (`~~`)
|
|
11
|
+
3. Links `[text](url)` and images ``
|
|
12
|
+
4. Ordered and unordered lists (including nested lists)
|
|
13
|
+
5. Code blocks (fenced with ``` and language annotation)
|
|
14
|
+
6. Blockquotes (nested `>`)
|
|
15
|
+
7. Tables (GFM pipe syntax with alignment)
|
|
16
|
+
8. Horizontal rules (`---`, `***`, `___`)
|
|
17
|
+
9. Task lists (`- [ ]`, `- [x]`)
|
|
18
|
+
10. Auto-linked URLs and HTML entity escaping
|
|
19
|
+
|
|
20
|
+
It must also provide a tokenization/inspection mode for testing intermediate representations.
|
|
21
|
+
|
|
22
|
+
## Technical Requirements
|
|
23
|
+
|
|
24
|
+
- Pure JavaScript, no external Markdown parsing libraries
|
|
25
|
+
- XSS-safe: all user content must be HTML-escaped before insertion. Specifically, compiling `<script>alert('xss')</script>` must produce escaped output with `<script>`, never executable script tags.
|
|
26
|
+
- Well-formed HTML output: every opening tag must have a matching closing tag. Self-closing tags (`<br/>`, `<img/>`) use XHTML syntax.
|
|
27
|
+
- Exported as both CommonJS and ESM
|
|
28
|
+
|
|
29
|
+
## Suggested Approach
|
|
30
|
+
|
|
31
|
+
A two-pass architecture (tokeniser/lexer pass, then renderer pass) works well for this problem, but any architecture that passes the acceptance criteria is acceptable.
|
|
32
|
+
|
|
33
|
+
## Requirements
|
|
34
|
+
|
|
35
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
36
|
+
- Comprehensive test suite covering: 1 test per feature area (10 minimum), nesting combinations (bold in links, links in lists, code in blockquotes — 5 minimum), edge cases (empty input, single character, whitespace only, deeply nested lists — 5 minimum).
|
|
37
|
+
- README with usage examples.
|
|
38
|
+
|
|
39
|
+
## Acceptance Criteria
|
|
40
|
+
|
|
41
|
+
- [ ] Compiling markdown returns an HTML string
|
|
42
|
+
- [ ] Tokenizing markdown returns an array of token objects for inspection
|
|
43
|
+
- [ ] Handles all 10 feature areas listed above
|
|
44
|
+
- [ ] Nested constructs work: bold inside links, links inside lists, code inside blockquotes
|
|
45
|
+
- [ ] Compiling `<script>alert('xss')</script>` produces `<script>` (XSS-safe)
|
|
46
|
+
- [ ] A sample document is compiled and saved to `docs/examples/sample.html`
|
|
47
|
+
- [ ] Output is well-formed HTML (every opening tag has a matching closing tag)
|
|
48
|
+
- [ ] All unit tests pass
|
|
@@ -1,24 +1,43 @@
|
|
|
1
1
|
# Mission
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A JavaScript library and CLI tool for generating plots from mathematical expressions and time series data. Produces SVG and PNG output files.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
- Transform and given range and a simple expression syntax for (pick an existing open standard) to time series data.
|
|
7
|
-
- Read and write the time series data in a standard format (pick an existing open standard).
|
|
8
|
-
- Make use of libraries for formula parsing, time series generation, plotting, and persistence in image formats.
|
|
9
|
-
- Generate SVG and PNG plots from the time series data and save these as files.
|
|
10
|
-
- Variations on this example: `node run start -- --expression "y=sin(x)" --range "x=-1:-1,y=-1:-1" --file output.svg` .
|
|
11
|
-
- Showcase all the features of the library via a CLI by dry running tp generate example commands and output in the README.md file.
|
|
5
|
+
## Required Capabilities
|
|
12
6
|
|
|
13
|
-
|
|
14
|
-
|
|
7
|
+
- Parse a mathematical expression string using JavaScript `Math` functions (e.g. `"y=Math.sin(x)"`, `"y=x*x+2*x-1"`) into an evaluatable function.
|
|
8
|
+
- Evaluate an expression over a numeric range (`start:step:end`) and return an array of data points.
|
|
9
|
+
- Load time series data from a CSV file with columns `time,value`.
|
|
10
|
+
- Render a data series to SVG 1.1 using `<polyline>` elements with a `viewBox` attribute.
|
|
11
|
+
- Render a data series to PNG (canvas-based or via SVG conversion — document the approach in the README).
|
|
12
|
+
- Save a plot to a file, inferring format from extension (`.svg` or `.png`).
|
|
13
|
+
|
|
14
|
+
## CLI
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
node src/lib/main.js --expression "y=Math.sin(x)" --range "-3.14:0.01:3.14" --file output.svg
|
|
18
|
+
node src/lib/main.js --csv data.csv --file output.png
|
|
19
|
+
node src/lib/main.js --help
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Range format: `start:step:end` (e.g. `-3.14:0.01:3.14`).
|
|
23
|
+
|
|
24
|
+
The `--help` flag prints usage examples and exits.
|
|
25
|
+
|
|
26
|
+
## Requirements
|
|
27
|
+
|
|
28
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
29
|
+
- SVG output must be valid SVG 1.1 with a `viewBox` attribute.
|
|
30
|
+
- External dependencies allowed only for PNG rendering (e.g. `canvas`, `sharp`). Expression parsing must use built-in JavaScript `Math` — no external math libraries.
|
|
31
|
+
- Comprehensive unit tests covering expression parsing, series generation, SVG structure, and CLI flags.
|
|
32
|
+
- README with example commands and sample output descriptions.
|
|
15
33
|
|
|
16
34
|
## Acceptance Criteria
|
|
17
35
|
|
|
18
|
-
- [ ]
|
|
19
|
-
- [ ]
|
|
20
|
-
- [ ]
|
|
21
|
-
- [ ]
|
|
22
|
-
- [ ] CLI
|
|
23
|
-
- [ ]
|
|
36
|
+
- [ ] Parsing `"y=Math.sin(x)"` returns a callable function
|
|
37
|
+
- [ ] Evaluating over range `-3.14:0.01:3.14` returns ~628 data points
|
|
38
|
+
- [ ] SVG output contains `<polyline>` and `viewBox` attributes
|
|
39
|
+
- [ ] PNG output starts with the PNG magic bytes
|
|
40
|
+
- [ ] CLI `--expression "y=Math.sin(x)" --range "-3.14:0.01:3.14" --file output.svg` produces a file
|
|
41
|
+
- [ ] CLI `--help` prints usage information
|
|
24
42
|
- [ ] All unit tests pass
|
|
43
|
+
- [ ] README documents CLI usage with examples
|
|
@@ -9,28 +9,27 @@ A JavaScript library that simulates a lunar lander descent and provides an autop
|
|
|
9
9
|
- Thrust: each fuel unit burned reduces velocity by 4 m/s
|
|
10
10
|
- Landing: altitude reaches 0. Safe if velocity ≤ 4 m/s, crash if > 4 m/s
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Required Capabilities
|
|
13
13
|
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
14
|
+
- Create a lander state with configurable initial conditions (altitude, velocity, fuel). Defaults to the values above.
|
|
15
|
+
- Advance one tick: burn thrust fuel (clamped to available fuel), apply gravity and thrust, return a new immutable state. State objects are plain objects: `{ altitude, velocity, fuel, tick, landed, crashed }`.
|
|
16
|
+
- Simulate to completion using a controller function `(state) => thrustUnits` and return the full trace (array of states).
|
|
17
|
+
- Provide a built-in autopilot controller that lands safely. This is the algorithmically interesting part.
|
|
18
|
+
- Score a landing: `0` for crash, otherwise `(initialFuel - fuelUsed) * 10 + Math.max(0, (4 - landingVelocity) * 25)`. Higher is better.
|
|
19
19
|
|
|
20
20
|
## Requirements
|
|
21
21
|
|
|
22
|
-
- The autopilot must land safely across a range of initial conditions: altitude 500–2000m, velocity 20–80 m/s, fuel 10–50 units.
|
|
23
|
-
-
|
|
24
|
-
- Export all functions as named exports from `src/lib/main.js`.
|
|
22
|
+
- The autopilot must land safely across a range of initial conditions: altitude 500–2000m, velocity 20–80 m/s, fuel 10–50 units. Some combinations are physically impossible to survive (e.g. velocity 80 m/s with fuel 10) — the autopilot should return a crash trace, not throw.
|
|
23
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
25
24
|
- Comprehensive unit tests including physics correctness, autopilot safety across parameter ranges, and edge cases (zero fuel, already landed).
|
|
26
25
|
- README with example simulation output showing a successful landing trace.
|
|
27
26
|
|
|
28
27
|
## Acceptance Criteria
|
|
29
28
|
|
|
30
|
-
- [ ]
|
|
31
|
-
- [ ]
|
|
32
|
-
- [ ]
|
|
33
|
-
- [ ]
|
|
34
|
-
- [ ]
|
|
29
|
+
- [ ] Stepping correctly applies gravity and thrust physics
|
|
30
|
+
- [ ] Autopilot lands safely with default initial conditions
|
|
31
|
+
- [ ] Autopilot lands safely across at least 10 different (altitude, velocity, fuel) combinations
|
|
32
|
+
- [ ] Scoring returns 0 for crashes, positive for safe landings using the formula `(initialFuel - fuelUsed) * 10 + Math.max(0, (4 - landingVelocity) * 25)`
|
|
33
|
+
- [ ] Simulation returns a complete trace from start to landing
|
|
35
34
|
- [ ] All unit tests pass
|
|
36
35
|
- [ ] README shows example simulation output
|
|
@@ -1,41 +1,35 @@
|
|
|
1
1
|
# Mission
|
|
2
2
|
|
|
3
|
-
A JavaScript library
|
|
3
|
+
A JavaScript library for generating, normalising, forecasting, and correlating time series data. Uses deterministic data generators rather than external APIs, making results reproducible.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## Required Capabilities
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
|
|
16
|
-
## Core Functions
|
|
17
|
-
|
|
18
|
-
- `discover(sources?)` — search for and download time series data into `data/`.
|
|
19
|
-
- `load(file)` — load a CSV or JSON dataset, auto-detect date format.
|
|
20
|
-
- `normalise(dataset, interval)` — resample to uniform intervals, interpolate missing values.
|
|
21
|
-
- `refresh(file)` — update an existing dataset with newer data from its source.
|
|
22
|
-
- `forecast(dataset, method, horizon)` — predict future values using the specified method.
|
|
23
|
-
- `correlate(datasetA, datasetB)` — compute cross-correlation between two time series.
|
|
24
|
-
- `report(datasets)` — generate a markdown summary report.
|
|
7
|
+
- Generate a sine wave dataset with configurable periods, noise level, and sample rate. Returns an array of `{ time, value }` objects.
|
|
8
|
+
- Generate a seeded random walk for a given number of steps. Returns an array of `{ time, value }` objects.
|
|
9
|
+
- Load time series from a CSV file with columns `time,value`. Auto-detect ISO 8601 and Unix timestamp date formats.
|
|
10
|
+
- Normalise a dataset to uniform intervals using linear interpolation for missing values.
|
|
11
|
+
- Forecast future values using:
|
|
12
|
+
- Simple moving average (window size N, horizon M).
|
|
13
|
+
- Exponential smoothing (alpha 0.0–1.0, horizon M).
|
|
14
|
+
- Compute Pearson cross-correlation between two datasets for lags from -maxLag to +maxLag (default 20). Return an array of `{ lag, r }` objects.
|
|
15
|
+
- Generate a markdown report summarising datasets (row count, min, max, mean, trend direction).
|
|
25
16
|
|
|
26
17
|
## Requirements
|
|
27
18
|
|
|
28
|
-
- Export all
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
19
|
+
- Export all public API as named exports from `src/lib/main.js`.
|
|
20
|
+
- No external runtime dependencies.
|
|
21
|
+
- All random generators must accept a seed for deterministic output.
|
|
22
|
+
- Comprehensive unit tests covering generation, normalisation, forecasting accuracy, and correlation.
|
|
32
23
|
- README with usage examples.
|
|
33
24
|
|
|
34
25
|
## Acceptance Criteria
|
|
35
26
|
|
|
36
|
-
- [ ]
|
|
37
|
-
- [ ]
|
|
38
|
-
- [ ]
|
|
39
|
-
- [ ]
|
|
27
|
+
- [ ] Generating a sine wave with 2 periods, 0 noise, 100 samples produces 200 data points tracing a clean sine wave
|
|
28
|
+
- [ ] Generating a random walk with seed 42 produces identical output on repeated calls (deterministic)
|
|
29
|
+
- [ ] Normalising fills gaps with linearly interpolated values
|
|
30
|
+
- [ ] Moving average forecast with window 10, horizon 20 returns 20 predicted values
|
|
31
|
+
- [ ] Forecast of a known sine wave has RMSE < 0.5 for a 10-point horizon
|
|
32
|
+
- [ ] Cross-correlation of two offset sine waves shows peak correlation at the correct lag
|
|
33
|
+
- [ ] Report produces a markdown string with dataset summaries
|
|
40
34
|
- [ ] All unit tests pass
|
|
41
35
|
- [ ] README documents the API with examples
|