@pencil-agent/nano-pencil 1.13.7 → 1.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-meta.json +3 -3
- package/dist/core/sub-agent/sub-agent-backend.js +39 -1
- package/dist/core/sub-agent/sub-agent-types.d.ts +4 -0
- package/dist/extensions/defaults/AGENT.md +2 -2
- package/dist/extensions/defaults/sal/README.md +5 -2
- package/dist/extensions/defaults/sal/eval/insforge-sink.d.ts +4 -1
- package/dist/extensions/defaults/sal/eval/insforge-sink.js +93 -23
- package/dist/extensions/defaults/sal/index.d.ts +4 -3
- package/dist/extensions/defaults/sal/index.js +45 -8
- package/dist/extensions/defaults/team/CLAUDE.md +14 -5
- package/dist/extensions/defaults/team/index.d.ts +7 -3
- package/dist/extensions/defaults/team/index.js +135 -4
- package/dist/extensions/defaults/team/team-dashboard.d.ts +9 -0
- package/dist/extensions/defaults/team/team-dashboard.js +103 -0
- package/dist/extensions/defaults/team/team-harness.d.ts +35 -0
- package/dist/extensions/defaults/team/team-harness.js +351 -0
- package/dist/extensions/defaults/team/team-parser.d.ts +14 -4
- package/dist/extensions/defaults/team/team-parser.js +57 -8
- package/dist/extensions/defaults/team/team-presets.d.ts +33 -0
- package/dist/extensions/defaults/team/team-presets.js +83 -0
- package/dist/extensions/defaults/team/team-psyche.d.ts +14 -0
- package/dist/extensions/defaults/team/team-psyche.js +130 -0
- package/dist/extensions/defaults/team/team-runtime.d.ts +5 -0
- package/dist/extensions/defaults/team/team-runtime.js +70 -2
- package/dist/extensions/defaults/team/team-types.d.ts +53 -2
- package/dist/extensions/defaults/team/team-types.js +1 -1
- package/dist/modes/interactive/interactive-mode.js +4 -0
- package/dist/node_modules/@pencil-agent/ai/cli.js +0 -0
- package/dist/node_modules/@pencil-agent/ai/models.generated.d.ts +23 -0
- package/dist/node_modules/@pencil-agent/ai/models.generated.js +25 -7
- package/docs/SAL/345/256/236/351/252/214/350/257/204/344/274/260/346/226/271/345/274/217/357/274/210/344/273/243/347/240/201/345/257/271/346/257/224/344/270/216/345/244/232worktree/357/274/211.md +2 -2
- package/docs/SAL/346/200/273/344/275/223/350/267/257/347/272/277/344/270/216/345/256/236/351/252/214/345/244/247/347/272/262.md +2 -2
- package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +251 -0
- package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +123 -0
- package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -0
- package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -0
- package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -0
- package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +321 -0
- package/docs/loop-usage-examples.md +215 -0
- package/docs/planmode.md +1987 -0
- package/package.json +1 -1
package/dist/build-meta.json
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* [HERE]: core/sub-agent/sub-agent-backend.ts - in-process SubAgent implementation
|
|
6
6
|
*/
|
|
7
7
|
import { createAgentSession } from "../runtime/sdk.js";
|
|
8
|
+
import { readFile } from "node:fs/promises";
|
|
9
|
+
import { isAbsolute, resolve } from "node:path";
|
|
8
10
|
/**
|
|
9
11
|
* In-process SubAgent backend.
|
|
10
12
|
* Wraps createAgentSession() to run SubAgent in the same process.
|
|
@@ -12,6 +14,7 @@ import { createAgentSession } from "../runtime/sdk.js";
|
|
|
12
14
|
export class InProcessSubAgentBackend {
|
|
13
15
|
async spawn(spec) {
|
|
14
16
|
const id = crypto.randomUUID();
|
|
17
|
+
const prompt = await buildPromptWithContextFiles(spec);
|
|
15
18
|
// Create an internal AbortController that can be triggered by external signal or timeout
|
|
16
19
|
const internalAbortController = new AbortController();
|
|
17
20
|
// Forward external signal abort to internal controller
|
|
@@ -56,7 +59,7 @@ export class InProcessSubAgentBackend {
|
|
|
56
59
|
// Start the prompt
|
|
57
60
|
const promptPromise = (async () => {
|
|
58
61
|
try {
|
|
59
|
-
await session.prompt(
|
|
62
|
+
await session.prompt(prompt, {
|
|
60
63
|
images: spec.images,
|
|
61
64
|
});
|
|
62
65
|
status = "done";
|
|
@@ -87,6 +90,18 @@ export class InProcessSubAgentBackend {
|
|
|
87
90
|
}
|
|
88
91
|
}
|
|
89
92
|
finally {
|
|
93
|
+
if (spec.exitHook && result) {
|
|
94
|
+
try {
|
|
95
|
+
await spec.exitHook(result);
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
status = "error";
|
|
99
|
+
result = {
|
|
100
|
+
success: false,
|
|
101
|
+
error: `exitHook failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
}
|
|
90
105
|
if (timeoutId !== undefined) {
|
|
91
106
|
clearTimeout(timeoutId);
|
|
92
107
|
}
|
|
@@ -117,3 +132,26 @@ export class InProcessSubAgentBackend {
|
|
|
117
132
|
};
|
|
118
133
|
}
|
|
119
134
|
}
|
|
135
|
+
async function buildPromptWithContextFiles(spec) {
|
|
136
|
+
if (!spec.contextFiles?.length) {
|
|
137
|
+
return spec.prompt;
|
|
138
|
+
}
|
|
139
|
+
const chunks = [];
|
|
140
|
+
for (const filePath of spec.contextFiles) {
|
|
141
|
+
const absolutePath = isAbsolute(filePath) ? filePath : resolve(spec.cwd, filePath);
|
|
142
|
+
try {
|
|
143
|
+
const content = await readFile(absolutePath, "utf8");
|
|
144
|
+
chunks.push(`### ${filePath}\n\`\`\`\n${content}\n\`\`\``);
|
|
145
|
+
}
|
|
146
|
+
catch (error) {
|
|
147
|
+
chunks.push(`### ${filePath}\n(unavailable: ${error instanceof Error ? error.message : String(error)})`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return [
|
|
151
|
+
"The following files are injected as current task context. Treat them as read-only context unless the task instructions explicitly allow updates.",
|
|
152
|
+
"",
|
|
153
|
+
...chunks,
|
|
154
|
+
"",
|
|
155
|
+
spec.prompt,
|
|
156
|
+
].join("\n");
|
|
157
|
+
}
|
|
@@ -25,6 +25,10 @@ export interface SubAgentSpec {
|
|
|
25
25
|
images?: ImageContent[];
|
|
26
26
|
/** Model to use (reuses main session's model and auth) */
|
|
27
27
|
model?: Model<any>;
|
|
28
|
+
/** Files to inject into the initial prompt as read-only context */
|
|
29
|
+
contextFiles?: string[];
|
|
30
|
+
/** Optional callback invoked after the run result is available */
|
|
31
|
+
exitHook?: (result: SubAgentResult) => Promise<void> | void;
|
|
28
32
|
}
|
|
29
33
|
/**
|
|
30
34
|
* Result from a completed SubAgent run.
|
|
@@ -37,14 +37,14 @@ loop/scheduler-controller.ts: SchedulerController - in-memory recurring task sto
|
|
|
37
37
|
loop/scheduler-parser.ts: Loop command parsing with flags/subcommands, parseSchedulerCommand/parseDurationSpec/buildSchedulerHelp, --name/--max/--quiet
|
|
38
38
|
loop/scheduler-types.ts: Scheduled loop types, LoopPayloadKind/ScheduledLoopTask/LoopStartSpec/ParsedSchedulerCommand
|
|
39
39
|
loop/README.md: Loop extension documentation - recurring scheduler usage and flags
|
|
40
|
-
sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/run_end eval events through pluggable EvalSink; runtime no-op when --nosal is set
|
|
40
|
+
sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-ab/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/run_end eval events through pluggable EvalSink with best-effort shutdown flushing; writes local .memory-experiments sidecar anchors only when --sal-ab or NANOPENCIL_SAL_AB=1 is enabled; runtime no-op when --nosal is set
|
|
41
41
|
sal/terrain.ts: TerrainSnapshot/TerrainNode/TerrainEdge model, buildTerrainIndex(), checkDipCoverage(), isSnapshotStale(), moduleIdForPath(), parses P2 AGENT.md and P3 file headers
|
|
42
42
|
sal/anchors.ts: StructuralAnchor/AnchorResolution model, locateTask(), locateAction(), evidence-driven scoring with tunable SalWeights, CJK bigram tokenization
|
|
43
43
|
sal/weights.ts: SalWeights interface, SAL_DEFAULT_WEIGHTS, loadSalWeights() reads sal-config.json from workspace or .memory-experiments/sal/
|
|
44
44
|
sal/eval/index.ts: createEvalSink() factory + barrel re-exports; adapter selection via options.adapter or endpoint scheme inference (http(s)→insforge, file://|/|./|../→jsonl, missing→noop); ONLY entry point SAL imports from
|
|
45
45
|
sal/eval/types.ts: EvalSink interface, EvalEventEnvelope/EvalEventType (run_start/run_end/turn_anchor), EvalAdapterId ("insforge"|"jsonl"|"noop"), CreateEvalSinkOptions, createEvalEvent factory; zero-dependency type surface
|
|
46
46
|
sal/eval/noop-sink.ts: noopSink — silent EvalSink used when eval disabled or no adapter configured
|
|
47
|
-
sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates), turn_anchor
|
|
47
|
+
sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates) with legacy-schema fallback, writes turn_anchor/tool_trace/memory_recalls/run_end only after parent run confirmation, tool_trace→eval_tool_traces with PGRST204 fallback, memory_recalls→eval_memory_recalls batch INSERT, run_end→eval_runs PATCH; allowSelfSigned TLS option logs only in development runtime, batching with default 2000ms interval
|
|
48
48
|
sal/eval/jsonl-sink.ts: JsonlEvalSink — append-only filesystem adapter, one JSON object per line, accepts file:// URLs or plain paths, auto-creates parent dir, batched writes
|
|
49
49
|
sal/README.md: SAL extension usage, sidecar output layout, weights override, pluggability contract
|
|
50
50
|
team/index.ts: AgentTeam extension entry, /team:/team:spawn/:send/:status/:stop/:terminate/:approve/:mode commands, TEAM_MESSAGE_TYPE renderer
|
|
@@ -14,11 +14,14 @@ SAL is **enabled by default** on every nanoPencil session.
|
|
|
14
14
|
# SAL active (default)
|
|
15
15
|
pencil -p "your prompt"
|
|
16
16
|
|
|
17
|
+
# SAL active with local A/B sidecar artifacts
|
|
18
|
+
pencil --sal-ab -p "your prompt"
|
|
19
|
+
|
|
17
20
|
# SAL disabled — baseline memory mode
|
|
18
21
|
pencil --nosal -p "your prompt"
|
|
19
22
|
```
|
|
20
23
|
|
|
21
|
-
When `--nosal` is set, all hooks return early and zero work is performed.
|
|
24
|
+
When `--nosal` is set, all hooks return early and zero work is performed. When SAL is active without `--sal-ab`, it can still emit configured eval data to InsForge, but it does not create local `.memory-experiments` sidecar files.
|
|
22
25
|
|
|
23
26
|
## Terminal compatibility (Warp, block UIs)
|
|
24
27
|
|
|
@@ -33,7 +36,7 @@ SAL builds a **terrain snapshot** of the workspace (walk + read DIP headers). Th
|
|
|
33
36
|
|
|
34
37
|
## Sidecar output
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
Local sidecar output is disabled by default. Enable it only for explicit SAL A/B experiments with `--sal-ab` or `NANOPENCIL_SAL_AB=1`. In that mode, every grounded turn writes a JSON record to:
|
|
37
40
|
|
|
38
41
|
```
|
|
39
42
|
<workspace>/.memory-experiments/sal/anchors/turn-<timestamp>.json
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
|
|
3
3
|
* [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
|
|
4
4
|
* [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
|
|
5
|
-
* [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
|
|
5
|
+
* [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version with legacy fallback), turn_anchor→eval_turns + eval_sal_anchors×2 only after parent run confirmation, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
|
|
6
6
|
*
|
|
7
7
|
* Pluggable: nothing in this file may be imported from outside the eval/ directory.
|
|
8
8
|
* To add a new backend, write a sibling file with the same EvalSink interface.
|
|
@@ -18,6 +18,8 @@ export declare class InsForgeEvalSink implements EvalSink {
|
|
|
18
18
|
private flushTimer;
|
|
19
19
|
private flushInFlight;
|
|
20
20
|
private closed;
|
|
21
|
+
private confirmedRuns;
|
|
22
|
+
private failedRuns;
|
|
21
23
|
constructor(options: CreateEvalSinkOptions);
|
|
22
24
|
sendEvent(event: EvalEventEnvelope): Promise<void>;
|
|
23
25
|
flush(): Promise<void>;
|
|
@@ -26,6 +28,7 @@ export declare class InsForgeEvalSink implements EvalSink {
|
|
|
26
28
|
private scheduleFlush;
|
|
27
29
|
private routeEvent;
|
|
28
30
|
private handleRunStart;
|
|
31
|
+
private ensureRunExists;
|
|
29
32
|
private handleTurnAnchor;
|
|
30
33
|
private handleRunEnd;
|
|
31
34
|
private handleMemoryRecalls;
|
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
* [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
|
|
3
3
|
* [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
|
|
4
4
|
* [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
|
|
5
|
-
* [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
|
|
5
|
+
* [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version with legacy fallback), turn_anchor→eval_turns + eval_sal_anchors×2 only after parent run confirmation, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
|
|
6
6
|
*
|
|
7
7
|
* Pluggable: nothing in this file may be imported from outside the eval/ directory.
|
|
8
8
|
* To add a new backend, write a sibling file with the same EvalSink interface.
|
|
9
9
|
*/
|
|
10
|
-
import { request } from "node:https";
|
|
11
10
|
import { request as httpRequest } from "node:http";
|
|
11
|
+
import { request } from "node:https";
|
|
12
12
|
import { URL } from "node:url";
|
|
13
|
+
import { fileURLToPath } from "node:url";
|
|
13
14
|
export class InsForgeEvalSink {
|
|
14
15
|
enabled = true;
|
|
15
16
|
base;
|
|
@@ -20,11 +21,13 @@ export class InsForgeEvalSink {
|
|
|
20
21
|
flushTimer;
|
|
21
22
|
flushInFlight;
|
|
22
23
|
closed = false;
|
|
24
|
+
confirmedRuns = new Set();
|
|
25
|
+
failedRuns = new Set();
|
|
23
26
|
constructor(options) {
|
|
24
27
|
this.base = options.endpoint.replace(/\/+$/, "");
|
|
25
28
|
this.batchIntervalMs = options.batchIntervalMs ?? 2000;
|
|
26
29
|
this.allowSelfSigned = options.allowSelfSigned ?? false;
|
|
27
|
-
if (this.allowSelfSigned) {
|
|
30
|
+
if (this.allowSelfSigned && isDevelopmentRuntime()) {
|
|
28
31
|
console.warn("[sal][eval] TLS certificate verification disabled (allowSelfSigned=true)");
|
|
29
32
|
}
|
|
30
33
|
const h = {
|
|
@@ -51,13 +54,16 @@ export class InsForgeEvalSink {
|
|
|
51
54
|
}
|
|
52
55
|
async flush() {
|
|
53
56
|
if (this.flushInFlight) {
|
|
54
|
-
await this.flushInFlight;
|
|
57
|
+
await this.flushInFlight.catch(() => { });
|
|
55
58
|
return;
|
|
56
59
|
}
|
|
57
60
|
this.flushInFlight = this.doFlush();
|
|
58
61
|
try {
|
|
59
62
|
await this.flushInFlight;
|
|
60
63
|
}
|
|
64
|
+
catch (err) {
|
|
65
|
+
console.error("[sal][eval] flush failed:", err.message);
|
|
66
|
+
}
|
|
61
67
|
finally {
|
|
62
68
|
this.flushInFlight = undefined;
|
|
63
69
|
}
|
|
@@ -78,7 +84,9 @@ export class InsForgeEvalSink {
|
|
|
78
84
|
}
|
|
79
85
|
async close() {
|
|
80
86
|
this.closed = true;
|
|
81
|
-
await this.flush()
|
|
87
|
+
await this.flush().catch((err) => {
|
|
88
|
+
console.error("[sal][eval] close flush failed:", err.message);
|
|
89
|
+
});
|
|
82
90
|
}
|
|
83
91
|
scheduleFlush() {
|
|
84
92
|
if (this.flushTimer)
|
|
@@ -98,16 +106,20 @@ export class InsForgeEvalSink {
|
|
|
98
106
|
await this.handleRunStart(event);
|
|
99
107
|
break;
|
|
100
108
|
case "turn_anchor":
|
|
101
|
-
await this.
|
|
109
|
+
if (await this.ensureRunExists(event))
|
|
110
|
+
await this.handleTurnAnchor(event);
|
|
102
111
|
break;
|
|
103
112
|
case "memory_recalls":
|
|
104
|
-
await this.
|
|
113
|
+
if (await this.ensureRunExists(event))
|
|
114
|
+
await this.handleMemoryRecalls(event);
|
|
105
115
|
break;
|
|
106
116
|
case "tool_trace":
|
|
107
|
-
await this.
|
|
117
|
+
if (await this.ensureRunExists(event))
|
|
118
|
+
await this.handleToolTrace(event);
|
|
108
119
|
break;
|
|
109
120
|
case "run_end":
|
|
110
|
-
await this.
|
|
121
|
+
if (await this.ensureRunExists(event))
|
|
122
|
+
await this.handleRunEnd(event);
|
|
111
123
|
break;
|
|
112
124
|
}
|
|
113
125
|
}
|
|
@@ -118,20 +130,61 @@ export class InsForgeEvalSink {
|
|
|
118
130
|
// INSERT into eval_runs (merge-duplicates so a later run_start can update model)
|
|
119
131
|
async handleRunStart(ev) {
|
|
120
132
|
const p = ev.payload;
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
133
|
+
const row = {
|
|
134
|
+
run_id: ev.run_id,
|
|
135
|
+
variant: ev.variant,
|
|
136
|
+
status: "running",
|
|
137
|
+
task_description: strOrNull(p.task_description),
|
|
138
|
+
task_file: strOrNull(p.task_file),
|
|
139
|
+
model: strOrNull(p.model),
|
|
140
|
+
thinking: p.thinking === true,
|
|
141
|
+
pencil_version: strOrNull(p.pencil_version),
|
|
142
|
+
commit_hash: strOrNull(p.commit, "unknown"),
|
|
143
|
+
branch_name: strOrNull(p.branch, "unknown"),
|
|
144
|
+
workspace_root: strOrNull(p.workspace_root),
|
|
145
|
+
started_at: ev.ts,
|
|
146
|
+
};
|
|
147
|
+
const url = `${this.base}/api/database/records/eval_runs`;
|
|
148
|
+
const result = await this.postJson(url, [row], {
|
|
149
|
+
prefer: "resolution=merge-duplicates",
|
|
150
|
+
quietErrorCodes: ["PGRST204"],
|
|
151
|
+
});
|
|
152
|
+
if (result.ok) {
|
|
153
|
+
this.confirmedRuns.add(ev.run_id);
|
|
154
|
+
this.failedRuns.delete(ev.run_id);
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
const fallback = await this.postJson(url, [toLegacyRunStartRow(row)], {
|
|
158
|
+
prefer: "resolution=merge-duplicates",
|
|
159
|
+
});
|
|
160
|
+
if (fallback.ok) {
|
|
161
|
+
this.confirmedRuns.add(ev.run_id);
|
|
162
|
+
this.failedRuns.delete(ev.run_id);
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
this.failedRuns.add(ev.run_id);
|
|
166
|
+
}
|
|
167
|
+
async ensureRunExists(ev) {
|
|
168
|
+
if (this.confirmedRuns.has(ev.run_id))
|
|
169
|
+
return true;
|
|
170
|
+
if (!this.failedRuns.has(ev.run_id)) {
|
|
171
|
+
await this.handleRunStart({
|
|
172
|
+
...ev,
|
|
173
|
+
event_type: "run_start",
|
|
174
|
+
payload: {
|
|
175
|
+
task_description: strOrNull(ev.payload.prompt_summary),
|
|
176
|
+
model: strOrNull(ev.metadata?.model) ?? "unknown",
|
|
177
|
+
thinking: false,
|
|
178
|
+
commit: "unknown",
|
|
179
|
+
branch: "unknown",
|
|
180
|
+
workspace_root: strOrNull(ev.metadata?.workspace_root),
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
if (this.confirmedRuns.has(ev.run_id))
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
console.error(`[sal][eval] skipping ${ev.event_type}: eval_runs row is not available for run_id=${ev.run_id}`);
|
|
187
|
+
return false;
|
|
135
188
|
}
|
|
136
189
|
// INSERT into eval_turns + eval_sal_anchors (task + action)
|
|
137
190
|
async handleTurnAnchor(ev) {
|
|
@@ -339,6 +392,23 @@ function parsePostgrestErrorCode(rawBody) {
|
|
|
339
392
|
return undefined;
|
|
340
393
|
}
|
|
341
394
|
}
|
|
395
|
+
function isDevelopmentRuntime() {
|
|
396
|
+
if (process.env.NODE_ENV === "development")
|
|
397
|
+
return true;
|
|
398
|
+
if (process.env.NODE_ENV === "production")
|
|
399
|
+
return false;
|
|
400
|
+
try {
|
|
401
|
+
const currentFile = fileURLToPath(import.meta.url).replace(/\\/g, "/");
|
|
402
|
+
return !currentFile.includes("/dist/");
|
|
403
|
+
}
|
|
404
|
+
catch {
|
|
405
|
+
return false;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
function toLegacyRunStartRow(row) {
|
|
409
|
+
const { pencil_version: _pencilVersion, commit_hash: _commitHash, branch_name: _branchName, workspace_root: _workspaceRoot, ...legacyRow } = row;
|
|
410
|
+
return legacyRow;
|
|
411
|
+
}
|
|
342
412
|
function toLegacyToolTraceRow(row) {
|
|
343
413
|
const { has_tool_usage: _hasToolUsage, completed_tool_calls: _completedToolCalls, truncated_tool_calls: _truncatedToolCalls, truncated_tool_summary: _truncatedToolSummary, ...legacyRow } = row;
|
|
344
414
|
return legacyRow;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
|
|
2
|
+
* [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-ab/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
|
|
3
3
|
* [FROM]: Depends on core/extensions/types.ts (ToolExecutionStartEvent, ToolExecutionEndEvent), core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
|
|
4
4
|
* [TO]: Loaded by builtin-extensions.ts as a default extension entry point
|
|
5
|
-
* [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
|
|
5
|
+
* [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events with best-effort flush/close isolation; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
|
|
6
6
|
*/
|
|
7
7
|
import { type EvalAdapterId } from "./eval/index.js";
|
|
8
8
|
import type { ExtensionAPI } from "../../../core/extensions/types.js";
|
|
@@ -42,5 +42,6 @@ declare function resolveSalSidecarDir(workspaceRoot: string, experimentId?: stri
|
|
|
42
42
|
type TaskIntent = "fix" | "feat" | "refactor" | "explain" | "explore" | "unknown";
|
|
43
43
|
declare function inferIntent(prompt: string): TaskIntent;
|
|
44
44
|
declare function buildToolTracePayload(turn: TurnState, turnDuration: number): Record<string, unknown>;
|
|
45
|
+
declare function resolveSalAbEnabled(flagValue: unknown): boolean;
|
|
45
46
|
export default function salExtension(api: ExtensionAPI): Promise<void>;
|
|
46
|
-
export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveStaleCleanupEnabled, };
|
|
47
|
+
export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveSalAbEnabled, resolveStaleCleanupEnabled, };
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
|
|
2
|
+
* [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-ab/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
|
|
3
3
|
* [FROM]: Depends on core/extensions/types.ts (ToolExecutionStartEvent, ToolExecutionEndEvent), core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
|
|
4
4
|
* [TO]: Loaded by builtin-extensions.ts as a default extension entry point
|
|
5
|
-
* [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
|
|
5
|
+
* [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events with best-effort flush/close isolation; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
8
8
|
import { homedir } from "node:os";
|
|
@@ -49,7 +49,9 @@ function loadBuildMeta() {
|
|
|
49
49
|
}
|
|
50
50
|
const BUILD_META = loadBuildMeta();
|
|
51
51
|
const NOSAL_FLAG = "nosal";
|
|
52
|
+
const SAL_AB_FLAG = "sal-ab";
|
|
52
53
|
const SAL_REBUILD_FLAG = "sal-rebuild-terrain";
|
|
54
|
+
const SAL_AB_ENV = "NANOPENCIL_SAL_AB";
|
|
53
55
|
const SAL_CONTEXT_BUDGET_TOKENS = 800;
|
|
54
56
|
const APPROX_TOKENS_PER_CHAR = 0.25;
|
|
55
57
|
const EVAL_ENABLED_ENV = "NANOPENCIL_EVAL_ENABLED";
|
|
@@ -70,6 +72,10 @@ function resolveStaleCleanupEnabled(envValue, credentials) {
|
|
|
70
72
|
return credentials?.cleanup_stale_runs === true;
|
|
71
73
|
}
|
|
72
74
|
function isTruthy(value) {
|
|
75
|
+
if (value === true)
|
|
76
|
+
return true;
|
|
77
|
+
if (typeof value !== "string")
|
|
78
|
+
return false;
|
|
73
79
|
if (!value)
|
|
74
80
|
return false;
|
|
75
81
|
return ["1", "true", "yes", "on"].includes(value.toLowerCase());
|
|
@@ -411,6 +417,9 @@ function resolveEvalVariant(runtime, salEnabled) {
|
|
|
411
417
|
return runtime.evalVariantOverride;
|
|
412
418
|
return salEnabled ? "sal" : "control";
|
|
413
419
|
}
|
|
420
|
+
function resolveSalAbEnabled(flagValue) {
|
|
421
|
+
return isTruthy(flagValue) || isTruthy(process.env[SAL_AB_ENV]);
|
|
422
|
+
}
|
|
414
423
|
async function emitEval(runtime, eventType, salEnabled, payload) {
|
|
415
424
|
if (!runtime.evalEnabled)
|
|
416
425
|
return;
|
|
@@ -422,6 +431,27 @@ async function emitEval(runtime, eventType, salEnabled, payload) {
|
|
|
422
431
|
console.error("[sal][eval] failed to emit event:", err.message);
|
|
423
432
|
}
|
|
424
433
|
}
|
|
434
|
+
async function evalBestEffort(label, work, timeoutMs = 6000) {
|
|
435
|
+
let timer;
|
|
436
|
+
try {
|
|
437
|
+
await Promise.race([
|
|
438
|
+
work,
|
|
439
|
+
new Promise((resolve) => {
|
|
440
|
+
timer = setTimeout(() => {
|
|
441
|
+
console.error(`[sal][eval] ${label} timed out; continuing session shutdown`);
|
|
442
|
+
resolve();
|
|
443
|
+
}, timeoutMs);
|
|
444
|
+
}),
|
|
445
|
+
]);
|
|
446
|
+
}
|
|
447
|
+
catch (err) {
|
|
448
|
+
console.error(`[sal][eval] ${label} failed:`, err.message);
|
|
449
|
+
}
|
|
450
|
+
finally {
|
|
451
|
+
if (timer)
|
|
452
|
+
clearTimeout(timer);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
425
455
|
/**
|
|
426
456
|
* Fire-and-forget PATCH to mark stale "running" eval runs as "abandoned".
|
|
427
457
|
* Uses raw HTTP so it stays independent of the EvalSink batching pipeline.
|
|
@@ -485,6 +515,11 @@ export default async function salExtension(api) {
|
|
|
485
515
|
description: "Disable Structural Anchor Localization (SAL) - fall back to baseline memory mode",
|
|
486
516
|
default: false,
|
|
487
517
|
});
|
|
518
|
+
api.registerFlag(SAL_AB_FLAG, {
|
|
519
|
+
type: "boolean",
|
|
520
|
+
description: "Enable SAL A/B experiment sidecar files under .memory-experiments",
|
|
521
|
+
default: false,
|
|
522
|
+
});
|
|
488
523
|
api.registerFlag(SAL_REBUILD_FLAG, {
|
|
489
524
|
type: "boolean",
|
|
490
525
|
description: "Force SAL terrain index rebuild on next localization pass",
|
|
@@ -568,6 +603,7 @@ export default async function salExtension(api) {
|
|
|
568
603
|
pendingRebuild: false,
|
|
569
604
|
};
|
|
570
605
|
const isEnabled = () => !api.getFlag(NOSAL_FLAG);
|
|
606
|
+
const isSalAbEnabled = () => resolveSalAbEnabled(api.getFlag(SAL_AB_FLAG));
|
|
571
607
|
api.registerCommand("sal:coverage", {
|
|
572
608
|
description: "Report DIP P3 coverage for SAL prerequisite gating. Usage: /sal:coverage [module1 module2 ...]",
|
|
573
609
|
handler: async (args, ctx) => {
|
|
@@ -679,6 +715,7 @@ export default async function salExtension(api) {
|
|
|
679
715
|
const lines = [
|
|
680
716
|
"[SAL Status]",
|
|
681
717
|
` SAL: ${flagOn ? "ON (default)" : "OFF (--nosal)"}`,
|
|
718
|
+
` SAL A/B sidecar: ${isSalAbEnabled() ? "ON (--sal-ab)" : "OFF"}`,
|
|
682
719
|
` eval: ${runtime.evalEnabled ? "ON" : "OFF"}`,
|
|
683
720
|
` adapter: ${runtime.evalAdapter ?? "(inferred at sink creation)"}`,
|
|
684
721
|
` endpoint: ${endpointDisplay}`,
|
|
@@ -687,7 +724,7 @@ export default async function salExtension(api) {
|
|
|
687
724
|
` weightsSource: ${runtime.weightsSource}`,
|
|
688
725
|
` snapshotGeneratedAt: ${snapshot ? new Date(snapshot.generatedAt).toISOString() : "(not built)"}`,
|
|
689
726
|
` nodes: ${snapshot?.nodes.length ?? 0}`,
|
|
690
|
-
` sidecarDir: ${runtime.sidecarDir}`,
|
|
727
|
+
` sidecarDir: ${isSalAbEnabled() ? runtime.sidecarDir : "(disabled; use --sal-ab)"}`,
|
|
691
728
|
];
|
|
692
729
|
ctx.ui.notify(lines.join("\n"), "info");
|
|
693
730
|
},
|
|
@@ -837,7 +874,7 @@ export default async function salExtension(api) {
|
|
|
837
874
|
// Emit tool usage trace for self-awareness analytics.
|
|
838
875
|
// Always emit a bounded summary, including no-tool turns.
|
|
839
876
|
await emitEval(runtime, "tool_trace", isEnabled(), buildToolTracePayload(runtime.turn, turnDuration));
|
|
840
|
-
if (actionRes) {
|
|
877
|
+
if (isSalAbEnabled() && actionRes) {
|
|
841
878
|
persistTurnRecord(runtime, taskRes, actionRes);
|
|
842
879
|
}
|
|
843
880
|
runtime.turn = {
|
|
@@ -879,8 +916,8 @@ export default async function salExtension(api) {
|
|
|
879
916
|
turn_count: runtime.turnCounter,
|
|
880
917
|
total_duration_ms: Math.max(0, Date.now() - runtime.evalStartedAtMs),
|
|
881
918
|
});
|
|
882
|
-
await runtime.evalSink.flush();
|
|
883
|
-
await runtime.evalSink.close();
|
|
919
|
+
await evalBestEffort("flush", runtime.evalSink.flush());
|
|
920
|
+
await evalBestEffort("close", runtime.evalSink.close());
|
|
884
921
|
});
|
|
885
922
|
// ------------------------------------------------------------------
|
|
886
923
|
// Strategy A: Emergency flush on abnormal exit.
|
|
@@ -898,7 +935,7 @@ export default async function salExtension(api) {
|
|
|
898
935
|
turn_count: runtime.turnCounter,
|
|
899
936
|
total_duration_ms: Math.max(0, Date.now() - runtime.evalStartedAtMs),
|
|
900
937
|
})
|
|
901
|
-
.then(() => runtime.evalSink.flush())
|
|
938
|
+
.then(() => evalBestEffort("emergency flush", runtime.evalSink.flush()))
|
|
902
939
|
.catch(() => { });
|
|
903
940
|
};
|
|
904
941
|
process.on("beforeExit", emergencyFlush);
|
|
@@ -946,4 +983,4 @@ export default async function salExtension(api) {
|
|
|
946
983
|
});
|
|
947
984
|
});
|
|
948
985
|
}
|
|
949
|
-
export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveStaleCleanupEnabled, };
|
|
986
|
+
export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveSalAbEnabled, resolveStaleCleanupEnabled, };
|
|
@@ -3,11 +3,15 @@
|
|
|
3
3
|
> P2 | Parent: ../CLAUDE.md
|
|
4
4
|
|
|
5
5
|
Member List
|
|
6
|
-
- index.ts: AgentTeam extension entry, /team:/team:spawn/:send/:status/:stop/:terminate/:approve/:mode commands, TEAM_MESSAGE_TYPE renderer
|
|
7
|
-
- team-types.ts: TeammateRole/TeammateMode/TeammateStatus/TeammateIdentity/TeammateMessage/PersistedTeammate/TeamSpawnSpec/TeamSendResult types
|
|
6
|
+
- index.ts: AgentTeam extension entry, /team:/team:spawn/:preset/:send/:status/:progress/:psyche/:dashboard/:stop/:terminate/:approve/:mode commands, TEAM_MESSAGE_TYPE renderer, footer/widget updates
|
|
7
|
+
- team-types.ts: TeammateRole/TeammateMode/TeammateStatus/HarnessState/PsycheWeights/TeammateIdentity/TeammateMessage/PersistedTeammate/TeamSpawnSpec/TeamSendResult types
|
|
8
8
|
- team-state-store.ts: TeamStateStore class - durable teammate persistence via JSON files in <agentDir>/teams/
|
|
9
|
-
- team-parser.ts: Team command parser - parseTeamCommand/buildTeamHelp for /team:* subcommands
|
|
10
|
-
- team-runtime.ts: TeamRuntime class - teammate registry, lifecycle, mailbox + permission + transcript wiring; uses SubAgentRuntime for agent spawning
|
|
9
|
+
- team-parser.ts: Team command parser - parseTeamCommand/buildTeamHelp for /team:* subcommands, preset/progress/psyche/dashboard parsing, --harness spawn flag
|
|
10
|
+
- team-runtime.ts: TeamRuntime class - teammate registry, lifecycle, harness/psyche prompt injection, harness implementer execute default, mailbox + permission + transcript wiring; uses SubAgentRuntime for agent spawning
|
|
11
|
+
- team-psyche.ts: Psyche prompt layer - phase/role/soul weighted Id/Ego/Superego prompt construction
|
|
12
|
+
- team-harness.ts: Harness protocol helpers - harness file defaults, phase instructions, context file selection, feature-list validation, git checkpoint/revert, phase progression
|
|
13
|
+
- team-presets.ts: Preset definitions and executor - solo/duo/squad teammate spawning and optional solo autostart
|
|
14
|
+
- team-dashboard.ts: Text dashboard/status rendering - card layout, psyche/progress bars, footer status summary
|
|
11
15
|
- team-permissions.ts: PermissionStore - pending permission request queue, approve/deny, path allowlists (B.4)
|
|
12
16
|
- team-mailbox.ts: TeamMailbox - typed in-memory append-only message log for leader↔teammate (B.3)
|
|
13
17
|
- team-transcript.ts: TeamTranscriptWriter - per-teammate JSONL transcripts under <storageDir>/transcripts/ (B.7)
|
|
@@ -32,9 +36,13 @@ This extension implements the Phase B "true AgentTeam" per the refactor plan:
|
|
|
32
36
|
| Command | Description |
|
|
33
37
|
|---------|-------------|
|
|
34
38
|
| `/team` | List all teammates |
|
|
35
|
-
| `/team:spawn <role> [--name <id>]` | Create a persistent teammate |
|
|
39
|
+
| `/team:spawn <role> [--name <id>] [--harness]` | Create a persistent teammate |
|
|
40
|
+
| `/team:preset <solo\|duo\|squad> <task>` | Create teammates from a preset |
|
|
36
41
|
| `/team:send <name> <message>` | Send message to a teammate |
|
|
37
42
|
| `/team:status [<name>]` | Show team or teammate status |
|
|
43
|
+
| `/team:progress [<name>]` | Show harness progress |
|
|
44
|
+
| `/team:psyche [<name>]` | Show psyche weights |
|
|
45
|
+
| `/team:dashboard` | Toggle the text dashboard widget |
|
|
38
46
|
| `/team:stop <name>` | Stop teammate's current turn |
|
|
39
47
|
| `/team:terminate <name>` | Destroy a teammate |
|
|
40
48
|
| `/team:approve <request-id>` | Approve a permission request (TODO) |
|
|
@@ -46,6 +54,7 @@ This extension implements the Phase B "true AgentTeam" per the refactor plan:
|
|
|
46
54
|
- `reviewer`: Read-only review/audit
|
|
47
55
|
- `implementer`: Sandboxed write in isolated worktree
|
|
48
56
|
- `planner`: Read-only plan production
|
|
57
|
+
- `verifier`: Read-only strict verification/review
|
|
49
58
|
- `generic`: Read-only by default
|
|
50
59
|
|
|
51
60
|
## Modes
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* [WHO]: AgentTeam extension, registers /team commands for persistent teammates
|
|
3
|
-
* [FROM]: Depends on @pencil-agent/tui, core/extensions/types, ./team-runtime, ./team-parser, ./team-types
|
|
2
|
+
* [WHO]: AgentTeam extension, registers /team commands for persistent teammates, harness status, psyche status, dashboard widget
|
|
3
|
+
* [FROM]: Depends on @pencil-agent/tui, core/extensions/types, ./team-runtime, ./team-parser, ./team-types, ./team-harness, ./team-presets, ./team-dashboard
|
|
4
4
|
* [TO]: Consumed by builtin-extensions.ts as default extension
|
|
5
5
|
* [HERE]: extensions/defaults/team/index.ts - AgentTeam extension entry point
|
|
6
6
|
*
|
|
7
7
|
* Commands:
|
|
8
8
|
* /team - List teammates
|
|
9
|
-
* /team:spawn <role> [--name <id>] - Create teammate
|
|
9
|
+
* /team:spawn <role> [--name <id>] [--harness] - Create teammate
|
|
10
|
+
* /team:preset <solo|duo|squad> <task> - Create preset team
|
|
10
11
|
* /team:send <name> <message> - Send message to teammate
|
|
11
12
|
* /team:status [<name>] - Show status
|
|
13
|
+
* /team:progress [<name>] - Show harness progress
|
|
14
|
+
* /team:psyche [<name>] - Show psyche weights
|
|
15
|
+
* /team:dashboard - Toggle dashboard widget
|
|
12
16
|
* /team:stop <name> - Stop teammate turn
|
|
13
17
|
* /team:terminate <name> - Destroy teammate
|
|
14
18
|
* /team:approve <request-id> - Approve permission request
|