@pencil-agent/nano-pencil 1.13.6 → 1.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/build-meta.json +3 -3
  2. package/dist/core/model-registry.js +2 -1
  3. package/dist/core/runtime/agent-session.js +29 -4
  4. package/dist/core/sub-agent/sub-agent-backend.js +39 -1
  5. package/dist/core/sub-agent/sub-agent-types.d.ts +4 -0
  6. package/dist/extensions/defaults/AGENT.md +2 -2
  7. package/dist/extensions/defaults/CLAUDE.md +1 -1
  8. package/dist/extensions/defaults/sal/README.md +5 -2
  9. package/dist/extensions/defaults/sal/eval/insforge-sink.d.ts +4 -1
  10. package/dist/extensions/defaults/sal/eval/insforge-sink.js +141 -49
  11. package/dist/extensions/defaults/sal/index.d.ts +4 -3
  12. package/dist/extensions/defaults/sal/index.js +45 -8
  13. package/dist/extensions/defaults/team/CLAUDE.md +14 -5
  14. package/dist/extensions/defaults/team/index.d.ts +7 -3
  15. package/dist/extensions/defaults/team/index.js +135 -4
  16. package/dist/extensions/defaults/team/team-dashboard.d.ts +9 -0
  17. package/dist/extensions/defaults/team/team-dashboard.js +103 -0
  18. package/dist/extensions/defaults/team/team-harness.d.ts +35 -0
  19. package/dist/extensions/defaults/team/team-harness.js +351 -0
  20. package/dist/extensions/defaults/team/team-parser.d.ts +14 -4
  21. package/dist/extensions/defaults/team/team-parser.js +57 -8
  22. package/dist/extensions/defaults/team/team-presets.d.ts +33 -0
  23. package/dist/extensions/defaults/team/team-presets.js +83 -0
  24. package/dist/extensions/defaults/team/team-psyche.d.ts +14 -0
  25. package/dist/extensions/defaults/team/team-psyche.js +130 -0
  26. package/dist/extensions/defaults/team/team-runtime.d.ts +5 -0
  27. package/dist/extensions/defaults/team/team-runtime.js +70 -2
  28. package/dist/extensions/defaults/team/team-types.d.ts +53 -2
  29. package/dist/extensions/defaults/team/team-types.js +1 -1
  30. package/dist/modes/interactive/components/footer.d.ts +4 -3
  31. package/dist/modes/interactive/components/footer.js +16 -8
  32. package/dist/modes/interactive/components/provider-selector.d.ts +18 -5
  33. package/dist/modes/interactive/components/provider-selector.js +128 -21
  34. package/dist/modes/interactive/interactive-mode.js +8 -9
  35. package/dist/node_modules/@pencil-agent/ai/models.generated.d.ts +381 -1
  36. package/dist/node_modules/@pencil-agent/ai/models.generated.js +422 -47
  37. package/docs/SAL/345/256/236/351/252/214/350/257/204/344/274/260/346/226/271/345/274/217/357/274/210/344/273/243/347/240/201/345/257/271/346/257/224/344/270/216/345/244/232worktree/357/274/211.md +2 -2
  38. package/docs/SAL/346/200/273/344/275/223/350/267/257/347/272/277/344/270/216/345/256/236/351/252/214/345/244/247/347/272/262.md +2 -2
  39. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +251 -0
  40. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +123 -0
  41. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -0
  42. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -0
  43. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -0
  44. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +321 -0
  45. package/docs/loop-usage-examples.md +215 -0
  46. package/docs/planmode.md +1987 -0
  47. package/package.json +1 -1
@@ -1,6 +1,6 @@
1
1
  {
2
- "version": "1.13.6",
3
- "commitHash": "d5ae966",
2
+ "version": "1.13.8",
3
+ "commitHash": "66ecf7f",
4
4
  "branch": "main",
5
- "builtAt": "2026-04-23T04:35:24.822Z"
5
+ "builtAt": "2026-04-26T15:41:55.811Z"
6
6
  }
@@ -207,8 +207,9 @@ export class ModelRegistry {
207
207
  // Keep built-in models even if custom models failed to load
208
208
  }
209
209
  const builtInModels = this.useOnlyCustomModels
210
- ? this.loadBuiltInModels(overrides, modelOverrides, new Set(["openrouter"]), {
210
+ ? this.loadBuiltInModels(overrides, modelOverrides, new Set(["openrouter", "zai"]), {
211
211
  openrouter: new Set(NANOPENCIL_OPENROUTER_BUILTIN_MODEL_IDS),
212
+ // zai not specified = load all zai models
212
213
  })
213
214
  : this.loadBuiltInModels(overrides, modelOverrides);
214
215
  let combined = this.mergeCustomModels(builtInModels, customModels);
@@ -1183,8 +1183,22 @@ export class AgentSession {
1183
1183
  this.agent.setModel(model);
1184
1184
  this.sessionManager.appendModelChange(model.provider, model.id);
1185
1185
  this.settingsManager.setDefaultModelAndProvider(model.provider, model.id);
1186
- // Re-clamp thinking level for new model's capabilities
1187
- this.setThinkingLevel(this.thinkingLevel);
1186
+ // Auto-select thinking level based on model capabilities
1187
+ const currentLevel = this.thinkingLevel;
1188
+ let newLevel;
1189
+ if (!model.reasoning) {
1190
+ // Model doesn't support thinking, force off
1191
+ newLevel = "off";
1192
+ }
1193
+ else if (currentLevel === "off") {
1194
+ // Model supports thinking but current level is off, default to medium
1195
+ newLevel = "medium";
1196
+ }
1197
+ else {
1198
+ // Keep current level but clamp to new model's capabilities
1199
+ newLevel = currentLevel;
1200
+ }
1201
+ this.setThinkingLevel(newLevel);
1188
1202
  await this._emitModelSelect(model, previousModel, "set");
1189
1203
  }
1190
1204
  /**
@@ -1290,8 +1304,19 @@ export class AgentSession {
1290
1304
  this.agent.setModel(nextModel);
1291
1305
  this.sessionManager.appendModelChange(nextModel.provider, nextModel.id);
1292
1306
  this.settingsManager.setDefaultModelAndProvider(nextModel.provider, nextModel.id);
1293
- // Re-clamp thinking level for new model's capabilities
1294
- this.setThinkingLevel(this.thinkingLevel);
1307
+ // Auto-select thinking level based on model capabilities
1308
+ const currentLevel = this.thinkingLevel;
1309
+ let newLevel;
1310
+ if (!nextModel.reasoning) {
1311
+ newLevel = "off";
1312
+ }
1313
+ else if (currentLevel === "off") {
1314
+ newLevel = "medium";
1315
+ }
1316
+ else {
1317
+ newLevel = currentLevel;
1318
+ }
1319
+ this.setThinkingLevel(newLevel);
1295
1320
  await this._emitModelSelect(nextModel, currentModel, "cycle");
1296
1321
  return {
1297
1322
  model: nextModel,
@@ -5,6 +5,8 @@
5
5
  * [HERE]: core/sub-agent/sub-agent-backend.ts - in-process SubAgent implementation
6
6
  */
7
7
  import { createAgentSession } from "../runtime/sdk.js";
8
+ import { readFile } from "node:fs/promises";
9
+ import { isAbsolute, resolve } from "node:path";
8
10
  /**
9
11
  * In-process SubAgent backend.
10
12
  * Wraps createAgentSession() to run SubAgent in the same process.
@@ -12,6 +14,7 @@ import { createAgentSession } from "../runtime/sdk.js";
12
14
  export class InProcessSubAgentBackend {
13
15
  async spawn(spec) {
14
16
  const id = crypto.randomUUID();
17
+ const prompt = await buildPromptWithContextFiles(spec);
15
18
  // Create an internal AbortController that can be triggered by external signal or timeout
16
19
  const internalAbortController = new AbortController();
17
20
  // Forward external signal abort to internal controller
@@ -56,7 +59,7 @@ export class InProcessSubAgentBackend {
56
59
  // Start the prompt
57
60
  const promptPromise = (async () => {
58
61
  try {
59
- await session.prompt(spec.prompt, {
62
+ await session.prompt(prompt, {
60
63
  images: spec.images,
61
64
  });
62
65
  status = "done";
@@ -87,6 +90,18 @@ export class InProcessSubAgentBackend {
87
90
  }
88
91
  }
89
92
  finally {
93
+ if (spec.exitHook && result) {
94
+ try {
95
+ await spec.exitHook(result);
96
+ }
97
+ catch (error) {
98
+ status = "error";
99
+ result = {
100
+ success: false,
101
+ error: `exitHook failed: ${error instanceof Error ? error.message : String(error)}`,
102
+ };
103
+ }
104
+ }
90
105
  if (timeoutId !== undefined) {
91
106
  clearTimeout(timeoutId);
92
107
  }
@@ -117,3 +132,26 @@ export class InProcessSubAgentBackend {
117
132
  };
118
133
  }
119
134
  }
135
+ async function buildPromptWithContextFiles(spec) {
136
+ if (!spec.contextFiles?.length) {
137
+ return spec.prompt;
138
+ }
139
+ const chunks = [];
140
+ for (const filePath of spec.contextFiles) {
141
+ const absolutePath = isAbsolute(filePath) ? filePath : resolve(spec.cwd, filePath);
142
+ try {
143
+ const content = await readFile(absolutePath, "utf8");
144
+ chunks.push(`### ${filePath}\n\`\`\`\n${content}\n\`\`\``);
145
+ }
146
+ catch (error) {
147
+ chunks.push(`### ${filePath}\n(unavailable: ${error instanceof Error ? error.message : String(error)})`);
148
+ }
149
+ }
150
+ return [
151
+ "The following files are injected as current task context. Treat them as read-only context unless the task instructions explicitly allow updates.",
152
+ "",
153
+ ...chunks,
154
+ "",
155
+ spec.prompt,
156
+ ].join("\n");
157
+ }
@@ -25,6 +25,10 @@ export interface SubAgentSpec {
25
25
  images?: ImageContent[];
26
26
  /** Model to use (reuses main session's model and auth) */
27
27
  model?: Model<any>;
28
+ /** Files to inject into the initial prompt as read-only context */
29
+ contextFiles?: string[];
30
+ /** Optional callback invoked after the run result is available */
31
+ exitHook?: (result: SubAgentResult) => Promise<void> | void;
28
32
  }
29
33
  /**
30
34
  * Result from a completed SubAgent run.
@@ -37,14 +37,14 @@ loop/scheduler-controller.ts: SchedulerController - in-memory recurring task sto
37
37
  loop/scheduler-parser.ts: Loop command parsing with flags/subcommands, parseSchedulerCommand/parseDurationSpec/buildSchedulerHelp, --name/--max/--quiet
38
38
  loop/scheduler-types.ts: Scheduled loop types, LoopPayloadKind/ScheduledLoopTask/LoopStartSpec/ParsedSchedulerCommand
39
39
  loop/README.md: Loop extension documentation - recurring scheduler usage and flags
40
- sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/run_end eval events through pluggable EvalSink; runtime no-op when --nosal is set
40
+ sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-ab/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/run_end eval events through pluggable EvalSink with best-effort shutdown flushing; writes local .memory-experiments sidecar anchors only when --sal-ab or NANOPENCIL_SAL_AB=1 is enabled; runtime no-op when --nosal is set
41
41
  sal/terrain.ts: TerrainSnapshot/TerrainNode/TerrainEdge model, buildTerrainIndex(), checkDipCoverage(), isSnapshotStale(), moduleIdForPath(), parses P2 AGENT.md and P3 file headers
42
42
  sal/anchors.ts: StructuralAnchor/AnchorResolution model, locateTask(), locateAction(), evidence-driven scoring with tunable SalWeights, CJK bigram tokenization
43
43
  sal/weights.ts: SalWeights interface, SAL_DEFAULT_WEIGHTS, loadSalWeights() reads sal-config.json from workspace or .memory-experiments/sal/
44
44
  sal/eval/index.ts: createEvalSink() factory + barrel re-exports; adapter selection via options.adapter or endpoint scheme inference (http(s)→insforge, file://|/|./|../→jsonl, missing→noop); ONLY entry point SAL imports from
45
45
  sal/eval/types.ts: EvalSink interface, EvalEventEnvelope/EvalEventType (run_start/run_end/turn_anchor), EvalAdapterId ("insforge"|"jsonl"|"noop"), CreateEvalSinkOptions, createEvalEvent factory; zero-dependency type surface
46
46
  sal/eval/noop-sink.ts: noopSink — silent EvalSink used when eval disabled or no adapter configured
47
- sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, run_end→eval_runs PATCH; allowSelfSigned TLS option, batching with default 2000ms interval
47
+ sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates) with legacy-schema fallback, writes turn_anchor/tool_trace/memory_recalls/run_end only after parent run confirmation, tool_traceeval_tool_traces with PGRST204 fallback, memory_recalls→eval_memory_recalls batch INSERT, run_end→eval_runs PATCH; allowSelfSigned TLS option logs only in development runtime, batching with default 2000ms interval
48
48
  sal/eval/jsonl-sink.ts: JsonlEvalSink — append-only filesystem adapter, one JSON object per line, accepts file:// URLs or plain paths, auto-creates parent dir, batched writes
49
49
  sal/README.md: SAL extension usage, sidecar output layout, weights override, pluggability contract
50
50
  team/index.ts: AgentTeam extension entry, /team:/team:spawn/:send/:status/:stop/:terminate/:approve/:mode commands, TEAM_MESSAGE_TYPE renderer
@@ -47,7 +47,7 @@ sal/terrain.ts: TerrainSnapshot/TerrainNode/TerrainEdge model, async buildTerrai
47
47
  sal/anchors.ts: StructuralAnchor/AnchorResolution model, locateTask(), locateAction(), evidence-driven scoring with tunable SalWeights, CJK bigram tokenization
48
48
  sal/weights.ts: SalWeights interface, SAL_DEFAULT_WEIGHTS, loadSalWeights() reads sal-config.json from workspace or .memory-experiments/sal/
49
49
  sal/eval/index.ts: createEvalSink() factory + barrel re-exports; adapter selection via options.adapter or endpoint scheme inference (http(s)→insforge, file://|/|./|../→jsonl, missing→noop); ONLY entry point SAL imports from
50
- sal/eval/types.ts: EvalSink interface, EvalEventEnvelope/EvalEventType (run_start/run_end/turn_anchor/memory_recalls), EvalAdapterId ("insforge"|"jsonl"|"noop"), CreateEvalSinkOptions, createEvalEvent factory; zero-dependency type surface
50
+ sal/eval/types.ts: EvalSink interface, EvalEventEnvelope/EvalEventType (run_start/run_end/turn_anchor/memory_recalls/tool_trace), EvalAdapterId ("insforge"|"jsonl"|"noop"), CreateEvalSinkOptions, createEvalEvent factory; zero-dependency type surface
51
51
  sal/eval/noop-sink.ts: noopSink — silent EvalSink used when eval disabled or no adapter configured
52
52
  sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces bounded per-turn summaries (including no-tool turns and truncation counters), memory_recalls→eval_memory_recalls batch INSERT, run_end→eval_runs PATCH; allowSelfSigned TLS option, batching with default 2000ms interval
53
53
  sal/eval/jsonl-sink.ts: JsonlEvalSink — append-only filesystem adapter, one JSON object per line, accepts file:// URLs or plain paths, auto-creates parent dir, batched writes
@@ -14,11 +14,14 @@ SAL is **enabled by default** on every nanoPencil session.
14
14
  # SAL active (default)
15
15
  pencil -p "your prompt"
16
16
 
17
+ # SAL active with local A/B sidecar artifacts
18
+ pencil --sal-ab -p "your prompt"
19
+
17
20
  # SAL disabled — baseline memory mode
18
21
  pencil --nosal -p "your prompt"
19
22
  ```
20
23
 
21
- When `--nosal` is set, all hooks return early and zero work is performed.
24
+ When `--nosal` is set, all hooks return early and zero work is performed. When SAL is active without `--sal-ab`, it can still emit configured eval data to InsForge, but it does not create local `.memory-experiments` sidecar files.
22
25
 
23
26
  ## Terminal compatibility (Warp, block UIs)
24
27
 
@@ -33,7 +36,7 @@ SAL builds a **terrain snapshot** of the workspace (walk + read DIP headers). Th
33
36
 
34
37
  ## Sidecar output
35
38
 
36
- When enabled, every turn writes a JSON record to:
39
+ Local sidecar output is disabled by default. Enable it only for explicit SAL A/B experiments with `--sal-ab` or `NANOPENCIL_SAL_AB=1`. In that mode, every grounded turn writes a JSON record to:
37
40
 
38
41
  ```
39
42
  <workspace>/.memory-experiments/sal/anchors/turn-<timestamp>.json
@@ -2,7 +2,7 @@
2
2
  * [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
3
3
  * [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
4
4
  * [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
5
- * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
5
+ * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version with legacy fallback), turn_anchor→eval_turns + eval_sal_anchors×2 only after parent run confirmation, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
6
6
  *
7
7
  * Pluggable: nothing in this file may be imported from outside the eval/ directory.
8
8
  * To add a new backend, write a sibling file with the same EvalSink interface.
@@ -18,6 +18,8 @@ export declare class InsForgeEvalSink implements EvalSink {
18
18
  private flushTimer;
19
19
  private flushInFlight;
20
20
  private closed;
21
+ private confirmedRuns;
22
+ private failedRuns;
21
23
  constructor(options: CreateEvalSinkOptions);
22
24
  sendEvent(event: EvalEventEnvelope): Promise<void>;
23
25
  flush(): Promise<void>;
@@ -26,6 +28,7 @@ export declare class InsForgeEvalSink implements EvalSink {
26
28
  private scheduleFlush;
27
29
  private routeEvent;
28
30
  private handleRunStart;
31
+ private ensureRunExists;
29
32
  private handleTurnAnchor;
30
33
  private handleRunEnd;
31
34
  private handleMemoryRecalls;
@@ -2,14 +2,15 @@
2
2
  * [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
3
3
  * [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
4
4
  * [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
5
- * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
5
+ * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version with legacy fallback), turn_anchor→eval_turns + eval_sal_anchors×2 only after parent run confirmation, tool_trace→eval_tool_traces with legacy-schema fallback, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
6
6
  *
7
7
  * Pluggable: nothing in this file may be imported from outside the eval/ directory.
8
8
  * To add a new backend, write a sibling file with the same EvalSink interface.
9
9
  */
10
- import { request } from "node:https";
11
10
  import { request as httpRequest } from "node:http";
11
+ import { request } from "node:https";
12
12
  import { URL } from "node:url";
13
+ import { fileURLToPath } from "node:url";
13
14
  export class InsForgeEvalSink {
14
15
  enabled = true;
15
16
  base;
@@ -20,11 +21,13 @@ export class InsForgeEvalSink {
20
21
  flushTimer;
21
22
  flushInFlight;
22
23
  closed = false;
24
+ confirmedRuns = new Set();
25
+ failedRuns = new Set();
23
26
  constructor(options) {
24
27
  this.base = options.endpoint.replace(/\/+$/, "");
25
28
  this.batchIntervalMs = options.batchIntervalMs ?? 2000;
26
29
  this.allowSelfSigned = options.allowSelfSigned ?? false;
27
- if (this.allowSelfSigned) {
30
+ if (this.allowSelfSigned && isDevelopmentRuntime()) {
28
31
  console.warn("[sal][eval] TLS certificate verification disabled (allowSelfSigned=true)");
29
32
  }
30
33
  const h = {
@@ -51,13 +54,16 @@ export class InsForgeEvalSink {
51
54
  }
52
55
  async flush() {
53
56
  if (this.flushInFlight) {
54
- await this.flushInFlight;
57
+ await this.flushInFlight.catch(() => { });
55
58
  return;
56
59
  }
57
60
  this.flushInFlight = this.doFlush();
58
61
  try {
59
62
  await this.flushInFlight;
60
63
  }
64
+ catch (err) {
65
+ console.error("[sal][eval] flush failed:", err.message);
66
+ }
61
67
  finally {
62
68
  this.flushInFlight = undefined;
63
69
  }
@@ -78,7 +84,9 @@ export class InsForgeEvalSink {
78
84
  }
79
85
  async close() {
80
86
  this.closed = true;
81
- await this.flush();
87
+ await this.flush().catch((err) => {
88
+ console.error("[sal][eval] close flush failed:", err.message);
89
+ });
82
90
  }
83
91
  scheduleFlush() {
84
92
  if (this.flushTimer)
@@ -98,16 +106,20 @@ export class InsForgeEvalSink {
98
106
  await this.handleRunStart(event);
99
107
  break;
100
108
  case "turn_anchor":
101
- await this.handleTurnAnchor(event);
109
+ if (await this.ensureRunExists(event))
110
+ await this.handleTurnAnchor(event);
102
111
  break;
103
112
  case "memory_recalls":
104
- await this.handleMemoryRecalls(event);
113
+ if (await this.ensureRunExists(event))
114
+ await this.handleMemoryRecalls(event);
105
115
  break;
106
116
  case "tool_trace":
107
- await this.handleToolTrace(event);
117
+ if (await this.ensureRunExists(event))
118
+ await this.handleToolTrace(event);
108
119
  break;
109
120
  case "run_end":
110
- await this.handleRunEnd(event);
121
+ if (await this.ensureRunExists(event))
122
+ await this.handleRunEnd(event);
111
123
  break;
112
124
  }
113
125
  }
@@ -118,20 +130,61 @@ export class InsForgeEvalSink {
118
130
  // INSERT into eval_runs (merge-duplicates so a later run_start can update model)
119
131
  async handleRunStart(ev) {
120
132
  const p = ev.payload;
121
- await this.postJson(`${this.base}/api/database/records/eval_runs`, [{
122
- run_id: ev.run_id,
123
- variant: ev.variant,
124
- status: "running",
125
- task_description: strOrNull(p.task_description),
126
- task_file: strOrNull(p.task_file),
127
- model: strOrNull(p.model),
128
- thinking: p.thinking === true,
129
- pencil_version: strOrNull(p.pencil_version),
130
- commit_hash: strOrNull(p.commit, "unknown"),
131
- branch_name: strOrNull(p.branch, "unknown"),
132
- workspace_root: strOrNull(p.workspace_root),
133
- started_at: ev.ts,
134
- }], { prefer: "resolution=merge-duplicates" });
133
+ const row = {
134
+ run_id: ev.run_id,
135
+ variant: ev.variant,
136
+ status: "running",
137
+ task_description: strOrNull(p.task_description),
138
+ task_file: strOrNull(p.task_file),
139
+ model: strOrNull(p.model),
140
+ thinking: p.thinking === true,
141
+ pencil_version: strOrNull(p.pencil_version),
142
+ commit_hash: strOrNull(p.commit, "unknown"),
143
+ branch_name: strOrNull(p.branch, "unknown"),
144
+ workspace_root: strOrNull(p.workspace_root),
145
+ started_at: ev.ts,
146
+ };
147
+ const url = `${this.base}/api/database/records/eval_runs`;
148
+ const result = await this.postJson(url, [row], {
149
+ prefer: "resolution=merge-duplicates",
150
+ quietErrorCodes: ["PGRST204"],
151
+ });
152
+ if (result.ok) {
153
+ this.confirmedRuns.add(ev.run_id);
154
+ this.failedRuns.delete(ev.run_id);
155
+ return;
156
+ }
157
+ const fallback = await this.postJson(url, [toLegacyRunStartRow(row)], {
158
+ prefer: "resolution=merge-duplicates",
159
+ });
160
+ if (fallback.ok) {
161
+ this.confirmedRuns.add(ev.run_id);
162
+ this.failedRuns.delete(ev.run_id);
163
+ return;
164
+ }
165
+ this.failedRuns.add(ev.run_id);
166
+ }
167
+ async ensureRunExists(ev) {
168
+ if (this.confirmedRuns.has(ev.run_id))
169
+ return true;
170
+ if (!this.failedRuns.has(ev.run_id)) {
171
+ await this.handleRunStart({
172
+ ...ev,
173
+ event_type: "run_start",
174
+ payload: {
175
+ task_description: strOrNull(ev.payload.prompt_summary),
176
+ model: strOrNull(ev.metadata?.model) ?? "unknown",
177
+ thinking: false,
178
+ commit: "unknown",
179
+ branch: "unknown",
180
+ workspace_root: strOrNull(ev.metadata?.workspace_root),
181
+ },
182
+ });
183
+ if (this.confirmedRuns.has(ev.run_id))
184
+ return true;
185
+ }
186
+ console.error(`[sal][eval] skipping ${ev.event_type}: eval_runs row is not available for run_id=${ev.run_id}`);
187
+ return false;
135
188
  }
136
189
  // INSERT into eval_turns + eval_sal_anchors (task + action)
137
190
  async handleTurnAnchor(ev) {
@@ -222,25 +275,33 @@ export class InsForgeEvalSink {
222
275
  async handleToolTrace(ev) {
223
276
  const p = ev.payload;
224
277
  const taskSignals = p.task_signals;
225
- await this.postJson(`${this.base}/api/database/records/eval_tool_traces`, [{
226
- run_id: ev.run_id,
227
- turn_id: String(p.turn_id ?? 0),
228
- event_id: ev.event_id,
229
- tool_calls: p.tool_calls ? JSON.stringify(p.tool_calls) : null,
230
- tool_sequence: p.tool_sequence ? JSON.stringify(p.tool_sequence) : null,
231
- intent: strOrNull(taskSignals?.intent),
232
- prompt_length: String(taskSignals?.prompt_length ?? 0),
233
- has_error_trace: String(taskSignals?.has_error_trace === true),
234
- has_file_reference: String(taskSignals?.has_file_reference === true),
235
- has_tool_usage: String(p.has_tool_usage === true),
236
- total_tool_calls: String(p.total_tool_calls ?? 0),
237
- total_errors: String(p.total_errors ?? 0),
238
- completed_tool_calls: String(p.completed_tool_calls ?? 0),
239
- truncated_tool_calls: String(p.truncated_tool_calls ?? 0),
240
- truncated_tool_summary: String(p.truncated_tool_summary ?? 0),
241
- duration_ms: String(p.duration_ms ?? 0),
242
- recorded_at: ev.ts,
243
- }], { prefer: "resolution=ignore-duplicates" });
278
+ const row = {
279
+ run_id: ev.run_id,
280
+ turn_id: String(p.turn_id ?? 0),
281
+ event_id: ev.event_id,
282
+ tool_calls: p.tool_calls ? JSON.stringify(p.tool_calls) : null,
283
+ tool_sequence: p.tool_sequence ? JSON.stringify(p.tool_sequence) : null,
284
+ intent: strOrNull(taskSignals?.intent),
285
+ prompt_length: String(taskSignals?.prompt_length ?? 0),
286
+ has_error_trace: String(taskSignals?.has_error_trace === true),
287
+ has_file_reference: String(taskSignals?.has_file_reference === true),
288
+ has_tool_usage: String(p.has_tool_usage === true),
289
+ total_tool_calls: String(p.total_tool_calls ?? 0),
290
+ total_errors: String(p.total_errors ?? 0),
291
+ completed_tool_calls: String(p.completed_tool_calls ?? 0),
292
+ truncated_tool_calls: String(p.truncated_tool_calls ?? 0),
293
+ truncated_tool_summary: String(p.truncated_tool_summary ?? 0),
294
+ duration_ms: String(p.duration_ms ?? 0),
295
+ recorded_at: ev.ts,
296
+ };
297
+ const url = `${this.base}/api/database/records/eval_tool_traces`;
298
+ const result = await this.postJson(url, [row], {
299
+ prefer: "resolution=ignore-duplicates",
300
+ quietErrorCodes: ["PGRST204"],
301
+ });
302
+ if (!result.ok && result.errorCode === "PGRST204") {
303
+ await this.postJson(url, [toLegacyToolTraceRow(row)], { prefer: "resolution=ignore-duplicates" });
304
+ }
244
305
  }
245
306
  // ------------------------------------------------------------------
246
307
  // HTTP helpers
@@ -249,12 +310,12 @@ export class InsForgeEvalSink {
249
310
  const extraHeaders = {};
250
311
  if (extra?.prefer)
251
312
  extraHeaders["Prefer"] = extra.prefer;
252
- return this.httpJson("POST", url, body, extraHeaders);
313
+ return this.httpJson("POST", url, body, extraHeaders, extra?.quietErrorCodes);
253
314
  }
254
315
  patchJson(url, body) {
255
316
  return this.httpJson("PATCH", url, body, {});
256
317
  }
257
- httpJson(method, url, body, extraHeaders) {
318
+ httpJson(method, url, body, extraHeaders, quietErrorCodes = []) {
258
319
  return new Promise((resolve) => {
259
320
  const payload = JSON.stringify(body);
260
321
  let parsed;
@@ -263,7 +324,7 @@ export class InsForgeEvalSink {
263
324
  }
264
325
  catch {
265
326
  console.error(`[sal][eval] invalid URL: ${url}`);
266
- resolve(false);
327
+ resolve({ ok: false });
267
328
  return;
268
329
  }
269
330
  const isHttps = parsed.protocol === "https:";
@@ -287,20 +348,21 @@ export class InsForgeEvalSink {
287
348
  res.on("data", (chunk) => { rawBody += chunk; });
288
349
  res.on("end", () => {
289
350
  const ok = res.statusCode !== undefined && res.statusCode < 300;
290
- if (!ok) {
351
+ const errorCode = parsePostgrestErrorCode(rawBody);
352
+ if (!ok && !quietErrorCodes.includes(errorCode ?? "")) {
291
353
  console.error(`[sal][eval] HTTP ${res.statusCode} ${method} ${parsed.pathname} — ${rawBody.slice(0, 300)}`);
292
354
  }
293
- resolve(ok);
355
+ resolve({ ok, statusCode: res.statusCode, body: rawBody, errorCode });
294
356
  });
295
357
  });
296
358
  req.on("error", (err) => {
297
359
  console.error(`[sal][eval] network error → ${parsed.hostname}: ${err.message}`);
298
- resolve(false);
360
+ resolve({ ok: false });
299
361
  });
300
362
  req.on("timeout", () => {
301
363
  console.error(`[sal][eval] timeout ${method} ${parsed.pathname}`);
302
364
  req.destroy();
303
- resolve(false);
365
+ resolve({ ok: false });
304
366
  });
305
367
  req.write(payload);
306
368
  req.end();
@@ -321,3 +383,33 @@ function numOrNull(v) {
321
383
  const n = Number(v);
322
384
  return isNaN(n) ? null : n;
323
385
  }
386
+ function parsePostgrestErrorCode(rawBody) {
387
+ try {
388
+ const parsed = JSON.parse(rawBody);
389
+ return typeof parsed?.code === "string" ? parsed.code : undefined;
390
+ }
391
+ catch {
392
+ return undefined;
393
+ }
394
+ }
395
+ function isDevelopmentRuntime() {
396
+ if (process.env.NODE_ENV === "development")
397
+ return true;
398
+ if (process.env.NODE_ENV === "production")
399
+ return false;
400
+ try {
401
+ const currentFile = fileURLToPath(import.meta.url).replace(/\\/g, "/");
402
+ return !currentFile.includes("/dist/");
403
+ }
404
+ catch {
405
+ return false;
406
+ }
407
+ }
408
+ function toLegacyRunStartRow(row) {
409
+ const { pencil_version: _pencilVersion, commit_hash: _commitHash, branch_name: _branchName, workspace_root: _workspaceRoot, ...legacyRow } = row;
410
+ return legacyRow;
411
+ }
412
+ function toLegacyToolTraceRow(row) {
413
+ const { has_tool_usage: _hasToolUsage, completed_tool_calls: _completedToolCalls, truncated_tool_calls: _truncatedToolCalls, truncated_tool_summary: _truncatedToolSummary, ...legacyRow } = row;
414
+ return legacyRow;
415
+ }
@@ -1,8 +1,8 @@
1
1
  /**
2
- * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
2
+ * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-ab/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
3
3
  * [FROM]: Depends on core/extensions/types.ts (ToolExecutionStartEvent, ToolExecutionEndEvent), core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
4
4
  * [TO]: Loaded by builtin-extensions.ts as a default extension entry point
5
- * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
5
+ * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events with best-effort flush/close isolation; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
6
6
  */
7
7
  import { type EvalAdapterId } from "./eval/index.js";
8
8
  import type { ExtensionAPI } from "../../../core/extensions/types.js";
@@ -42,5 +42,6 @@ declare function resolveSalSidecarDir(workspaceRoot: string, experimentId?: stri
42
42
  type TaskIntent = "fix" | "feat" | "refactor" | "explain" | "explore" | "unknown";
43
43
  declare function inferIntent(prompt: string): TaskIntent;
44
44
  declare function buildToolTracePayload(turn: TurnState, turnDuration: number): Record<string, unknown>;
45
+ declare function resolveSalAbEnabled(flagValue: unknown): boolean;
45
46
  export default function salExtension(api: ExtensionAPI): Promise<void>;
46
- export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveStaleCleanupEnabled, };
47
+ export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveSalAbEnabled, resolveStaleCleanupEnabled, };