@pencil-agent/nano-pencil 1.13.5 → 1.13.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ {
2
+ "version": "1.13.6",
3
+ "commitHash": "d5ae966",
4
+ "branch": "main",
5
+ "builtAt": "2026-04-23T04:35:24.822Z"
6
+ }
@@ -42,14 +42,14 @@ plan/exit-plan-mode-tool.ts: createExitPlanModeTool() - ExitPlanMode tool with p
42
42
  plan/plan-agents.ts: Explore/Plan subagent definitions with read-only tools for plan mode workflow
43
43
  plan/plan-validation.ts: validatePlan() - validates plan has required sections (Context, Approach, Files, Verification)
44
44
  plan/teammate-approval.ts: isInTeammateContext(), submitPlanToLeader(), formatPlanSubmittedMessage() - teammate plan approval integration
45
- sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/memory_recalls/run_end eval events through pluggable EvalSink; reads memoryRecallSnapshot from turn-context bus in agent_end; runtime no-op when --nosal is set
45
+ sal/index.ts: SAL extension entry, enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; /sal:setup writes ~/.memory-experiments/credentials.json with adapter inference (insforge/jsonl/noop); publishes structuralAnchor via core/runtime/turn-context (no SAL-specific globals); emits run_start/turn_anchor/memory_recalls/tool_trace/run_end eval events through pluggable EvalSink; reads memoryRecallSnapshot from turn-context bus in agent_end; runtime no-op when --nosal is set; auto-injects pencil_version from build-meta.json into run_start; emergency flush on beforeExit/SIGHUP/SIGTERM; stale run cleanup is opt-in via NANOPENCIL_EVAL_CLEANUP_STALE_RUNS / credentials cleanup_stale_runs; tool_trace is a bounded per-turn summary and includes no-tool turns
46
46
  sal/terrain.ts: TerrainSnapshot/TerrainNode/TerrainEdge model, async buildTerrainIndex()/isSnapshotStale() (fs/promises + periodic yields so TUI can flush under block terminals like Warp), checkDipCoverage(), moduleIdForPath(), parses P2 CLAUDE.md and P3 file headers
47
47
  sal/anchors.ts: StructuralAnchor/AnchorResolution model, locateTask(), locateAction(), evidence-driven scoring with tunable SalWeights, CJK bigram tokenization
48
48
  sal/weights.ts: SalWeights interface, SAL_DEFAULT_WEIGHTS, loadSalWeights() reads sal-config.json from workspace or .memory-experiments/sal/
49
49
  sal/eval/index.ts: createEvalSink() factory + barrel re-exports; adapter selection via options.adapter or endpoint scheme inference (http(s)→insforge, file://|/|./|../→jsonl, missing→noop); ONLY entry point SAL imports from
50
50
  sal/eval/types.ts: EvalSink interface, EvalEventEnvelope/EvalEventType (run_start/run_end/turn_anchor/memory_recalls), EvalAdapterId ("insforge"|"jsonl"|"noop"), CreateEvalSinkOptions, createEvalEvent factory; zero-dependency type surface
51
51
  sal/eval/noop-sink.ts: noopSink — silent EvalSink used when eval disabled or no adapter configured
52
- sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, memory_recalls→eval_memory_recalls batch INSERT, run_end→eval_runs PATCH; allowSelfSigned TLS option, batching with default 2000ms interval
52
+ sal/eval/insforge-sink.ts: InsForgeEvalSink — PostgREST adapter, routes run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces bounded per-turn summaries (including no-tool turns and truncation counters), memory_recalls→eval_memory_recalls batch INSERT, run_end→eval_runs PATCH; allowSelfSigned TLS option, batching with default 2000ms interval
53
53
  sal/eval/jsonl-sink.ts: JsonlEvalSink — append-only filesystem adapter, one JSON object per line, accepts file:// URLs or plain paths, auto-creates parent dir, batched writes
54
54
  sal/README.md: SAL extension usage, sidecar output layout, weights override, pluggability contract
55
55
  team/index.ts: AgentTeam extension entry, /team:/team:spawn/:send/:status/:stop/:terminate/:approve/:mode commands, TEAM_MESSAGE_TYPE renderer
@@ -64,4 +64,4 @@ team/TESTING.md: Manual & smoke-test guide for Phase B AgentTeam
64
64
 
65
65
  Rule: Members complete, one item per line, parent links valid, precise terms first
66
66
 
67
- [COVENANT]: Update this file header on changes and verify against parent CLAUDE.md
67
+ [COVENANT]: Update this file header on changes and verify against parent CLAUDE.md
@@ -2,7 +2,7 @@
2
2
  * [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
3
3
  * [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
4
4
  * [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
5
- * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, run_end→eval_runs PATCH
5
+ * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
6
6
  *
7
7
  * Pluggable: nothing in this file may be imported from outside the eval/ directory.
8
8
  * To add a new backend, write a sibling file with the same EvalSink interface.
@@ -29,6 +29,7 @@ export declare class InsForgeEvalSink implements EvalSink {
29
29
  private handleTurnAnchor;
30
30
  private handleRunEnd;
31
31
  private handleMemoryRecalls;
32
+ private handleToolTrace;
32
33
  private postJson;
33
34
  private patchJson;
34
35
  private httpJson;
@@ -2,7 +2,7 @@
2
2
  * [WHO]: Provides InsForgeEvalSink (PostgREST-backed adapter)
3
3
  * [FROM]: Depends on node:https, node:http, node:url; ./types.js for EvalSink/EvalEventEnvelope/CreateEvalSinkOptions
4
4
  * [TO]: Constructed by eval/index.ts factory when adapter resolves to "insforge"
5
- * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates), turn_anchor→eval_turns + eval_sal_anchors×2, run_end→eval_runs PATCH
5
+ * [HERE]: extensions/defaults/sal/eval/insforge-sink.ts - InsForge-specific routing: run_start→eval_runs INSERT (merge-duplicates, includes pencil_version), turn_anchor→eval_turns + eval_sal_anchors×2, tool_trace→eval_tool_traces, memory_recalls→eval_memory_recalls, run_end→eval_runs PATCH
6
6
  *
7
7
  * Pluggable: nothing in this file may be imported from outside the eval/ directory.
8
8
  * To add a new backend, write a sibling file with the same EvalSink interface.
@@ -103,6 +103,9 @@ export class InsForgeEvalSink {
103
103
  case "memory_recalls":
104
104
  await this.handleMemoryRecalls(event);
105
105
  break;
106
+ case "tool_trace":
107
+ await this.handleToolTrace(event);
108
+ break;
106
109
  case "run_end":
107
110
  await this.handleRunEnd(event);
108
111
  break;
@@ -123,6 +126,7 @@ export class InsForgeEvalSink {
123
126
  task_file: strOrNull(p.task_file),
124
127
  model: strOrNull(p.model),
125
128
  thinking: p.thinking === true,
129
+ pencil_version: strOrNull(p.pencil_version),
126
130
  commit_hash: strOrNull(p.commit, "unknown"),
127
131
  branch_name: strOrNull(p.branch, "unknown"),
128
132
  workspace_root: strOrNull(p.workspace_root),
@@ -213,6 +217,31 @@ export class InsForgeEvalSink {
213
217
  }));
214
218
  await this.postJson(`${this.base}/api/database/records/eval_memory_recalls`, rows, { prefer: "resolution=ignore-duplicates" });
215
219
  }
220
+ // INSERT into eval_tool_traces — one row per turn with tool usage summary
221
+ // InsForge columns are all TEXT; JSONB fields must be serialized to strings.
222
+ async handleToolTrace(ev) {
223
+ const p = ev.payload;
224
+ const taskSignals = p.task_signals;
225
+ await this.postJson(`${this.base}/api/database/records/eval_tool_traces`, [{
226
+ run_id: ev.run_id,
227
+ turn_id: String(p.turn_id ?? 0),
228
+ event_id: ev.event_id,
229
+ tool_calls: p.tool_calls ? JSON.stringify(p.tool_calls) : null,
230
+ tool_sequence: p.tool_sequence ? JSON.stringify(p.tool_sequence) : null,
231
+ intent: strOrNull(taskSignals?.intent),
232
+ prompt_length: String(taskSignals?.prompt_length ?? 0),
233
+ has_error_trace: String(taskSignals?.has_error_trace === true),
234
+ has_file_reference: String(taskSignals?.has_file_reference === true),
235
+ has_tool_usage: String(p.has_tool_usage === true),
236
+ total_tool_calls: String(p.total_tool_calls ?? 0),
237
+ total_errors: String(p.total_errors ?? 0),
238
+ completed_tool_calls: String(p.completed_tool_calls ?? 0),
239
+ truncated_tool_calls: String(p.truncated_tool_calls ?? 0),
240
+ truncated_tool_summary: String(p.truncated_tool_summary ?? 0),
241
+ duration_ms: String(p.duration_ms ?? 0),
242
+ recorded_at: ev.ts,
243
+ }], { prefer: "resolution=ignore-duplicates" });
244
+ }
216
245
  // ------------------------------------------------------------------
217
246
  // HTTP helpers
218
247
  // ------------------------------------------------------------------
@@ -5,7 +5,7 @@
5
5
  * [HERE]: extensions/defaults/sal/eval/types.ts - transport-agnostic event types and the EvalSink contract; concrete adapters live in sibling files
6
6
  */
7
7
  export type EvalVariant = "sal" | "control" | "baseline";
8
- export type EvalEventType = "run_start" | "run_end" | "turn_anchor" | "memory_recalls";
8
+ export type EvalEventType = "run_start" | "run_end" | "turn_anchor" | "memory_recalls" | "tool_trace";
9
9
  /** Wire format for eval events. Adapter implementations decide how to materialize. */
10
10
  export interface EvalEventEnvelope {
11
11
  run_id: string;
@@ -1,12 +1,46 @@
1
1
  /**
2
- * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; runtime no-op when --nosal is set
3
- * [FROM]: Depends on core/extensions/types.ts, core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
2
+ * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
3
+ * [FROM]: Depends on core/extensions/types.ts (ToolExecutionStartEvent, ToolExecutionEndEvent), core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
4
4
  * [TO]: Loaded by builtin-extensions.ts as a default extension entry point
5
- * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/run_end eval events; /sal:setup writes ~/.memory-experiments/credentials.json
5
+ * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
6
6
  */
7
+ import { type EvalAdapterId } from "./eval/index.js";
7
8
  import type { ExtensionAPI } from "../../../core/extensions/types.js";
9
+ import { type AnchorResolution } from "./anchors.js";
8
10
  import { SAL_DEFAULT_WEIGHTS } from "./weights.js";
11
+ interface ToolCallRecord {
12
+ toolCallId: string;
13
+ tool: string;
14
+ startMs: number;
15
+ endMs?: number;
16
+ isError?: boolean;
17
+ }
18
+ interface TurnState {
19
+ turnId: number;
20
+ startedAtMs: number;
21
+ taskResolution?: AnchorResolution;
22
+ touchedFiles: Set<string>;
23
+ toolCalls: ToolCallRecord[];
24
+ prompt?: string;
25
+ }
26
+ interface EvalCredentials {
27
+ insforge_url?: string;
28
+ endpoint?: string;
29
+ api_key?: string;
30
+ anon_key?: string;
31
+ api_key_header?: string;
32
+ headers?: Record<string, string>;
33
+ enabled?: boolean;
34
+ allow_self_signed?: boolean;
35
+ cleanup_stale_runs?: boolean;
36
+ /** Adapter selector. When omitted, inferred from endpoint scheme (http→insforge, file/path→jsonl). */
37
+ adapter?: EvalAdapterId;
38
+ }
39
+ declare function resolveStaleCleanupEnabled(envValue: string | undefined, credentials: EvalCredentials | undefined): boolean;
9
40
  declare function normalizeExperimentId(experimentId?: string): string | undefined;
10
41
  declare function resolveSalSidecarDir(workspaceRoot: string, experimentId?: string): string;
42
+ type TaskIntent = "fix" | "feat" | "refactor" | "explain" | "explore" | "unknown";
43
+ declare function inferIntent(prompt: string): TaskIntent;
44
+ declare function buildToolTracePayload(turn: TurnState, turnDuration: number): Record<string, unknown>;
11
45
  export default function salExtension(api: ExtensionAPI): Promise<void>;
12
- export { SAL_DEFAULT_WEIGHTS, normalizeExperimentId, resolveSalSidecarDir };
46
+ export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveStaleCleanupEnabled, };
@@ -1,17 +1,53 @@
1
1
  /**
2
- * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/agent_end hooks; runtime no-op when --nosal is set
3
- * [FROM]: Depends on core/extensions/types.ts, core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
2
+ * [WHO]: SAL extension entry - enabled by default, registers --nosal/--sal-rebuild-terrain flags, /sal:coverage /sal:status /sal:setup commands, before_agent_start/tool_execution_start/tool_execution_end/agent_end hooks; runtime no-op when --nosal is set
3
+ * [FROM]: Depends on core/extensions/types.ts (ToolExecutionStartEvent, ToolExecutionEndEvent), core/runtime/turn-context.ts (publishes structuralAnchor), extensions/defaults/sal/terrain.ts, anchors.ts, weights.ts, eval/index.ts (pluggable adapters)
4
4
  * [TO]: Loaded by builtin-extensions.ts as a default extension entry point
5
- * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/run_end eval events; /sal:setup writes ~/.memory-experiments/credentials.json
5
+ * [HERE]: extensions/defaults/sal/index.ts - pluggable Structural Anchor Localization (SAL) extension; emits run_start/turn_anchor/tool_trace/run_end eval events; tool_trace captures per-turn tool usage profile (call counts, sequences, intent, errors) for self-awareness analytics
6
6
  */
7
7
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
8
8
  import { homedir } from "node:os";
9
- import { isAbsolute, join, relative } from "node:path";
9
+ import { isAbsolute, join, dirname, relative } from "node:path";
10
+ import { fileURLToPath } from "node:url";
10
11
  import { createEvalEvent, createEvalSink, } from "./eval/index.js";
11
12
  import { getTurnContext, resetTurnContext, setTurnContext } from "../../../core/runtime/turn-context.js";
12
13
  import { locateAction, locateTask } from "./anchors.js";
13
14
  import { buildTerrainIndex, checkDipCoverage, isSnapshotStale, toPosixPath, } from "./terrain.js";
14
15
  import { loadSalWeights, SAL_DEFAULT_WEIGHTS } from "./weights.js";
16
+ function loadBuildMeta() {
17
+ const fallback = { version: "dev" };
18
+ try {
19
+ const thisFile = fileURLToPath(import.meta.url);
20
+ const thisDir = dirname(thisFile);
21
+ // dist layout: dist/extensions/defaults/sal/index.js → dist/build-meta.json
22
+ const distMeta = join(thisDir, "..", "..", "..", "build-meta.json");
23
+ if (existsSync(distMeta)) {
24
+ const parsed = JSON.parse(readFileSync(distMeta, "utf-8"));
25
+ return {
26
+ version: parsed.version ?? fallback.version,
27
+ commitHash: parsed.commitHash,
28
+ branch: parsed.branch,
29
+ };
30
+ }
31
+ // Dev fallback: project root package.json
32
+ const pkgCandidates = [
33
+ join(thisDir, "..", "..", "..", "package.json"), // from extensions/defaults/sal/
34
+ join(thisDir, "..", "..", "..", "..", "package.json"), // one more level in case
35
+ ];
36
+ for (const p of pkgCandidates) {
37
+ if (existsSync(p)) {
38
+ const pkg = JSON.parse(readFileSync(p, "utf-8"));
39
+ if (pkg.name === "@pencil-agent/nano-pencil") {
40
+ return { version: pkg.version ?? fallback.version };
41
+ }
42
+ }
43
+ }
44
+ }
45
+ catch {
46
+ // non-fatal — return fallback
47
+ }
48
+ return fallback;
49
+ }
50
+ const BUILD_META = loadBuildMeta();
15
51
  const NOSAL_FLAG = "nosal";
16
52
  const SAL_REBUILD_FLAG = "sal-rebuild-terrain";
17
53
  const SAL_CONTEXT_BUDGET_TOKENS = 800;
@@ -25,6 +61,14 @@ const EVAL_API_KEY_ENV = "NANOPENCIL_EVAL_API_KEY";
25
61
  const EVAL_API_KEY_HEADER_ENV = "NANOPENCIL_EVAL_API_KEY_HEADER";
26
62
  const EVAL_HEADERS_JSON_ENV = "NANOPENCIL_EVAL_HEADERS_JSON";
27
63
  const EVAL_CREDENTIALS_FILE_ENV = "NANOPENCIL_EVAL_CREDENTIALS_FILE";
64
+ const EVAL_STALE_CLEANUP_ENV = "NANOPENCIL_EVAL_CLEANUP_STALE_RUNS";
65
+ const MAX_TOOL_SEQUENCE = 32;
66
+ const MAX_TOOL_SUMMARY_TOOLS = 16;
67
+ function resolveStaleCleanupEnabled(envValue, credentials) {
68
+ if (envValue !== undefined)
69
+ return isTruthy(envValue);
70
+ return credentials?.cleanup_stale_runs === true;
71
+ }
28
72
  function isTruthy(value) {
29
73
  if (!value)
30
74
  return false;
@@ -202,6 +246,86 @@ function extractToolFilePaths(toolName, args, workspaceRoot) {
202
246
  }
203
247
  return out;
204
248
  }
249
+ const INTENT_PATTERNS = [
250
+ ["fix", [
251
+ /\b(fix|bug|error|issue|broken|crash|fail|wrong|debug|patch|repair)\b/i,
252
+ /(修复|报错|问题|异常|崩溃|失败|错误)/,
253
+ ]],
254
+ ["refactor", [
255
+ /\b(refactor|rename|extract|move|split|merge|clean\s?up|restructure)\b/i,
256
+ /(重构|整理|拆分|重命名|抽取)/,
257
+ ]],
258
+ ["explain", [
259
+ /\b(explain|how does|what is|why does|understand|read|review|audit|tell me|describe)\b/i,
260
+ /(解释|为什么|怎么|什么|看一下|看下|评审|核审|说明)/,
261
+ ]],
262
+ ["feat", [
263
+ /\b(add|implement|create|build|new|feature|support|enable|integrate)\b/i,
264
+ /(增加|新增|实现|添加|功能|支持|接入)/,
265
+ ]],
266
+ ["explore", [
267
+ /\b(find|search|look for|where|locate|explore|check|investigate|list)\b/i,
268
+ /(查找|找|搜|在哪|检查|排查)/,
269
+ ]],
270
+ ];
271
+ function inferIntent(prompt) {
272
+ if (!prompt || prompt.length < 4)
273
+ return "unknown";
274
+ for (const [intent, patterns] of INTENT_PATTERNS) {
275
+ for (const pattern of patterns) {
276
+ if (pattern.test(prompt))
277
+ return intent;
278
+ }
279
+ }
280
+ return "unknown";
281
+ }
282
+ function buildToolTracePayload(turn, turnDuration) {
283
+ const toolSummary = new Map();
284
+ let totalErrors = 0;
285
+ for (const tc of turn.toolCalls) {
286
+ const entry = toolSummary.get(tc.tool) ?? { count: 0, errors: 0, totalMs: 0, completed: 0 };
287
+ entry.count += 1;
288
+ if (tc.isError) {
289
+ entry.errors += 1;
290
+ totalErrors += 1;
291
+ }
292
+ if (tc.endMs != null) {
293
+ entry.totalMs += tc.endMs - tc.startMs;
294
+ entry.completed += 1;
295
+ }
296
+ toolSummary.set(tc.tool, entry);
297
+ }
298
+ const summarizedTools = Array.from(toolSummary.entries())
299
+ .sort((a, b) => b[1].count - a[1].count || a[0].localeCompare(b[0]))
300
+ .slice(0, MAX_TOOL_SUMMARY_TOOLS)
301
+ .map(([tool, stats]) => ({
302
+ tool,
303
+ count: stats.count,
304
+ errors: stats.errors,
305
+ avg_ms: stats.completed > 0 ? Math.round(stats.totalMs / stats.completed) : null,
306
+ completed_calls: stats.completed,
307
+ }));
308
+ const sequence = turn.toolCalls.slice(0, MAX_TOOL_SEQUENCE).map((tc) => tc.tool);
309
+ const completedToolCalls = turn.toolCalls.filter((tc) => tc.endMs != null).length;
310
+ return {
311
+ turn_id: turn.turnId,
312
+ tool_calls: summarizedTools,
313
+ tool_sequence: sequence,
314
+ task_signals: {
315
+ prompt_length: (turn.prompt ?? "").length,
316
+ has_error_trace: /\b(error|exception|stack\s?trace|traceback|panic)\b/i.test(turn.prompt ?? ""),
317
+ has_file_reference: /[\w./-]+\.(ts|tsx|js|jsx|py|go|rs|md|json)/.test(turn.prompt ?? ""),
318
+ intent: inferIntent(turn.prompt ?? ""),
319
+ },
320
+ has_tool_usage: turn.toolCalls.length > 0,
321
+ total_tool_calls: turn.toolCalls.length,
322
+ total_errors: totalErrors,
323
+ completed_tool_calls: completedToolCalls,
324
+ truncated_tool_calls: Math.max(0, turn.toolCalls.length - sequence.length),
325
+ truncated_tool_summary: Math.max(0, toolSummary.size - summarizedTools.length),
326
+ duration_ms: turnDuration,
327
+ };
328
+ }
205
329
  function buildContextInjection(resolution, snapshot) {
206
330
  if (!resolution.selected || resolution.candidates.length === 0)
207
331
  return undefined;
@@ -298,6 +422,63 @@ async function emitEval(runtime, eventType, salEnabled, payload) {
298
422
  console.error("[sal][eval] failed to emit event:", err.message);
299
423
  }
300
424
  }
425
+ /**
426
+ * Fire-and-forget PATCH to mark stale "running" eval runs as "abandoned".
427
+ * Uses raw HTTP so it stays independent of the EvalSink batching pipeline.
428
+ * Fully async — callers should void-call this and never await on the hot path.
429
+ */
430
+ async function cleanupStaleRuns(runtime) {
431
+ if (!runtime.evalEndpoint)
432
+ return;
433
+ const base = runtime.evalEndpoint.replace(/\/+$/, "");
434
+ // Mark runs from this workspace that are still "running" (but not the current run)
435
+ const url = `${base}/api/database/records/eval_runs?` +
436
+ `status=eq.running&` +
437
+ `workspace_root=eq.${encodeURIComponent(runtime.workspaceRoot)}&` +
438
+ `run_id=neq.${encodeURIComponent(runtime.evalRunId)}`;
439
+ const headers = { "Content-Type": "application/json" };
440
+ if (runtime.evalAnonKey) {
441
+ headers["apikey"] = runtime.evalAnonKey;
442
+ headers["Authorization"] = `Bearer ${runtime.evalAnonKey}`;
443
+ }
444
+ if (runtime.evalApiKey) {
445
+ headers[runtime.evalApiKeyHeader ?? "x-api-key"] = runtime.evalApiKey;
446
+ if (!runtime.evalAnonKey) {
447
+ headers["Authorization"] = `Bearer ${runtime.evalApiKey}`;
448
+ }
449
+ }
450
+ Object.assign(headers, runtime.evalHeaders);
451
+ const body = JSON.stringify({
452
+ status: "abandoned",
453
+ ended_at: new Date().toISOString(),
454
+ });
455
+ const { request: httpsRequest } = await import("node:https");
456
+ const { request: httpRequest } = await import("node:http");
457
+ const { URL } = await import("node:url");
458
+ const parsed = new URL(url);
459
+ const isHttps = parsed.protocol === "https:";
460
+ const reqFn = isHttps ? httpsRequest : httpRequest;
461
+ const port = parsed.port ? Number(parsed.port) : (isHttps ? 443 : 80);
462
+ return new Promise((resolve) => {
463
+ const req = reqFn({
464
+ hostname: parsed.hostname,
465
+ port,
466
+ path: parsed.pathname + parsed.search,
467
+ method: "PATCH",
468
+ headers: { ...headers, "Content-Length": Buffer.byteLength(body) },
469
+ timeout: 5000,
470
+ ...(isHttps && runtime.evalAllowSelfSigned ? { rejectUnauthorized: false } : {}),
471
+ }, (res) => {
472
+ // Drain response body (required to free the socket)
473
+ res.resume();
474
+ res.on("end", () => resolve());
475
+ });
476
+ req.on("error", () => resolve());
477
+ req.on("timeout", () => { req.destroy(); resolve(); });
478
+ req.write(body);
479
+ req.end();
480
+ });
481
+ }
301
482
  export default async function salExtension(api) {
302
483
  api.registerFlag(NOSAL_FLAG, {
303
484
  type: "boolean",
@@ -345,6 +526,7 @@ export default async function salExtension(api) {
345
526
  }
346
527
  const evalAllowSelfSigned = isTruthy(process.env["NANOPENCIL_EVAL_ALLOW_SELF_SIGNED"]) ||
347
528
  (credentials?.allow_self_signed ?? false);
529
+ const allowStaleCleanup = resolveStaleCleanupEnabled(process.env[EVAL_STALE_CLEANUP_ENV], credentials);
348
530
  const evalSink = createEvalSink({
349
531
  enabled: evalCollectionEnabled && !!evalEndpoint,
350
532
  adapter: evalAdapter,
@@ -360,7 +542,7 @@ export default async function salExtension(api) {
360
542
  workspaceRoot,
361
543
  weights,
362
544
  weightsSource,
363
- turn: { turnId: 0, startedAtMs: Date.now(), touchedFiles: new Set() },
545
+ turn: { turnId: 0, startedAtMs: Date.now(), touchedFiles: new Set(), toolCalls: [] },
364
546
  sidecarDir,
365
547
  evalSink,
366
548
  evalAdapter,
@@ -369,16 +551,21 @@ export default async function salExtension(api) {
369
551
  evalAnonKey,
370
552
  evalApiKeyHeader,
371
553
  evalHeaders,
554
+ evalAllowSelfSigned,
372
555
  evalEnabled: evalSink.enabled,
373
556
  evalRunId,
374
557
  evalVariantOverride,
375
558
  evalStartedAtMs: Date.now(),
376
559
  evalRunStarted: false,
377
560
  turnCounter: 0,
561
+ allowStaleCleanup,
378
562
  evalMetadata: {
379
563
  workspace_root: workspaceRoot,
380
564
  session_id: evalRunId,
381
565
  },
566
+ buildMeta: BUILD_META,
567
+ staleCleanupDone: false,
568
+ pendingRebuild: false,
382
569
  };
383
570
  const isEnabled = () => !api.getFlag(NOSAL_FLAG);
384
571
  api.registerCommand("sal:coverage", {
@@ -506,11 +693,24 @@ export default async function salExtension(api) {
506
693
  },
507
694
  });
508
695
  api.on("before_agent_start", async (event, ctx) => {
696
+ // ---------------------------------------------------------------
697
+ // ZERO-I/O CONTRACT: this handler must NEVER await filesystem work.
698
+ // runner.ts enforces a 1500ms timeout on before_agent_start; any
699
+ // I/O (terrain build, staleness probe, HTTP) risks timeout which
700
+ // silently drops SAL's appendSystemPrompt injection.
701
+ //
702
+ // All terrain building and refresh happens in the background:
703
+ // 1. Extension load → setImmediate prewarm
704
+ // 2. agent_end → async staleness check + rebuild
705
+ // 3. --sal-rebuild-terrain → flag read here, rebuild in agent_end
706
+ //
707
+ // This handler only reads runtime.snapshot (in-memory) and runs
708
+ // locateTask() (pure computation, <5ms).
709
+ // ---------------------------------------------------------------
509
710
  // Yield once so any UI frame queued via process.nextTick right before
510
- // session.prompt() (notably the user-message bubble) flushes to stdout
511
- // BEFORE we start the terrain work below. Without this, GPU block
512
- // terminals (Warp) can coalesce the whole turn's render into one block
513
- // that only paints when the turn ends.
711
+ // session.prompt() (notably the user-message bubble) flushes to stdout.
712
+ // Without this, GPU block terminals (Warp) coalesce the whole turn's
713
+ // render into one block that only paints when the turn ends.
514
714
  await new Promise((resolve) => setImmediate(resolve));
515
715
  resetTurnContext();
516
716
  runtime.turnCounter += 1;
@@ -518,20 +718,25 @@ export default async function salExtension(api) {
518
718
  turnId: runtime.turnCounter,
519
719
  startedAtMs: Date.now(),
520
720
  touchedFiles: new Set(),
721
+ toolCalls: [],
521
722
  prompt: event.prompt,
522
723
  };
523
724
  if (!runtime.evalRunStarted && runtime.evalEnabled) {
524
725
  runtime.evalRunStarted = true;
525
726
  runtime.evalMetadata.model = runtime.evalMetadata.model ?? ctx.model?.id ?? ctx.model?.name;
526
- await emitEval(runtime, "run_start", isEnabled(), {
727
+ // emitEval pushes to a batching queue; does NOT await HTTP.
728
+ void emitEval(runtime, "run_start", isEnabled(), {
527
729
  task_description: (event.prompt ?? "").slice(0, 500),
528
730
  task_file: process.env.NANOPENCIL_EXPERIMENT_TASK_FILE,
529
731
  model: runtime.evalMetadata.model ?? "unknown",
530
732
  thinking: false,
531
- commit: process.env.NANOPENCIL_EVAL_COMMIT ?? "unknown",
532
- branch: process.env.NANOPENCIL_EVAL_BRANCH ?? "unknown",
733
+ pencil_version: runtime.buildMeta.version,
734
+ commit: process.env.NANOPENCIL_EVAL_COMMIT ?? runtime.buildMeta.commitHash ?? "unknown",
735
+ branch: process.env.NANOPENCIL_EVAL_BRANCH ?? runtime.buildMeta.branch ?? "unknown",
533
736
  workspace_root: runtime.workspaceRoot,
534
737
  });
738
+ // Strategy B: optional fire-and-forget stale run cleanup.
739
+ scheduleStaleCleanup();
535
740
  }
536
741
  if (!isEnabled())
537
742
  return undefined;
@@ -541,10 +746,15 @@ export default async function salExtension(api) {
541
746
  const prompt = (event.prompt ?? "").trim();
542
747
  if (prompt.length < 12)
543
748
  return undefined;
544
- const forceRebuild = Boolean(api.getFlag(SAL_REBUILD_FLAG));
545
- const snapshot = await ensureSnapshot(runtime, forceRebuild);
749
+ // Pure memory read — if prewarm hasn't finished yet, snapshot is
750
+ // undefined and we gracefully skip SAL for this turn.
751
+ const snapshot = runtime.snapshot;
546
752
  if (!snapshot)
547
753
  return undefined;
754
+ // Record if user wants a rebuild; agent_end will act on it.
755
+ if (api.getFlag(SAL_REBUILD_FLAG)) {
756
+ runtime.pendingRebuild = true;
757
+ }
548
758
  const resolution = locateTask({
549
759
  prompt,
550
760
  cwd: runtime.workspaceRoot,
@@ -572,6 +782,18 @@ export default async function salExtension(api) {
572
782
  const paths = extractToolFilePaths(event.toolName, event.args, runtime.workspaceRoot);
573
783
  for (const p of paths)
574
784
  runtime.turn.touchedFiles.add(p);
785
+ runtime.turn.toolCalls.push({
786
+ toolCallId: event.toolCallId,
787
+ tool: event.toolName,
788
+ startMs: Date.now(),
789
+ });
790
+ });
791
+ api.on("tool_execution_end", async (event, _ctx) => {
792
+ const record = runtime.turn.toolCalls.find((tc) => tc.toolCallId === event.toolCallId);
793
+ if (record) {
794
+ record.endMs = Date.now();
795
+ record.isError = event.isError;
796
+ }
575
797
  });
576
798
  api.on("agent_end", async (_event, _ctx) => {
577
799
  const turnDuration = Math.max(0, Date.now() - runtime.turn.startedAtMs);
@@ -612,6 +834,9 @@ export default async function salExtension(api) {
612
834
  });
613
835
  }
614
836
  }
837
+ // Emit tool usage trace for self-awareness analytics.
838
+ // Always emit a bounded summary, including no-tool turns.
839
+ await emitEval(runtime, "tool_trace", isEnabled(), buildToolTracePayload(runtime.turn, turnDuration));
615
840
  if (actionRes) {
616
841
  persistTurnRecord(runtime, taskRes, actionRes);
617
842
  }
@@ -619,7 +844,32 @@ export default async function salExtension(api) {
619
844
  turnId: runtime.turn.turnId,
620
845
  startedAtMs: Date.now(),
621
846
  touchedFiles: new Set(),
847
+ toolCalls: [],
622
848
  };
849
+ // ---------------------------------------------------------------
850
+ // Background terrain refresh — runs AFTER the turn is done.
851
+ // agent_end has no timeout, so async I/O is safe here.
852
+ // This keeps the snapshot fresh for the NEXT before_agent_start
853
+ // without ever blocking the hook that has the 1500ms deadline.
854
+ // ---------------------------------------------------------------
855
+ if (isEnabled()) {
856
+ const wantRebuild = runtime.pendingRebuild;
857
+ runtime.pendingRebuild = false;
858
+ // Fire-and-forget: don't block agent_end from finishing.
859
+ void (async () => {
860
+ try {
861
+ if (wantRebuild) {
862
+ await ensureSnapshot(runtime, true);
863
+ }
864
+ else if (runtime.snapshot && await isSnapshotStale(runtime.snapshot)) {
865
+ await ensureSnapshot(runtime, true);
866
+ }
867
+ }
868
+ catch {
869
+ // Non-fatal; snapshotErrored flag is set inside ensureSnapshot.
870
+ }
871
+ })();
872
+ }
623
873
  });
624
874
  api.on("session_shutdown", async () => {
625
875
  if (!runtime.evalEnabled)
@@ -632,6 +882,56 @@ export default async function salExtension(api) {
632
882
  await runtime.evalSink.flush();
633
883
  await runtime.evalSink.close();
634
884
  });
885
+ // ------------------------------------------------------------------
886
+ // Strategy A: Emergency flush on abnormal exit.
887
+ // Best-effort — these may not complete if the process is killed hard,
888
+ // but they cover uncaught exceptions and natural event-loop drain.
889
+ // IMPORTANT: no sync I/O — all async, fire-and-forget.
890
+ // ------------------------------------------------------------------
891
+ let emergencyFlushed = false;
892
+ const emergencyFlush = () => {
893
+ if (emergencyFlushed || !runtime.evalEnabled || !runtime.evalRunStarted)
894
+ return;
895
+ emergencyFlushed = true;
896
+ void emitEval(runtime, "run_end", isEnabled(), {
897
+ status: "interrupted",
898
+ turn_count: runtime.turnCounter,
899
+ total_duration_ms: Math.max(0, Date.now() - runtime.evalStartedAtMs),
900
+ })
901
+ .then(() => runtime.evalSink.flush())
902
+ .catch(() => { });
903
+ };
904
+ process.on("beforeExit", emergencyFlush);
905
+ // SIGINT is already handled by interactive-mode (double Ctrl+C → shutdown).
906
+ // We only add SIGHUP/SIGTERM as secondary safety nets; they do not replace
907
+ // the primary session_shutdown flow.
908
+ const signalFlush = () => { emergencyFlush(); };
909
+ process.on("SIGHUP", signalFlush);
910
+ process.on("SIGTERM", signalFlush);
911
+ // ------------------------------------------------------------------
912
+ // Strategy B: Opt-in stale run cleanup on first turn.
913
+ // Disabled by default because workspace_root alone cannot distinguish
914
+ // a dead run from another live nanoPencil instance in the same repo.
915
+ // Operators may re-enable it explicitly in single-run environments.
916
+ //
917
+ // When enabled, on the first before_agent_start, fire-and-forget a PATCH
918
+ // to mark stale "running" runs from the same workspace as "abandoned".
919
+ // Runs fully async — does NOT block the before_agent_start return,
920
+ // so the TUI renders the user's message immediately in GPU block
921
+ // terminals (Warp, etc.).
922
+ // ------------------------------------------------------------------
923
+ function scheduleStaleCleanup() {
924
+ if (runtime.staleCleanupDone ||
925
+ !runtime.evalEnabled ||
926
+ !runtime.evalEndpoint ||
927
+ !runtime.allowStaleCleanup)
928
+ return;
929
+ runtime.staleCleanupDone = true;
930
+ // Defer to next tick so the current hook returns instantly.
931
+ setImmediate(() => {
932
+ void cleanupStaleRuns(runtime).catch(() => { });
933
+ });
934
+ }
635
935
  // Background prewarm: start the first terrain build as soon as the TUI has
636
936
  // painted the initial frame. setImmediate defers it past the current stack
637
937
  // and any process.nextTick callbacks, so startup rendering is untouched.
@@ -646,4 +946,4 @@ export default async function salExtension(api) {
646
946
  });
647
947
  });
648
948
  }
649
- export { SAL_DEFAULT_WEIGHTS, normalizeExperimentId, resolveSalSidecarDir };
949
+ export { SAL_DEFAULT_WEIGHTS, buildToolTracePayload, inferIntent, normalizeExperimentId, resolveSalSidecarDir, resolveStaleCleanupEnabled, };