@cmetech/otto 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/resources/.managed-resources-content-hash +1 -1
  2. package/dist/resources/extensions/ollama/model-capabilities.js +15 -4
  3. package/dist/resources/extensions/ollama/ollama-discovery.js +29 -15
  4. package/dist/resources/extensions/otto/commands/release-notes/_data.js +18 -2
  5. package/dist/resources/extensions/workflow/auto-verification.js +9 -2
  6. package/dist/resources/extensions/workflow/bootstrap/crash-log.js +3 -4
  7. package/dist/resources/extensions/workflow/worktree-state-projection.js +29 -0
  8. package/package.json +6 -6
  9. package/packages/contracts/package.json +1 -1
  10. package/packages/daemon/package.json +3 -3
  11. package/packages/mcp-server/package.json +3 -3
  12. package/packages/native/package.json +1 -1
  13. package/packages/pi-agent-core/package.json +1 -1
  14. package/packages/pi-ai/package.json +1 -1
  15. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.js +1 -1
  16. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.js.map +1 -1
  17. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.test.d.ts +2 -0
  18. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.test.d.ts.map +1 -0
  19. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.test.js +43 -0
  20. package/packages/pi-coding-agent/dist/modes/interactive/components/config-selector.test.js.map +1 -0
  21. package/packages/pi-coding-agent/package.json +2 -2
  22. package/packages/pi-coding-agent/src/modes/interactive/components/config-selector.test.ts +58 -0
  23. package/packages/pi-coding-agent/src/modes/interactive/components/config-selector.ts +1 -1
  24. package/packages/pi-coding-agent/tsconfig.tsbuildinfo +1 -1
  25. package/packages/pi-tui/dist/__tests__/terminal-image.test.d.ts +2 -0
  26. package/packages/pi-tui/dist/__tests__/terminal-image.test.d.ts.map +1 -0
  27. package/packages/pi-tui/dist/__tests__/terminal-image.test.js +57 -0
  28. package/packages/pi-tui/dist/__tests__/terminal-image.test.js.map +1 -0
  29. package/packages/pi-tui/dist/terminal-image.d.ts.map +1 -1
  30. package/packages/pi-tui/dist/terminal-image.js +4 -0
  31. package/packages/pi-tui/dist/terminal-image.js.map +1 -1
  32. package/packages/pi-tui/package.json +1 -1
  33. package/packages/pi-tui/src/__tests__/terminal-image.test.ts +57 -0
  34. package/packages/pi-tui/src/terminal-image.ts +5 -0
  35. package/packages/pi-tui/tsconfig.tsbuildinfo +1 -1
  36. package/packages/rpc-client/package.json +2 -2
  37. package/pkg/package.json +1 -1
  38. package/scripts/install.js +8 -1
  39. package/src/resources/extensions/ollama/model-capabilities.ts +15 -4
  40. package/src/resources/extensions/ollama/ollama-discovery.ts +28 -12
  41. package/src/resources/extensions/ollama/tests/model-capabilities.test.ts +47 -2
  42. package/src/resources/extensions/ollama/tests/ollama-discovery-priority.test.ts +93 -0
  43. package/src/resources/extensions/otto/commands/release-notes/_data.ts +18 -2
  44. package/src/resources/extensions/workflow/auto-verification.ts +11 -2
  45. package/src/resources/extensions/workflow/bootstrap/crash-log.ts +3 -4
  46. package/src/resources/extensions/workflow/tests/crash-handler-secondary.test.ts +22 -0
  47. package/src/resources/extensions/workflow/tests/post-exec-retry-bypass.test.ts +23 -10
  48. package/src/resources/extensions/workflow/tests/worktree-state-projection.test.ts +41 -1
  49. package/src/resources/extensions/workflow/worktree-state-projection.ts +33 -0
@@ -24,11 +24,18 @@ export interface ModelCapability {
24
24
  * Keys are matched as prefixes against the model name (before the colon/tag).
25
25
  * More specific entries should appear first.
26
26
  */
27
- // Note: ollamaOptions.num_ctx is set for known model families where the context
28
- // window is authoritative. For unknown/estimated models, num_ctx is NOT sent
29
- // to avoid OOM risk Ollama uses its own safe default instead.
27
+ // Note: ollamaOptions.num_ctx is set when the context window has an authoritative
28
+ // source either a KNOWN_MODELS table entry, or /api/show returning context_length
29
+ // at runtime (ollama-discovery.ts syncs num_ctx with the /api/show value when present).
30
+ // When neither source provides a context window, num_ctx is NOT sent and ollama
31
+ // uses its own safe default to avoid OOM on constrained hosts.
30
32
  const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [
31
33
  // ─── Reasoning models ───────────────────────────────────────────────
34
+ // Long-variants listed before the bare `deepseek-v4` base to avoid prefix shadowing.
35
+ // Same invariant as qwen3-coder / glm / kimi / minimax families.
36
+ ["deepseek-v4-pro", { contextWindow: 1048576, reasoning: true, ollamaOptions: { num_ctx: 1048576 } }],
37
+ ["deepseek-v4-flash", { contextWindow: 1048576, reasoning: true, ollamaOptions: { num_ctx: 1048576 } }],
38
+ ["deepseek-v4", { contextWindow: 1048576, reasoning: true, ollamaOptions: { num_ctx: 1048576 } }],
32
39
  ["deepseek-r1", { contextWindow: 131072, reasoning: true, ollamaOptions: { num_ctx: 131072 } }],
33
40
  ["qwq", { contextWindow: 131072, reasoning: true, ollamaOptions: { num_ctx: 131072 } }],
34
41
 
@@ -90,11 +97,15 @@ const KNOWN_MODELS: Array<[pattern: string, caps: ModelCapability]> = [
90
97
 
91
98
  // ─── MiniMax M2 (Ollama Cloud) ─────────────────────────────────────
92
99
  // ref: minimax-m2 1M ctx — https://www.minimax.io/news/minimax-m2
93
- ["minimax-m2.7", { contextWindow: 1048576, maxTokens: 16384, ollamaOptions: { num_ctx: 1048576 } }],
100
+ // minimax-m2.7:cloud reports 196608 via /api/show despite the M2 announcement
101
+ // quoting 1M context. Cloud deployment truncates / OOMs at the announced
102
+ // number; trust the deployed backend.
103
+ ["minimax-m2.7", { contextWindow: 196608, maxTokens: 16384, reasoning: true, ollamaOptions: { num_ctx: 196608 } }],
94
104
  ["minimax-m2.5", { contextWindow: 1048576, maxTokens: 16384, ollamaOptions: { num_ctx: 1048576 } }],
95
105
  ["minimax-m2", { contextWindow: 1048576, maxTokens: 16384, ollamaOptions: { num_ctx: 1048576 } }],
96
106
 
97
107
  // ─── Gemma family ───────────────────────────────────────────────────
108
+ ["gemma4", { contextWindow: 262144, reasoning: true, ollamaOptions: { num_ctx: 262144 } }],
98
109
  ["gemma3", { contextWindow: 131072, maxTokens: 16384, ollamaOptions: { num_ctx: 131072 } }],
99
110
  ["gemma2", { contextWindow: 8192, maxTokens: 8192, ollamaOptions: { num_ctx: 8192 } }],
100
111
 
@@ -57,22 +57,28 @@ async function enrichModel(info: OllamaModelInfo, deps: ClientDeps): Promise<Dis
57
57
  const caps = getModelCapabilities(info.name);
58
58
  const parameterSize = info.details?.parameter_size ?? "";
59
59
 
60
- // /api/tags doesn't include context length; /api/show does via "{arch}.context_length" in model_info.
60
+ // /api/tags doesn't include context length; /api/show does via "{arch}.context_length"
61
+ // in model_info. Call /api/show unconditionally so its value can override a stale
62
+ // KNOWN_MODELS entry — see the priority resolution below.
61
63
  let showContextWindow: number | undefined;
62
- if (caps.contextWindow === undefined) {
63
- try {
64
- const showData = await deps.showModel(info.name);
65
- showContextWindow = extractContextFromModelInfo(showData.model_info);
66
- } catch (err) {
67
- // non-fatal: fall through to estimate
68
- if ((process.env.OTTO_DEBUG ?? process.env.OTTO_DEBUG)) console.warn(`[ollama] /api/show failed for ${info.name}:`, err instanceof Error ? err.message : String(err));
69
- }
64
+ try {
65
+ const showData = await deps.showModel(info.name);
66
+ showContextWindow = extractContextFromModelInfo(showData.model_info);
67
+ } catch (err) {
68
+ // non-fatal: fall through to table/estimate
69
+ if ((process.env.OTTO_DEBUG ?? process.env.OTTO_DEBUG)) console.warn(`[ollama] /api/show failed for ${info.name}:`, err instanceof Error ? err.message : String(err));
70
70
  }
71
71
 
72
- // Determine context window: known table > /api/show > estimate from param size > default
72
+ // Determine context window: /api/show (authoritative ollama metadata) >
73
+ // known table (fallback for old ollama versions / network failure) >
74
+ // estimate from parameter size > default. Earlier priority order put
75
+ // known table first, but the table fell behind reality on several
76
+ // model families (deepseek-v4-* missing, minimax-m2.7 1048576 vs
77
+ // real 196608). /api/show is the source of truth when reachable;
78
+ // the table only fills the gap when it isn't.
73
79
  const contextWindow =
74
- caps.contextWindow ??
75
80
  showContextWindow ??
81
+ caps.contextWindow ??
76
82
  (parameterSize ? estimateContextFromParams(parameterSize) : 8192);
77
83
 
78
84
  // Determine max tokens: known table > fraction of context > default
@@ -87,6 +93,16 @@ async function enrichModel(info: OllamaModelInfo, deps: ClientDeps): Promise<Dis
87
93
  // Detect reasoning from known table
88
94
  const reasoning = caps.reasoning ?? false;
89
95
 
96
+ // Sync num_ctx with the authoritative contextWindow. When /api/show
97
+ // wins, the table's static num_ctx would otherwise be stale and sent
98
+ // on every chat request — the very drift this priority flip was
99
+ // designed to eliminate. Keep all other ollamaOptions (num_gpu,
100
+ // sampling params, keep_alive) from the table.
101
+ const ollamaOptions =
102
+ showContextWindow !== undefined
103
+ ? { ...caps.ollamaOptions, num_ctx: showContextWindow }
104
+ : caps.ollamaOptions;
105
+
90
106
  return {
91
107
  id: info.name,
92
108
  name: humanizeModelName(info.name),
@@ -97,7 +113,7 @@ async function enrichModel(info: OllamaModelInfo, deps: ClientDeps): Promise<Dis
97
113
  maxTokens,
98
114
  sizeBytes: info.size,
99
115
  parameterSize,
100
- ollamaOptions: caps.ollamaOptions,
116
+ ollamaOptions,
101
117
  };
102
118
  }
103
119
 
@@ -149,9 +149,8 @@ describe("getModelCapabilities — long-variant overrides aren't shadowed (#4991
149
149
  assert.equal(caps.contextWindow, 262144);
150
150
  });
151
151
 
152
- it("minimax-m2.5:cloud and minimax-m2.7:cloud report 1M", () => {
152
+ it("minimax-m2.5:cloud reports 1M", () => {
153
153
  assert.equal(getModelCapabilities("minimax-m2.5:cloud").contextWindow, 1048576);
154
- assert.equal(getModelCapabilities("minimax-m2.7:cloud").contextWindow, 1048576);
155
154
  });
156
155
 
157
156
  it("minimax-m2 base resolves to 1M", () => {
@@ -256,3 +255,49 @@ describe("formatModelSize", () => {
256
255
  assert.equal(formatModelSize(500_000), "500 KB");
257
256
  });
258
257
  });
258
+
259
+ // ─── deepseek-v4 prefix-shadowing regression ────────────────────────────────
260
+ //
261
+ // deepseek-v4-pro:cloud and deepseek-v4-flash:cloud must be listed before the
262
+ // bare `deepseek-v4` entry in KNOWN_MODELS, otherwise the linear startsWith
263
+ // scan resolves any deepseek-v4-* query to the family base. Same invariant
264
+ // as the qwen3-coder / glm / kimi families already pin elsewhere.
265
+
266
+ describe("getModelCapabilities — deepseek-v4 long-variants aren't shadowed", () => {
267
+ it("deepseek-v4-pro:cloud and deepseek-v4-flash:cloud resolve to 1M (long-variants beat deepseek-v4 base)", () => {
268
+ assert.equal(getModelCapabilities("deepseek-v4-pro:cloud").contextWindow, 1048576);
269
+ assert.equal(getModelCapabilities("deepseek-v4-flash:cloud").contextWindow, 1048576);
270
+ });
271
+
272
+ it("deepseek-v4 base also resolves to 1M (parity with long-variants)", () => {
273
+ const caps = getModelCapabilities("deepseek-v4:671b");
274
+ assert.equal(caps.contextWindow, 1048576);
275
+ });
276
+
277
+ it("ollamaOptions.num_ctx mirrors contextWindow for all deepseek-v4 / gemma4 entries", () => {
278
+ // Inference time: num_ctx is what gets sent to Ollama on each chat.
279
+ // If contextWindow is right but num_ctx is stale, the model still
280
+ // gets truncated. Pin both sides.
281
+ for (const name of [
282
+ "deepseek-v4-pro:cloud",
283
+ "deepseek-v4-flash:cloud",
284
+ "deepseek-v4:671b",
285
+ "gemma4:31b",
286
+ ]) {
287
+ const caps = getModelCapabilities(name);
288
+ assert.equal(caps.ollamaOptions?.num_ctx, caps.contextWindow,
289
+ `${name}: num_ctx ${caps.ollamaOptions?.num_ctx} != contextWindow ${caps.contextWindow}`);
290
+ }
291
+ });
292
+ });
293
+
294
+ describe("getModelCapabilities — minimax-m2.7 reflects /api/show truth", () => {
295
+ it("minimax-m2.7 contextWindow is 196608, not the official-spec 1048576", () => {
296
+ // minimax-m2.7:cloud reports 196608 via /api/show even though the
297
+ // MiniMax M2 announcement quoted 1M context. Trust the deployed
298
+ // backend, not marketing — a 1M num_ctx would silently truncate
299
+ // or OOM under cloud-routing.
300
+ assert.equal(getModelCapabilities("minimax-m2.7:cloud").contextWindow, 196608);
301
+ assert.equal(getModelCapabilities("minimax-m2.7:cloud").ollamaOptions?.num_ctx, 196608);
302
+ });
303
+ });
@@ -0,0 +1,93 @@
1
+ // OTTO — Tests for ollama-discovery /api/show priority and num_ctx sync
2
+ //
3
+ // Ported from gsd-pi fc39cdc. Pins the showContextWindow > caps resolution
4
+ // order so a stale KNOWN_MODELS entry cannot mask the authoritative
5
+ // /api/show value, and pins the num_ctx mirror invariant so the priority
6
+ // flip propagates to inference requests.
7
+ import { describe, it } from "node:test";
8
+ import assert from "node:assert/strict";
9
+ import { discoverModels } from "../ollama-discovery.js";
10
+ import type { OllamaModelInfo, OllamaShowResponse, OllamaTagsResponse } from "../types.js";
11
+
12
+ function makeDeps(showResp: Partial<OllamaShowResponse>, modelInfo: Partial<OllamaModelInfo> = {}) {
13
+ return {
14
+ listModels: async (): Promise<OllamaTagsResponse> => ({
15
+ models: [{
16
+ name: "test-model:latest",
17
+ model: "test-model:latest",
18
+ modified_at: "",
19
+ size: 1_000_000,
20
+ digest: "abc",
21
+ details: { parent_model: "", format: "", family: "", families: [], parameter_size: "7B", quantization_level: "" },
22
+ ...modelInfo,
23
+ } as OllamaModelInfo],
24
+ }),
25
+ showModel: async () => ({
26
+ modelfile: "",
27
+ parameters: "",
28
+ template: "",
29
+ details: { parent_model: "", format: "", family: "", families: [], parameter_size: "7B", quantization_level: "" },
30
+ model_info: {},
31
+ ...showResp,
32
+ } as OllamaShowResponse),
33
+ };
34
+ }
35
+
36
+ describe("enrichModel — /api/show context priority", () => {
37
+ it("uses /api/show context_length over a stale KNOWN_MODELS value", async () => {
38
+ // llama3.1 in KNOWN_MODELS = 131072. If /api/show says 262144, trust it.
39
+ const deps = makeDeps({ model_info: { "llama.context_length": 262144 } }, { name: "llama3.1:8b" });
40
+ const [m] = await discoverModels(deps);
41
+ assert.equal(m.contextWindow, 262144);
42
+ });
43
+
44
+ it("falls back to KNOWN_MODELS when /api/show provides no context_length", async () => {
45
+ const deps = makeDeps({ model_info: {} }, { name: "llama3.1:8b" });
46
+ const [m] = await discoverModels(deps);
47
+ assert.equal(m.contextWindow, 131072); // KNOWN_MODELS llama3.1
48
+ });
49
+ });
50
+
51
+ describe("enrichModel — num_ctx sync with /api/show", () => {
52
+ it("syncs ollamaOptions.num_ctx with showContextWindow when /api/show wins", async () => {
53
+ const deps = makeDeps({ model_info: { "llama.context_length": 262144 } }, { name: "llama3.1:8b" });
54
+ const [m] = await discoverModels(deps);
55
+ assert.equal(m.ollamaOptions?.num_ctx, 262144,
56
+ "num_ctx must mirror the authoritative contextWindow; sending stale num_ctx defeats the priority flip");
57
+ });
58
+
59
+ it("preserves sibling ollamaOptions fields when /api/show flips num_ctx", async () => {
60
+ // Drive enrichModel with a synthetic capabilities stub: model name matches a known
61
+ // table entry, but we mock the table indirectly by injecting deps that simulate
62
+ // what enrichModel would receive. Since enrichModel resolves caps internally via
63
+ // getModelCapabilities, the cleanest assertion is at the discoverModels output:
64
+ // the returned ollamaOptions must contain ALL fields from caps.ollamaOptions plus
65
+ // the synced num_ctx — confirmed by checking the num_ctx is overridden AND the
66
+ // returned ollamaOptions object reference is NOT equal to caps.ollamaOptions (it
67
+ // must be a fresh object from the spread). This catches a naive replacement
68
+ // `ollamaOptions = { num_ctx: showContextWindow }` that drops siblings.
69
+ const deps = makeDeps({ model_info: { "llama.context_length": 262144 } }, { name: "llama3.1:8b" });
70
+ const [m] = await discoverModels(deps);
71
+ // Sanity: num_ctx flipped to /api/show value
72
+ assert.equal(m.ollamaOptions?.num_ctx, 262144);
73
+ // Real coverage: the returned object must be a spread, not a literal {num_ctx}.
74
+ // We verify this structurally by checking that every key from the original
75
+ // caps.ollamaOptions (looked up directly from the source table) is present.
76
+ // llama3.1 table currently only has num_ctx — if/when sibling fields are added,
77
+ // this test will catch a regression where the spread is removed.
78
+ // For now we pin the spread invariant: ollamaOptions must be the fresh
79
+ // shallow-spread object, not a reference to caps.ollamaOptions.
80
+ const { getModelCapabilities } = await import("../model-capabilities.js");
81
+ const tableCaps = getModelCapabilities("llama3.1:8b");
82
+ const tableNumCtx = tableCaps.ollamaOptions?.num_ctx;
83
+ assert.notEqual(tableNumCtx, 262144, "test precondition: table num_ctx differs from /api/show value");
84
+ assert.notEqual(m.ollamaOptions, tableCaps.ollamaOptions,
85
+ "returned ollamaOptions must be a fresh spread object, not a reference to the table — otherwise a future direct replacement `{num_ctx}` would silently drop sibling fields");
86
+ });
87
+
88
+ it("preserves KNOWN_MODELS num_ctx when /api/show returns no context_length", async () => {
89
+ const deps = makeDeps({ model_info: {} }, { name: "llama3.1:8b" });
90
+ const [m] = await discoverModels(deps);
91
+ assert.equal(m.ollamaOptions?.num_ctx, 131072); // unchanged from table
92
+ });
93
+ });
@@ -33,13 +33,29 @@ export interface ReleaseNotesManifest {
33
33
 
34
34
  export const RELEASE_NOTES_MANIFEST: ReleaseNotesManifest = {
35
35
  truncated: false,
36
- total: 18,
36
+ total: 19,
37
37
  oldestBundled: '1.0.0',
38
- newestBundled: '1.3.2',
38
+ newestBundled: '1.3.3',
39
39
  historyUrl: 'https://github.com/cmetech/otto-cli/blob/main/CHANGELOG.md',
40
40
  };
41
41
 
42
42
  export const RELEASE_NOTES: ReleaseNote[] = [
43
+ {
44
+ version: '1.3.3',
45
+ date: '2026-06-08',
46
+ headline: 'Maintenance patch rolling up six upstream-ported fixes that landed on `main` since 1.3.2: TUI rendering on JetBrains terminals, pattern-resolution basedir, project-root artifact placement when running inside worktrees, verification-pause diagnostics, per-PID crash-log isolation, and Ollama context-window trust.',
47
+ fixed: [
48
+ '**JetBrains terminal capabilities.** TUI rendering now provides the correct capability set when running under JetBrains\' embedded terminal (`packages/pi-tui`), eliminating layout glitches reported on IntelliJ / WebStorm / GoLand. Closes #31 (ported via PR #77).',
49
+ '**Pattern basedir resolution.** Pattern lookups now resolve against the correct base directory, restoring expected matching behavior for relative glob patterns. Closes #53 (ported via PR #74).',
50
+ '**Project root artifacts in worktrees.** Workflow runs invoked from a `git worktree` now project root-level artifacts (lockfile, configs, generated files) into the worktree itself instead of leaking into the primary checkout. Closes #90 (PR #370).',
51
+ '**Verification pause message shows failing check.** When a workflow pauses after an execution step, the message now surfaces *which* check failed instead of a generic pause string, dramatically shortening the debug loop. Closes #99 (PR #371).',
52
+ '**Crash logs append to single per-PID file.** Crash diagnostics now append to one file per process rather than fragmenting across multiple files, making post-mortem inspection coherent. Closes #343 (PR #374).',
53
+ '**Ollama `/api/show` context + `num_ctx` sync.** The Ollama integration now trusts the model\'s reported context window from `/api/show`, keeps `num_ctx` in lockstep, and corrects `KNOWN_MODELS` drift — preventing silent truncation when a model\'s real context exceeds the hard-coded table. Closes #345 (PR #375).',
54
+ ],
55
+ notes: [
56
+ 'Internal: upstream-swarm orchestrator skill + autonomy hardening (PRs #75, #76, #78, #79, #80, #81, #82) landed in this window but are tooling-only and have no runtime impact for end users.',
57
+ ],
58
+ },
43
59
  {
44
60
  version: '1.3.2',
45
61
  date: '2026-06-04',
@@ -600,6 +600,7 @@ export async function runPostUnitVerification(
600
600
  // ── Post-execution checks (run after main verification passes for execute-task units) ──
601
601
  let postExecChecks: PostExecutionCheckJSON[] | undefined;
602
602
  let postExecBlockingFailure = false;
603
+ let postExecFailureSummary: string | null = null;
603
604
 
604
605
  if (result.passed && mid && sid && tid) {
605
606
  // Check preferences — respect enhanced_verification and enhanced_verification_post
@@ -696,6 +697,13 @@ export async function runPostUnitVerification(
696
697
  const blockingCount = postExecResult.checks.filter(
697
698
  (c) => !c.passed && c.blocking
698
699
  ).length;
700
+ const firstBlockingFailure = postExecResult.checks.find(
701
+ (c) => !c.passed && c.blocking
702
+ );
703
+ if (firstBlockingFailure) {
704
+ postExecFailureSummary =
705
+ `[${firstBlockingFailure.category}] ${firstBlockingFailure.target}: ${firstBlockingFailure.message}`;
706
+ }
699
707
  ctx.ui.notify(
700
708
  `Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
701
709
  "error"
@@ -810,12 +818,13 @@ export async function runPostUnitVerification(
810
818
  s.verificationRetryCount.delete(retryKey);
811
819
  s.verificationRetryFailureHashes.delete(retryKey);
812
820
  s.pendingVerificationRetry = null;
821
+ const failureDetail = postExecFailureSummary ?? "unknown post-execution check failure";
813
822
  ctx.ui.notify(
814
- `Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
823
+ `Post-execution checks failed (${failureDetail}) — pausing for human review`,
815
824
  "error",
816
825
  );
817
826
  await pauseAuto(ctx, pi, {
818
- message: "Post-execution checks failed: cross-task consistency issue detected.",
827
+ message: `Post-execution checks failed: ${failureDetail}.`,
819
828
  category: "unknown",
820
829
  });
821
830
  return "pause";
@@ -1,5 +1,5 @@
1
1
  /**
2
- * crash-log.ts — Write crash diagnostics to ~/.otto/crash/<timestamp>.log
2
+ * crash-log.ts — Write crash diagnostics to ~/.otto/crash/pid-<pid>.log
3
3
  *
4
4
  * Zero cross-dependencies: only uses Node.js built-ins so it can be imported
5
5
  * safely from uncaughtException / unhandledRejection handlers and from tests
@@ -11,15 +11,14 @@ import { homedir } from "node:os";
11
11
  import { join } from "node:path";
12
12
 
13
13
  /**
14
- * Write a crash log to ~/.otto/crash/<timestamp>.log (or $OTTO_HOME/crash/).
14
+ * Write a crash log to ~/.otto/crash/pid-<pid>.log (or $OTTO_HOME/crash/).
15
15
  * Never throws — must be safe to call from any error handler.
16
16
  */
17
17
  export function writeCrashLog(err: Error, source: string): void {
18
18
  try {
19
19
  const crashDir = join((process.env.OTTO_HOME ?? process.env.OTTO_HOME) ?? join(homedir(), ".otto"), "crash");
20
20
  mkdirSync(crashDir, { recursive: true });
21
- const ts = new Date().toISOString().replace(/[:.]/g, "-");
22
- const logPath = join(crashDir, `${ts}.log`);
21
+ const logPath = join(crashDir, `pid-${process.pid}.log`);
23
22
  const lines = [
24
23
  `[otto] ${source}: ${err.message}`,
25
24
  `timestamp: ${new Date().toISOString()}`,
@@ -49,6 +49,28 @@ describe('register-extension crash handler secondary fixes (#3348)', () => {
49
49
  }
50
50
  });
51
51
 
52
+ test('writeCrashLog appends repeated crashes from one process to a single file', async () => {
53
+ const tmpHome = join(tmpdir(), `otto-crash-test-${randomUUID()}`);
54
+ const origHome = process.env.OTTO_HOME;
55
+ process.env.OTTO_HOME = tmpHome;
56
+ try {
57
+ const { writeCrashLog } = await import('../bootstrap/crash-log.ts');
58
+ writeCrashLog(new Error('first crash'), 'uncaughtException');
59
+ writeCrashLog(new Error('second crash'), 'unhandledRejection');
60
+
61
+ const crashDir = join(tmpHome, 'crash');
62
+ const logs = readdirSync(crashDir).filter((f) => f.endsWith('.log'));
63
+ assert.equal(logs.length, 1, 'repeated writes in one process should share one crash log');
64
+
65
+ const content = readFileSync(join(crashDir, logs[0]), 'utf-8');
66
+ assert.ok(content.includes('first crash'), 'log should contain first error message');
67
+ assert.ok(content.includes('second crash'), 'log should contain second error message');
68
+ } finally {
69
+ process.env.OTTO_HOME = origHome;
70
+ rmSync(tmpHome, { recursive: true, force: true });
71
+ }
72
+ });
73
+
52
74
  test('_gsdRejectionGuard is registered for unhandledRejection', () => {
53
75
  installEpipeGuard();
54
76
  const listener = process.listeners("unhandledRejection").find((candidate) =>
@@ -359,13 +359,8 @@ describe("Post-execution blocking failure retry bypass", () => {
359
359
  assert.ok(messages.some((m: string) => m.includes("Verification failed") && m.includes("auto-fix attempt 1/2")));
360
360
  });
361
361
 
362
- test("post-exec failure notification mentions cross-task consistency", async () => {
363
- // This test verifies that the notification for post-exec failures includes
364
- // the appropriate message about cross-task consistency issues.
365
- // The actual post-exec failure would require specific file/output state
366
- // that's harder to set up in a unit test, but we can verify the code path exists.
367
-
368
- createBasicTask();
362
+ test("post-exec failure notification includes failing check details", async () => {
363
+ createPostExecFailureTask();
369
364
  writePreferences({
370
365
  enhanced_verification: true,
371
366
  enhanced_verification_post: true,
@@ -381,9 +376,27 @@ describe("Post-execution blocking failure retry bypass", () => {
381
376
  const vctx: VerificationContext = { s, ctx, pi };
382
377
  const result = await runPostUnitVerification(vctx, pauseAutoMock);
383
378
 
384
- // The verification should pass with our simple "echo pass" task
385
- // This test mainly confirms the wiring is correct
386
- assert.equal(result, "continue");
379
+ assert.equal(result, "pause");
380
+ assert.equal(pauseAutoMock.mock.callCount(), 1);
381
+ const notifyMessages = ctx.ui.notify.mock.calls.map((c: { arguments: unknown[] }) =>
382
+ String(c.arguments[0])
383
+ );
384
+ assert.ok(
385
+ notifyMessages.some(
386
+ (m: string) =>
387
+ m.includes("Post-execution checks failed ([import] src/broken.ts:1") &&
388
+ m.includes("pausing for human review")
389
+ )
390
+ );
391
+
392
+ const pauseCallArgs = (pauseAutoMock.mock.calls[0]?.arguments as unknown as unknown[])?.[2] as
393
+ | { message?: string }
394
+ | undefined;
395
+ assert.ok(
396
+ pauseCallArgs?.message?.includes(
397
+ "Post-execution checks failed: [import] src/broken.ts:1"
398
+ )
399
+ );
387
400
  });
388
401
 
389
402
  test("uok gate runner persists post-execution gate failures when enabled", async () => {
@@ -2,7 +2,7 @@
2
2
  // File Purpose: Worktree State Projection Module — typed-Interface contract tests for projectRootToWorktree (ADR-016).
3
3
  import test from "node:test";
4
4
  import assert from "node:assert/strict";
5
- import { mkdtempSync, rmSync, mkdirSync } from "node:fs";
5
+ import { existsSync, mkdtempSync, readFileSync, rmSync, mkdirSync, writeFileSync } from "node:fs";
6
6
  import { join } from "node:path";
7
7
  import { tmpdir } from "node:os";
8
8
  import { WorktreeStateProjection } from "../worktree-state-projection.js";
@@ -60,6 +60,46 @@ test("projectRootToWorktree is idempotent — repeated calls do not throw", () =
60
60
  }
61
61
  });
62
62
 
63
+ test("projectRootToWorktree forwards root PROJECT.md into isolated worktrees", () => {
64
+ const { dir, cleanup } = makeProjectRoot();
65
+ try {
66
+ const worktree = join(dir, ".otto/workflow/worktrees/M001");
67
+ mkdirSync(join(dir, ".otto/workflow/milestones/M001"), { recursive: true });
68
+ mkdirSync(join(worktree, ".otto/workflow"), { recursive: true });
69
+
70
+ const projectContent = [
71
+ "# Project",
72
+ "",
73
+ "## Milestone Sequence",
74
+ "",
75
+ "- [ ] M001: Foundation — Establish the runnable slice.",
76
+ "",
77
+ ].join("\n");
78
+ writeFileSync(join(dir, ".otto/workflow/PROJECT.md"), projectContent);
79
+ writeFileSync(join(dir, ".otto/workflow/REQUIREMENTS.md"), "# Requirements\n");
80
+ writeFileSync(
81
+ join(dir, ".otto/workflow/milestones/M001/M001-ROADMAP.md"),
82
+ "# M001\n",
83
+ );
84
+
85
+ const workspace = createWorkspace(worktree);
86
+ const scope = scopeMilestone(workspace, "M001");
87
+ const projection = new WorktreeStateProjection();
88
+
89
+ projection.projectRootToWorktree(scope);
90
+
91
+ const projectedProject = join(worktree, ".otto/workflow/PROJECT.md");
92
+ assert.ok(existsSync(projectedProject), "PROJECT.md is available to worktree-bound units");
93
+ assert.equal(readFileSync(projectedProject, "utf-8"), projectContent);
94
+ assert.ok(
95
+ existsSync(join(worktree, ".otto/workflow/milestones/M001/M001-ROADMAP.md")),
96
+ "milestone artifacts still project into the worktree",
97
+ );
98
+ } finally {
99
+ cleanup();
100
+ }
101
+ });
102
+
63
103
  // ─── projectWorktreeToRoot — Module contract ────────────────────────────────
64
104
 
65
105
  test("projectWorktreeToRoot exists and accepts a MilestoneScope", () => {
@@ -172,6 +172,35 @@ const ROOT_DIAGNOSTIC_FILES = [
172
172
  "metrics.json",
173
173
  ] as const;
174
174
 
175
+ /**
176
+ * Root-level .otto/workflow/ projections copied from project root into worktrees for
177
+ * compatibility reads. Project root remains authoritative; copy-back still
178
+ * excludes these markdown projections.
179
+ */
180
+ const ROOT_FORWARD_PROJECTION_FILES = [
181
+ "DECISIONS.md",
182
+ "REQUIREMENTS.md",
183
+ "PROJECT.md",
184
+ "KNOWLEDGE.md",
185
+ "OVERRIDES.md",
186
+ "QUEUE.md",
187
+ "completed-units.json",
188
+ "metrics.json",
189
+ "mcp.json",
190
+ ] as const;
191
+
192
+ function syncRootProjectionFilesToWorktree(prGsd: string, wtGsd: string): void {
193
+ mkdirSync(wtGsd, { recursive: true });
194
+
195
+ for (const file of ROOT_FORWARD_PROJECTION_FILES) {
196
+ const src = join(prGsd, file);
197
+ const dst = join(wtGsd, file);
198
+ if (!existsSync(src) || existsSync(dst)) continue;
199
+
200
+ safeCopy(src, dst, { force: false });
201
+ }
202
+ }
203
+
175
204
  // ─── Implementation cores ────────────────────────────────────────────────
176
205
  //
177
206
  // The `_*Impl` exports take raw paths so the deprecated path-string
@@ -204,6 +233,10 @@ export function _projectRootToWorktreeImpl(
204
233
  // Compare realpaths and skip when they resolve to the same physical path (#2184).
205
234
  if (isSamePath(prGsd, wtGsd)) return;
206
235
 
236
+ // Root PROJECT/REQUIREMENTS/DECISIONS projections must be readable from a
237
+ // worktree-bound unit; the project root remains authoritative.
238
+ syncRootProjectionFilesToWorktree(prGsd, wtGsd);
239
+
207
240
  // Copy milestone directory from project root to worktree — additive only.
208
241
  // force:false prevents cpSync from overwriting existing worktree files.
209
242
  // Without this, worktree-local files (e.g. VALIDATION.md written