@oh-my-pi/pi-coding-agent 15.10.2 → 15.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -1
- package/dist/types/cli/gallery-fixtures/types.d.ts +7 -1
- package/dist/types/edit/index.d.ts +0 -1
- package/dist/types/lsp/index.d.ts +0 -5
- package/dist/types/main.d.ts +11 -0
- package/dist/types/modes/components/assistant-message.d.ts +0 -9
- package/dist/types/modes/components/late-diagnostics-message.d.ts +20 -0
- package/dist/types/modes/components/read-tool-group.d.ts +6 -0
- package/dist/types/modes/components/session-selector.d.ts +16 -7
- package/dist/types/modes/components/tool-execution.d.ts +0 -18
- package/dist/types/modes/types.d.ts +4 -0
- package/dist/types/session/messages.d.ts +11 -8
- package/dist/types/session/yield-queue.d.ts +10 -1
- package/dist/types/tools/eval-render.d.ts +0 -1
- package/dist/types/tools/index.d.ts +31 -0
- package/dist/types/tools/path-utils.d.ts +5 -1
- package/dist/types/tools/read.d.ts +2 -1
- package/dist/types/tools/render-utils.d.ts +3 -1
- package/dist/types/tools/renderers.d.ts +0 -15
- package/dist/types/tools/write.d.ts +0 -2
- package/dist/types/tui/code-cell.d.ts +0 -2
- package/dist/types/tui/hyperlink.d.ts +5 -7
- package/dist/types/tui/output-block.d.ts +0 -18
- package/package.json +9 -9
- package/src/cli/gallery-cli.ts +4 -0
- package/src/cli/gallery-fixtures/codeintel.ts +0 -1
- package/src/cli/gallery-fixtures/fs.ts +68 -1
- package/src/cli/gallery-fixtures/types.ts +8 -1
- package/src/commit/agentic/agent.ts +1 -0
- package/src/edit/hashline/diff.ts +86 -0
- package/src/edit/hashline/execute.ts +14 -1
- package/src/edit/index.ts +31 -17
- package/src/edit/renderer.ts +116 -31
- package/src/eval/js/shared/prelude.txt +26 -10
- package/src/internal-urls/docs-index.generated.ts +4 -4
- package/src/lsp/index.ts +128 -52
- package/src/main.ts +54 -14
- package/src/modes/components/assistant-message.ts +3 -15
- package/src/modes/components/late-diagnostics-message.ts +60 -0
- package/src/modes/components/plan-review-overlay.ts +26 -5
- package/src/modes/components/read-tool-group.ts +415 -35
- package/src/modes/components/session-selector.ts +89 -35
- package/src/modes/components/tool-execution.ts +7 -49
- package/src/modes/components/transcript-container.ts +108 -32
- package/src/modes/controllers/event-controller.ts +6 -1
- package/src/modes/controllers/input-controller.ts +10 -2
- package/src/modes/types.ts +4 -0
- package/src/modes/utils/ui-helpers.ts +26 -5
- package/src/prompts/system/manual-continue.md +7 -0
- package/src/prompts/system/plan-mode-active.md +56 -72
- package/src/prompts/tools/eval.md +3 -1
- package/src/prompts/tools/lsp-late-diagnostic.md +8 -0
- package/src/sdk.ts +59 -1
- package/src/session/agent-session.ts +5 -3
- package/src/session/messages.ts +21 -14
- package/src/session/session-manager.ts +2 -2
- package/src/session/yield-queue.ts +20 -2
- package/src/task/executor.ts +1 -0
- package/src/tiny/title-client.ts +6 -1
- package/src/tools/bash.ts +0 -7
- package/src/tools/eval-render.ts +4 -23
- package/src/tools/find.ts +148 -106
- package/src/tools/index.ts +32 -0
- package/src/tools/path-utils.ts +19 -22
- package/src/tools/read.ts +16 -8
- package/src/tools/render-utils.ts +3 -1
- package/src/tools/renderers.ts +0 -15
- package/src/tools/ssh.ts +0 -1
- package/src/tools/todo.ts +1 -0
- package/src/tools/write.ts +3 -12
- package/src/tui/code-cell.ts +1 -6
- package/src/tui/hyperlink.ts +13 -23
- package/src/tui/output-block.ts +2 -97
|
@@ -1,125 +1,109 @@
|
|
|
1
1
|
<critical>
|
|
2
|
-
Plan mode active. You MUST perform READ-ONLY
|
|
2
|
+
Plan mode is active. You MUST perform READ-ONLY work only:
|
|
3
|
+
- You NEVER create, edit, or delete files — except the single plan file named below.
|
|
4
|
+
- You NEVER run state-changing commands (`git commit`, `npm install`, migrations) or make any other system change.
|
|
3
5
|
|
|
4
|
-
|
|
5
|
-
- Create, edit, or delete files (except plan file below)
|
|
6
|
-
- Run state-changing commands (git commit, npm install, etc.)
|
|
7
|
-
- Make any system changes
|
|
6
|
+
To leave plan mode and implement: call `resolve` with `action: "apply"`, a `reason`, and `extra: { title: "<slug>" }`, where `<slug>` matches your `local://<slug>-plan.md`. The user then picks an execution option and full write access is restored. `<slug>` may contain only letters, numbers, underscores, and hyphens.
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
You NEVER ask the user to exit plan mode for you; you MUST call `resolve` yourself.
|
|
8
|
+
You NEVER ask the user to exit plan mode, and you NEVER request approval in prose or via `{{askToolName}}` — approval happens ONLY through `resolve`.
|
|
12
9
|
</critical>
|
|
13
10
|
|
|
14
|
-
##
|
|
11
|
+
## What a plan is
|
|
12
|
+
|
|
13
|
+
The plan is an **execution spec**, not a design doc. After approval the planning conversation may be cleared or compacted, and a different engineer or a fresh agent implements straight from the file. The bar is absolute: **a competent implementer who never saw this conversation executes the file top to bottom and makes ZERO design decisions.** Every choice is already made; the file alone carries it.
|
|
15
14
|
|
|
16
|
-
|
|
15
|
+
Detail exists to remove the implementer's decisions — not to look thorough. A document padded with Non-Goals, Alternatives, or risk matrices yet leaving one real decision open is a FAILED plan. So is a short plan that reads cleanly but forces the implementer to choose. When brevity and decision-completeness collide, completeness wins.
|
|
17
16
|
|
|
18
|
-
## Plan
|
|
17
|
+
## Plan file
|
|
19
18
|
|
|
20
19
|
{{#if planExists}}
|
|
21
|
-
|
|
20
|
+
A plan already exists at `{{planFilePath}}` — read it, then update it incrementally with `{{editToolName}}`. If this request is a different task, leave that plan in place and start a fresh `local://<slug>-plan.md`.
|
|
22
21
|
{{else}}
|
|
23
|
-
Choose a short kebab-case `<slug>`
|
|
22
|
+
Choose a short kebab-case `<slug>` naming this task and write the plan to `local://<slug>-plan.md` (e.g. `local://auth-token-refresh-plan.md`). The file is never renamed on approval, so the name you choose persists — pass that same `<slug>` as `title` when you `resolve`.
|
|
24
23
|
{{/if}}
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
Use `{{editToolName}}` for incremental edits and `{{writeToolName}}` only to create or fully replace the file. You MUST write findings into the plan as you learn them — you NEVER batch all writing to the end.
|
|
27
26
|
|
|
28
|
-
##
|
|
27
|
+
## Ground every claim
|
|
29
28
|
|
|
30
|
-
You
|
|
29
|
+
You eliminate unknowns by discovering facts, not by asking.
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
- **
|
|
34
|
-
- **Preferences and tradeoffs** — intent, UX, scope boundaries, performance-vs-simplicity: not derivable from code. You MUST surface these early via `{{askToolName}}` with 2–4 mutually exclusive options and a recommended default. If left unanswered, proceed with the default and record it under Assumptions.
|
|
31
|
+
- **Discoverable facts** (file locations, current behavior, signatures, configs): you MUST find them yourself with `find`, `search`, `read`, or parallel `explore` subagents. Every path, symbol, signature, and behavior the plan states as fact MUST come from something you actually read this session. Anything you could not confirm you mark inline (`unverified — confirm first`); you NEVER present a guess as settled. Ask only when several real candidates survive exploration — then present them with a recommendation.
|
|
32
|
+
- **Preferences and tradeoffs** (intent, UX, scope edges, performance-vs-simplicity): not derivable from code. Surface these early via `{{askToolName}}` with 2–4 mutually exclusive options and a recommended default. Left unanswered → proceed with the default and record it under Assumptions.
|
|
35
33
|
|
|
36
|
-
Every question MUST
|
|
34
|
+
Every question MUST change the plan or settle a load-bearing choice. Batch them. You NEVER ask what exploration answers, and you NEVER ask filler.
|
|
37
35
|
|
|
38
36
|
{{#if reentry}}
|
|
39
37
|
## Re-entry
|
|
40
38
|
|
|
41
39
|
<procedure>
|
|
42
40
|
1. Read the existing plan.
|
|
43
|
-
2.
|
|
44
|
-
3.
|
|
45
|
-
- **Different task** → overwrite the plan.
|
|
46
|
-
- **Same task, continuing** → update and delete outdated sections.
|
|
41
|
+
2. Compare the new request against it.
|
|
42
|
+
3. Different task → overwrite it. Same task continuing → update it and delete outdated sections.
|
|
47
43
|
4. Call `resolve` with `action: "apply"` and `extra: { title }` when complete.
|
|
48
44
|
</procedure>
|
|
49
45
|
{{/if}}
|
|
50
46
|
|
|
51
47
|
{{#if iterative}}
|
|
52
|
-
## Workflow —
|
|
48
|
+
## Workflow — iterative
|
|
53
49
|
|
|
54
50
|
<procedure>
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
You MUST use `{{askToolName}}` to resolve preferences and tradeoffs (see Resolving Unknowns). Batch questions; never ask what exploration answers.
|
|
60
|
-
|
|
61
|
-
### 3. Update incrementally
|
|
62
|
-
You MUST use `{{editToolName}}` to revise the plan file as you learn.
|
|
63
|
-
|
|
64
|
-
### 4. Calibrate
|
|
65
|
-
- Large, unspecified task → multiple interview rounds.
|
|
66
|
-
- Small, well-specified task → few or no questions.
|
|
51
|
+
1. **Explore** — use `find`/`search`/`read` to ground in the real code; hunt for existing functions, utilities, and conventions to reuse before proposing anything new.
|
|
52
|
+
2. **Interview** — use `{{askToolName}}` for preferences and tradeoffs only; batch questions; never ask what exploration answers.
|
|
53
|
+
3. **Update** — revise the plan with `{{editToolName}}` as you learn.
|
|
54
|
+
4. **Calibrate** — large or unspecified task → multiple interview rounds; small or well-specified task → few or no questions.
|
|
67
55
|
</procedure>
|
|
68
56
|
{{else}}
|
|
69
|
-
## Workflow —
|
|
57
|
+
## Workflow — parallel
|
|
70
58
|
|
|
71
59
|
<procedure>
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
You MUST draft an approach from your exploration, weigh trade-offs briefly, then commit to one. For large or cross-cutting changes you MAY spawn a planning/critique subagent to pressure-test the approach before you commit.
|
|
77
|
-
|
|
78
|
-
### Phase 3 — Review
|
|
79
|
-
You MUST read the critical files you intend to touch to confirm the approach holds against the real code. You MUST verify the plan still matches the original request. You SHOULD use `{{askToolName}}` to close remaining preference questions.
|
|
80
|
-
|
|
81
|
-
### Phase 4 — Write the plan
|
|
82
|
-
You MUST write the plan file (see **Plan File** above) per **The Plan** below.
|
|
60
|
+
1. **Understand** — focus on the request and the code behind it. Launch parallel `explore` subagents (via `task`) when scope spans areas; give each a distinct focus (existing implementations, related components, test patterns). Hunt for reusable code before proposing new.
|
|
61
|
+
2. **Design** — draft one approach from what you found, weigh tradeoffs briefly, then commit. For large or cross-cutting work you MAY spawn a critique subagent to pressure-test it before committing.
|
|
62
|
+
3. **Review** — read the files you intend to touch and confirm the approach holds against the real code; confirm the plan still answers the literal request; use `{{askToolName}}` to close any remaining preference questions.
|
|
63
|
+
4. **Write** — write the plan per **Plan contents** below.
|
|
83
64
|
</procedure>
|
|
84
65
|
{{/if}}
|
|
85
66
|
|
|
86
|
-
##
|
|
67
|
+
## Plan contents
|
|
87
68
|
|
|
88
|
-
|
|
69
|
+
Write scannable markdown using these sections. Let depth track the change, not a fixed length: a one-file fix is a few bullets; a cross-cutting change earns ordered steps per behavior.
|
|
89
70
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
-
|
|
93
|
-
-
|
|
94
|
-
-
|
|
95
|
-
-
|
|
96
|
-
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
71
|
+
- **Context** — restate the literal ask, why it is needed, and the intended end state, in 2–4 sentences. Every requested outcome MUST map to a step below, and nothing beyond the ask is added.
|
|
72
|
+
- **Approach** — the load-bearing section: the ordered steps that make the change. Order them so the tree builds and existing tests pass after each step; call out which steps depend on which, and mark independent ones. Group steps by behavior, never one-per-file. For each step:
|
|
73
|
+
- State the concrete edit — verb + exact target + the new behavior — never just an area to "update" or "handle".
|
|
74
|
+
- Name existing functions/utilities to reuse, with paths; introduce new code only with a one-line note that no existing equivalent was found.
|
|
75
|
+
- For a new or changed symbol whose callers must fit it, or whose value is load-bearing (enum member, error/log string, config key, wire/JSON field), give the exact signature or literal.
|
|
76
|
+
- For a rename, signature change, or removal, list every callsite to update (or the exact `search` that returns exactly them) and what to delete — default to a clean cutover with no dead code or compatibility aliases.
|
|
77
|
+
- When rival patterns exist, name the one to copy and the one to avoid.
|
|
78
|
+
- Specify the edge and failure handling for each new path (empty, missing, conflict, error), or state that none is needed and why.
|
|
79
|
+
- **Critical files & anchors** — the ≤5 files that disambiguate non-obvious work, each as path + the symbol or region + a one-line reason. Line numbers are hints; the implementer re-reads before editing. Skip files already obvious from the Approach.
|
|
80
|
+
- **Verification** — how to prove it works end-to-end. Include at least one check that exercises the NEW behavior (concrete input → expected observable output), not only build/typecheck or the existing suite. Give exact commands plus what they need to run: working directory, env vars, fixtures, and how to reach a manual UI or state. Tie a risky step's check to that step.
|
|
81
|
+
- **Assumptions & contingencies** — only the decisions you made that the user might want to override; you NEVER park a decision the implementer must make here — that belongs in Approach. For any load-bearing assumption that could prove false during execution, pre-decide the fallback ("if reality is X, do Y instead") so the implementer never stalls with the conversation gone.
|
|
82
|
+
|
|
83
|
+
Cut anything that removes no decision: restated invariants, unaffected behavior, mechanical repetition, narration. Spell out anything an implementer would otherwise have to invent.
|
|
100
84
|
|
|
101
85
|
<directives>
|
|
102
|
-
- You NEVER include sections
|
|
103
|
-
- You NEVER
|
|
104
|
-
- You NEVER
|
|
86
|
+
- You NEVER include decision-free sections — Non-Goals, Out of Scope, Alternatives Considered, Risks/Mitigations, Future Work. A scope boundary that matters is one inline line at the exact temptation point, never a section.
|
|
87
|
+
- You NEVER reference the planning conversation ("the option we chose above", "as discussed") — the reader will not have it. State the choice and its reason inline.
|
|
88
|
+
- You NEVER invent schema, precedence, or fallback policy the request did not establish, unless it prevents a concrete implementation mistake — then state it as a decision, not an open question.
|
|
105
89
|
</directives>
|
|
106
90
|
|
|
107
91
|
<caution>
|
|
108
|
-
|
|
92
|
+
On approval the user picks one execution mode:
|
|
109
93
|
- **Approve and execute** — execution starts in fresh context (session cleared).
|
|
110
|
-
- **Approve and compact context** — distills this discussion into a summary, then executes
|
|
111
|
-
- **Approve and keep context** — executes
|
|
94
|
+
- **Approve and compact context** — distills this discussion into a summary, then executes here.
|
|
95
|
+
- **Approve and keep context** — executes here, preserving exploration history.
|
|
112
96
|
|
|
113
|
-
All three rely on the
|
|
97
|
+
All three rely on the file being self-contained.
|
|
114
98
|
</caution>
|
|
115
99
|
|
|
116
100
|
<critical>
|
|
117
|
-
|
|
101
|
+
Before you `resolve`, apply the test: an engineer who never saw this conversation executes every step without making one design decision and can tell, at each step, whether it worked. If any step would force a choice or leave "done" ambiguous, deepen it first.
|
|
118
102
|
|
|
119
103
|
Your turn ends ONLY by:
|
|
120
|
-
1. Using `{{askToolName}}` to gather
|
|
121
|
-
2. Calling `resolve` with `action: "apply"`, `reason`, and `extra: { title: "<slug>" }` (the slug of your `local://<slug>-plan.md`)
|
|
104
|
+
1. Using `{{askToolName}}` to gather requirements or choose between approaches, OR
|
|
105
|
+
2. Calling `resolve` with `action: "apply"`, `reason`, and `extra: { title: "<slug>" }` (the slug of your `local://<slug>-plan.md`).
|
|
122
106
|
|
|
123
|
-
You NEVER
|
|
107
|
+
You NEVER request plan approval via prose or `{{askToolName}}`; you MUST use `resolve`.
|
|
124
108
|
You MUST keep going until the plan is decision-complete.
|
|
125
109
|
</critical>
|
|
@@ -22,7 +22,7 @@ Cell fields:
|
|
|
22
22
|
</instruction>
|
|
23
23
|
|
|
24
24
|
<prelude>
|
|
25
|
-
{{#ifAll py js}}Same helpers in both runtimes with the same positional argument order. Python: trailing options as keyword args. JavaScript: trailing options
|
|
25
|
+
{{#ifAll py js}}Same helpers in both runtimes with the same positional argument order. Python: trailing options as keyword args. JavaScript: trailing options are a single trailing object literal, never positional — passing options positionally (or any extra positional arg) throws. JavaScript helpers are async and `await`able; Python helpers run synchronously.{{else}}{{#if py}}Helpers run synchronously. Trailing options are keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are a single trailing object literal, never positional — passing options positionally (or any extra positional arg) throws.{{/if}}{{/ifAll}}
|
|
26
26
|
```
|
|
27
27
|
display(value) → None
|
|
28
28
|
Render a value in the current cell output.
|
|
@@ -48,6 +48,8 @@ llm(prompt, model?="default", system?=None, schema?=None) → str | dict
|
|
|
48
48
|
Oneshot, stateless LLM call (no history, no tools). `model` picks a tier: "smol" (fast), "default" (this session's model), "slow" (most capable). Pass `system` for a system prompt. Pass a JSON-Schema `schema` to force structured output and get the parsed object back; otherwise returns the completion text.
|
|
49
49
|
{{#if spawns}}agent(prompt, agent_type?="task", model?=None, context?=None, label?=None, schema?=None) → str | dict
|
|
50
50
|
Run a subagent and return its final output. Defaults to the bundled "task" agent; pass `agent_type`/`agentType` for another discovered agent. Pass a JSON-Schema `schema` to force structured output and get the parsed object back.
|
|
51
|
+
{{#if js}} In JS, pass options as one trailing object — never positional: agent(prompt, { agentType, context, schema }).
|
|
52
|
+
{{/if}}
|
|
51
53
|
{{/if}}
|
|
52
54
|
parallel(thunks) → list
|
|
53
55
|
Run thunks (callables) through a bounded pool, preserving input order. The pool is as wide as a `task` tool batch (tracks the `task.maxConcurrency` setting), so fan out as wide as the work divides — don't pre-shrink it. Barrier: returns once all finish; a thunk that throws propagates.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<system-notice>
|
|
2
|
+
{{#if multiple}}Late LSP diagnostics arrived for {{files.length}} files after their edits returned:
|
|
3
|
+
{{else}}Late LSP diagnostics arrived after the edit returned:
|
|
4
|
+
{{/if}}
|
|
5
|
+
{{#each files}}{{this.path}} — {{this.summary}}
|
|
6
|
+
{{#each this.messages}}{{this}}
|
|
7
|
+
{{/each}}{{#unless @last}}
|
|
8
|
+
{{/unless}}{{/each}}</system-notice>
|
package/src/sdk.ts
CHANGED
|
@@ -91,6 +91,7 @@ import { discoverAndLoadMCPTools, MCPManager, type MCPToolsLoadResult } from "./
|
|
|
91
91
|
import { resolveMemoryBackend } from "./memory-backend";
|
|
92
92
|
import type { MnemopiSessionState } from "./mnemopi/state";
|
|
93
93
|
import asyncResultTemplate from "./prompts/tools/async-result.md" with { type: "text" };
|
|
94
|
+
import lateDiagnosticTemplate from "./prompts/tools/lsp-late-diagnostic.md" with { type: "text" };
|
|
94
95
|
import { AgentRegistry, MAIN_AGENT_ID } from "./registry/agent-registry";
|
|
95
96
|
import {
|
|
96
97
|
collectEnvSecrets,
|
|
@@ -110,7 +111,12 @@ import {
|
|
|
110
111
|
type SnapshotResponse,
|
|
111
112
|
writeAuthBrokerSnapshotCache,
|
|
112
113
|
} from "./session/auth-storage";
|
|
113
|
-
import {
|
|
114
|
+
import {
|
|
115
|
+
type CustomMessage,
|
|
116
|
+
convertToLlm,
|
|
117
|
+
LSP_LATE_DIAGNOSTIC_MESSAGE_TYPE,
|
|
118
|
+
wrapSteeringForModel,
|
|
119
|
+
} from "./session/messages";
|
|
114
120
|
import { getRestorableSessionModels, SessionManager } from "./session/session-manager";
|
|
115
121
|
import { closeAllConnections } from "./ssh/connection-manager";
|
|
116
122
|
import { unmountAll } from "./ssh/sshfs-mount";
|
|
@@ -143,6 +149,7 @@ import {
|
|
|
143
149
|
BUILTIN_TOOLS,
|
|
144
150
|
computeEssentialBuiltinNames,
|
|
145
151
|
createTools,
|
|
152
|
+
type DeferredDiagnosticsEntry,
|
|
146
153
|
discoverStartupLspServers,
|
|
147
154
|
EditTool,
|
|
148
155
|
EvalTool,
|
|
@@ -229,6 +236,42 @@ function buildAsyncResultBatchMessage(entries: AsyncResultEntry[]): CustomMessag
|
|
|
229
236
|
};
|
|
230
237
|
}
|
|
231
238
|
|
|
239
|
+
type LateDiagnosticsDetails = {
|
|
240
|
+
files: Array<{ path: string; summary: string; errored: boolean; messages: string[] }>;
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
function buildLateDiagnosticsBatchMessage(
|
|
244
|
+
entries: DeferredDiagnosticsEntry[],
|
|
245
|
+
): CustomMessage<LateDiagnosticsDetails> | null {
|
|
246
|
+
if (entries.length === 0) return null;
|
|
247
|
+
const files = entries.map(entry => ({
|
|
248
|
+
path: entry.path,
|
|
249
|
+
summary: entry.summary,
|
|
250
|
+
messages: entry.messages,
|
|
251
|
+
errored: entry.errored,
|
|
252
|
+
}));
|
|
253
|
+
const details: LateDiagnosticsDetails = {
|
|
254
|
+
files: files.map(file => ({
|
|
255
|
+
path: file.path,
|
|
256
|
+
summary: file.summary,
|
|
257
|
+
errored: file.errored,
|
|
258
|
+
messages: file.messages,
|
|
259
|
+
})),
|
|
260
|
+
};
|
|
261
|
+
return {
|
|
262
|
+
role: "custom",
|
|
263
|
+
customType: LSP_LATE_DIAGNOSTIC_MESSAGE_TYPE,
|
|
264
|
+
content: prompt.render(lateDiagnosticTemplate, {
|
|
265
|
+
multiple: files.length > 1,
|
|
266
|
+
files,
|
|
267
|
+
}),
|
|
268
|
+
display: true,
|
|
269
|
+
attribution: "agent",
|
|
270
|
+
details,
|
|
271
|
+
timestamp: Date.now(),
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
232
275
|
function buildMcpNotificationBatchMessage(entries: McpNotificationEntry[]): AgentMessage | null {
|
|
233
276
|
const resources: McpNotificationEntry[] = [];
|
|
234
277
|
const seen = new Set<string>();
|
|
@@ -1267,6 +1310,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1267
1310
|
if (model) return formatModelString(model);
|
|
1268
1311
|
return undefined;
|
|
1269
1312
|
};
|
|
1313
|
+
// Per-path mutation counter shared across edit/write tools. Late-diagnostics
|
|
1314
|
+
// entries capture it at fetch time and are dropped at injection if a newer
|
|
1315
|
+
// mutation (any tool) bumped it in the meantime.
|
|
1316
|
+
const fileMutationVersions = new Map<string, number>();
|
|
1270
1317
|
const toolSession: ToolSession = {
|
|
1271
1318
|
get cwd() {
|
|
1272
1319
|
return sessionManager.getCwd();
|
|
@@ -1312,6 +1359,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1312
1359
|
recordEvalSubagentUsage: output => sessionManager.recordEvalSubagentOutput(output),
|
|
1313
1360
|
getClientBridge: () => session?.clientBridge,
|
|
1314
1361
|
getCompactContext: () => session.formatCompactContext(),
|
|
1362
|
+
queueDeferredDiagnostics: entry => session?.yieldQueue.enqueue(LSP_LATE_DIAGNOSTIC_MESSAGE_TYPE, entry),
|
|
1363
|
+
bumpFileMutationVersion: path => {
|
|
1364
|
+
const next = (fileMutationVersions.get(path) ?? 0) + 1;
|
|
1365
|
+
fileMutationVersions.set(path, next);
|
|
1366
|
+
return next;
|
|
1367
|
+
},
|
|
1368
|
+
getFileMutationVersion: path => fileMutationVersions.get(path) ?? 0,
|
|
1315
1369
|
getTodoPhases: () => session.getTodoPhases(),
|
|
1316
1370
|
setTodoPhases: phases => session.setTodoPhases(phases),
|
|
1317
1371
|
isMCPDiscoveryEnabled: () => session.isMCPDiscoveryEnabled(),
|
|
@@ -2167,6 +2221,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
2167
2221
|
session.yieldQueue.register<McpNotificationEntry>("mcp-notification", {
|
|
2168
2222
|
build: buildMcpNotificationBatchMessage,
|
|
2169
2223
|
});
|
|
2224
|
+
session.yieldQueue.register<DeferredDiagnosticsEntry>(LSP_LATE_DIAGNOSTIC_MESSAGE_TYPE, {
|
|
2225
|
+
isStale: entry => entry.isStale(),
|
|
2226
|
+
build: buildLateDiagnosticsBatchMessage,
|
|
2227
|
+
});
|
|
2170
2228
|
|
|
2171
2229
|
// Attach the live session to the pre-registered ref so peers can route IRC
|
|
2172
2230
|
// messages here. Refresh sessionFile in case it was unavailable at pre-register
|
|
@@ -1174,7 +1174,6 @@ export class AgentSession {
|
|
|
1174
1174
|
this.agent.setRawSseEventInterceptor(this.#onSseEvent);
|
|
1175
1175
|
this.yieldQueue = new YieldQueue({
|
|
1176
1176
|
isStreaming: () => this.isStreaming,
|
|
1177
|
-
injectStreaming: message => this.agent.followUp(message),
|
|
1178
1177
|
injectIdle: async messages => {
|
|
1179
1178
|
const first = messages[0];
|
|
1180
1179
|
if (!first) return;
|
|
@@ -1189,7 +1188,10 @@ export class AgentSession {
|
|
|
1189
1188
|
);
|
|
1190
1189
|
},
|
|
1191
1190
|
});
|
|
1192
|
-
|
|
1191
|
+
// Background-job completions / late diagnostics are pulled into the run at
|
|
1192
|
+
// each step boundary as non-interrupting asides (see Agent.getAsideMessages),
|
|
1193
|
+
// so they reach the model between requests without waiting for a yield.
|
|
1194
|
+
this.agent.setAsideMessageProvider(() => this.yieldQueue.drainLazy());
|
|
1193
1195
|
this.#convertToLlm = config.convertToLlm ?? convertToLlm;
|
|
1194
1196
|
this.#rebuildSystemPrompt = config.rebuildSystemPrompt;
|
|
1195
1197
|
this.#getMcpServerInstructions = config.getMcpServerInstructions;
|
|
@@ -3040,7 +3042,7 @@ export class AgentSession {
|
|
|
3040
3042
|
this.#isDisposed = true;
|
|
3041
3043
|
this.#pendingBackgroundExchanges = [];
|
|
3042
3044
|
this.yieldQueue.clear();
|
|
3043
|
-
this.agent.
|
|
3045
|
+
this.agent.setAsideMessageProvider(undefined);
|
|
3044
3046
|
this.#evalExecutionDisposing = true;
|
|
3045
3047
|
try {
|
|
3046
3048
|
if (this.#extensionRunner?.hasHandlers("session_shutdown")) {
|
package/src/session/messages.ts
CHANGED
|
@@ -34,6 +34,7 @@ import type { OutputMeta } from "../tools/output-meta";
|
|
|
34
34
|
import { formatOutputNotice } from "../tools/output-meta";
|
|
35
35
|
|
|
36
36
|
export const SKILL_PROMPT_MESSAGE_TYPE = "skill-prompt";
|
|
37
|
+
export const LSP_LATE_DIAGNOSTIC_MESSAGE_TYPE = "lsp-late-diagnostic";
|
|
37
38
|
|
|
38
39
|
export interface SkillPromptDetails {
|
|
39
40
|
name: string;
|
|
@@ -71,21 +72,29 @@ export function isSilentAbort(errorMessage: string | undefined): boolean {
|
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
/** Reason threaded through `AbortController.abort(reason)` when the user aborts
|
|
74
|
-
* the turn with Esc (see `AgentSession.abort`). The agent
|
|
75
|
-
*
|
|
76
|
-
* a deliberate
|
|
75
|
+
* the turn with Esc (see `AgentSession.abort`). The agent keeps it on the
|
|
76
|
+
* aborted assistant message's `errorMessage` so queued follow-ups/tool-result
|
|
77
|
+
* placeholders can distinguish a deliberate interrupt from a bare lifecycle
|
|
78
|
+
* abort, but interactive renderers suppress this redundant transcript line. */
|
|
77
79
|
export const USER_INTERRUPT_LABEL = "Interrupted by user";
|
|
78
80
|
|
|
81
|
+
export function isUserInterruptAbort(errorMessage: string | undefined): boolean {
|
|
82
|
+
return errorMessage === USER_INTERRUPT_LABEL;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function shouldRenderAbortReason(errorMessage: string | undefined): boolean {
|
|
86
|
+
return !isSilentAbort(errorMessage) && !isUserInterruptAbort(errorMessage);
|
|
87
|
+
}
|
|
88
|
+
|
|
79
89
|
/** Sentinel `errorMessage` the agent stamps on any abort that carried no custom
|
|
80
90
|
* reason (bare `abort()`). Renderers treat it as "no specific reason given". */
|
|
81
91
|
const GENERIC_ABORT_SENTINEL = "Request was aborted";
|
|
82
92
|
|
|
83
93
|
/** Resolve the operator-facing label for an aborted assistant turn. A custom
|
|
84
|
-
* abort reason
|
|
85
|
-
*
|
|
86
|
-
*
|
|
87
|
-
*
|
|
88
|
-
* they stay in lockstep. */
|
|
94
|
+
* abort reason threaded onto `errorMessage` is returned verbatim; aborts with
|
|
95
|
+
* no threaded reason fall back to the retry-aware generic label. Call
|
|
96
|
+
* `shouldRenderAbortReason` before rendering when user interrupts should stay
|
|
97
|
+
* visually quiet. */
|
|
89
98
|
export function resolveAbortLabel(errorMessage: string | undefined, retryAttempt = 0): string {
|
|
90
99
|
if (errorMessage && errorMessage !== GENERIC_ABORT_SENTINEL && !isSilentAbort(errorMessage)) {
|
|
91
100
|
return errorMessage;
|
|
@@ -524,7 +533,7 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
|
|
|
524
533
|
case "custom":
|
|
525
534
|
case "hookMessage": {
|
|
526
535
|
const content = typeof m.content === "string" ? [{ type: "text" as const, text: m.content }] : m.content;
|
|
527
|
-
const role = "
|
|
536
|
+
const role = "developer";
|
|
528
537
|
const attribution = m.attribution;
|
|
529
538
|
return {
|
|
530
539
|
role,
|
|
@@ -564,17 +573,15 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
|
|
|
564
573
|
const inner = file.content ? `\n${file.content}\n` : "\n";
|
|
565
574
|
return `<file path="${file.path}">${inner}</file>`;
|
|
566
575
|
})
|
|
567
|
-
.join("\n
|
|
568
|
-
const content: (TextContent | ImageContent)[] = [
|
|
569
|
-
{ type: "text" as const, text: `<system-reminder>\n${fileContents}\n</system-reminder>` },
|
|
570
|
-
];
|
|
576
|
+
.join("\n");
|
|
577
|
+
const content: (TextContent | ImageContent)[] = [{ type: "text" as const, text: fileContents }];
|
|
571
578
|
for (const file of m.files) {
|
|
572
579
|
if (file.image) {
|
|
573
580
|
content.push(file.image);
|
|
574
581
|
}
|
|
575
582
|
}
|
|
576
583
|
return {
|
|
577
|
-
role: "
|
|
584
|
+
role: "developer",
|
|
578
585
|
content,
|
|
579
586
|
attribution: "user",
|
|
580
587
|
timestamp: m.timestamp,
|
|
@@ -753,8 +753,8 @@ export function buildSessionContext(
|
|
|
753
753
|
// turn's tool results are off the selected path: its result children live on a
|
|
754
754
|
// sibling branch, or it is the leaf itself (results are children below it). Left
|
|
755
755
|
// in place, `transformMessages` fabricates one synthetic "aborted"/"No result
|
|
756
|
-
// provided" result per dangling call
|
|
757
|
-
//
|
|
756
|
+
// provided" result per dangling call, which render as phantom failed calls and
|
|
757
|
+
// re-inject the failed batch into the model's
|
|
758
758
|
// context — the rewind/restore loop.
|
|
759
759
|
//
|
|
760
760
|
// Stripping is necessary but not sufficient: a *modified* assistant turn that still
|
|
@@ -10,7 +10,7 @@ export interface YieldDispatcher<P> {
|
|
|
10
10
|
|
|
11
11
|
export interface YieldQueueOptions {
|
|
12
12
|
isStreaming: () => boolean;
|
|
13
|
-
injectStreaming(msg: AgentMessage): void;
|
|
13
|
+
injectStreaming?(msg: AgentMessage): void;
|
|
14
14
|
injectIdle(messages: AgentMessage[]): Promise<void>;
|
|
15
15
|
scheduleIdleFlush(run: () => Promise<void>): void;
|
|
16
16
|
}
|
|
@@ -85,7 +85,7 @@ export class YieldQueue {
|
|
|
85
85
|
if (!message) continue;
|
|
86
86
|
if (mode === "streaming") {
|
|
87
87
|
try {
|
|
88
|
-
this.#options.injectStreaming(message);
|
|
88
|
+
this.#options.injectStreaming?.(message);
|
|
89
89
|
} catch (error) {
|
|
90
90
|
logger.warn("Yield queue streaming dispatch failed", { kind, error: formatError(error) });
|
|
91
91
|
}
|
|
@@ -102,6 +102,24 @@ export class YieldQueue {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Snapshot and remove all queued entries, returning one lazy thunk per kind.
|
|
107
|
+
* Each thunk applies the dispatcher's staleness filter and builds the batched
|
|
108
|
+
* message only when called — so the consumer (the agent loop) decides, at the
|
|
109
|
+
* moment it injects, whether the message is still worth delivering (a thunk may
|
|
110
|
+
* return null to skip). Background-job completions and late diagnostics reach
|
|
111
|
+
* the model between requests without the agent having to stop.
|
|
112
|
+
*/
|
|
113
|
+
drainLazy(): Array<() => AgentMessage | null> {
|
|
114
|
+
const thunks: Array<() => AgentMessage | null> = [];
|
|
115
|
+
for (const [kind, dispatcher] of this.#dispatchers) {
|
|
116
|
+
const entries = this.#drain(kind);
|
|
117
|
+
if (entries.length === 0) continue;
|
|
118
|
+
thunks.push(() => this.#build(kind, dispatcher, entries));
|
|
119
|
+
}
|
|
120
|
+
return thunks;
|
|
121
|
+
}
|
|
122
|
+
|
|
105
123
|
clear(): void {
|
|
106
124
|
this.#entries.clear();
|
|
107
125
|
this.#idleFlushPending = false;
|
package/src/task/executor.ts
CHANGED
|
@@ -1501,6 +1501,7 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
|
|
|
1501
1501
|
await awaitAbortable(
|
|
1502
1502
|
session.prompt(reminder, {
|
|
1503
1503
|
attribution: "agent",
|
|
1504
|
+
synthetic: true,
|
|
1504
1505
|
...(isFinalRetry && reminderToolChoice ? { toolChoice: reminderToolChoice } : {}),
|
|
1505
1506
|
}),
|
|
1506
1507
|
);
|
package/src/tiny/title-client.ts
CHANGED
|
@@ -39,7 +39,12 @@ export interface TinyTitleDownloadOptions {
|
|
|
39
39
|
onProgress?: (event: TinyTitleProgressEvent) => void;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
// Cold-starting the worker subprocess from a compiled binary (decompress + module
|
|
43
|
+
// graph load) is slow on contended CI runners — the macos-15-intel release smoke
|
|
44
|
+
// blew past 5s while arm64/linux/win passed. The probe only needs to prove the
|
|
45
|
+
// worker spawns and ponges at all (a dead worker never ponges regardless), so a
|
|
46
|
+
// generous bound removes the flake without weakening the check.
|
|
47
|
+
const SMOKE_TEST_TIMEOUT_MS = 30_000;
|
|
43
48
|
|
|
44
49
|
/**
|
|
45
50
|
* Hidden subcommand on the main CLI that boots the tiny-model worker in the
|
package/src/tools/bash.ts
CHANGED
|
@@ -14,7 +14,6 @@ import { type BashResult, executeBash } from "../exec/bash-executor";
|
|
|
14
14
|
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
15
15
|
import { InternalUrlRouter } from "../internal-urls";
|
|
16
16
|
import { truncateToVisualLines } from "../modes/components/visual-truncate";
|
|
17
|
-
import { shimmerEnabled } from "../modes/theme/shimmer";
|
|
18
17
|
import { highlightCode, type Theme } from "../modes/theme/theme";
|
|
19
18
|
import bashDescription from "../prompts/tools/bash.md" with { type: "text" };
|
|
20
19
|
import type { ClientBridgeTerminalExitStatus, ClientBridgeTerminalOutput } from "../session/client-bridge";
|
|
@@ -1130,7 +1129,6 @@ export function createShellRenderer<TArgs>(config: ShellRendererConfig<TArgs>) {
|
|
|
1130
1129
|
state: "pending",
|
|
1131
1130
|
sections: [{ lines: capPreviewLines(cmdLines, uiTheme, { expanded: options.expanded }) }],
|
|
1132
1131
|
width,
|
|
1133
|
-
animate: true,
|
|
1134
1132
|
},
|
|
1135
1133
|
uiTheme,
|
|
1136
1134
|
),
|
|
@@ -1261,11 +1259,6 @@ export function createShellRenderer<TArgs>(config: ShellRendererConfig<TArgs>) {
|
|
|
1261
1259
|
{ label: uiTheme.fg("toolTitle", "Output"), lines: outputLines },
|
|
1262
1260
|
],
|
|
1263
1261
|
width,
|
|
1264
|
-
// Don't animate once the command has been backgrounded: the block
|
|
1265
|
-
// gets committed to scrollback and finalizes later via the async
|
|
1266
|
-
// update path, so a mid-sweep frame would freeze a stray dark
|
|
1267
|
-
// border segment.
|
|
1268
|
-
animate: options.isPartial && shimmerEnabled() && details?.async?.state !== "running",
|
|
1269
1262
|
},
|
|
1270
1263
|
uiTheme,
|
|
1271
1264
|
);
|
package/src/tools/eval-render.ts
CHANGED
|
@@ -16,9 +16,8 @@ import type { EvalCellResult, EvalLanguage, EvalStatusEvent, EvalToolDetails } f
|
|
|
16
16
|
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
17
17
|
import { formatContextUsage } from "../modes/components/status-line/context-thresholds";
|
|
18
18
|
import { truncateToVisualLines } from "../modes/components/visual-truncate";
|
|
19
|
-
import { shimmerEnabled } from "../modes/theme/shimmer";
|
|
20
19
|
import { getMarkdownTheme, type Theme } from "../modes/theme/theme";
|
|
21
|
-
import {
|
|
20
|
+
import { markFramedBlockComponent, renderCodeCell } from "../tui";
|
|
22
21
|
import {
|
|
23
22
|
JSON_TREE_MAX_DEPTH_COLLAPSED,
|
|
24
23
|
JSON_TREE_MAX_DEPTH_EXPANDED,
|
|
@@ -491,8 +490,7 @@ export const evalToolRenderer = {
|
|
|
491
490
|
|
|
492
491
|
return markFramedBlockComponent({
|
|
493
492
|
render: (width: number): string[] => {
|
|
494
|
-
const
|
|
495
|
-
const key = `${animate ? borderShimmerTick() : 0}|${options.expanded ? 1 : 0}|${cells.map(c => `${c.language}:${c.title ?? ""}:${c.code.length}`).join("|")}`;
|
|
493
|
+
const key = `${options.expanded ? 1 : 0}|${cells.map(c => `${c.language}:${c.title ?? ""}:${c.code.length}`).join("|")}`;
|
|
496
494
|
if (cached && cached.key === key && cached.width === width) {
|
|
497
495
|
return cached.result;
|
|
498
496
|
}
|
|
@@ -510,13 +508,9 @@ export const evalToolRenderer = {
|
|
|
510
508
|
status: "pending",
|
|
511
509
|
width,
|
|
512
510
|
// Always render the full source: the code is fixed input, not the
|
|
513
|
-
// streaming part, so it is never compacted.
|
|
514
|
-
// (args streaming) the block is not yet committed to native
|
|
515
|
-
// scrollback — its head is only committed once a result exists and
|
|
516
|
-
// the code has finalized (see `isStreamingPreviewAppendOnly`).
|
|
511
|
+
// streaming part, so it is never compacted.
|
|
517
512
|
codeMaxLines: Number.POSITIVE_INFINITY,
|
|
518
513
|
expanded: options.expanded,
|
|
519
|
-
animate,
|
|
520
514
|
},
|
|
521
515
|
uiTheme,
|
|
522
516
|
);
|
|
@@ -579,8 +573,7 @@ export const evalToolRenderer = {
|
|
|
579
573
|
render: (width: number): string[] => {
|
|
580
574
|
const expanded = options.renderContext?.expanded ?? options.expanded;
|
|
581
575
|
const previewLines = options.renderContext?.previewLines ?? EVAL_DEFAULT_PREVIEW_LINES;
|
|
582
|
-
const
|
|
583
|
-
const key = `${expanded}|${previewLines}|${options.spinnerFrame}|${animate ? borderShimmerTick() : 0}`;
|
|
576
|
+
const key = `${expanded}|${previewLines}|${options.spinnerFrame}`;
|
|
584
577
|
if (cached && cached.key === key && cached.width === width) {
|
|
585
578
|
return cached.result;
|
|
586
579
|
}
|
|
@@ -622,7 +615,6 @@ export const evalToolRenderer = {
|
|
|
622
615
|
codeMaxLines: Number.POSITIVE_INFINITY,
|
|
623
616
|
expanded,
|
|
624
617
|
width,
|
|
625
|
-
animate,
|
|
626
618
|
},
|
|
627
619
|
uiTheme,
|
|
628
620
|
);
|
|
@@ -752,17 +744,6 @@ export const evalToolRenderer = {
|
|
|
752
744
|
};
|
|
753
745
|
},
|
|
754
746
|
|
|
755
|
-
// Append-only once a result exists (args complete → code finalized). The code
|
|
756
|
-
// is rendered in full as a fixed top-anchored prefix, and the streamed stdout
|
|
757
|
-
// below it only appends rows at the bottom, so the scrolled-off head commits
|
|
758
|
-
// to native scrollback instead of being yanked — collapsed or expanded, since
|
|
759
|
-
// the collapsed output cap keeps its sliding tail in the bottom live region.
|
|
760
|
-
// Returns false while still pending: the code is mid-stream (args incomplete)
|
|
761
|
-
// and its header still reads "pending", so committing it would strand a stale
|
|
762
|
-
// pending preview in history.
|
|
763
|
-
isStreamingPreviewAppendOnly(_args: EvalRenderArgs, _options: RenderResultOptions, result?: unknown): boolean {
|
|
764
|
-
return result != null;
|
|
765
|
-
},
|
|
766
747
|
mergeCallAndResult: true,
|
|
767
748
|
inline: true,
|
|
768
749
|
};
|