npm - @oh-my-pi/pi-coding-agent - Versions diffs - 16.1.1 → 16.1.3 - Mend

@oh-my-pi/pi-coding-agent 16.1.1 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/CHANGELOG.md +50 -0
package/dist/cli.js +3090 -3115
package/dist/types/cli/bench-cli.d.ts +2 -1
package/dist/types/config/model-resolver.d.ts +3 -3
package/dist/types/config/settings-schema.d.ts +1 -1
package/dist/types/main.d.ts +2 -0
package/dist/types/mnemopi/embed-client.d.ts +70 -0
package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
package/dist/types/mnemopi/embed-worker.d.ts +12 -0
package/dist/types/mnemopi/state.d.ts +9 -1
package/dist/types/modes/components/assistant-message.d.ts +12 -0
package/dist/types/modes/components/welcome.d.ts +1 -1
package/dist/types/sdk.d.ts +19 -2
package/dist/types/session/agent-storage.d.ts +2 -0
package/dist/types/session/auth-broker-config.d.ts +34 -6
package/dist/types/session/history-storage.d.ts +1 -1
package/dist/types/system-prompt.d.ts +5 -1
package/dist/types/task/executor.d.ts +10 -0
package/dist/types/tools/find.d.ts +0 -2
package/dist/types/tools/image-gen.d.ts +2 -2
package/dist/types/tools/search.d.ts +3 -3
package/dist/types/utils/image-loading.d.ts +1 -1
package/dist/types/utils/ipc.d.ts +22 -0
package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
package/package.json +12 -12
package/scripts/measure-prompt-tokens.ts +63 -0
package/src/cli/bench-cli.ts +64 -3
package/src/cli/startup-cwd.ts +3 -13
package/src/cli.ts +8 -0
package/src/commands/token.ts +52 -33
package/src/config/append-only-context-mode.ts +45 -0
package/src/config/model-discovery.ts +3 -0
package/src/config/model-registry.ts +21 -3
package/src/config/model-resolver.ts +31 -8
package/src/config/settings-schema.ts +1 -1
package/src/cursor.ts +1 -1
package/src/debug/raw-sse-buffer.ts +31 -10
package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
package/src/eval/py/prelude.py +1 -1
package/src/export/html/tool-views.generated.js +1 -1
package/src/extensibility/extensions/runner.ts +8 -2
package/src/internal-urls/docs-index.generated.txt +1 -1
package/src/lsp/client.ts +24 -0
package/src/main.ts +29 -9
package/src/mnemopi/backend.ts +49 -3
package/src/mnemopi/embed-client.ts +401 -0
package/src/mnemopi/embed-protocol.ts +35 -0
package/src/mnemopi/embed-worker.ts +113 -0
package/src/mnemopi/state.ts +29 -1
package/src/modes/components/assistant-message.ts +86 -0
package/src/modes/components/custom-editor.ts +1 -1
package/src/modes/components/model-selector.ts +2 -2
package/src/modes/components/tips.txt +2 -1
package/src/modes/components/welcome.ts +87 -9
package/src/modes/controllers/event-controller.ts +9 -1
package/src/modes/controllers/selector-controller.ts +2 -2
package/src/modes/theme/theme.ts +69 -0
package/src/prompts/system/personalities/default.md +8 -16
package/src/prompts/system/system-prompt.md +101 -115
package/src/prompts/tools/ast-edit.md +10 -12
package/src/prompts/tools/ast-grep.md +14 -18
package/src/prompts/tools/bash.md +19 -21
package/src/prompts/tools/browser.md +24 -24
package/src/prompts/tools/checkpoint.md +0 -1
package/src/prompts/tools/debug.md +11 -15
package/src/prompts/tools/eval.md +27 -27
package/src/prompts/tools/find.md +6 -10
package/src/prompts/tools/github.md +11 -15
package/src/prompts/tools/goal.md +0 -7
package/src/prompts/tools/inspect-image.md +0 -1
package/src/prompts/tools/irc.md +15 -24
package/src/prompts/tools/job.md +5 -8
package/src/prompts/tools/learn.md +2 -2
package/src/prompts/tools/lsp.md +27 -30
package/src/prompts/tools/manage-skill.md +4 -4
package/src/prompts/tools/read.md +21 -23
package/src/prompts/tools/replace.md +0 -1
package/src/prompts/tools/resolve.md +4 -9
package/src/prompts/tools/rewind.md +1 -1
package/src/prompts/tools/search.md +8 -10
package/src/prompts/tools/task.md +33 -38
package/src/prompts/tools/todo.md +14 -18
package/src/prompts/tools/web-search.md +0 -4
package/src/prompts/tools/write.md +1 -1
package/src/sdk.ts +53 -102
package/src/session/agent-session.ts +25 -2
package/src/session/agent-storage.ts +14 -0
package/src/session/auth-broker-config.ts +37 -76
package/src/session/history-storage.ts +13 -1
package/src/session/session-history-format.ts +1 -1
package/src/session/session-manager.ts +33 -6
package/src/stt/asr-client.ts +2 -7
package/src/system-prompt.ts +28 -8
package/src/task/executor.ts +57 -0
package/src/task/index.ts +15 -1
package/src/tiny/title-client.ts +2 -7
package/src/tools/browser.ts +1 -1
package/src/tools/eval.ts +1 -1
package/src/tools/find.ts +4 -17
package/src/tools/image-gen.ts +4 -8
package/src/tools/memory-edit.ts +1 -1
package/src/tools/render-utils.ts +4 -1
package/src/tools/search.ts +5 -5
package/src/tts/tts-client.ts +2 -7
package/src/utils/image-loading.ts +12 -2
package/src/utils/ipc.ts +38 -0
package/src/web/search/providers/perplexity-auth.ts +133 -0
package/src/web/search/providers/perplexity.ts +2 -125

package/src/prompts/tools/search.md CHANGED Viewed

@@ -1,24 +1,22 @@
-Searches files using powerful regex matching.
+Searches files using regex.
 <instruction>
-- Supports Rust regex syntax (RE2-style — no lookaround or backreferences). Use line anchors or post-filters instead of (?!…)/(?<!…)
-- `paths` accepts either one string or an array of files, directories, globs, or internal URLs. Optional: when omitted or empty it searches the workspace root (`.`). Prefer scoping to specific paths when you know them.
-- For multiple targets, pass an array with one target per element: `["src", "tests"]`.
-- Cross-line patterns are detected from literal `\n` or escaped `\\n` in `pattern`
+- Rust regex (RE2-style) — no lookaround/backreferences; use line anchors or post-filters instead of (?!…)/(?<!…).
+- `paths`: SHOULD scope to known paths (e.g. `["src","tests"]`).
+- Cross-line patterns detected from literal `\n` or `\\n` in `pattern`.
 </instruction>
 <output>
 {{#if IS_HL_MODE}}
-- Text output emits a file snapshot tag header per matched file plus numbered lines: `[src/login.ts#1A2B]`, `*42:if (user.id) {` (match), ` 43:return user;` (context). Copy the header for anchored edits; ops use bare line numbers.
+- Per matched file: snapshot tag header + numbered lines: `[src/login.ts#1A2B]`, `*42:if (user.id) {` (match), ` 43:return user;` (context). Copy header for anchored edits; ops use bare line numbers.
 {{else}}
 {{#if IS_LINE_NUMBER_MODE}}
-- Text output is line-number-prefixed
+- Output is line-number-prefixed.
 {{/if}}
 {{/if}}
 </output>
 <critical>
-- You MUST use the built-in `search` tool for any content search. NEVER shell out to `grep`, `rg`, `ripgrep`, `ag`, `ack`, `git grep`, `awk`, `sed`-for-search, or any other CLI search via Bash — even for a single match, even "just to check quickly", even piped through other commands.
-- Bash `grep`/`rg` loses `.gitignore` semantics, bypasses result limits, and wastes tokens. The `search` tool is faster, structured, and already wired into the workspace — there is no scenario where Bash search is preferable.
-- If the search is open-ended, requiring multiple rounds, you MUST use the Task tool with the explore subagent instead of chaining `search` calls yourself.
+- MUST use built-in `search` for any content search. NEVER shell out to `grep`, `rg`, `ripgrep`, `ag`, `ack`, `git grep`, `awk`, `sed`-for-search, or any CLI search via Bash — not even for one match or a quick check.
+- Open-ended search needing multiple rounds? MUST use the Task tool with the explore subagent, NOT chained `search` calls.
 </critical>

package/src/prompts/tools/task.md CHANGED Viewed

@@ -1,73 +1,68 @@
-{{#if asyncEnabled}}{{#if batchEnabled}}Spawns subagents to work in the background — one per `tasks[]` item; a single spawn is a one-item batch.{{else}}Spawns ONE subagent per call to work in the background.{{/if}}
+{{#if asyncEnabled}}{{#if batchEnabled}}Spawns subagents in the background — one per `tasks[]` item; single spawn = one-item batch.{{else}}Spawns ONE subagent per call in the background.{{/if}}
-- Spawning is non-blocking: the call returns immediately with the agent id{{#if batchEnabled}}s{{/if}} and job id{{#if batchEnabled}}s{{/if}}; each result is delivered automatically when that agent yields.
-- Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. To launch several subagents, you MUST batch them into a single call's `tasks[]` — they share `context` once instead of duplicating it. Separate `task` calls in one message are ONLY for spawns needing a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}. Concurrency is bounded at {{MAX_CONCURRENCY}} running subagents per session.
-- If genuinely blocked on a result, wait with `job poll`; otherwise keep working. `job cancel` terminates a task and **cannot carry a message** — only for stalled/abandoned work.
-{{else}}{{#if batchEnabled}}Runs subagents synchronously — one per `tasks[]` item; a single spawn is a one-item batch.{{else}}Runs ONE subagent synchronously per call.{{/if}}
+- Non-blocking: returns agent id{{#if batchEnabled}}s{{/if}} + job id{{#if batchEnabled}}s{{/if}} immediately; each result auto-delivered on yield.
+- Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. MUST batch into one `tasks[]` (share `context` once). Separate `task` calls ONLY for a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}.
+- Blocked on a result? `job poll`; else keep working. `job cancel` kills a task, **cannot carry a message** — only for stalled/abandoned work.
+{{else}}{{#if batchEnabled}}Runs subagents synchronously — one per `tasks[]` item; single spawn = one-item batch.{{else}}Runs ONE subagent synchronously per call.{{/if}}
-- Spawning is blocking: the call returns only after the agent{{#if batchEnabled}}s{{/if}} finish; results arrive inline.
-- Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. To launch several subagents, you MUST batch them into a single call's `tasks[]` — they share `context` once instead of duplicating it. Separate `task` calls in one message are ONLY for spawns needing a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}. Concurrency is bounded at {{MAX_CONCURRENCY}} running subagents per session.
+- Blocking: returns only after the agent{{#if batchEnabled}}s{{/if}} finish; results arrive inline.
+- Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. MUST batch into one `tasks[]` (share `context` once). Separate `task` calls ONLY for a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}.
 {{/if}}
 {{#if ircEnabled}}
-- Coordinate with agents via `irc` using their ids. Agents reach you and their siblings live the same way.
+- Coordinate via `irc` by agent id; agents reach you + siblings live.
 {{/if}}
-<lifecycle>
-- Finished agents stay alive: `idle` first, then `parked` after a TTL.{{#if ircEnabled}} Both remain addressable and revivable: messaging one via `irc` wakes it and runs your message as a follow-up turn. **Prefer messaging an agent that already holds the relevant context over spawning fresh** — check `irc` op:"list" for candidates.{{/if}}
-- `history://<id>` is the agent's transcript; `agent://<id>` its latest output artifact.
-</lifecycle>
 <parameters>
 - `agent`: agent type to spawn
 {{#if batchEnabled}}
-- `context`: shared background prepended to every assignment — goal, constraints, shared contract (see context-fmt); REQUIRED, session-specific only
-- `tasks`: tasks to spawn — one subagent per item, all in parallel:
-  - `assignment`: complete self-contained instructions; one-liners and missing acceptance criteria are PROHIBITED
-  - `id`: stable agent id, CamelCase, ≤32 chars; generated when omitted
+- `context`: background prepended to every assignment — goal, constraints, contract (see context-fmt); REQUIRED, session-specific only
+- `tasks`: one subagent per item, all in parallel:
+  - `assignment`: complete self-contained instructions; one-liners / missing acceptance criteria PROHIBITED
+  - `id`: stable agent id, CamelCase, ≤32 chars; auto when omitted
   - `description`: UI label only — subagent never sees it
-  - `role`: specialist identity this subagent embodies (e.g. "Auth-flow security reviewer") — sets its system-prompt persona and roster display name; tailor every spawn rather than cloning a generic worker
+  - `role`: specialist identity (e.g. "Auth-flow security reviewer") — sets system-prompt persona + roster name
 {{#if isolationEnabled}}
-  - `isolated`: run this spawn in an isolated env; returns patches. Isolated agents are torn down at completion — not addressable afterwards
+  - `isolated`: run spawn in isolated env; returns patches. Torn down at completion — not addressable after
 {{/if}}
 {{else}}
-- `id`: stable agent id, CamelCase, ≤32 chars; generated when omitted
+- `id`: stable agent id, CamelCase, ≤32 chars; auto when omitted
 - `description`: UI label only — subagent never sees it
-- `role`: specialist identity this subagent embodies (e.g. "Auth-flow security reviewer") — sets its system-prompt persona and roster display name; tailor every spawn rather than cloning a generic worker
-- `assignment`: complete self-contained instructions; one-liners and missing acceptance criteria are PROHIBITED
+- `role`: specialist identity (e.g. "Auth-flow security reviewer") — sets system-prompt persona + roster name
+- `assignment`: complete self-contained instructions; one-liners / missing acceptance criteria PROHIBITED
 {{#if isolationEnabled}}
-- `isolated`: run in isolated env; returns patches. Isolated agents are torn down at completion — not addressable afterwards
+- `isolated`: run in isolated env; returns patches. Torn down at completion — not addressable after
 {{/if}}
 {{/if}}
 </parameters>
 <rules>
-- **Maximize fan-out.** Issue the widest {{#if batchEnabled}}`tasks[]` batch{{else}}set of parallel `task` calls{{/if}} the work decomposes into. NEVER serialize work that could run concurrently.
-- **Subagents do not verify, lint, or format.** Every assignment MUST instruct the subagent to skip all gates, formatters, and project-wide build/test/lint. You run them once at the end across the union of changed files.
+- **Maximize fan-out.** Widest {{#if batchEnabled}}`tasks[]` batch{{else}}set of parallel `task` calls{{/if}} the work decomposes into. NEVER serialize parallelizable work.
+- **Subagents do not verify, lint, or format.** Each assignment MUST tell the subagent: skip all gates, formatters, project-wide build/test/lint. You run them once at the end across changed files.
 - No globs, no "update all", no package-wide scope. Fan out.
-- **Tailor every spawn with a `role`.** A role naming the specialist (e.g. "Parser edge-case tester", "SSE backpressure specialist") makes a sharper agent than a bare generic `task`/`quick_task` worker; decompose into named specialists, never clones of one generic worker. A role-less generic spawn is the exception.
-- NEVER slow down or serialize because tasks might overlap on some files. Agents resolve collisions among themselves in real time.
-- Subagents have no conversation history. Every fact, file path, and direction they need MUST be explicit in {{#if batchEnabled}}`context` or the item's `assignment`{{else}}the `assignment`{{/if}}.
+- **Tailor every spawn with a `role`.** A named specialist (e.g. "Parser edge-case tester", "SSE backpressure specialist") beats a generic `task`/`quick_task` worker; decompose into specialists, never clones. Role-less spawn is the exception.
+- NEVER serialize over possible file overlap. Agents self-resolve collisions in real time.
+- Subagents have no conversation history. Every fact, file path, direction MUST be explicit in {{#if batchEnabled}}`context` or the item's `assignment`{{else}}the `assignment`{{/if}}.
 {{#if batchEnabled}}
-- **Shared background** lives in `context` once — never duplicated across assignments. Pass large payloads via `local://<path>` URIs, not inline.
+- **Shared background** in `context` once, never per assignment. Large payloads via `local://<path>` URIs, not inline.
 {{else}}
-- **Shared background**: write it ONCE to a `local://` file (e.g. `local://ctx.md`) and reference that path in each assignment. Pass large payloads via `local://<path>` URIs, not inline.
+- **Shared background**: write ONCE to a `local://` file (e.g. `local://ctx.md`), reference it in each assignment. Large payloads via `local://<path>` URIs, not inline.
 {{/if}}
-- Prefer agents that investigate **and** edit in one pass; only spin a read-only discovery step when affected files are genuinely unknown.
-- **Read-only agents**: Agents tagged READ-ONLY (e.g. `explore`) have no edit/write/command tools. NEVER hand them an assignment that requires changing files or running commands. Use them to investigate and report back; do the edits yourself or delegate to a writing agent (`task`, `oracle`, `designer`).
-- **No reasoning offload**: NEVER offload reasoning, analysis, design, or decision-making to `quick_task` or `explore` — they run minimal-effort / small models for mechanical lookups and data collection only. Keep judgment and synthesis in your own context; delegate hard thinking to `task`, `plan`, or `oracle`.
+- Prefer agents that investigate **and** edit in one pass; spin a read-only discovery step only when affected files unknown.
+- **Read-only agents** (e.g. `explore`): no edit/write/command tools. NEVER assign them file changes or commands. Use to investigate + report; delegate edits to a writing agent (`task`/`oracle`/`designer`) or do them yourself.
+- **No reasoning offload**: NEVER route reasoning, analysis, design, or decisions to `quick_task`/`explore` — minimal-effort / small models for mechanical lookups + data collection only. Keep judgment + synthesis in your own context; delegate hard thinking to `task`/`plan`/`oracle`.
 </rules>
 <parallelization>
 {{#if ircEnabled}}
-Test: can task B run correctly without seeing A's output? If no, sequence A → B — **unless** B can reasonably ask A for the missing piece over `irc`. Live coordination beats a serial waterfall when the contract is small and easy to describe in a DM.
-Still sequence when one task produces a large, evolving contract (generated types, schema migration, core module API) the other consumes wholesale — IRC round-trips do not replace a finished artifact.
-Parallel when tasks touch disjoint files, are independent refactors/tests, or only need occasional clarification that can be resolved peer-to-peer.
+Test: can B run without A's output? No → sequence A → B — **unless** B can ask A over `irc`. Live coordination beats a waterfall when the contract is small + DM-able.
+Still sequence when a task produces a large evolving contract (generated types, schema migration, core module API) consumed wholesale — IRC round-trips don't replace a finished artifact.
+Parallel when tasks touch disjoint files, are independent refactors/tests, or need only occasional peer clarification.
 {{else}}
-Test: can task B run correctly without seeing A's output? If no, sequence A → B.
+Test: can B run without A's output? No → sequence A → B.
 Sequential when one task produces a contract (types, API, schema, core module) the other consumes.
 Parallel when tasks touch disjoint files or are independent refactors/tests.
 {{/if}}
-{{#if ircEnabled}}Sequenced follow-ups SHOULD message the agent that produced the prerequisite — it already holds the context.{{/if}}
+{{#if ircEnabled}}Sequenced follow-ups SHOULD message the prerequisite's producer — it holds the context.{{/if}}
 </parallelization>
 {{#if batchEnabled}}

package/src/prompts/tools/todo.md CHANGED Viewed

@@ -1,43 +1,39 @@
-**Tasks are referenced by their verbatim content string, not by any auto-generated ID. There is no "task-1"/"task-N" identifier — the tool never emits one. Pass the task's content text in the `task` field.**
+**Tasks referenced by verbatim content string, NEVER an auto-generated ID — no "task-1"/"task-N" exists. Pass the content text in the `task` field.**
-Manages a phased task list. Pass `ops`: a flat array of operations.
-The next pending task is auto-promoted to `in_progress` after each completion.
-Allowed `op` values are only `init`, `start`, `done`, `drop`, `rm`, `append`, and `view`. `pending` is a task status, not an `op`; leave not-yet-started tasks implicit in `init`/`append` lists.
+Manages a phased task list. Pass `ops`: flat array of operations. Next pending task auto-promotes to `in_progress` on each completion. `pending` is a status, not an `op` — leave not-yet-started tasks implicit in `init`/`append`.
 ## Operations
 |`op`|Required fields|Effect|
 |---|---|---|
-|`init`|`list: [{phase, items: string[]}]`|Initialize the full list (replaces any existing list)|
+|`init`|`list: [{phase, items: string[]}]`|Initialize full list (replaces existing)|
 |`init`|`items: string[]`|Flattened single-phase init|
 |`start`|`task`|Mark in progress|
 |`done`|`task` or `phase`|Mark completed|
 |`drop`|`task` or `phase`|Mark abandoned|
-|`rm`|`task` or `phase` (optional)|Remove task or phase's tasks; omit both to clear the entire list|
+|`rm`|`task` or `phase` (optional)|Remove task or phase's tasks; omit both to clear the list|
 |`append`|`phase`, `items: string[]`|Append tasks to `phase`; lazily creates phase|
-|`view`|—|Read-only: echo the current list without modifying it|
+|`view`|—|Read-only: echo the list, no modify|
 ## Anatomy
-- **Task content**: 5–10 words, what is being done, not how. Used as the task identifier — unique.
-- **Phase name**: short noun phrase (e.g. `Foundation`, `Auth`, `Verification`). Used as the phase identifier — unique. Do not add prefixes like `1.`, `A)`, `Phase 1:`, etc.
+- **Task content**: 5–10 words; what, not how. Unique identifier.
+- **Phase name**: short noun phrase (e.g. `Foundation`, `Auth`, `Verification`). Unique identifier. NEVER prefix `1.`, `A)`, `Phase 1:`.
 ## Rules
 - Mark tasks done immediately after finishing.
 - Complete phases in order.
-- On blockers, `append` a new task to the active phase to unblock yourself, or `drop`.
-- `task` and `phase` fields reference content/name verbatim; keep them stable once introduced.
-- Lost track of exact task text? `view` echoes the full list — NEVER guess content from memory; a mismatched `task` string is an error.
+- Blocked? `append` a task to the active phase to unblock, or `drop`.
+- Keep `task`/`phase` strings stable once introduced.
+- Lost the exact task text? `view` echoes the list — NEVER guess from memory; a mismatched `task` string is an error.
 ## When to create a list
 - Task requires 3+ distinct steps
 - User explicitly requests one
-- User provides a set of tasks to complete
+- User provides a set of tasks
 - New instructions arrive mid-task — capture before proceeding
 <critical>
-When the user hands you a multi-step plan — a phased todo, a numbered or bulleted checklist, or "N bugs/items/tasks" to work through:
-- You MUST `init` the list with EVERY item as its own task before doing the work.
-- Enumerate all of them;
-- NEVER summarize the plan into fewer tasks, sample "the important ones", drop items, or rely on memory to track the rest.
-The entire point is to remember every one.
+User hands you a multi-step plan — phased todo, numbered/bulleted checklist, or "N bugs/items/tasks":
+- You MUST `init` the list with EVERY item as its own task before working.
+- Enumerate all; NEVER summarize into fewer tasks, sample "the important ones", drop items, or track the rest from memory.
 </critical>

package/src/prompts/tools/web-search.md CHANGED Viewed

@@ -4,7 +4,3 @@ Searches the web for up-to-date information beyond knowledge cutoff.
 - You SHOULD prefer primary sources (papers, official docs) and corroborate key claims with multiple sources
 - You MUST include links for cited sources in the final response
 </instruction>
-<caution>
-Searches are performed automatically within a single API call—no pagination or follow-up requests needed.
-</caution>

package/src/prompts/tools/write.md CHANGED Viewed

@@ -8,7 +8,7 @@ Creates or overwrites file at specified path.
 </conditions>
 <critical>
-- You SHOULD use Edit tool for modifying existing files (more precise, preserves formatting)
+- You SHOULD use Edit tool for modifying existing files
 - You NEVER create documentation files (*.md, README) unless explicitly requested
 - You NEVER use emojis unless requested
 </critical>

package/src/sdk.ts CHANGED Viewed

@@ -22,18 +22,7 @@ import {
 } from "@oh-my-pi/pi-ai/providers/openai-codex-responses";
 import { FALLBACK_DIALECT, preferredDialect } from "@oh-my-pi/pi-catalog/identity";
 import type { Component } from "@oh-my-pi/pi-tui";
-import {
-	$env,
-	$flag,
-	getAgentDbPath,
-	getAgentDir,
-	getAuthBrokerSnapshotCachePath,
-	getProjectDir,
-	logger,
-	postmortem,
-	prompt,
-	Snowflake,
-} from "@oh-my-pi/pi-utils";
+import { $env, $flag, getAgentDir, getProjectDir, logger, postmortem, prompt, Snowflake } from "@oh-my-pi/pi-utils";
 import { INTENT_FIELD } from "@oh-my-pi/pi-wire";
 import { ADVISOR_READONLY_TOOL_NAMES, discoverWatchdogFiles } from "./advisor";
 import { type AsyncJob, AsyncJobManager } from "./async";
@@ -56,11 +45,6 @@ import { loadPromptTemplates as loadPromptTemplatesInternal, type PromptTemplate
 import { Settings, type SkillsSettings } from "./config/settings";
 import { CursorExecHandlers } from "./cursor";
 import "./discovery";
-import { AuthBrokerClient } from "@oh-my-pi/pi-ai/auth-broker/client";
-import { RemoteAuthCredentialStore } from "@oh-my-pi/pi-ai/auth-broker/remote-store";
-import { readAuthBrokerSnapshotCache, writeAuthBrokerSnapshotCache } from "@oh-my-pi/pi-ai/auth-broker/snapshot-cache";
-import { DEFAULT_SNAPSHOT_CACHE_TTL_MS, type SnapshotResponse } from "@oh-my-pi/pi-ai/auth-broker/types";
-import { resolveConfigValue } from "./config/resolve-config-value";
 import { initializeWithSettings } from "./discovery";
 import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./eval/py/executor";
 import { defaultEvalSessionId } from "./eval/session-id";
@@ -119,8 +103,8 @@ import {
 	SecretObfuscator,
 } from "./secrets";
 import { AgentSession } from "./session/agent-session";
-import { resolveAuthBrokerConfig } from "./session/auth-broker-config";
-import { AuthStorage } from "./session/auth-storage";
+import { discoverAuthStorage as discoverAuthStorageFromConfig } from "./session/auth-broker-config";
+import type { AuthStorage } from "./session/auth-storage";
 import {
 	type CustomMessage,
 	convertToLlm,
@@ -405,8 +389,12 @@ export interface CreateAgentSessionOptions {
 	/** Models available for cycling (Ctrl+P in interactive mode) */
 	scopedModels?: Array<{ model: Model; thinkingLevel?: ThinkingLevel }>;
-	/** System prompt blocks. Array replaces default, function receives default blocks and returns final blocks. */
+	/** Provider-facing system prompt override. Replaces the fully rendered default blocks. */
 	systemPrompt?: string | string[] | ((defaultPrompt: string[]) => string | string[]);
+	/** Already-loaded custom prompt text rendered through the bundled custom system prompt template. */
+	customSystemPrompt?: string;
+	/** Already-loaded text appended through the bundled system prompt templates. */
+	appendSystemPrompt?: string;
 	/** Optional provider-facing session identifier for prompt caches and sticky auth selection.
 	 * Keeps persisted session files isolated while reusing provider-side caches. */
 	providerSessionId?: string;
@@ -545,6 +533,16 @@ export interface CreateAgentSessionOptions {
 	 */
 	telemetry?: AgentTelemetryConfig;
+	/**
+	 * Fired once, when the agent loop hands its first request to the provider
+	 * transport (i.e. the `streamFn` wrapper is first invoked). Used to measure
+	 * subagent launch latency — the boundary between "session built" and "model
+	 * call dispatched". This is the loop's dispatch point, slightly before the
+	 * actual provider HTTP call (per-request prep, identical across all
+	 * requests, follows it), which is the right granularity for launch timing.
+	 */
+	onFirstChatDispatch?: () => void;
 	/** Whether to auto-approve all tool calls (--auto-approve CLI flag). Default: false */
 	autoApprove?: boolean;
 }
@@ -617,21 +615,6 @@ export {
 // Helper Functions
-function getDefaultAgentDir(): string {
-	return getAgentDir();
-}
-function resolveSnapshotTtlMs(): number {
-	const raw = process.env.OMP_AUTH_BROKER_SNAPSHOT_TTL_MS;
-	if (raw === undefined) return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
-	const value = raw.trim();
-	if (value === "") return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
-	const ttlMs = Number(value);
-	if (Number.isFinite(ttlMs) && ttlMs >= 0) return ttlMs;
-	logger.warn("Invalid OMP_AUTH_BROKER_SNAPSHOT_TTL_MS; using default", { value: raw });
-	return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
-}
 // Discovery Functions
 /**
@@ -644,70 +627,12 @@ function resolveSnapshotTtlMs(): number {
  * the client receives access tokens with `refresh = "__remote__"` and calls
  * back into the broker through the {@link AuthStorageOptions.refreshOAuthCredential}
  * override to re-mint access tokens when needed.
+ *
+ * Delegates to {@link ./session/auth-broker-config} so the TUI and the catalog
+ * generator share the same credential-discovery logic.
  */
-export async function discoverAuthStorage(agentDir: string = getDefaultAgentDir()): Promise<AuthStorage> {
-	const brokerConfigPromise = resolveAuthBrokerConfig();
-	const cachePath = getAuthBrokerSnapshotCachePath();
-	// Warm the encrypted snapshot cache into the page cache while the broker
-	// config resolves (it may shell out for a `!command` token). Decryption
-	// needs the resolved token, so the real cache read cannot start earlier.
-	void Bun.file(cachePath)
-		.arrayBuffer()
-		.catch(() => undefined);
-	const brokerConfig = await brokerConfigPromise;
-	if (brokerConfig) {
-		const client = new AuthBrokerClient({ url: brokerConfig.url, token: brokerConfig.token });
-		const ttlMs = resolveSnapshotTtlMs();
-		const persist =
-			ttlMs > 0
-				? (snapshot: SnapshotResponse): void => {
-						void writeAuthBrokerSnapshotCache({
-							path: cachePath,
-							token: brokerConfig.token,
-							url: brokerConfig.url,
-							snapshot,
-						}).catch(error => {
-							logger.debug("auth-broker snapshot cache write failed", { error: String(error) });
-						});
-					}
-				: undefined;
-		let initialSnapshot: SnapshotResponse | undefined;
-		if (ttlMs > 0) {
-			initialSnapshot =
-				(await readAuthBrokerSnapshotCache({
-					path: cachePath,
-					token: brokerConfig.token,
-					url: brokerConfig.url,
-					ttlMs,
-				}).catch(error => {
-					logger.debug("auth-broker snapshot cache read failed", { error: String(error) });
-					return null;
-				})) ?? undefined;
-		}
-		if (!initialSnapshot) {
-			const initialResult = await client.fetchSnapshot();
-			if (initialResult.status !== 200) throw new Error("Auth broker returned no initial snapshot");
-			initialSnapshot = initialResult.snapshot;
-			persist?.(initialSnapshot);
-		}
-		const store = new RemoteAuthCredentialStore({ client, initialSnapshot, onSnapshot: persist });
-		// Refresh + usage hooks live on RemoteAuthCredentialStore; AuthStorage
-		// discovers them automatically when no explicit option overrides them.
-		const storage = new AuthStorage(store, {
-			configValueResolver: resolveConfigValue,
-			sourceLabel: `broker ${brokerConfig.url}`,
-		});
-		await storage.reload();
-		return storage;
-	}
-	const dbPath = getAgentDbPath(agentDir);
-	const storage = await AuthStorage.create(dbPath, {
-		configValueResolver: resolveConfigValue,
-		sourceLabel: `local ${dbPath}`,
-	});
-	await storage.reload();
-	return storage;
+export async function discoverAuthStorage(agentDir: string = getAgentDir()): Promise<AuthStorage> {
+	return discoverAuthStorageFromConfig(agentDir);
 }
 /**
@@ -795,7 +720,7 @@ export async function discoverContextFiles(
 export async function discoverPromptTemplates(cwd?: string, agentDir?: string): Promise<PromptTemplate[]> {
 	return await loadPromptTemplatesInternal({
 		cwd: cwd ?? getProjectDir(),
-		agentDir: agentDir ?? getDefaultAgentDir(),
+		agentDir: agentDir ?? getAgentDir(),
 	});
 }
@@ -811,7 +736,7 @@ export async function discoverSlashCommands(cwd?: string): Promise<FileSlashComm
  */
 export async function discoverCustomTSCommands(cwd?: string, agentDir?: string): Promise<CustomCommandsLoadResult> {
 	const resolvedCwd = cwd ?? getProjectDir();
-	const resolvedAgentDir = agentDir ?? getDefaultAgentDir();
+	const resolvedAgentDir = agentDir ?? getAgentDir();
 	return loadCustomCommandsInternal({
 		cwd: resolvedCwd,
@@ -837,6 +762,7 @@ export interface BuildSystemPromptOptions {
 	skills?: Skill[];
 	contextFiles?: Array<{ path: string; content: string }>;
 	cwd?: string;
+	customPrompt?: string;
 	appendPrompt?: string;
 	inlineToolDescriptors?: boolean;
 }
@@ -850,6 +776,7 @@ export interface BuildSystemPromptOptions {
 export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}): Promise<BuildSystemPromptResult> {
 	return await buildSystemPromptInternal({
 		cwd: options.cwd,
+		customPrompt: options.customPrompt,
 		skills: options.skills,
 		contextFiles: options.contextFiles,
 		appendSystemPrompt: options.appendPrompt,
@@ -1112,7 +1039,7 @@ function buildMCPPromptCommands(manager: MCPManager): LoadedCustomCommand[] {
  */
 export async function createAgentSession(options: CreateAgentSessionOptions = {}): Promise<CreateAgentSessionResult> {
 	const cwd = options.cwd ?? getProjectDir();
-	const agentDir = options.agentDir ?? getDefaultAgentDir();
+	const agentDir = options.agentDir ?? getAgentDir();
 	const eventBus = options.eventBus ?? new EventBus();
 	registerSshCleanup();
@@ -1124,6 +1051,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	const modelRegistry =
 		options.modelRegistry ??
 		new ModelRegistry(options.authStorage ?? (await logger.time("discoverModels", discoverAuthStorage, agentDir)));
+	// Track whether we internally created the authStorage so we can close it
+	// if construction fails before the session takes ownership.
+	const ownsAuthStorage = !options.authStorage && !options.modelRegistry;
 	const authStorage = modelRegistry.authStorage;
 	if (options.authStorage && options.authStorage !== authStorage) {
 		throw new Error(
@@ -2201,16 +2131,22 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			// Owned/in-band tool dialects (non-native) require the catalog as `# Tool:`
 			// sections; native tool calling lets the compact name list suffice.
 			const nativeTools = resolveDialect(settings.get("tools.format"), agent?.state.model ?? model) === undefined;
+			if (options.appendSystemPrompt) {
+				appendPrompt = appendPrompt
+					? `${appendPrompt}\n\n${options.appendSystemPrompt}`
+					: options.appendSystemPrompt;
+			}
 			const defaultPrompt = await buildSystemPromptInternal({
 				cwd,
+				resolvedCustomPrompt: options.customSystemPrompt,
 				skills,
 				contextFiles,
 				tools: promptTools,
 				toolNames,
 				rules: rulebookRules,
 				alwaysApplyRules,
+				resolvedAppendSystemPrompt: appendPrompt,
 				skillsSettings: settings.getGroup("skills"),
-				appendSystemPrompt: appendPrompt,
 				inlineToolDescriptors,
 				nativeTools,
 				intentField,
@@ -2475,6 +2411,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				? undefined
 				: serviceTierSetting;
+		// One-shot launch-latency marker: fired the first time the loop dispatches
+		// a chat request to the provider transport. See onFirstChatDispatch.
+		let notifyFirstChatDispatch = options.onFirstChatDispatch;
 		agent = new Agent({
 			initialState: {
 				systemPrompt,
@@ -2508,6 +2447,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			getToolContext: tc => toolContextStore.getContext(tc),
 			getApiKey: requestModel => modelRegistry.resolver(requestModel, agent.sessionId),
 			streamFn: (streamModel, context, streamOptions) => {
+				if (notifyFirstChatDispatch) {
+					const cb = notifyFirstChatDispatch;
+					notifyFirstChatDispatch = undefined;
+					try {
+						cb();
+					} catch (err) {
+						logger.warn("onFirstChatDispatch hook threw", {
+							error: err instanceof Error ? err.message : String(err),
+						});
+					}
+				}
 				const openrouterRoutingPreset = settings.get("providers.openrouterVariant");
 				const openrouterVariant =
 					openrouterRoutingPreset && openrouterRoutingPreset !== "default" ? openrouterRoutingPreset : undefined;
@@ -2907,6 +2857,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 					await asyncJobManager.dispose({ timeoutMs: 3_000 });
 				}
 				await disposeKernelSessionsByOwner(evalKernelOwnerId);
+				if (ownsAuthStorage) authStorage.close();
 			}
 		} catch (cleanupError) {
 			logger.warn("Failed to clean up createAgentSession resources after startup error", {

package/src/session/agent-session.ts CHANGED Viewed

@@ -104,6 +104,7 @@ import {
 	streamSimple,
 } from "@oh-my-pi/pi-ai";
 import { stripToolDescriptions } from "@oh-my-pi/pi-ai/utils/schema";
+import { THINKING_LOOP_ERROR_MARKER } from "@oh-my-pi/pi-ai/utils/thinking-loop";
 import { getSupportedEfforts } from "@oh-my-pi/pi-catalog/model-thinking";
 import { modelsAreEqual } from "@oh-my-pi/pi-catalog/models";
 import { MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
@@ -205,6 +206,7 @@ import type { HindsightSessionState } from "../hindsight/state";
 import { type LocalProtocolOptions, resolveLocalUrlToPath } from "../internal-urls";
 import { IrcBus, type IrcMessage } from "../irc/bus";
 import { resolveMemoryBackend } from "../memory-backend";
+import { shutdownMnemopiEmbedClient } from "../mnemopi/embed-client";
 import { getMnemopiSessionState, type MnemopiSessionState, setMnemopiSessionState } from "../mnemopi/state";
 import { containsOrchestrate, ORCHESTRATE_NOTICE } from "../modes/orchestrate";
 import { getCurrentThemeName, theme } from "../modes/theme/theme";
@@ -3860,7 +3862,11 @@ export class AgentSession {
 		if (event.type === "agent_start") {
 			this.#turnIndex = 0;
 			await this.#extensionRunner.emit({ type: "agent_start" });
-		} else if (event.type === "agent_end") {
+			return;
+		}
+		if (!this.#extensionRunner.hasHandlers(event.type)) return;
+		if (event.type === "agent_end") {
 			// `agent_end` extension notification is emitted from the settled
 			// agent_end maintenance path so `session_stop` control hooks are not
 			// blocked by unrelated notification-only work.
@@ -4204,6 +4210,11 @@ export class AgentSession {
 		hindsightState?.dispose();
 		const mnemopiState = setMnemopiSessionState(this, undefined);
 		await mnemopiState?.dispose();
+		// Tear down the embeddings subprocess AFTER mnemopi state.dispose:
+		// consolidate-on-dispose may still call `embed()` to store the final
+		// memories, and that round-trips through the worker we are about to
+		// hard-kill (issue #3031).
+		await shutdownMnemopiEmbedClient();
 		this.#disconnectFromAgent();
 		if (this.#unsubscribeAppendOnly) {
 			this.#unsubscribeAppendOnly();
@@ -7645,13 +7656,24 @@ export class AgentSession {
 			// both take the summarizer path (the latter loudly).
 			const wantsSnapcompact =
 				compactionPrep.kind !== "fromHook" && effectiveSettings.strategy === "snapcompact" && !customInstructions;
-			const snapcompactReady = wantsSnapcompact && this.model.input.includes("image");
+			let snapcompactReady = wantsSnapcompact && this.model.input.includes("image");
 			if (wantsSnapcompact && !snapcompactReady) {
 				this.emitNotice(
 					"warning",
 					`snapcompact needs a vision-capable model (${this.model.id} is text-only) — using an LLM summary instead`,
 					"compaction",
 				);
+			} else if (snapcompactReady) {
+				const text = snapcompact.serializeConversation(convertToLlm(preparation.messagesToSummarize));
+				const renderScan = snapcompact.scanRenderability(text);
+				if (!renderScan.isSafe) {
+					this.emitNotice(
+						"warning",
+						`snapcompact disabled: high non-ASCII rate detected (${(renderScan.unrenderableRatio * 100).toFixed(1)}%). Falling back to an LLM summary to prevent data loss.`,
+						"compaction",
+					);
+					snapcompactReady = false;
+				}
 			}
 			let summary: string;
@@ -9962,6 +9984,7 @@ export class AgentSession {
 		if (this.#isProviderErrorFinishReasonBeforeToolUse(message)) return true;
 		if (this.#isMalformedFunctionCallError(message)) return true;
 		if (this.#hasReplayUnsafeToolOutput(message)) return false;
+		if (message.errorMessage.includes(THINKING_LOOP_ERROR_MARKER)) return true;
 		if (this.#isStaleOpenAIResponsesReplayError(message)) return true;
 		const err = message.errorMessage;

package/src/session/agent-storage.ts CHANGED Viewed

@@ -247,6 +247,20 @@ FROM model_usage_legacy
 			{ cause: lastError },
 		);
 	}
+	/** @internal Reset all singletons and close their databases — test-only. */
+	static resetInstance(): void {
+		for (const storage of instances.values()) storage.#close();
+		instances.clear();
+	}
+	#close(): void {
+		this.#listSettingsStmt.finalize();
+		this.#upsertModelUsageStmt.finalize();
+		this.#listModelUsageStmt.finalize();
+		// SqliteAuthCredentialStore.close() finalizes its own statements and
+		// closes the shared #db handle — must run after our statements finalize.
+		this.#authStore.close();
+	}
 	/**
 	 * Reads legacy settings persisted in the agent.db `settings` table.