@oh-my-pi/pi-coding-agent 16.1.1 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +50 -0
  2. package/dist/cli.js +3090 -3115
  3. package/dist/types/cli/bench-cli.d.ts +2 -1
  4. package/dist/types/config/model-resolver.d.ts +3 -3
  5. package/dist/types/config/settings-schema.d.ts +1 -1
  6. package/dist/types/main.d.ts +2 -0
  7. package/dist/types/mnemopi/embed-client.d.ts +70 -0
  8. package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
  9. package/dist/types/mnemopi/embed-worker.d.ts +12 -0
  10. package/dist/types/mnemopi/state.d.ts +9 -1
  11. package/dist/types/modes/components/assistant-message.d.ts +12 -0
  12. package/dist/types/modes/components/welcome.d.ts +1 -1
  13. package/dist/types/sdk.d.ts +19 -2
  14. package/dist/types/session/agent-storage.d.ts +2 -0
  15. package/dist/types/session/auth-broker-config.d.ts +34 -6
  16. package/dist/types/session/history-storage.d.ts +1 -1
  17. package/dist/types/system-prompt.d.ts +5 -1
  18. package/dist/types/task/executor.d.ts +10 -0
  19. package/dist/types/tools/find.d.ts +0 -2
  20. package/dist/types/tools/image-gen.d.ts +2 -2
  21. package/dist/types/tools/search.d.ts +3 -3
  22. package/dist/types/utils/image-loading.d.ts +1 -1
  23. package/dist/types/utils/ipc.d.ts +22 -0
  24. package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
  25. package/package.json +12 -12
  26. package/scripts/measure-prompt-tokens.ts +63 -0
  27. package/src/cli/bench-cli.ts +64 -3
  28. package/src/cli/startup-cwd.ts +3 -13
  29. package/src/cli.ts +8 -0
  30. package/src/commands/token.ts +52 -33
  31. package/src/config/append-only-context-mode.ts +45 -0
  32. package/src/config/model-discovery.ts +3 -0
  33. package/src/config/model-registry.ts +21 -3
  34. package/src/config/model-resolver.ts +31 -8
  35. package/src/config/settings-schema.ts +1 -1
  36. package/src/cursor.ts +1 -1
  37. package/src/debug/raw-sse-buffer.ts +31 -10
  38. package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
  39. package/src/eval/py/prelude.py +1 -1
  40. package/src/export/html/tool-views.generated.js +1 -1
  41. package/src/extensibility/extensions/runner.ts +8 -2
  42. package/src/internal-urls/docs-index.generated.txt +1 -1
  43. package/src/lsp/client.ts +24 -0
  44. package/src/main.ts +29 -9
  45. package/src/mnemopi/backend.ts +49 -3
  46. package/src/mnemopi/embed-client.ts +401 -0
  47. package/src/mnemopi/embed-protocol.ts +35 -0
  48. package/src/mnemopi/embed-worker.ts +113 -0
  49. package/src/mnemopi/state.ts +29 -1
  50. package/src/modes/components/assistant-message.ts +86 -0
  51. package/src/modes/components/custom-editor.ts +1 -1
  52. package/src/modes/components/model-selector.ts +2 -2
  53. package/src/modes/components/tips.txt +2 -1
  54. package/src/modes/components/welcome.ts +87 -9
  55. package/src/modes/controllers/event-controller.ts +9 -1
  56. package/src/modes/controllers/selector-controller.ts +2 -2
  57. package/src/modes/theme/theme.ts +69 -0
  58. package/src/prompts/system/personalities/default.md +8 -16
  59. package/src/prompts/system/system-prompt.md +101 -115
  60. package/src/prompts/tools/ast-edit.md +10 -12
  61. package/src/prompts/tools/ast-grep.md +14 -18
  62. package/src/prompts/tools/bash.md +19 -21
  63. package/src/prompts/tools/browser.md +24 -24
  64. package/src/prompts/tools/checkpoint.md +0 -1
  65. package/src/prompts/tools/debug.md +11 -15
  66. package/src/prompts/tools/eval.md +27 -27
  67. package/src/prompts/tools/find.md +6 -10
  68. package/src/prompts/tools/github.md +11 -15
  69. package/src/prompts/tools/goal.md +0 -7
  70. package/src/prompts/tools/inspect-image.md +0 -1
  71. package/src/prompts/tools/irc.md +15 -24
  72. package/src/prompts/tools/job.md +5 -8
  73. package/src/prompts/tools/learn.md +2 -2
  74. package/src/prompts/tools/lsp.md +27 -30
  75. package/src/prompts/tools/manage-skill.md +4 -4
  76. package/src/prompts/tools/read.md +21 -23
  77. package/src/prompts/tools/replace.md +0 -1
  78. package/src/prompts/tools/resolve.md +4 -9
  79. package/src/prompts/tools/rewind.md +1 -1
  80. package/src/prompts/tools/search.md +8 -10
  81. package/src/prompts/tools/task.md +33 -38
  82. package/src/prompts/tools/todo.md +14 -18
  83. package/src/prompts/tools/web-search.md +0 -4
  84. package/src/prompts/tools/write.md +1 -1
  85. package/src/sdk.ts +53 -102
  86. package/src/session/agent-session.ts +25 -2
  87. package/src/session/agent-storage.ts +14 -0
  88. package/src/session/auth-broker-config.ts +37 -76
  89. package/src/session/history-storage.ts +13 -1
  90. package/src/session/session-history-format.ts +1 -1
  91. package/src/session/session-manager.ts +33 -6
  92. package/src/stt/asr-client.ts +2 -7
  93. package/src/system-prompt.ts +28 -8
  94. package/src/task/executor.ts +57 -0
  95. package/src/task/index.ts +15 -1
  96. package/src/tiny/title-client.ts +2 -7
  97. package/src/tools/browser.ts +1 -1
  98. package/src/tools/eval.ts +1 -1
  99. package/src/tools/find.ts +4 -17
  100. package/src/tools/image-gen.ts +4 -8
  101. package/src/tools/memory-edit.ts +1 -1
  102. package/src/tools/render-utils.ts +4 -1
  103. package/src/tools/search.ts +5 -5
  104. package/src/tts/tts-client.ts +2 -7
  105. package/src/utils/image-loading.ts +12 -2
  106. package/src/utils/ipc.ts +38 -0
  107. package/src/web/search/providers/perplexity-auth.ts +133 -0
  108. package/src/web/search/providers/perplexity.ts +2 -125
@@ -1,24 +1,22 @@
1
- Searches files using powerful regex matching.
1
+ Searches files using regex.
2
2
 
3
3
  <instruction>
4
- - Supports Rust regex syntax (RE2-style — no lookaround or backreferences). Use line anchors or post-filters instead of (?!…)/(?<!…)
5
- - `paths` accepts either one string or an array of files, directories, globs, or internal URLs. Optional: when omitted or empty it searches the workspace root (`.`). Prefer scoping to specific paths when you know them.
6
- - For multiple targets, pass an array with one target per element: `["src", "tests"]`.
7
- - Cross-line patterns are detected from literal `\n` or escaped `\\n` in `pattern`
4
+ - Rust regex (RE2-style) — no lookaround/backreferences; use line anchors or post-filters instead of (?!…)/(?<!…).
5
+ - `paths`: SHOULD scope to known paths (e.g. `["src","tests"]`).
6
+ - Cross-line patterns detected from literal `\n` or `\\n` in `pattern`.
8
7
  </instruction>
9
8
 
10
9
  <output>
11
10
  {{#if IS_HL_MODE}}
12
- - Text output emits a file snapshot tag header per matched file plus numbered lines: `[src/login.ts#1A2B]`, `*42:if (user.id) {` (match), ` 43:return user;` (context). Copy the header for anchored edits; ops use bare line numbers.
11
+ - Per matched file: snapshot tag header + numbered lines: `[src/login.ts#1A2B]`, `*42:if (user.id) {` (match), ` 43:return user;` (context). Copy header for anchored edits; ops use bare line numbers.
13
12
  {{else}}
14
13
  {{#if IS_LINE_NUMBER_MODE}}
15
- - Text output is line-number-prefixed
14
+ - Output is line-number-prefixed.
16
15
  {{/if}}
17
16
  {{/if}}
18
17
  </output>
19
18
 
20
19
  <critical>
21
- - You MUST use the built-in `search` tool for any content search. NEVER shell out to `grep`, `rg`, `ripgrep`, `ag`, `ack`, `git grep`, `awk`, `sed`-for-search, or any other CLI search via Bash — even for a single match, even "just to check quickly", even piped through other commands.
22
- - Bash `grep`/`rg` loses `.gitignore` semantics, bypasses result limits, and wastes tokens. The `search` tool is faster, structured, and already wired into the workspace there is no scenario where Bash search is preferable.
23
- - If the search is open-ended, requiring multiple rounds, you MUST use the Task tool with the explore subagent instead of chaining `search` calls yourself.
20
+ - MUST use built-in `search` for any content search. NEVER shell out to `grep`, `rg`, `ripgrep`, `ag`, `ack`, `git grep`, `awk`, `sed`-for-search, or any CLI search via Bash — not even for one match or a quick check.
21
+ - Open-ended search needing multiple rounds? MUST use the Task tool with the explore subagent, NOT chained `search` calls.
24
22
  </critical>
@@ -1,73 +1,68 @@
1
- {{#if asyncEnabled}}{{#if batchEnabled}}Spawns subagents to work in the background — one per `tasks[]` item; a single spawn is a one-item batch.{{else}}Spawns ONE subagent per call to work in the background.{{/if}}
1
+ {{#if asyncEnabled}}{{#if batchEnabled}}Spawns subagents in the background — one per `tasks[]` item; single spawn = one-item batch.{{else}}Spawns ONE subagent per call in the background.{{/if}}
2
2
 
3
- - Spawning is non-blocking: the call returns immediately with the agent id{{#if batchEnabled}}s{{/if}} and job id{{#if batchEnabled}}s{{/if}}; each result is delivered automatically when that agent yields.
4
- - Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. To launch several subagents, you MUST batch them into a single call's `tasks[]` — they share `context` once instead of duplicating it. Separate `task` calls in one message are ONLY for spawns needing a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}. Concurrency is bounded at {{MAX_CONCURRENCY}} running subagents per session.
5
- - If genuinely blocked on a result, wait with `job poll`; otherwise keep working. `job cancel` terminates a task and **cannot carry a message** — only for stalled/abandoned work.
6
- {{else}}{{#if batchEnabled}}Runs subagents synchronously — one per `tasks[]` item; a single spawn is a one-item batch.{{else}}Runs ONE subagent synchronously per call.{{/if}}
3
+ - Non-blocking: returns agent id{{#if batchEnabled}}s{{/if}} + job id{{#if batchEnabled}}s{{/if}} immediately; each result auto-delivered on yield.
4
+ - Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. MUST batch into one `tasks[]` (share `context` once). Separate `task` calls ONLY for a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}.
5
+ - Blocked on a result? `job poll`; else keep working. `job cancel` kills a task, **cannot carry a message** — only for stalled/abandoned work.
6
+ {{else}}{{#if batchEnabled}}Runs subagents synchronously — one per `tasks[]` item; single spawn = one-item batch.{{else}}Runs ONE subagent synchronously per call.{{/if}}
7
7
 
8
- - Spawning is blocking: the call returns only after the agent{{#if batchEnabled}}s{{/if}} finish; results arrive inline.
9
- - Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. To launch several subagents, you MUST batch them into a single call's `tasks[]` — they share `context` once instead of duplicating it. Separate `task` calls in one message are ONLY for spawns needing a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}. Concurrency is bounded at {{MAX_CONCURRENCY}} running subagents per session.
8
+ - Blocking: returns only after the agent{{#if batchEnabled}}s{{/if}} finish; results arrive inline.
9
+ - Parallelism = {{#if batchEnabled}}multiple `tasks[]` items in ONE call. MUST batch into one `tasks[]` (share `context` once). Separate `task` calls ONLY for a different `agent` type or unrelated `context`{{else}}multiple `task` calls in one assistant message{{/if}}.
10
10
  {{/if}}
11
11
  {{#if ircEnabled}}
12
- - Coordinate with agents via `irc` using their ids. Agents reach you and their siblings live the same way.
12
+ - Coordinate via `irc` by agent id; agents reach you + siblings live.
13
13
  {{/if}}
14
14
 
15
- <lifecycle>
16
- - Finished agents stay alive: `idle` first, then `parked` after a TTL.{{#if ircEnabled}} Both remain addressable and revivable: messaging one via `irc` wakes it and runs your message as a follow-up turn. **Prefer messaging an agent that already holds the relevant context over spawning fresh** — check `irc` op:"list" for candidates.{{/if}}
17
- - `history://<id>` is the agent's transcript; `agent://<id>` its latest output artifact.
18
- </lifecycle>
19
-
20
15
  <parameters>
21
16
  - `agent`: agent type to spawn
22
17
  {{#if batchEnabled}}
23
- - `context`: shared background prepended to every assignment — goal, constraints, shared contract (see context-fmt); REQUIRED, session-specific only
24
- - `tasks`: tasks to spawn — one subagent per item, all in parallel:
25
- - `assignment`: complete self-contained instructions; one-liners and missing acceptance criteria are PROHIBITED
26
- - `id`: stable agent id, CamelCase, ≤32 chars; generated when omitted
18
+ - `context`: background prepended to every assignment — goal, constraints, contract (see context-fmt); REQUIRED, session-specific only
19
+ - `tasks`: one subagent per item, all in parallel:
20
+ - `assignment`: complete self-contained instructions; one-liners / missing acceptance criteria PROHIBITED
21
+ - `id`: stable agent id, CamelCase, ≤32 chars; auto when omitted
27
22
  - `description`: UI label only — subagent never sees it
28
- - `role`: specialist identity this subagent embodies (e.g. "Auth-flow security reviewer") — sets its system-prompt persona and roster display name; tailor every spawn rather than cloning a generic worker
23
+ - `role`: specialist identity (e.g. "Auth-flow security reviewer") — sets system-prompt persona + roster name
29
24
  {{#if isolationEnabled}}
30
- - `isolated`: run this spawn in an isolated env; returns patches. Isolated agents are torn down at completion — not addressable afterwards
25
+ - `isolated`: run spawn in isolated env; returns patches. Torn down at completion — not addressable after
31
26
  {{/if}}
32
27
  {{else}}
33
- - `id`: stable agent id, CamelCase, ≤32 chars; generated when omitted
28
+ - `id`: stable agent id, CamelCase, ≤32 chars; auto when omitted
34
29
  - `description`: UI label only — subagent never sees it
35
- - `role`: specialist identity this subagent embodies (e.g. "Auth-flow security reviewer") — sets its system-prompt persona and roster display name; tailor every spawn rather than cloning a generic worker
36
- - `assignment`: complete self-contained instructions; one-liners and missing acceptance criteria are PROHIBITED
30
+ - `role`: specialist identity (e.g. "Auth-flow security reviewer") — sets system-prompt persona + roster name
31
+ - `assignment`: complete self-contained instructions; one-liners / missing acceptance criteria PROHIBITED
37
32
  {{#if isolationEnabled}}
38
- - `isolated`: run in isolated env; returns patches. Isolated agents are torn down at completion — not addressable afterwards
33
+ - `isolated`: run in isolated env; returns patches. Torn down at completion — not addressable after
39
34
  {{/if}}
40
35
  {{/if}}
41
36
  </parameters>
42
37
 
43
38
  <rules>
44
- - **Maximize fan-out.** Issue the widest {{#if batchEnabled}}`tasks[]` batch{{else}}set of parallel `task` calls{{/if}} the work decomposes into. NEVER serialize work that could run concurrently.
45
- - **Subagents do not verify, lint, or format.** Every assignment MUST instruct the subagent to skip all gates, formatters, and project-wide build/test/lint. You run them once at the end across the union of changed files.
39
+ - **Maximize fan-out.** Widest {{#if batchEnabled}}`tasks[]` batch{{else}}set of parallel `task` calls{{/if}} the work decomposes into. NEVER serialize parallelizable work.
40
+ - **Subagents do not verify, lint, or format.** Each assignment MUST tell the subagent: skip all gates, formatters, project-wide build/test/lint. You run them once at the end across changed files.
46
41
  - No globs, no "update all", no package-wide scope. Fan out.
47
- - **Tailor every spawn with a `role`.** A role naming the specialist (e.g. "Parser edge-case tester", "SSE backpressure specialist") makes a sharper agent than a bare generic `task`/`quick_task` worker; decompose into named specialists, never clones of one generic worker. A role-less generic spawn is the exception.
48
- - NEVER slow down or serialize because tasks might overlap on some files. Agents resolve collisions among themselves in real time.
49
- - Subagents have no conversation history. Every fact, file path, and direction they need MUST be explicit in {{#if batchEnabled}}`context` or the item's `assignment`{{else}}the `assignment`{{/if}}.
42
+ - **Tailor every spawn with a `role`.** A named specialist (e.g. "Parser edge-case tester", "SSE backpressure specialist") beats a generic `task`/`quick_task` worker; decompose into specialists, never clones. Role-less spawn is the exception.
43
+ - NEVER serialize over possible file overlap. Agents self-resolve collisions in real time.
44
+ - Subagents have no conversation history. Every fact, file path, direction MUST be explicit in {{#if batchEnabled}}`context` or the item's `assignment`{{else}}the `assignment`{{/if}}.
50
45
  {{#if batchEnabled}}
51
- - **Shared background** lives in `context` once never duplicated across assignments. Pass large payloads via `local://<path>` URIs, not inline.
46
+ - **Shared background** in `context` once, never per assignment. Large payloads via `local://<path>` URIs, not inline.
52
47
  {{else}}
53
- - **Shared background**: write it ONCE to a `local://` file (e.g. `local://ctx.md`) and reference that path in each assignment. Pass large payloads via `local://<path>` URIs, not inline.
48
+ - **Shared background**: write ONCE to a `local://` file (e.g. `local://ctx.md`), reference it in each assignment. Large payloads via `local://<path>` URIs, not inline.
54
49
  {{/if}}
55
- - Prefer agents that investigate **and** edit in one pass; only spin a read-only discovery step when affected files are genuinely unknown.
56
- - **Read-only agents**: Agents tagged READ-ONLY (e.g. `explore`) have no edit/write/command tools. NEVER hand them an assignment that requires changing files or running commands. Use them to investigate and report back; do the edits yourself or delegate to a writing agent (`task`, `oracle`, `designer`).
57
- - **No reasoning offload**: NEVER offload reasoning, analysis, design, or decision-making to `quick_task` or `explore` — they run minimal-effort / small models for mechanical lookups and data collection only. Keep judgment and synthesis in your own context; delegate hard thinking to `task`, `plan`, or `oracle`.
50
+ - Prefer agents that investigate **and** edit in one pass; spin a read-only discovery step only when affected files unknown.
51
+ - **Read-only agents** (e.g. `explore`): no edit/write/command tools. NEVER assign them file changes or commands. Use to investigate + report; delegate edits to a writing agent (`task`/`oracle`/`designer`) or do them yourself.
52
+ - **No reasoning offload**: NEVER route reasoning, analysis, design, or decisions to `quick_task`/`explore` — minimal-effort / small models for mechanical lookups + data collection only. Keep judgment + synthesis in your own context; delegate hard thinking to `task`/`plan`/`oracle`.
58
53
  </rules>
59
54
 
60
55
  <parallelization>
61
56
  {{#if ircEnabled}}
62
- Test: can task B run correctly without seeing A's output? If no, sequence A → B — **unless** B can reasonably ask A for the missing piece over `irc`. Live coordination beats a serial waterfall when the contract is small and easy to describe in a DM.
63
- Still sequence when one task produces a large, evolving contract (generated types, schema migration, core module API) the other consumes wholesale — IRC round-trips do not replace a finished artifact.
64
- Parallel when tasks touch disjoint files, are independent refactors/tests, or only need occasional clarification that can be resolved peer-to-peer.
57
+ Test: can B run without A's output? No sequence A → B — **unless** B can ask A over `irc`. Live coordination beats a waterfall when the contract is small + DM-able.
58
+ Still sequence when a task produces a large evolving contract (generated types, schema migration, core module API) consumed wholesale — IRC round-trips don't replace a finished artifact.
59
+ Parallel when tasks touch disjoint files, are independent refactors/tests, or need only occasional peer clarification.
65
60
  {{else}}
66
- Test: can task B run correctly without seeing A's output? If no, sequence A → B.
61
+ Test: can B run without A's output? No sequence A → B.
67
62
  Sequential when one task produces a contract (types, API, schema, core module) the other consumes.
68
63
  Parallel when tasks touch disjoint files or are independent refactors/tests.
69
64
  {{/if}}
70
- {{#if ircEnabled}}Sequenced follow-ups SHOULD message the agent that produced the prerequisite — it already holds the context.{{/if}}
65
+ {{#if ircEnabled}}Sequenced follow-ups SHOULD message the prerequisite's producer — it holds the context.{{/if}}
71
66
  </parallelization>
72
67
 
73
68
  {{#if batchEnabled}}
@@ -1,43 +1,39 @@
1
- **Tasks are referenced by their verbatim content string, not by any auto-generated ID. There is no "task-1"/"task-N" identifier — the tool never emits one. Pass the task's content text in the `task` field.**
1
+ **Tasks referenced by verbatim content string, NEVER an auto-generated ID no "task-1"/"task-N" exists. Pass the content text in the `task` field.**
2
2
 
3
- Manages a phased task list. Pass `ops`: a flat array of operations.
4
- The next pending task is auto-promoted to `in_progress` after each completion.
5
- Allowed `op` values are only `init`, `start`, `done`, `drop`, `rm`, `append`, and `view`. `pending` is a task status, not an `op`; leave not-yet-started tasks implicit in `init`/`append` lists.
3
+ Manages a phased task list. Pass `ops`: flat array of operations. Next pending task auto-promotes to `in_progress` on each completion. `pending` is a status, not an `op` — leave not-yet-started tasks implicit in `init`/`append`.
6
4
 
7
5
  ## Operations
8
6
 
9
7
  |`op`|Required fields|Effect|
10
8
  |---|---|---|
11
- |`init`|`list: [{phase, items: string[]}]`|Initialize the full list (replaces any existing list)|
9
+ |`init`|`list: [{phase, items: string[]}]`|Initialize full list (replaces existing)|
12
10
  |`init`|`items: string[]`|Flattened single-phase init|
13
11
  |`start`|`task`|Mark in progress|
14
12
  |`done`|`task` or `phase`|Mark completed|
15
13
  |`drop`|`task` or `phase`|Mark abandoned|
16
- |`rm`|`task` or `phase` (optional)|Remove task or phase's tasks; omit both to clear the entire list|
14
+ |`rm`|`task` or `phase` (optional)|Remove task or phase's tasks; omit both to clear the list|
17
15
  |`append`|`phase`, `items: string[]`|Append tasks to `phase`; lazily creates phase|
18
- |`view`|—|Read-only: echo the current list without modifying it|
16
+ |`view`|—|Read-only: echo the list, no modify|
19
17
 
20
18
  ## Anatomy
21
- - **Task content**: 5–10 words, what is being done, not how. Used as the task identifier — unique.
22
- - **Phase name**: short noun phrase (e.g. `Foundation`, `Auth`, `Verification`). Used as the phase identifier — unique. Do not add prefixes like `1.`, `A)`, `Phase 1:`, etc.
19
+ - **Task content**: 5–10 words; what, not how. Unique identifier.
20
+ - **Phase name**: short noun phrase (e.g. `Foundation`, `Auth`, `Verification`). Unique identifier. NEVER prefix `1.`, `A)`, `Phase 1:`.
23
21
 
24
22
  ## Rules
25
23
  - Mark tasks done immediately after finishing.
26
24
  - Complete phases in order.
27
- - On blockers, `append` a new task to the active phase to unblock yourself, or `drop`.
28
- - `task` and `phase` fields reference content/name verbatim; keep them stable once introduced.
29
- - Lost track of exact task text? `view` echoes the full list — NEVER guess content from memory; a mismatched `task` string is an error.
25
+ - Blocked? `append` a task to the active phase to unblock, or `drop`.
26
+ - Keep `task`/`phase` strings stable once introduced.
27
+ - Lost the exact task text? `view` echoes the list — NEVER guess from memory; a mismatched `task` string is an error.
30
28
 
31
29
  ## When to create a list
32
30
  - Task requires 3+ distinct steps
33
31
  - User explicitly requests one
34
- - User provides a set of tasks to complete
32
+ - User provides a set of tasks
35
33
  - New instructions arrive mid-task — capture before proceeding
36
34
 
37
35
  <critical>
38
- When the user hands you a multi-step plan — a phased todo, a numbered or bulleted checklist, or "N bugs/items/tasks" to work through:
39
- - You MUST `init` the list with EVERY item as its own task before doing the work.
40
- - Enumerate all of them;
41
- - NEVER summarize the plan into fewer tasks, sample "the important ones", drop items, or rely on memory to track the rest.
42
- The entire point is to remember every one.
36
+ User hands you a multi-step plan — phased todo, numbered/bulleted checklist, or "N bugs/items/tasks":
37
+ - You MUST `init` the list with EVERY item as its own task before working.
38
+ - Enumerate all; NEVER summarize into fewer tasks, sample "the important ones", drop items, or track the rest from memory.
43
39
  </critical>
@@ -4,7 +4,3 @@ Searches the web for up-to-date information beyond knowledge cutoff.
4
4
  - You SHOULD prefer primary sources (papers, official docs) and corroborate key claims with multiple sources
5
5
  - You MUST include links for cited sources in the final response
6
6
  </instruction>
7
-
8
- <caution>
9
- Searches are performed automatically within a single API call—no pagination or follow-up requests needed.
10
- </caution>
@@ -8,7 +8,7 @@ Creates or overwrites file at specified path.
8
8
  </conditions>
9
9
 
10
10
  <critical>
11
- - You SHOULD use Edit tool for modifying existing files (more precise, preserves formatting)
11
+ - You SHOULD use Edit tool for modifying existing files
12
12
  - You NEVER create documentation files (*.md, README) unless explicitly requested
13
13
  - You NEVER use emojis unless requested
14
14
  </critical>
package/src/sdk.ts CHANGED
@@ -22,18 +22,7 @@ import {
22
22
  } from "@oh-my-pi/pi-ai/providers/openai-codex-responses";
23
23
  import { FALLBACK_DIALECT, preferredDialect } from "@oh-my-pi/pi-catalog/identity";
24
24
  import type { Component } from "@oh-my-pi/pi-tui";
25
- import {
26
- $env,
27
- $flag,
28
- getAgentDbPath,
29
- getAgentDir,
30
- getAuthBrokerSnapshotCachePath,
31
- getProjectDir,
32
- logger,
33
- postmortem,
34
- prompt,
35
- Snowflake,
36
- } from "@oh-my-pi/pi-utils";
25
+ import { $env, $flag, getAgentDir, getProjectDir, logger, postmortem, prompt, Snowflake } from "@oh-my-pi/pi-utils";
37
26
  import { INTENT_FIELD } from "@oh-my-pi/pi-wire";
38
27
  import { ADVISOR_READONLY_TOOL_NAMES, discoverWatchdogFiles } from "./advisor";
39
28
  import { type AsyncJob, AsyncJobManager } from "./async";
@@ -56,11 +45,6 @@ import { loadPromptTemplates as loadPromptTemplatesInternal, type PromptTemplate
56
45
  import { Settings, type SkillsSettings } from "./config/settings";
57
46
  import { CursorExecHandlers } from "./cursor";
58
47
  import "./discovery";
59
- import { AuthBrokerClient } from "@oh-my-pi/pi-ai/auth-broker/client";
60
- import { RemoteAuthCredentialStore } from "@oh-my-pi/pi-ai/auth-broker/remote-store";
61
- import { readAuthBrokerSnapshotCache, writeAuthBrokerSnapshotCache } from "@oh-my-pi/pi-ai/auth-broker/snapshot-cache";
62
- import { DEFAULT_SNAPSHOT_CACHE_TTL_MS, type SnapshotResponse } from "@oh-my-pi/pi-ai/auth-broker/types";
63
- import { resolveConfigValue } from "./config/resolve-config-value";
64
48
  import { initializeWithSettings } from "./discovery";
65
49
  import { disposeAllKernelSessions, disposeKernelSessionsByOwner } from "./eval/py/executor";
66
50
  import { defaultEvalSessionId } from "./eval/session-id";
@@ -119,8 +103,8 @@ import {
119
103
  SecretObfuscator,
120
104
  } from "./secrets";
121
105
  import { AgentSession } from "./session/agent-session";
122
- import { resolveAuthBrokerConfig } from "./session/auth-broker-config";
123
- import { AuthStorage } from "./session/auth-storage";
106
+ import { discoverAuthStorage as discoverAuthStorageFromConfig } from "./session/auth-broker-config";
107
+ import type { AuthStorage } from "./session/auth-storage";
124
108
  import {
125
109
  type CustomMessage,
126
110
  convertToLlm,
@@ -405,8 +389,12 @@ export interface CreateAgentSessionOptions {
405
389
  /** Models available for cycling (Ctrl+P in interactive mode) */
406
390
  scopedModels?: Array<{ model: Model; thinkingLevel?: ThinkingLevel }>;
407
391
 
408
- /** System prompt blocks. Array replaces default, function receives default blocks and returns final blocks. */
392
+ /** Provider-facing system prompt override. Replaces the fully rendered default blocks. */
409
393
  systemPrompt?: string | string[] | ((defaultPrompt: string[]) => string | string[]);
394
+ /** Already-loaded custom prompt text rendered through the bundled custom system prompt template. */
395
+ customSystemPrompt?: string;
396
+ /** Already-loaded text appended through the bundled system prompt templates. */
397
+ appendSystemPrompt?: string;
410
398
  /** Optional provider-facing session identifier for prompt caches and sticky auth selection.
411
399
  * Keeps persisted session files isolated while reusing provider-side caches. */
412
400
  providerSessionId?: string;
@@ -545,6 +533,16 @@ export interface CreateAgentSessionOptions {
545
533
  */
546
534
  telemetry?: AgentTelemetryConfig;
547
535
 
536
+ /**
537
+ * Fired once, when the agent loop hands its first request to the provider
538
+ * transport (i.e. the `streamFn` wrapper is first invoked). Used to measure
539
+ * subagent launch latency — the boundary between "session built" and "model
540
+ * call dispatched". This is the loop's dispatch point, slightly before the
541
+ * actual provider HTTP call (per-request prep, identical across all
542
+ * requests, follows it), which is the right granularity for launch timing.
543
+ */
544
+ onFirstChatDispatch?: () => void;
545
+
548
546
  /** Whether to auto-approve all tool calls (--auto-approve CLI flag). Default: false */
549
547
  autoApprove?: boolean;
550
548
  }
@@ -617,21 +615,6 @@ export {
617
615
 
618
616
  // Helper Functions
619
617
 
620
- function getDefaultAgentDir(): string {
621
- return getAgentDir();
622
- }
623
-
624
- function resolveSnapshotTtlMs(): number {
625
- const raw = process.env.OMP_AUTH_BROKER_SNAPSHOT_TTL_MS;
626
- if (raw === undefined) return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
627
- const value = raw.trim();
628
- if (value === "") return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
629
- const ttlMs = Number(value);
630
- if (Number.isFinite(ttlMs) && ttlMs >= 0) return ttlMs;
631
- logger.warn("Invalid OMP_AUTH_BROKER_SNAPSHOT_TTL_MS; using default", { value: raw });
632
- return DEFAULT_SNAPSHOT_CACHE_TTL_MS;
633
- }
634
-
635
618
  // Discovery Functions
636
619
 
637
620
  /**
@@ -644,70 +627,12 @@ function resolveSnapshotTtlMs(): number {
644
627
  * the client receives access tokens with `refresh = "__remote__"` and calls
645
628
  * back into the broker through the {@link AuthStorageOptions.refreshOAuthCredential}
646
629
  * override to re-mint access tokens when needed.
630
+ *
631
+ * Delegates to {@link ./session/auth-broker-config} so the TUI and the catalog
632
+ * generator share the same credential-discovery logic.
647
633
  */
648
- export async function discoverAuthStorage(agentDir: string = getDefaultAgentDir()): Promise<AuthStorage> {
649
- const brokerConfigPromise = resolveAuthBrokerConfig();
650
- const cachePath = getAuthBrokerSnapshotCachePath();
651
- // Warm the encrypted snapshot cache into the page cache while the broker
652
- // config resolves (it may shell out for a `!command` token). Decryption
653
- // needs the resolved token, so the real cache read cannot start earlier.
654
- void Bun.file(cachePath)
655
- .arrayBuffer()
656
- .catch(() => undefined);
657
- const brokerConfig = await brokerConfigPromise;
658
- if (brokerConfig) {
659
- const client = new AuthBrokerClient({ url: brokerConfig.url, token: brokerConfig.token });
660
- const ttlMs = resolveSnapshotTtlMs();
661
- const persist =
662
- ttlMs > 0
663
- ? (snapshot: SnapshotResponse): void => {
664
- void writeAuthBrokerSnapshotCache({
665
- path: cachePath,
666
- token: brokerConfig.token,
667
- url: brokerConfig.url,
668
- snapshot,
669
- }).catch(error => {
670
- logger.debug("auth-broker snapshot cache write failed", { error: String(error) });
671
- });
672
- }
673
- : undefined;
674
-
675
- let initialSnapshot: SnapshotResponse | undefined;
676
- if (ttlMs > 0) {
677
- initialSnapshot =
678
- (await readAuthBrokerSnapshotCache({
679
- path: cachePath,
680
- token: brokerConfig.token,
681
- url: brokerConfig.url,
682
- ttlMs,
683
- }).catch(error => {
684
- logger.debug("auth-broker snapshot cache read failed", { error: String(error) });
685
- return null;
686
- })) ?? undefined;
687
- }
688
- if (!initialSnapshot) {
689
- const initialResult = await client.fetchSnapshot();
690
- if (initialResult.status !== 200) throw new Error("Auth broker returned no initial snapshot");
691
- initialSnapshot = initialResult.snapshot;
692
- persist?.(initialSnapshot);
693
- }
694
- const store = new RemoteAuthCredentialStore({ client, initialSnapshot, onSnapshot: persist });
695
- // Refresh + usage hooks live on RemoteAuthCredentialStore; AuthStorage
696
- // discovers them automatically when no explicit option overrides them.
697
- const storage = new AuthStorage(store, {
698
- configValueResolver: resolveConfigValue,
699
- sourceLabel: `broker ${brokerConfig.url}`,
700
- });
701
- await storage.reload();
702
- return storage;
703
- }
704
- const dbPath = getAgentDbPath(agentDir);
705
- const storage = await AuthStorage.create(dbPath, {
706
- configValueResolver: resolveConfigValue,
707
- sourceLabel: `local ${dbPath}`,
708
- });
709
- await storage.reload();
710
- return storage;
634
+ export async function discoverAuthStorage(agentDir: string = getAgentDir()): Promise<AuthStorage> {
635
+ return discoverAuthStorageFromConfig(agentDir);
711
636
  }
712
637
 
713
638
  /**
@@ -795,7 +720,7 @@ export async function discoverContextFiles(
795
720
  export async function discoverPromptTemplates(cwd?: string, agentDir?: string): Promise<PromptTemplate[]> {
796
721
  return await loadPromptTemplatesInternal({
797
722
  cwd: cwd ?? getProjectDir(),
798
- agentDir: agentDir ?? getDefaultAgentDir(),
723
+ agentDir: agentDir ?? getAgentDir(),
799
724
  });
800
725
  }
801
726
 
@@ -811,7 +736,7 @@ export async function discoverSlashCommands(cwd?: string): Promise<FileSlashComm
811
736
  */
812
737
  export async function discoverCustomTSCommands(cwd?: string, agentDir?: string): Promise<CustomCommandsLoadResult> {
813
738
  const resolvedCwd = cwd ?? getProjectDir();
814
- const resolvedAgentDir = agentDir ?? getDefaultAgentDir();
739
+ const resolvedAgentDir = agentDir ?? getAgentDir();
815
740
 
816
741
  return loadCustomCommandsInternal({
817
742
  cwd: resolvedCwd,
@@ -837,6 +762,7 @@ export interface BuildSystemPromptOptions {
837
762
  skills?: Skill[];
838
763
  contextFiles?: Array<{ path: string; content: string }>;
839
764
  cwd?: string;
765
+ customPrompt?: string;
840
766
  appendPrompt?: string;
841
767
  inlineToolDescriptors?: boolean;
842
768
  }
@@ -850,6 +776,7 @@ export interface BuildSystemPromptOptions {
850
776
  export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}): Promise<BuildSystemPromptResult> {
851
777
  return await buildSystemPromptInternal({
852
778
  cwd: options.cwd,
779
+ customPrompt: options.customPrompt,
853
780
  skills: options.skills,
854
781
  contextFiles: options.contextFiles,
855
782
  appendSystemPrompt: options.appendPrompt,
@@ -1112,7 +1039,7 @@ function buildMCPPromptCommands(manager: MCPManager): LoadedCustomCommand[] {
1112
1039
  */
1113
1040
  export async function createAgentSession(options: CreateAgentSessionOptions = {}): Promise<CreateAgentSessionResult> {
1114
1041
  const cwd = options.cwd ?? getProjectDir();
1115
- const agentDir = options.agentDir ?? getDefaultAgentDir();
1042
+ const agentDir = options.agentDir ?? getAgentDir();
1116
1043
  const eventBus = options.eventBus ?? new EventBus();
1117
1044
 
1118
1045
  registerSshCleanup();
@@ -1124,6 +1051,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1124
1051
  const modelRegistry =
1125
1052
  options.modelRegistry ??
1126
1053
  new ModelRegistry(options.authStorage ?? (await logger.time("discoverModels", discoverAuthStorage, agentDir)));
1054
+ // Track whether we internally created the authStorage so we can close it
1055
+ // if construction fails before the session takes ownership.
1056
+ const ownsAuthStorage = !options.authStorage && !options.modelRegistry;
1127
1057
  const authStorage = modelRegistry.authStorage;
1128
1058
  if (options.authStorage && options.authStorage !== authStorage) {
1129
1059
  throw new Error(
@@ -2201,16 +2131,22 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2201
2131
  // Owned/in-band tool dialects (non-native) require the catalog as `# Tool:`
2202
2132
  // sections; native tool calling lets the compact name list suffice.
2203
2133
  const nativeTools = resolveDialect(settings.get("tools.format"), agent?.state.model ?? model) === undefined;
2134
+ if (options.appendSystemPrompt) {
2135
+ appendPrompt = appendPrompt
2136
+ ? `${appendPrompt}\n\n${options.appendSystemPrompt}`
2137
+ : options.appendSystemPrompt;
2138
+ }
2204
2139
  const defaultPrompt = await buildSystemPromptInternal({
2205
2140
  cwd,
2141
+ resolvedCustomPrompt: options.customSystemPrompt,
2206
2142
  skills,
2207
2143
  contextFiles,
2208
2144
  tools: promptTools,
2209
2145
  toolNames,
2210
2146
  rules: rulebookRules,
2211
2147
  alwaysApplyRules,
2148
+ resolvedAppendSystemPrompt: appendPrompt,
2212
2149
  skillsSettings: settings.getGroup("skills"),
2213
- appendSystemPrompt: appendPrompt,
2214
2150
  inlineToolDescriptors,
2215
2151
  nativeTools,
2216
2152
  intentField,
@@ -2475,6 +2411,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2475
2411
  ? undefined
2476
2412
  : serviceTierSetting;
2477
2413
 
2414
+ // One-shot launch-latency marker: fired the first time the loop dispatches
2415
+ // a chat request to the provider transport. See onFirstChatDispatch.
2416
+ let notifyFirstChatDispatch = options.onFirstChatDispatch;
2478
2417
  agent = new Agent({
2479
2418
  initialState: {
2480
2419
  systemPrompt,
@@ -2508,6 +2447,17 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2508
2447
  getToolContext: tc => toolContextStore.getContext(tc),
2509
2448
  getApiKey: requestModel => modelRegistry.resolver(requestModel, agent.sessionId),
2510
2449
  streamFn: (streamModel, context, streamOptions) => {
2450
+ if (notifyFirstChatDispatch) {
2451
+ const cb = notifyFirstChatDispatch;
2452
+ notifyFirstChatDispatch = undefined;
2453
+ try {
2454
+ cb();
2455
+ } catch (err) {
2456
+ logger.warn("onFirstChatDispatch hook threw", {
2457
+ error: err instanceof Error ? err.message : String(err),
2458
+ });
2459
+ }
2460
+ }
2511
2461
  const openrouterRoutingPreset = settings.get("providers.openrouterVariant");
2512
2462
  const openrouterVariant =
2513
2463
  openrouterRoutingPreset && openrouterRoutingPreset !== "default" ? openrouterRoutingPreset : undefined;
@@ -2907,6 +2857,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
2907
2857
  await asyncJobManager.dispose({ timeoutMs: 3_000 });
2908
2858
  }
2909
2859
  await disposeKernelSessionsByOwner(evalKernelOwnerId);
2860
+ if (ownsAuthStorage) authStorage.close();
2910
2861
  }
2911
2862
  } catch (cleanupError) {
2912
2863
  logger.warn("Failed to clean up createAgentSession resources after startup error", {
@@ -104,6 +104,7 @@ import {
104
104
  streamSimple,
105
105
  } from "@oh-my-pi/pi-ai";
106
106
  import { stripToolDescriptions } from "@oh-my-pi/pi-ai/utils/schema";
107
+ import { THINKING_LOOP_ERROR_MARKER } from "@oh-my-pi/pi-ai/utils/thinking-loop";
107
108
  import { getSupportedEfforts } from "@oh-my-pi/pi-catalog/model-thinking";
108
109
  import { modelsAreEqual } from "@oh-my-pi/pi-catalog/models";
109
110
  import { MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
@@ -205,6 +206,7 @@ import type { HindsightSessionState } from "../hindsight/state";
205
206
  import { type LocalProtocolOptions, resolveLocalUrlToPath } from "../internal-urls";
206
207
  import { IrcBus, type IrcMessage } from "../irc/bus";
207
208
  import { resolveMemoryBackend } from "../memory-backend";
209
+ import { shutdownMnemopiEmbedClient } from "../mnemopi/embed-client";
208
210
  import { getMnemopiSessionState, type MnemopiSessionState, setMnemopiSessionState } from "../mnemopi/state";
209
211
  import { containsOrchestrate, ORCHESTRATE_NOTICE } from "../modes/orchestrate";
210
212
  import { getCurrentThemeName, theme } from "../modes/theme/theme";
@@ -3860,7 +3862,11 @@ export class AgentSession {
3860
3862
  if (event.type === "agent_start") {
3861
3863
  this.#turnIndex = 0;
3862
3864
  await this.#extensionRunner.emit({ type: "agent_start" });
3863
- } else if (event.type === "agent_end") {
3865
+ return;
3866
+ }
3867
+
3868
+ if (!this.#extensionRunner.hasHandlers(event.type)) return;
3869
+ if (event.type === "agent_end") {
3864
3870
  // `agent_end` extension notification is emitted from the settled
3865
3871
  // agent_end maintenance path so `session_stop` control hooks are not
3866
3872
  // blocked by unrelated notification-only work.
@@ -4204,6 +4210,11 @@ export class AgentSession {
4204
4210
  hindsightState?.dispose();
4205
4211
  const mnemopiState = setMnemopiSessionState(this, undefined);
4206
4212
  await mnemopiState?.dispose();
4213
+ // Tear down the embeddings subprocess AFTER mnemopi state.dispose:
4214
+ // consolidate-on-dispose may still call `embed()` to store the final
4215
+ // memories, and that round-trips through the worker we are about to
4216
+ // hard-kill (issue #3031).
4217
+ await shutdownMnemopiEmbedClient();
4207
4218
  this.#disconnectFromAgent();
4208
4219
  if (this.#unsubscribeAppendOnly) {
4209
4220
  this.#unsubscribeAppendOnly();
@@ -7645,13 +7656,24 @@ export class AgentSession {
7645
7656
  // both take the summarizer path (the latter loudly).
7646
7657
  const wantsSnapcompact =
7647
7658
  compactionPrep.kind !== "fromHook" && effectiveSettings.strategy === "snapcompact" && !customInstructions;
7648
- const snapcompactReady = wantsSnapcompact && this.model.input.includes("image");
7659
+ let snapcompactReady = wantsSnapcompact && this.model.input.includes("image");
7649
7660
  if (wantsSnapcompact && !snapcompactReady) {
7650
7661
  this.emitNotice(
7651
7662
  "warning",
7652
7663
  `snapcompact needs a vision-capable model (${this.model.id} is text-only) — using an LLM summary instead`,
7653
7664
  "compaction",
7654
7665
  );
7666
+ } else if (snapcompactReady) {
7667
+ const text = snapcompact.serializeConversation(convertToLlm(preparation.messagesToSummarize));
7668
+ const renderScan = snapcompact.scanRenderability(text);
7669
+ if (!renderScan.isSafe) {
7670
+ this.emitNotice(
7671
+ "warning",
7672
+ `snapcompact disabled: high non-ASCII rate detected (${(renderScan.unrenderableRatio * 100).toFixed(1)}%). Falling back to an LLM summary to prevent data loss.`,
7673
+ "compaction",
7674
+ );
7675
+ snapcompactReady = false;
7676
+ }
7655
7677
  }
7656
7678
 
7657
7679
  let summary: string;
@@ -9962,6 +9984,7 @@ export class AgentSession {
9962
9984
  if (this.#isProviderErrorFinishReasonBeforeToolUse(message)) return true;
9963
9985
  if (this.#isMalformedFunctionCallError(message)) return true;
9964
9986
  if (this.#hasReplayUnsafeToolOutput(message)) return false;
9987
+ if (message.errorMessage.includes(THINKING_LOOP_ERROR_MARKER)) return true;
9965
9988
  if (this.#isStaleOpenAIResponsesReplayError(message)) return true;
9966
9989
 
9967
9990
  const err = message.errorMessage;
@@ -247,6 +247,20 @@ FROM model_usage_legacy
247
247
  { cause: lastError },
248
248
  );
249
249
  }
250
+ /** @internal Reset all singletons and close their databases — test-only. */
251
+ static resetInstance(): void {
252
+ for (const storage of instances.values()) storage.#close();
253
+ instances.clear();
254
+ }
255
+
256
+ #close(): void {
257
+ this.#listSettingsStmt.finalize();
258
+ this.#upsertModelUsageStmt.finalize();
259
+ this.#listModelUsageStmt.finalize();
260
+ // SqliteAuthCredentialStore.close() finalizes its own statements and
261
+ // closes the shared #db handle — must run after our statements finalize.
262
+ this.#authStore.close();
263
+ }
250
264
 
251
265
  /**
252
266
  * Reads legacy settings persisted in the agent.db `settings` table.