@oh-my-pi/pi-coding-agent 16.0.4 → 16.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +94 -0
- package/dist/cli.js +2027 -1396
- package/dist/types/advisor/advise-tool.d.ts +31 -19
- package/dist/types/autoresearch/tools/init-experiment.d.ts +13 -17
- package/dist/types/autoresearch/tools/log-experiment.d.ts +17 -19
- package/dist/types/autoresearch/tools/run-experiment.d.ts +3 -4
- package/dist/types/autoresearch/tools/update-notes.d.ts +4 -5
- package/dist/types/cli/args.d.ts +1 -0
- package/dist/types/cli/bench-cli.d.ts +6 -0
- package/dist/types/cli/ttsr-cli.d.ts +39 -0
- package/dist/types/commands/launch.d.ts +3 -0
- package/dist/types/commands/ttsr.d.ts +57 -0
- package/dist/types/commit/agentic/tools/analyze-file.d.ts +4 -5
- package/dist/types/commit/agentic/tools/git-file-diff.d.ts +4 -5
- package/dist/types/commit/agentic/tools/git-hunk.d.ts +5 -6
- package/dist/types/commit/agentic/tools/git-overview.d.ts +4 -5
- package/dist/types/commit/agentic/tools/propose-changelog.d.ts +23 -24
- package/dist/types/commit/agentic/tools/propose-commit.d.ts +11 -32
- package/dist/types/commit/agentic/tools/recent-commits.d.ts +3 -4
- package/dist/types/commit/agentic/tools/schemas.d.ts +6 -27
- package/dist/types/commit/agentic/tools/split-commit.d.ts +28 -49
- package/dist/types/commit/changelog/generate.d.ts +12 -13
- package/dist/types/commit/shared-llm.d.ts +10 -37
- package/dist/types/config/config-file.d.ts +4 -4
- package/dist/types/config/keybindings.d.ts +5 -0
- package/dist/types/config/models-config-schema.d.ts +625 -990
- package/dist/types/config/models-config.d.ts +229 -217
- package/dist/types/config/settings-schema.d.ts +144 -25
- package/dist/types/edit/hashline/params.d.ts +7 -11
- package/dist/types/edit/index.d.ts +2 -1
- package/dist/types/edit/modes/apply-patch.d.ts +4 -5
- package/dist/types/edit/modes/patch.d.ts +15 -24
- package/dist/types/edit/modes/replace.d.ts +16 -17
- package/dist/types/eval/js/index.d.ts +1 -0
- package/dist/types/extensibility/custom-commands/types.d.ts +6 -3
- package/dist/types/extensibility/custom-tools/types.d.ts +8 -5
- package/dist/types/extensibility/extensions/runner.d.ts +5 -2
- package/dist/types/extensibility/extensions/types.d.ts +14 -10
- package/dist/types/extensibility/hooks/types.d.ts +7 -4
- package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +13 -5
- package/dist/types/extensibility/legacy-pi-coding-agent-shim.d.ts +17 -0
- package/dist/types/extensibility/shared-events.d.ts +22 -1
- package/dist/types/extensibility/typebox.d.ts +80 -58
- package/dist/types/goals/tools/goal-tool.d.ts +11 -24
- package/dist/types/index.d.ts +2 -0
- package/dist/types/lsp/index.d.ts +11 -26
- package/dist/types/lsp/types.d.ts +12 -28
- package/dist/types/main.d.ts +1 -0
- package/dist/types/mcp/client.d.ts +8 -0
- package/dist/types/modes/components/btw-panel.d.ts +1 -0
- package/dist/types/modes/components/custom-editor.d.ts +3 -1
- package/dist/types/modes/components/status-line/component.d.ts +1 -1
- package/dist/types/modes/components/status-line/context-thresholds.d.ts +0 -1
- package/dist/types/modes/controllers/btw-controller.d.ts +2 -0
- package/dist/types/modes/controllers/input-controller.d.ts +1 -0
- package/dist/types/modes/interactive-mode.d.ts +3 -0
- package/dist/types/modes/rpc/rpc-types.d.ts +1 -1
- package/dist/types/modes/setup-wizard/index.d.ts +1 -0
- package/dist/types/modes/setup-wizard/startup-splash.d.ts +7 -0
- package/dist/types/modes/theme/theme.d.ts +1 -1
- package/dist/types/modes/types.d.ts +3 -0
- package/dist/types/modes/utils/context-usage.d.ts +12 -0
- package/dist/types/sdk.d.ts +8 -1
- package/dist/types/session/agent-session.d.ts +24 -0
- package/dist/types/session/session-persistence.d.ts +4 -0
- package/dist/types/startup-splash.d.ts +12 -0
- package/dist/types/task/types.d.ts +47 -48
- package/dist/types/tools/ask.d.ts +26 -27
- package/dist/types/tools/ast-edit.d.ts +17 -17
- package/dist/types/tools/ast-grep.d.ts +12 -13
- package/dist/types/tools/bash.d.ts +20 -17
- package/dist/types/tools/browser.d.ts +46 -71
- package/dist/types/tools/checkpoint.d.ts +14 -15
- package/dist/types/tools/debug.d.ts +82 -145
- package/dist/types/tools/eval.d.ts +30 -40
- package/dist/types/tools/find.d.ts +17 -18
- package/dist/types/tools/gh.d.ts +49 -78
- package/dist/types/tools/image-gen.d.ts +20 -36
- package/dist/types/tools/inspect-image.d.ts +10 -11
- package/dist/types/tools/irc.d.ts +22 -33
- package/dist/types/tools/job.d.ts +11 -12
- package/dist/types/tools/learn.d.ts +21 -28
- package/dist/types/tools/manage-skill.d.ts +13 -22
- package/dist/types/tools/memory-edit.d.ts +15 -24
- package/dist/types/tools/memory-recall.d.ts +7 -8
- package/dist/types/tools/memory-reflect.d.ts +9 -10
- package/dist/types/tools/memory-retain.d.ts +13 -14
- package/dist/types/tools/read.d.ts +8 -8
- package/dist/types/tools/resolve.d.ts +11 -18
- package/dist/types/tools/review.d.ts +9 -15
- package/dist/types/tools/search-tool-bm25.d.ts +9 -10
- package/dist/types/tools/search.d.ts +16 -17
- package/dist/types/tools/ssh.d.ts +14 -15
- package/dist/types/tools/todo.d.ts +27 -43
- package/dist/types/tools/tts.d.ts +8 -9
- package/dist/types/tools/write.d.ts +9 -10
- package/dist/types/tui/code-cell.d.ts +2 -0
- package/dist/types/tui/index.d.ts +1 -0
- package/dist/types/tui/width-aware-text.d.ts +23 -0
- package/dist/types/utils/image-vision-fallback.d.ts +28 -0
- package/dist/types/utils/markit.d.ts +10 -1
- package/dist/types/web/search/index.d.ts +17 -28
- package/dist/types/web/search/providers/base.d.ts +1 -0
- package/dist/types/web/search/providers/gemini.d.ts +1 -0
- package/dist/types/web/search/providers/perplexity.d.ts +0 -2
- package/dist/types/web/search/types.d.ts +32 -26
- package/package.json +14 -13
- package/scripts/omp +1 -1
- package/src/advisor/__tests__/advisor.test.ts +103 -1
- package/src/advisor/advise-tool.ts +47 -11
- package/src/autoresearch/tools/init-experiment.ts +13 -16
- package/src/autoresearch/tools/log-experiment.ts +15 -18
- package/src/autoresearch/tools/run-experiment.ts +3 -3
- package/src/autoresearch/tools/update-notes.ts +4 -4
- package/src/cli/args.ts +1 -0
- package/src/cli/bench-cli.ts +30 -7
- package/src/cli/flag-tables.ts +8 -0
- package/src/cli/ttsr-cli.ts +995 -0
- package/src/cli-commands.ts +1 -0
- package/src/cli.ts +7 -1
- package/src/collab/host.ts +2 -2
- package/src/commands/launch.ts +3 -0
- package/src/commands/ttsr.ts +125 -0
- package/src/commit/agentic/tools/analyze-file.ts +4 -4
- package/src/commit/agentic/tools/git-file-diff.ts +4 -4
- package/src/commit/agentic/tools/git-hunk.ts +7 -5
- package/src/commit/agentic/tools/git-overview.ts +4 -4
- package/src/commit/agentic/tools/propose-changelog.ts +18 -15
- package/src/commit/agentic/tools/propose-commit.ts +6 -6
- package/src/commit/agentic/tools/recent-commits.ts +3 -3
- package/src/commit/agentic/tools/schemas.ts +8 -20
- package/src/commit/agentic/tools/split-commit.ts +19 -23
- package/src/commit/analysis/summary.ts +7 -5
- package/src/commit/changelog/generate.ts +15 -11
- package/src/commit/shared-llm.ts +17 -24
- package/src/config/config-file.ts +13 -15
- package/src/config/keybindings.ts +6 -0
- package/src/config/models-config-schema.ts +206 -179
- package/src/config/settings-schema.ts +118 -2
- package/src/discovery/builtin-rules/index.ts +2 -0
- package/src/discovery/builtin-rules/ts-import-type.md +2 -2
- package/src/discovery/builtin-rules/ts-no-any.md +11 -2
- package/src/discovery/builtin-rules/ts-no-inline-cast-access.md +55 -0
- package/src/edit/hashline/params.ts +12 -11
- package/src/edit/index.ts +5 -4
- package/src/edit/modes/apply-patch.ts +4 -4
- package/src/edit/modes/patch.ts +15 -18
- package/src/edit/modes/replace.ts +13 -17
- package/src/edit/renderer.ts +0 -1
- package/src/eval/agent-bridge.ts +11 -13
- package/src/eval/completion-bridge.ts +25 -17
- package/src/eval/js/context-manager.ts +17 -2
- package/src/eval/js/index.ts +1 -1
- package/src/eval/py/executor.ts +2 -2
- package/src/eval/py/runner.py +44 -0
- package/src/extensibility/custom-commands/loader.ts +5 -3
- package/src/extensibility/custom-commands/types.ts +6 -3
- package/src/extensibility/custom-tools/loader.ts +4 -2
- package/src/extensibility/custom-tools/types.ts +8 -5
- package/src/extensibility/extensions/loader.ts +4 -2
- package/src/extensibility/extensions/runner.ts +20 -2
- package/src/extensibility/extensions/types.ts +22 -8
- package/src/extensibility/hooks/loader.ts +5 -2
- package/src/extensibility/hooks/types.ts +7 -4
- package/src/extensibility/legacy-pi-ai-shim.ts +42 -5
- package/src/extensibility/legacy-pi-coding-agent-shim.ts +113 -0
- package/src/extensibility/plugins/legacy-pi-compat.ts +13 -13
- package/src/extensibility/shared-events.ts +24 -0
- package/src/extensibility/tool-proxy.ts +4 -1
- package/src/extensibility/typebox.ts +778 -251
- package/src/goals/guided-setup.ts +12 -3
- package/src/goals/tools/goal-tool.ts +6 -6
- package/src/index.ts +2 -0
- package/src/internal-urls/docs-index.generated.ts +15 -13
- package/src/lsp/types.ts +13 -27
- package/src/main.ts +29 -21
- package/src/mcp/client.ts +38 -13
- package/src/mcp/render.ts +102 -89
- package/src/modes/components/agent-hub.ts +11 -4
- package/src/modes/components/branch-summary-message.ts +1 -0
- package/src/modes/components/btw-panel.ts +5 -1
- package/src/modes/components/collab-prompt-message.ts +9 -7
- package/src/modes/components/compaction-summary-message.ts +1 -0
- package/src/modes/components/custom-editor.ts +18 -0
- package/src/modes/components/custom-message.ts +1 -0
- package/src/modes/components/footer.ts +6 -5
- package/src/modes/components/hook-message.ts +1 -0
- package/src/modes/components/read-tool-group.ts +9 -3
- package/src/modes/components/skill-message.ts +1 -0
- package/src/modes/components/status-line/component.ts +139 -15
- package/src/modes/components/status-line/context-thresholds.ts +0 -1
- package/src/modes/components/todo-reminder.ts +1 -0
- package/src/modes/components/tool-execution.ts +17 -10
- package/src/modes/components/ttsr-notification.ts +1 -0
- package/src/modes/components/user-message.ts +6 -6
- package/src/modes/controllers/btw-controller.ts +69 -1
- package/src/modes/controllers/event-controller.ts +2 -7
- package/src/modes/controllers/input-controller.ts +29 -0
- package/src/modes/controllers/selector-controller.ts +10 -3
- package/src/modes/interactive-mode.ts +42 -10
- package/src/modes/rpc/rpc-types.ts +1 -1
- package/src/modes/setup-wizard/index.ts +1 -0
- package/src/modes/setup-wizard/scenes/sign-in.ts +77 -5
- package/src/modes/setup-wizard/startup-splash.ts +107 -0
- package/src/modes/theme/theme.ts +133 -143
- package/src/modes/types.ts +3 -0
- package/src/modes/utils/context-usage.ts +37 -20
- package/src/modes/utils/hotkeys-markdown.ts +1 -0
- package/src/prompts/system/system-prompt.md +1 -0
- package/src/prompts/tools/image-attachment-describe-system.md +8 -0
- package/src/prompts/tools/image-attachment-describe.md +10 -0
- package/src/sdk.ts +35 -22
- package/src/session/agent-session.ts +715 -255
- package/src/session/session-history-format.ts +11 -2
- package/src/session/session-loader.ts +19 -32
- package/src/session/session-persistence.ts +27 -11
- package/src/session/snapcompact-inline.ts +1 -1
- package/src/slash-commands/builtin-registry.ts +4 -11
- package/src/ssh/connection-manager.ts +3 -2
- package/src/startup-splash.ts +19 -0
- package/src/task/executor.ts +12 -7
- package/src/task/types.ts +44 -41
- package/src/tool-discovery/tool-index.ts +17 -4
- package/src/tools/ask.ts +14 -14
- package/src/tools/ast-edit.ts +17 -14
- package/src/tools/ast-grep.ts +10 -9
- package/src/tools/bash.ts +15 -10
- package/src/tools/browser/launch.ts +13 -0
- package/src/tools/browser.ts +26 -32
- package/src/tools/checkpoint.ts +7 -7
- package/src/tools/debug.ts +72 -69
- package/src/tools/eval.ts +18 -19
- package/src/tools/find.ts +20 -13
- package/src/tools/gh.ts +29 -49
- package/src/tools/image-gen.ts +94 -57
- package/src/tools/inspect-image.ts +8 -9
- package/src/tools/irc.ts +12 -12
- package/src/tools/job.ts +6 -6
- package/src/tools/learn.ts +11 -14
- package/src/tools/manage-skill.ts +19 -23
- package/src/tools/memory-edit.ts +8 -8
- package/src/tools/memory-recall.ts +4 -4
- package/src/tools/memory-reflect.ts +5 -5
- package/src/tools/memory-retain.ts +9 -11
- package/src/tools/puppeteer/02_stealth_hairline.txt +1 -1
- package/src/tools/puppeteer/04_stealth_iframe.txt +4 -4
- package/src/tools/puppeteer/05_stealth_webgl.txt +1 -1
- package/src/tools/puppeteer/10_stealth_plugins.txt +6 -4
- package/src/tools/puppeteer/12_stealth_codecs.txt +2 -2
- package/src/tools/puppeteer/13_stealth_worker.txt +1 -1
- package/src/tools/read.ts +197 -19
- package/src/tools/report-tool-issue.ts +6 -6
- package/src/tools/resolve.ts +6 -6
- package/src/tools/review.ts +10 -12
- package/src/tools/search-tool-bm25.ts +5 -5
- package/src/tools/search.ts +20 -29
- package/src/tools/ssh.ts +8 -8
- package/src/tools/todo.ts +16 -19
- package/src/tools/tts.ts +16 -15
- package/src/tools/write.ts +5 -5
- package/src/tui/code-cell.ts +44 -3
- package/src/tui/index.ts +1 -0
- package/src/tui/width-aware-text.ts +58 -0
- package/src/utils/image-vision-fallback.ts +197 -0
- package/src/utils/markit.ts +17 -2
- package/src/web/search/index.ts +21 -9
- package/src/web/search/providers/base.ts +1 -0
- package/src/web/search/providers/gemini.ts +56 -18
- package/src/web/search/providers/perplexity.ts +373 -126
- package/src/web/search/types.ts +28 -48
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from "bun:test";
|
|
2
|
-
import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
|
|
2
|
+
import type { AgentMessage, AgentTelemetryConfig } from "@oh-my-pi/pi-agent-core";
|
|
3
|
+
import { type } from "arktype";
|
|
3
4
|
import { createAdvisorMessageCard } from "../../modes/components/advisor-message";
|
|
4
5
|
import { getThemeByName } from "../../modes/theme/theme";
|
|
5
6
|
import { formatSessionHistoryMarkdown } from "../../session/session-history-format";
|
|
@@ -11,7 +12,9 @@ import {
|
|
|
11
12
|
type AdvisorNote,
|
|
12
13
|
AdvisorRuntime,
|
|
13
14
|
type AdvisorRuntimeHost,
|
|
15
|
+
deriveAdvisorTelemetry,
|
|
14
16
|
formatAdvisorBatchContent,
|
|
17
|
+
isAdvisorInterruptImmuneTurnActive,
|
|
15
18
|
isInterruptingSeverity,
|
|
16
19
|
resolveAdvisorDeliveryChannel,
|
|
17
20
|
} from "..";
|
|
@@ -114,6 +117,16 @@ describe("advisor", () => {
|
|
|
114
117
|
expect(result.details).toEqual({ note: "x", severity: "concern" });
|
|
115
118
|
expect(result.useless).toBe(true);
|
|
116
119
|
});
|
|
120
|
+
|
|
121
|
+
it("validates parameters using ArkType", () => {
|
|
122
|
+
const onAdvice = vi.fn();
|
|
123
|
+
const tool = new AdviseTool(onAdvice);
|
|
124
|
+
const valid = tool.parameters({ note: "x", severity: "concern" });
|
|
125
|
+
expect(valid instanceof type.errors).toBe(false);
|
|
126
|
+
|
|
127
|
+
const invalid = tool.parameters({ note: 123, severity: "invalid" as any });
|
|
128
|
+
expect(invalid instanceof type.errors).toBe(true);
|
|
129
|
+
});
|
|
117
130
|
});
|
|
118
131
|
|
|
119
132
|
describe("advice delivery policy", () => {
|
|
@@ -124,6 +137,44 @@ describe("advisor", () => {
|
|
|
124
137
|
expect(isInterruptingSeverity(undefined)).toBe(false);
|
|
125
138
|
});
|
|
126
139
|
|
|
140
|
+
it("keeps the interrupt-immune turn fence half-open for the configured window", () => {
|
|
141
|
+
expect(
|
|
142
|
+
isAdvisorInterruptImmuneTurnActive({
|
|
143
|
+
completedTurns: 4,
|
|
144
|
+
immuneTurnStart: undefined,
|
|
145
|
+
immuneTurns: 2,
|
|
146
|
+
}),
|
|
147
|
+
).toBe(false);
|
|
148
|
+
expect(
|
|
149
|
+
isAdvisorInterruptImmuneTurnActive({
|
|
150
|
+
completedTurns: 4,
|
|
151
|
+
immuneTurnStart: 5,
|
|
152
|
+
immuneTurns: 0,
|
|
153
|
+
}),
|
|
154
|
+
).toBe(false);
|
|
155
|
+
expect(
|
|
156
|
+
isAdvisorInterruptImmuneTurnActive({
|
|
157
|
+
completedTurns: 4,
|
|
158
|
+
immuneTurnStart: 5,
|
|
159
|
+
immuneTurns: 2,
|
|
160
|
+
}),
|
|
161
|
+
).toBe(true);
|
|
162
|
+
expect(
|
|
163
|
+
isAdvisorInterruptImmuneTurnActive({
|
|
164
|
+
completedTurns: 6,
|
|
165
|
+
immuneTurnStart: 5,
|
|
166
|
+
immuneTurns: 2,
|
|
167
|
+
}),
|
|
168
|
+
).toBe(true);
|
|
169
|
+
expect(
|
|
170
|
+
isAdvisorInterruptImmuneTurnActive({
|
|
171
|
+
completedTurns: 7,
|
|
172
|
+
immuneTurnStart: 5,
|
|
173
|
+
immuneTurns: 2,
|
|
174
|
+
}),
|
|
175
|
+
).toBe(false);
|
|
176
|
+
});
|
|
177
|
+
|
|
127
178
|
it("wraps each note in an advisory tag with severity as an attribute and escapes the body", () => {
|
|
128
179
|
const content = formatAdvisorBatchContent([
|
|
129
180
|
{ note: "first note" },
|
|
@@ -142,6 +193,37 @@ describe("advisor", () => {
|
|
|
142
193
|
});
|
|
143
194
|
});
|
|
144
195
|
|
|
196
|
+
describe("deriveAdvisorTelemetry", () => {
|
|
197
|
+
it("returns undefined when the primary has no telemetry so the advisor stays a no-op", () => {
|
|
198
|
+
expect(deriveAdvisorTelemetry(undefined, { id: "s-advisor", name: "Advisor" })).toBeUndefined();
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it("inherits the primary's usage/cost hooks but restamps identity and clears the conversation", () => {
|
|
202
|
+
const onChatUsage = vi.fn();
|
|
203
|
+
const costEstimator = vi.fn();
|
|
204
|
+
const primary: AgentTelemetryConfig = {
|
|
205
|
+
agent: { id: "main", name: "Main" },
|
|
206
|
+
conversationId: "session-1",
|
|
207
|
+
attributes: { "deployment.id": "prod" },
|
|
208
|
+
onChatUsage,
|
|
209
|
+
costEstimator,
|
|
210
|
+
};
|
|
211
|
+
const identity = { id: "session-1-advisor", name: "Advisor", description: "anthropic/claude-sonnet-4-5" };
|
|
212
|
+
|
|
213
|
+
const derived = deriveAdvisorTelemetry(primary, identity);
|
|
214
|
+
|
|
215
|
+
// Usage/cost hooks are inherited so the advisor model's calls report through
|
|
216
|
+
// the same pipeline as the primary — the whole point of the fix.
|
|
217
|
+
expect(derived?.onChatUsage).toBe(onChatUsage);
|
|
218
|
+
expect(derived?.costEstimator).toBe(costEstimator);
|
|
219
|
+
expect(derived?.attributes).toEqual({ "deployment.id": "prod" });
|
|
220
|
+
// Advisor identity replaces the primary's so spans are attributable to the advisor.
|
|
221
|
+
expect(derived?.agent).toEqual(identity);
|
|
222
|
+
// Conversation cleared so the advisor loop falls back to its own `-advisor` session id.
|
|
223
|
+
expect(derived?.conversationId).toBeUndefined();
|
|
224
|
+
});
|
|
225
|
+
});
|
|
226
|
+
|
|
145
227
|
describe("AdvisorRuntime", () => {
|
|
146
228
|
function makeAgent(promptInputs: string[]): AdvisorAgent {
|
|
147
229
|
return {
|
|
@@ -688,6 +770,26 @@ describe("advisor", () => {
|
|
|
688
770
|
}
|
|
689
771
|
});
|
|
690
772
|
|
|
773
|
+
it("routes interrupting notes to the aside queue during immune turns without overriding preservation", () => {
|
|
774
|
+
expect(
|
|
775
|
+
resolveAdvisorDeliveryChannel({
|
|
776
|
+
severity: "concern",
|
|
777
|
+
autoResumeSuppressed: false,
|
|
778
|
+
streaming: true,
|
|
779
|
+
aborting: false,
|
|
780
|
+
interruptImmuneTurnActive: true,
|
|
781
|
+
}),
|
|
782
|
+
).toBe("aside");
|
|
783
|
+
expect(
|
|
784
|
+
resolveAdvisorDeliveryChannel({
|
|
785
|
+
severity: "blocker",
|
|
786
|
+
autoResumeSuppressed: true,
|
|
787
|
+
streaming: false,
|
|
788
|
+
aborting: false,
|
|
789
|
+
interruptImmuneTurnActive: true,
|
|
790
|
+
}),
|
|
791
|
+
).toBe("preserve");
|
|
792
|
+
});
|
|
691
793
|
it("preserves an interrupting note while suppressed AND idle (no auto-resume of a stopped run)", () => {
|
|
692
794
|
for (const severity of ["concern", "blocker"] as const) {
|
|
693
795
|
expect(
|
|
@@ -1,19 +1,23 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {
|
|
2
|
+
AgentIdentity,
|
|
3
|
+
AgentTelemetryConfig,
|
|
4
|
+
AgentTool,
|
|
5
|
+
AgentToolContext,
|
|
6
|
+
AgentToolResult,
|
|
7
|
+
AgentToolUpdateCallback,
|
|
8
|
+
} from "@oh-my-pi/pi-agent-core";
|
|
2
9
|
import { escapeXmlText } from "@oh-my-pi/pi-utils";
|
|
3
|
-
import {
|
|
10
|
+
import { type } from "arktype";
|
|
4
11
|
import adviseDescription from "../prompts/advisor/advise-tool.md" with { type: "text" };
|
|
5
12
|
|
|
6
|
-
const adviseSchema =
|
|
7
|
-
note:
|
|
8
|
-
.
|
|
9
|
-
|
|
10
|
-
severity:
|
|
11
|
-
.enum(["nit", "concern", "blocker"])
|
|
12
|
-
.optional()
|
|
13
|
-
.describe("How strongly to weigh this. Omit for a plain nit."),
|
|
13
|
+
const adviseSchema = type({
|
|
14
|
+
note: type("string").describe(
|
|
15
|
+
"One concrete piece of advice for the agent you are watching. Terse, specific, actionable.",
|
|
16
|
+
),
|
|
17
|
+
"severity?": type("'nit' | 'concern' | 'blocker'").describe("How strongly to weigh this. Omit for a plain nit."),
|
|
14
18
|
});
|
|
15
19
|
|
|
16
|
-
export type AdviseParams =
|
|
20
|
+
export type AdviseParams = typeof adviseSchema.infer;
|
|
17
21
|
|
|
18
22
|
export type AdvisorSeverity = "nit" | "concern" | "blocker";
|
|
19
23
|
|
|
@@ -68,6 +72,15 @@ export function isInterruptingSeverity(severity: AdvisorSeverity | undefined): b
|
|
|
68
72
|
|
|
69
73
|
/** How an advisor note is routed to the primary. */
|
|
70
74
|
export type AdvisorDeliveryChannel = "aside" | "steer" | "preserve";
|
|
75
|
+
/** Half-open turn-count fence for the post-interrupt cooldown. */
|
|
76
|
+
export function isAdvisorInterruptImmuneTurnActive(opts: {
|
|
77
|
+
completedTurns: number;
|
|
78
|
+
immuneTurnStart: number | undefined;
|
|
79
|
+
immuneTurns: number;
|
|
80
|
+
}): boolean {
|
|
81
|
+
if (opts.immuneTurnStart === undefined || opts.immuneTurns <= 0) return false;
|
|
82
|
+
return opts.completedTurns < opts.immuneTurnStart + opts.immuneTurns;
|
|
83
|
+
}
|
|
71
84
|
|
|
72
85
|
/**
|
|
73
86
|
* Decide how one advisor note reaches the primary agent.
|
|
@@ -84,18 +97,41 @@ export type AdvisorDeliveryChannel = "aside" | "steer" | "preserve";
|
|
|
84
97
|
* auto-resume anything, so it is delivered live. Parking it during an active
|
|
85
98
|
* run instead strands it (it never reaches the running agent) and the withheld
|
|
86
99
|
* notes dump as one burst at the next user prompt — the bug this guards.
|
|
100
|
+
* - During the post-interrupt immune-turn window, further `concern`/`blocker`
|
|
101
|
+
* notes are downgraded to asides; suppression preservation still wins.
|
|
87
102
|
*/
|
|
88
103
|
export function resolveAdvisorDeliveryChannel(opts: {
|
|
89
104
|
severity: AdvisorSeverity | undefined;
|
|
90
105
|
autoResumeSuppressed: boolean;
|
|
91
106
|
streaming: boolean;
|
|
92
107
|
aborting: boolean;
|
|
108
|
+
interruptImmuneTurnActive?: boolean;
|
|
93
109
|
}): AdvisorDeliveryChannel {
|
|
94
110
|
if (!isInterruptingSeverity(opts.severity)) return "aside";
|
|
95
111
|
if (opts.autoResumeSuppressed && (opts.aborting || !opts.streaming)) return "preserve";
|
|
112
|
+
if (opts.interruptImmuneTurnActive) return "aside";
|
|
96
113
|
return "steer";
|
|
97
114
|
}
|
|
98
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Derive the advisor loop's telemetry from the primary session's config so the
|
|
118
|
+
* advisor model's GenAI spans and usage/cost hooks (onChatUsage, onCostDelta,
|
|
119
|
+
* costEstimator) fire under the same pipeline as every other model call —
|
|
120
|
+
* stamped with the advisor's own agent identity. `conversationId` is cleared so
|
|
121
|
+
* the advisor loop falls back to its own `-advisor` session id for
|
|
122
|
+
* `gen_ai.conversation.id` instead of inheriting the primary's conversation.
|
|
123
|
+
*
|
|
124
|
+
* Returns undefined when the primary has no telemetry (instrumentation off), so
|
|
125
|
+
* the advisor `Agent` stays a zero-overhead no-op as well.
|
|
126
|
+
*/
|
|
127
|
+
export function deriveAdvisorTelemetry(
|
|
128
|
+
primaryTelemetry: AgentTelemetryConfig | undefined,
|
|
129
|
+
identity: AgentIdentity,
|
|
130
|
+
): AgentTelemetryConfig | undefined {
|
|
131
|
+
if (!primaryTelemetry) return undefined;
|
|
132
|
+
return { ...primaryTelemetry, agent: identity, conversationId: undefined };
|
|
133
|
+
}
|
|
134
|
+
|
|
99
135
|
/**
|
|
100
136
|
* Side-effect-free investigation tools handed to the advisor agent so it can
|
|
101
137
|
* inspect the workspace before weighing in. Names match the primary session's
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as path from "node:path";
|
|
2
2
|
|
|
3
3
|
import { Text } from "@oh-my-pi/pi-tui";
|
|
4
|
-
import {
|
|
4
|
+
import { type } from "arktype";
|
|
5
5
|
import type { ToolDefinition } from "../../extensibility/extensions";
|
|
6
6
|
import type { Theme } from "../../modes/theme/theme";
|
|
7
7
|
import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
|
|
@@ -16,21 +16,18 @@ export const HARNESS_FILENAME = "autoresearch.sh";
|
|
|
16
16
|
export const DEFAULT_HARNESS_COMMAND = `bash ${HARNESS_FILENAME}`;
|
|
17
17
|
const HARNESS_COMMIT_TITLE = "autoresearch: harness setup";
|
|
18
18
|
|
|
19
|
-
const initExperimentSchema =
|
|
20
|
-
name:
|
|
21
|
-
goal:
|
|
22
|
-
primary_metric:
|
|
23
|
-
metric_unit:
|
|
24
|
-
direction:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
constraints: z.array(z.string()).describe("free-form constraints").optional(),
|
|
32
|
-
max_iterations: z.number().describe("soft iteration cap per segment").optional(),
|
|
33
|
-
new_segment: z.boolean().describe("bump to a new segment in existing session").optional(),
|
|
19
|
+
const initExperimentSchema = type({
|
|
20
|
+
name: type("string").describe("experiment name"),
|
|
21
|
+
"goal?": type("string").describe("session goal"),
|
|
22
|
+
primary_metric: type("string").describe("primary metric name"),
|
|
23
|
+
"metric_unit?": type("string").describe("metric unit (e.g. ms, µs, mb)"),
|
|
24
|
+
"direction?": type("'lower' | 'higher'").describe("better direction (default lower)"),
|
|
25
|
+
"secondary_metrics?": type("string[]").describe("secondary metric names"),
|
|
26
|
+
"scope_paths?": type("string[]").describe("expected-to-modify paths"),
|
|
27
|
+
"off_limits?": type("string[]").describe("off-limits paths"),
|
|
28
|
+
"constraints?": type("string[]").describe("free-form constraints"),
|
|
29
|
+
"max_iterations?": type("number").describe("soft iteration cap per segment"),
|
|
30
|
+
"new_segment?": type("boolean").describe("bump to a new segment in existing session"),
|
|
34
31
|
});
|
|
35
32
|
|
|
36
33
|
interface InitExperimentDetails {
|
|
@@ -2,7 +2,7 @@ import * as fs from "node:fs";
|
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
|
|
4
4
|
import { Text } from "@oh-my-pi/pi-tui";
|
|
5
|
-
import {
|
|
5
|
+
import { type } from "arktype";
|
|
6
6
|
import type { ToolDefinition } from "../../extensibility/extensions";
|
|
7
7
|
import type { Theme } from "../../modes/theme/theme";
|
|
8
8
|
import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
|
|
@@ -36,23 +36,20 @@ import type {
|
|
|
36
36
|
|
|
37
37
|
const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
|
|
38
38
|
|
|
39
|
-
const logExperimentSchema =
|
|
40
|
-
metric:
|
|
41
|
-
status:
|
|
42
|
-
description:
|
|
43
|
-
metrics:
|
|
44
|
-
asi:
|
|
45
|
-
commit:
|
|
46
|
-
justification:
|
|
47
|
-
flag_runs:
|
|
48
|
-
.
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
)
|
|
54
|
-
.describe("flag earlier runs as suspect")
|
|
55
|
-
.optional(),
|
|
39
|
+
const logExperimentSchema = type({
|
|
40
|
+
metric: type("number").describe("primary metric value"),
|
|
41
|
+
status: type("'keep'|'discard'|'crash'|'checks_failed'").describe("run outcome"),
|
|
42
|
+
description: type("string").describe("short run description"),
|
|
43
|
+
"metrics?": type({ "[string]": "number" }).describe("secondary metrics"),
|
|
44
|
+
"asi?": type({ "[string]": "unknown" }).describe("free-form structured metadata"),
|
|
45
|
+
"commit?": type("string").describe("override recorded commit hash"),
|
|
46
|
+
"justification?": type("string").describe("required when keeping a scope-deviating run"),
|
|
47
|
+
"flag_runs?": type({
|
|
48
|
+
run_id: type("number.integer").describe("run id to flag"),
|
|
49
|
+
reason: type("string").describe("why this run is suspect"),
|
|
50
|
+
})
|
|
51
|
+
.array()
|
|
52
|
+
.describe("flag earlier runs as suspect"),
|
|
56
53
|
});
|
|
57
54
|
|
|
58
55
|
export function createLogExperimentTool(
|
|
@@ -2,7 +2,7 @@ import * as fs from "node:fs";
|
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import { Text } from "@oh-my-pi/pi-tui";
|
|
4
4
|
import { formatBytes } from "@oh-my-pi/pi-utils";
|
|
5
|
-
import {
|
|
5
|
+
import { type } from "arktype";
|
|
6
6
|
import { executeBash } from "../../exec/bash-executor";
|
|
7
7
|
import type { ToolDefinition } from "../../extensibility/extensions";
|
|
8
8
|
import type { Theme } from "../../modes/theme/theme";
|
|
@@ -25,8 +25,8 @@ import { openAutoresearchStorageIfExists } from "../storage";
|
|
|
25
25
|
import type { AutoresearchToolFactoryOptions, RunDetails, RunExperimentProgressDetails } from "../types";
|
|
26
26
|
import { DEFAULT_HARNESS_COMMAND } from "./init-experiment";
|
|
27
27
|
|
|
28
|
-
const runExperimentSchema =
|
|
29
|
-
timeout_seconds:
|
|
28
|
+
const runExperimentSchema = type({
|
|
29
|
+
"timeout_seconds?": type("number").describe("timeout in seconds (default 600)"),
|
|
30
30
|
});
|
|
31
31
|
|
|
32
32
|
interface ProcessExecutionResult {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Text } from "@oh-my-pi/pi-tui";
|
|
2
|
-
import {
|
|
2
|
+
import { type } from "arktype";
|
|
3
3
|
import type { ToolDefinition } from "../../extensibility/extensions";
|
|
4
4
|
import type { Theme } from "../../modes/theme/theme";
|
|
5
5
|
import { replaceTabs, truncateToWidth } from "../../tools/render-utils";
|
|
@@ -8,9 +8,9 @@ import { buildExperimentState } from "../state";
|
|
|
8
8
|
import { openAutoresearchStorageIfExists } from "../storage";
|
|
9
9
|
import type { AutoresearchToolFactoryOptions } from "../types";
|
|
10
10
|
|
|
11
|
-
const updateNotesSchema =
|
|
12
|
-
body:
|
|
13
|
-
append_idea:
|
|
11
|
+
const updateNotesSchema = type({
|
|
12
|
+
body: type("string").describe("replacement notes body"),
|
|
13
|
+
"append_idea?": type("string").describe("append as bullet under Ideas instead of replacing body"),
|
|
14
14
|
});
|
|
15
15
|
|
|
16
16
|
interface UpdateNotesDetails {
|
package/src/cli/args.ts
CHANGED
package/src/cli/bench-cli.ts
CHANGED
|
@@ -17,7 +17,12 @@ import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
|
|
|
17
17
|
import chalk from "chalk";
|
|
18
18
|
import type { ApiKeyResolverModel } from "../config/api-key-resolver";
|
|
19
19
|
import { type CanonicalModelQueryOptions, ModelRegistry } from "../config/model-registry";
|
|
20
|
-
import {
|
|
20
|
+
import {
|
|
21
|
+
formatModelSelectorValue,
|
|
22
|
+
formatModelString,
|
|
23
|
+
getModelMatchPreferences,
|
|
24
|
+
resolveCliModel,
|
|
25
|
+
} from "../config/model-resolver";
|
|
21
26
|
import { Settings } from "../config/settings";
|
|
22
27
|
import benchPrompt from "../prompts/bench.md" with { type: "text" };
|
|
23
28
|
import { discoverAuthStorage } from "../sdk";
|
|
@@ -144,9 +149,15 @@ function isFirstTokenEvent(event: AssistantMessageEvent): boolean {
|
|
|
144
149
|
* latency does not dilute throughput. Falls back to total duration when the
|
|
145
150
|
* response arrived as a single chunk (TTFT ~ duration).
|
|
146
151
|
*/
|
|
147
|
-
function computeTokensPerSecond(
|
|
152
|
+
export function computeTokensPerSecond(
|
|
153
|
+
outputTokens: number,
|
|
154
|
+
durationMs: number,
|
|
155
|
+
ttftMs: number,
|
|
156
|
+
deltaChunkCount: number,
|
|
157
|
+
): number {
|
|
148
158
|
const decodeMs = durationMs - ttftMs;
|
|
149
|
-
|
|
159
|
+
// Fall back to total duration when the response arrived as a single chunk/non-streaming.
|
|
160
|
+
const windowMs = decodeMs > 0 && deltaChunkCount >= 2 ? decodeMs : durationMs;
|
|
150
161
|
return windowMs > 0 ? (outputTokens * 1000) / windowMs : 0;
|
|
151
162
|
}
|
|
152
163
|
|
|
@@ -193,10 +204,17 @@ async function runBenchRequest(
|
|
|
193
204
|
headers: model.provider === "openrouter" ? { "X-OpenRouter-Cache": "false" } : undefined,
|
|
194
205
|
});
|
|
195
206
|
let message: AssistantMessage | undefined;
|
|
207
|
+
let deltaChunkCount = 0;
|
|
196
208
|
for await (const event of stream) {
|
|
197
209
|
if (firstTokenAt === undefined && isFirstTokenEvent(event)) {
|
|
198
210
|
firstTokenAt = now();
|
|
199
211
|
}
|
|
212
|
+
if (
|
|
213
|
+
(event.type === "text_delta" || event.type === "thinking_delta" || event.type === "toolcall_delta") &&
|
|
214
|
+
event.delta.length > 0
|
|
215
|
+
) {
|
|
216
|
+
deltaChunkCount++;
|
|
217
|
+
}
|
|
200
218
|
if (event.type === "error") {
|
|
201
219
|
return { ok: false, error: event.error.errorMessage ?? "request failed" };
|
|
202
220
|
}
|
|
@@ -218,7 +236,7 @@ async function runBenchRequest(
|
|
|
218
236
|
ttftMs,
|
|
219
237
|
durationMs,
|
|
220
238
|
outputTokens,
|
|
221
|
-
tokensPerSecond: computeTokensPerSecond(outputTokens, durationMs, ttftMs),
|
|
239
|
+
tokensPerSecond: computeTokensPerSecond(outputTokens, durationMs, ttftMs, deltaChunkCount),
|
|
222
240
|
};
|
|
223
241
|
} catch (error) {
|
|
224
242
|
return { ok: false, error: getErrorMessage(error) };
|
|
@@ -244,6 +262,10 @@ function buildModelReport(
|
|
|
244
262
|
return { selector, model: formatModelString(model), thinking, results, average };
|
|
245
263
|
}
|
|
246
264
|
|
|
265
|
+
function formatBenchModelLabel(report: BenchModelReport): string {
|
|
266
|
+
return formatModelSelectorValue(report.model, report.thinking);
|
|
267
|
+
}
|
|
268
|
+
|
|
247
269
|
function formatMs(ms: number): string {
|
|
248
270
|
return formatDuration(Math.max(0, Math.round(ms)));
|
|
249
271
|
}
|
|
@@ -264,7 +286,7 @@ export function formatBenchTable(summary: BenchSummary): string {
|
|
|
264
286
|
return b.average.tokensPerSecond - a.average.tokensPerSecond;
|
|
265
287
|
});
|
|
266
288
|
const rows = ranked.map(report => ({
|
|
267
|
-
model: report
|
|
289
|
+
model: formatBenchModelLabel(report),
|
|
268
290
|
ttft: report.average ? formatMs(report.average.ttftMs) : "-",
|
|
269
291
|
tps: report.average ? `${report.average.tokensPerSecond.toFixed(1)}/s` : "-",
|
|
270
292
|
tokens: report.average ? String(Math.round(report.average.outputTokens)) : "-",
|
|
@@ -382,8 +404,9 @@ export async function runBenchCommand(command: BenchCommandArgs, deps: BenchDepe
|
|
|
382
404
|
const reports: BenchModelReport[] = [];
|
|
383
405
|
for (const { selector, model, thinking } of targets) {
|
|
384
406
|
if (!json) {
|
|
385
|
-
const
|
|
386
|
-
|
|
407
|
+
const resolvedModel = formatModelSelectorValue(formatModelString(model), thinking);
|
|
408
|
+
const resolvedNote = selector === resolvedModel ? "" : chalk.dim(` (${selector})`);
|
|
409
|
+
writeStdout(`${chalk.bold(resolvedModel)}${resolvedNote}\n`);
|
|
387
410
|
}
|
|
388
411
|
const results: BenchRunResult[] = [];
|
|
389
412
|
for (let index = 0; index < runs; index++) {
|
package/src/cli/flag-tables.ts
CHANGED
|
@@ -120,6 +120,14 @@ export const STRING_SETTERS: Record<string, StringSetter> = {
|
|
|
120
120
|
"--plan": (result, value) => {
|
|
121
121
|
result.plan = value;
|
|
122
122
|
},
|
|
123
|
+
"--max-time": (result, value, deps) => {
|
|
124
|
+
const seconds = Number(value);
|
|
125
|
+
if (Number.isFinite(seconds) && seconds > 0) {
|
|
126
|
+
result.maxTime = seconds;
|
|
127
|
+
} else {
|
|
128
|
+
deps.logger.warn("Invalid seconds passed to --max-time", { value });
|
|
129
|
+
}
|
|
130
|
+
},
|
|
123
131
|
"--api-key": (result, value) => {
|
|
124
132
|
result.apiKey = value;
|
|
125
133
|
},
|