@oh-my-pi/pi-coding-agent 15.1.2 → 15.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/dist/types/cli/auth-broker-cli.d.ts +25 -0
- package/dist/types/cli/auth-gateway-cli.d.ts +18 -0
- package/dist/types/cli/grievances-cli.d.ts +12 -0
- package/dist/types/commands/auth-broker.d.ts +54 -0
- package/dist/types/commands/auth-gateway.d.ts +32 -0
- package/dist/types/commands/grievances.d.ts +1 -1
- package/dist/types/commit/agentic/tools/propose-commit.d.ts +9 -1
- package/dist/types/commit/agentic/tools/schemas.d.ts +9 -1
- package/dist/types/commit/agentic/tools/split-commit.d.ts +9 -1
- package/dist/types/config/model-registry.d.ts +3 -0
- package/dist/types/config/models-config-schema.d.ts +1 -0
- package/dist/types/config/settings-schema.d.ts +46 -0
- package/dist/types/discovery/agents.d.ts +12 -1
- package/dist/types/edit/renderer.d.ts +3 -0
- package/dist/types/eval/index.d.ts +0 -2
- package/dist/types/goals/tools/goal-tool.d.ts +10 -2
- package/dist/types/index.d.ts +0 -1
- package/dist/types/internal-urls/index.d.ts +1 -1
- package/dist/types/internal-urls/{pi-protocol.d.ts → omp-protocol.d.ts} +3 -3
- package/dist/types/internal-urls/types.d.ts +1 -1
- package/dist/types/modes/acp/acp-agent.d.ts +1 -0
- package/dist/types/modes/emoji-autocomplete.d.ts +16 -0
- package/dist/types/modes/interactive-mode.d.ts +1 -1
- package/dist/types/modes/prompt-action-autocomplete.d.ts +4 -0
- package/dist/types/plan-mode/approved-plan.d.ts +4 -0
- package/dist/types/sdk.d.ts +10 -3
- package/dist/types/session/agent-session.d.ts +1 -1
- package/dist/types/session/auth-broker-config.d.ts +13 -0
- package/dist/types/session/auth-storage.d.ts +1 -1
- package/dist/types/tools/eval.d.ts +41 -7
- package/dist/types/tools/irc.d.ts +8 -2
- package/dist/types/tools/report-tool-issue.d.ts +118 -1
- package/dist/types/tools/resolve.d.ts +8 -2
- package/examples/custom-tools/README.md +3 -12
- package/examples/extensions/README.md +2 -15
- package/examples/extensions/api-demo.ts +1 -7
- package/package.json +7 -7
- package/src/autoresearch/tools/init-experiment.ts +11 -33
- package/src/autoresearch/tools/log-experiment.ts +10 -24
- package/src/autoresearch/tools/run-experiment.ts +1 -1
- package/src/autoresearch/tools/update-notes.ts +2 -9
- package/src/cli/auth-broker-cli.ts +746 -0
- package/src/cli/auth-gateway-cli.ts +342 -0
- package/src/cli/grievances-cli.ts +109 -16
- package/src/cli.ts +4 -2
- package/src/commands/auth-broker.ts +96 -0
- package/src/commands/auth-gateway.ts +61 -0
- package/src/commands/grievances.ts +13 -8
- package/src/commands/launch.ts +1 -1
- package/src/commit/agentic/agent.ts +2 -0
- package/src/commit/agentic/tools/analyze-file.ts +2 -2
- package/src/commit/agentic/tools/git-file-diff.ts +2 -2
- package/src/commit/agentic/tools/git-hunk.ts +3 -3
- package/src/commit/agentic/tools/git-overview.ts +2 -2
- package/src/commit/agentic/tools/propose-changelog.ts +1 -3
- package/src/commit/agentic/tools/recent-commits.ts +1 -1
- package/src/commit/agentic/tools/schemas.ts +1 -9
- package/src/config/model-equivalence.ts +279 -174
- package/src/config/model-registry.ts +37 -6
- package/src/config/model-resolver.ts +13 -8
- package/src/config/models-config-schema.ts +8 -0
- package/src/config/settings-schema.ts +52 -0
- package/src/cursor.ts +1 -1
- package/src/debug/log-formatting.ts +1 -1
- package/src/debug/log-viewer.ts +1 -1
- package/src/debug/profiler.ts +4 -0
- package/src/debug/raw-sse-buffer.ts +100 -59
- package/src/debug/raw-sse.ts +1 -1
- package/src/discovery/agents.ts +15 -4
- package/src/edit/modes/apply-patch.ts +1 -5
- package/src/edit/modes/patch.ts +5 -5
- package/src/edit/modes/replace.ts +5 -5
- package/src/edit/renderer.ts +2 -1
- package/src/edit/streaming.ts +1 -1
- package/src/eval/index.ts +0 -2
- package/src/eval/js/shared/runtime.ts +25 -0
- package/src/eval/py/kernel.ts +1 -1
- package/src/exa/researcher.ts +4 -4
- package/src/exa/search.ts +10 -22
- package/src/exa/websets.ts +33 -33
- package/src/goals/tools/goal-tool.ts +3 -3
- package/src/index.ts +0 -3
- package/src/internal-urls/docs-index.generated.ts +21 -18
- package/src/internal-urls/index.ts +1 -1
- package/src/internal-urls/{pi-protocol.ts → omp-protocol.ts} +10 -10
- package/src/internal-urls/router.ts +3 -3
- package/src/internal-urls/types.ts +1 -1
- package/src/lsp/types.ts +8 -11
- package/src/main.ts +3 -0
- package/src/mcp/tool-bridge.ts +3 -3
- package/src/modes/acp/acp-agent.ts +88 -25
- package/src/modes/components/bash-execution.ts +1 -1
- package/src/modes/components/diff.ts +1 -2
- package/src/modes/components/eval-execution.ts +1 -1
- package/src/modes/components/oauth-selector.ts +38 -2
- package/src/modes/components/tool-execution.ts +1 -2
- package/src/modes/controllers/command-controller.ts +95 -34
- package/src/modes/controllers/input-controller.ts +4 -3
- package/src/modes/data/emojis.json +1 -0
- package/src/modes/emoji-autocomplete.ts +285 -0
- package/src/modes/interactive-mode.ts +92 -19
- package/src/modes/print-mode.ts +3 -3
- package/src/modes/prompt-action-autocomplete.ts +14 -0
- package/src/plan-mode/approved-plan.ts +9 -0
- package/src/prompts/system/system-prompt.md +1 -1
- package/src/prompts/system/ttsr-tool-reminder.md +5 -0
- package/src/prompts/tools/eval.md +25 -26
- package/src/prompts/tools/read.md +1 -1
- package/src/prompts/tools/resolve.md +1 -1
- package/src/prompts/tools/search.md +1 -1
- package/src/prompts/tools/web-search.md +1 -1
- package/src/sdk.ts +78 -7
- package/src/session/agent-session.ts +176 -77
- package/src/session/agent-storage.ts +7 -2
- package/src/session/auth-broker-config.ts +102 -0
- package/src/session/auth-storage.ts +7 -1
- package/src/session/streaming-output.ts +1 -1
- package/src/task/types.ts +10 -35
- package/src/tools/bash-interactive.ts +4 -1
- package/src/tools/bash-pty-selection.ts +2 -2
- package/src/tools/browser.ts +12 -20
- package/src/tools/eval.ts +77 -100
- package/src/tools/gh.ts +21 -45
- package/src/tools/hindsight-recall.ts +1 -1
- package/src/tools/hindsight-reflect.ts +2 -2
- package/src/tools/hindsight-retain.ts +3 -7
- package/src/tools/index.ts +8 -1
- package/src/tools/inspect-image.ts +4 -1
- package/src/tools/irc.ts +4 -12
- package/src/tools/job.ts +3 -11
- package/src/tools/report-tool-issue.ts +462 -17
- package/src/tools/resolve.ts +2 -7
- package/src/tools/todo-write.ts +8 -15
- package/src/utils/title-generator.ts +3 -0
- package/src/web/search/index.ts +6 -6
- package/dist/types/eval/parse.d.ts +0 -28
- package/dist/types/eval/sniff.d.ts +0 -11
- package/src/eval/eval.lark +0 -36
- package/src/eval/parse.ts +0 -407
- package/src/eval/sniff.ts +0 -28
|
@@ -6,8 +6,32 @@ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
|
6
6
|
import { type Theme } from "../modes/theme/theme";
|
|
7
7
|
import { type ToolSession } from ".";
|
|
8
8
|
export declare const EVAL_DEFAULT_PREVIEW_LINES = 10;
|
|
9
|
+
/**
|
|
10
|
+
* Per-cell input. Each cell runs in order; state persists within a language
|
|
11
|
+
* across cells and across tool calls.
|
|
12
|
+
*/
|
|
13
|
+
declare const evalCellSchema: z.ZodObject<{
|
|
14
|
+
language: z.ZodEnum<{
|
|
15
|
+
js: "js";
|
|
16
|
+
py: "py";
|
|
17
|
+
}>;
|
|
18
|
+
code: z.ZodString;
|
|
19
|
+
title: z.ZodOptional<z.ZodString>;
|
|
20
|
+
timeout: z.ZodOptional<z.ZodNumber>;
|
|
21
|
+
reset: z.ZodOptional<z.ZodBoolean>;
|
|
22
|
+
}, z.core.$strip>;
|
|
23
|
+
export type EvalCellInput = z.infer<typeof evalCellSchema>;
|
|
9
24
|
export declare const evalSchema: z.ZodObject<{
|
|
10
|
-
|
|
25
|
+
cells: z.ZodArray<z.ZodObject<{
|
|
26
|
+
language: z.ZodEnum<{
|
|
27
|
+
js: "js";
|
|
28
|
+
py: "py";
|
|
29
|
+
}>;
|
|
30
|
+
code: z.ZodString;
|
|
31
|
+
title: z.ZodOptional<z.ZodString>;
|
|
32
|
+
timeout: z.ZodOptional<z.ZodNumber>;
|
|
33
|
+
reset: z.ZodOptional<z.ZodBoolean>;
|
|
34
|
+
}, z.core.$strip>>;
|
|
11
35
|
}, z.core.$strip>;
|
|
12
36
|
export type EvalToolParams = z.infer<typeof evalSchema>;
|
|
13
37
|
export type EvalToolResult = {
|
|
@@ -35,20 +59,30 @@ export declare class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
35
59
|
readonly label = "Eval";
|
|
36
60
|
get description(): string;
|
|
37
61
|
readonly parameters: z.ZodObject<{
|
|
38
|
-
|
|
62
|
+
cells: z.ZodArray<z.ZodObject<{
|
|
63
|
+
language: z.ZodEnum<{
|
|
64
|
+
js: "js";
|
|
65
|
+
py: "py";
|
|
66
|
+
}>;
|
|
67
|
+
code: z.ZodString;
|
|
68
|
+
title: z.ZodOptional<z.ZodString>;
|
|
69
|
+
timeout: z.ZodOptional<z.ZodNumber>;
|
|
70
|
+
reset: z.ZodOptional<z.ZodBoolean>;
|
|
71
|
+
}, z.core.$strip>>;
|
|
39
72
|
}, z.core.$strip>;
|
|
40
73
|
readonly concurrency = "exclusive";
|
|
41
74
|
readonly strict = true;
|
|
42
75
|
readonly intent: (args: Partial<z.infer<typeof evalSchema>>) => string | undefined;
|
|
43
|
-
get customFormat(): {
|
|
44
|
-
syntax: "lark";
|
|
45
|
-
definition: string;
|
|
46
|
-
};
|
|
47
76
|
constructor(session: ToolSession | null, options?: EvalToolOptions);
|
|
48
77
|
execute(_toolCallId: string, params: z.infer<typeof evalSchema>, signal?: AbortSignal, onUpdate?: AgentToolUpdateCallback, _ctx?: AgentToolContext): Promise<AgentToolResult<EvalToolDetails | undefined>>;
|
|
49
78
|
}
|
|
79
|
+
interface EvalRenderCellArg {
|
|
80
|
+
language?: string;
|
|
81
|
+
code?: string;
|
|
82
|
+
title?: string;
|
|
83
|
+
}
|
|
50
84
|
interface EvalRenderArgs {
|
|
51
|
-
|
|
85
|
+
cells?: EvalRenderCellArg[];
|
|
52
86
|
__partialJson?: string;
|
|
53
87
|
}
|
|
54
88
|
interface EvalRenderContext {
|
|
@@ -21,7 +21,10 @@ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallb
|
|
|
21
21
|
import * as z from "zod/v4";
|
|
22
22
|
import type { ToolSession } from ".";
|
|
23
23
|
declare const ircSchema: z.ZodObject<{
|
|
24
|
-
op: z.
|
|
24
|
+
op: z.ZodEnum<{
|
|
25
|
+
list: "list";
|
|
26
|
+
send: "send";
|
|
27
|
+
}>;
|
|
25
28
|
to: z.ZodOptional<z.ZodString>;
|
|
26
29
|
message: z.ZodOptional<z.ZodString>;
|
|
27
30
|
awaitReply: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -59,7 +62,10 @@ export declare class IrcTool implements AgentTool<typeof ircSchema, IrcDetails>
|
|
|
59
62
|
readonly summary = "Send and receive messages between agents over IRC-like channels";
|
|
60
63
|
readonly description: string;
|
|
61
64
|
readonly parameters: z.ZodObject<{
|
|
62
|
-
op: z.
|
|
65
|
+
op: z.ZodEnum<{
|
|
66
|
+
list: "list";
|
|
67
|
+
send: "send";
|
|
68
|
+
}>;
|
|
63
69
|
to: z.ZodOptional<z.ZodString>;
|
|
64
70
|
message: z.ZodOptional<z.ZodString>;
|
|
65
71
|
awaitReply: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -1,6 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* report_tool_issue — automated QA tool for tracking unexpected tool behavior.
|
|
3
|
+
*
|
|
4
|
+
* Enabled by default; gated behind PI_AUTO_QA=1 / `dev.autoqa` so a user
|
|
5
|
+
* who flips the setting off short-circuits injection entirely.
|
|
6
|
+
* Always injected into every agent (including subagents) regardless of tool selection.
|
|
7
|
+
* Records grievances to a local SQLite database; never throws.
|
|
8
|
+
*
|
|
9
|
+
* Before the first record lands, the user's consent is checked. If they've
|
|
10
|
+
* never been asked (`dev.autoqa.consent === "unset"`) the process-global
|
|
11
|
+
* consent handler — wired by `InteractiveMode` to a Yes/No popup — is
|
|
12
|
+
* invoked exactly once and the decision is persisted. Subsequent calls
|
|
13
|
+
* (including from subagents) read the cached decision without prompting.
|
|
14
|
+
*
|
|
15
|
+
* When the user grants consent, push is automatically active against the
|
|
16
|
+
* bundled endpoint (`dev.autoqaPush.endpoint`, default `qa.omp.sh`). Each
|
|
17
|
+
* insert schedules a background flush that POSTs pending rows and deletes
|
|
18
|
+
* them on HTTP 2xx. `PI_AUTO_QA_PUSH=1` forces push in non-interactive
|
|
19
|
+
* environments where the consent dialog never fires. Tool execution is
|
|
20
|
+
* never blocked on the network and never throws.
|
|
21
|
+
*/
|
|
22
|
+
import { Database } from "bun:sqlite";
|
|
1
23
|
import type { AgentTool } from "@oh-my-pi/pi-agent-core";
|
|
2
24
|
import type { Settings } from "..";
|
|
3
25
|
import type { ToolSession } from "./index";
|
|
4
26
|
export declare function isAutoQaEnabled(settings?: Settings): boolean;
|
|
27
|
+
/**
|
|
28
|
+
* Resolver for the user's "share grievances?" consent.
|
|
29
|
+
*
|
|
30
|
+
* Return values:
|
|
31
|
+
* - `true` — user agreed; record + ship for this run and persist.
|
|
32
|
+
* - `false` — user declined; suppress for this run and persist.
|
|
33
|
+
* - `null` — user dismissed the dialog (ESC, click-away, …) without
|
|
34
|
+
* picking an option. The decision is NOT cached or persisted,
|
|
35
|
+
* so the next `report_tool_issue` invocation re-prompts.
|
|
36
|
+
*
|
|
37
|
+
* Persistence is the tool's job (so subagent invocations can persist into
|
|
38
|
+
* the disk-backed `Settings` instance the host registered alongside the
|
|
39
|
+
* handler), not the handler's. Implementations live in hosts that have UI
|
|
40
|
+
* affordances — today only `InteractiveMode`. When no handler is
|
|
41
|
+
* registered (CLI subcommands, tests, non-interactive runs) consent
|
|
42
|
+
* defaults to `false` — the explicit "don't collect by default" stance.
|
|
43
|
+
*/
|
|
44
|
+
export type AutoQaConsentHandler = () => Promise<boolean | null>;
|
|
45
|
+
/**
|
|
46
|
+
* Register the consent handler and the persistent {@link Settings} instance
|
|
47
|
+
* the decision should be written to. Passing `null` clears the handler
|
|
48
|
+
* (e.g. on `InteractiveMode` teardown). Re-registration is authoritative.
|
|
49
|
+
*/
|
|
50
|
+
export declare function setAutoQaConsentHandler(handler: AutoQaConsentHandler | null, persistentSettings?: Settings | null): void;
|
|
51
|
+
/** Test-only: clear consent cache + handler. Never call from production code. */
|
|
52
|
+
export declare function __resetAutoQaConsentForTests(): void;
|
|
53
|
+
/**
|
|
54
|
+
* Resolve the user's consent for `report_tool_issue` grievances.
|
|
55
|
+
*
|
|
56
|
+
* Precedence (highest first):
|
|
57
|
+
* 1. Process-global cache (set on first successful resolution).
|
|
58
|
+
* 2. Persistent setting (`dev.autoqa.consent` on the supplied `Settings`).
|
|
59
|
+
* 3. Persistent setting on the registered host `Settings`.
|
|
60
|
+
* 4. Consent handler popup (single-flight; persists the answer).
|
|
61
|
+
* 5. Default-deny when no handler is registered.
|
|
62
|
+
*
|
|
63
|
+
* Never throws — handler errors degrade to "denied for this call" without
|
|
64
|
+
* caching, so a subsequent invocation can re-prompt instead of being
|
|
65
|
+
* permanently locked into the false branch.
|
|
66
|
+
*/
|
|
67
|
+
export declare function resolveAutoQaConsent(settings: Settings | undefined): Promise<boolean>;
|
|
5
68
|
export declare function getAutoQaDbPath(): string;
|
|
6
|
-
|
|
69
|
+
/**
|
|
70
|
+
* Open (or return the cached handle for) the auto-QA SQLite database at
|
|
71
|
+
* `~/.omp/agent/autoqa.db`. Idempotently runs schema creation, the
|
|
72
|
+
* `pushed`-column migration, and index setup so every consumer — tool
|
|
73
|
+
* execute path, manual `omp grievances push`, future debug scripts —
|
|
74
|
+
* sees the same prepared schema. Returns `null` only on a hard open
|
|
75
|
+
* failure (filesystem permissions, etc.); a missing file is created.
|
|
76
|
+
*
|
|
77
|
+
* Exported because the `omp grievances` CLI handlers need the migrated
|
|
78
|
+
* handle too — having a second `openDb` in the CLI led to the column
|
|
79
|
+
* never being added on the manual-push path.
|
|
80
|
+
*/
|
|
81
|
+
export declare function openAutoQaDb(): Database | null;
|
|
82
|
+
export interface FlushResult {
|
|
83
|
+
pushed: number;
|
|
84
|
+
ok: boolean;
|
|
85
|
+
skipped?: boolean;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Optional per-flush controls. Used by `omp grievances push` to surface
|
|
89
|
+
* progress to a TTY and to skip the user-facing consent gate (manual
|
|
90
|
+
* pushes are the user's explicit intent, not a side effect of a tool call).
|
|
91
|
+
*/
|
|
92
|
+
export interface FlushOptions {
|
|
93
|
+
/**
|
|
94
|
+
* Skip the `dev.autoqa.consent === "granted"` gate in
|
|
95
|
+
* {@link resolvePushConfig}. Endpoint configuration is still required.
|
|
96
|
+
* Reserved for explicit user-driven pushes (CLI `grievances push`,
|
|
97
|
+
* future debug recipes); never set from the tool's auto-flush path.
|
|
98
|
+
*/
|
|
99
|
+
bypassConsent?: boolean;
|
|
100
|
+
/**
|
|
101
|
+
* Fires once at the start of the loop with the snapshot count of
|
|
102
|
+
* unpushed rows. Subsequent inserts won't be reflected (the count is
|
|
103
|
+
* a planning hint for progress reporters, not a live total).
|
|
104
|
+
*/
|
|
105
|
+
onStart?: (totalUnpushed: number) => void;
|
|
106
|
+
/**
|
|
107
|
+
* Fires after every successfully shipped batch with the running pushed
|
|
108
|
+
* count. Reporters compare against the `totalUnpushed` they saw in
|
|
109
|
+
* `onStart` to advance their bar.
|
|
110
|
+
*/
|
|
111
|
+
onProgress?: (pushedSoFar: number) => void;
|
|
112
|
+
}
|
|
113
|
+
/** Test-only: clear single-flight + cooldown state. Never call from production code. */
|
|
114
|
+
export declare function __resetAutoQaFlushStateForTests(): void;
|
|
115
|
+
/**
|
|
116
|
+
* Flush queued grievances to the configured backend.
|
|
117
|
+
*
|
|
118
|
+
* Single-flight: concurrent callers share the in-flight promise. After a
|
|
119
|
+
* failed push, retries are skipped for {@link FAILURE_COOLDOWN_MS} ms.
|
|
120
|
+
* Never throws — all errors are caught and routed to the logger.
|
|
121
|
+
*/
|
|
122
|
+
export declare function flushGrievances(db?: Database, settings?: Settings, options?: FlushOptions): Promise<FlushResult>;
|
|
123
|
+
export declare function createReportToolIssueTool(session: ToolSession, activeBuiltinNames?: readonly string[]): AgentTool;
|
|
@@ -5,7 +5,10 @@ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
|
5
5
|
import type { Theme } from "../modes/theme/theme";
|
|
6
6
|
import type { ToolSession } from ".";
|
|
7
7
|
declare const resolveSchema: z.ZodObject<{
|
|
8
|
-
action: z.
|
|
8
|
+
action: z.ZodEnum<{
|
|
9
|
+
apply: "apply";
|
|
10
|
+
discard: "discard";
|
|
11
|
+
}>;
|
|
9
12
|
reason: z.ZodString;
|
|
10
13
|
extra: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
11
14
|
}, z.core.$strip>;
|
|
@@ -54,7 +57,10 @@ export declare class ResolveTool implements AgentTool<typeof resolveSchema, Reso
|
|
|
54
57
|
readonly hidden = true;
|
|
55
58
|
readonly description: string;
|
|
56
59
|
readonly parameters: z.ZodObject<{
|
|
57
|
-
action: z.
|
|
60
|
+
action: z.ZodEnum<{
|
|
61
|
+
apply: "apply";
|
|
62
|
+
discard: "discard";
|
|
63
|
+
}>;
|
|
58
64
|
reason: z.ZodString;
|
|
59
65
|
extra: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
60
66
|
}, z.core.$strip>;
|
|
@@ -47,7 +47,6 @@ See [docs/custom-tools.md](../../docs/custom-tools.md) for full documentation.
|
|
|
47
47
|
**Factory pattern:**
|
|
48
48
|
|
|
49
49
|
```typescript
|
|
50
|
-
import { StringEnum } from "@oh-my-pi/pi-ai";
|
|
51
50
|
import { Text } from "@oh-my-pi/pi-tui";
|
|
52
51
|
import type { CustomToolFactory } from "@oh-my-pi/pi-coding-agent";
|
|
53
52
|
|
|
@@ -56,7 +55,7 @@ const factory: CustomToolFactory = (pi) => ({
|
|
|
56
55
|
label: "My Tool",
|
|
57
56
|
description: "Tool description for LLM",
|
|
58
57
|
parameters: pi.zod.object({
|
|
59
|
-
action:
|
|
58
|
+
action: pi.zod.enum(["list", "add"]),
|
|
60
59
|
}),
|
|
61
60
|
|
|
62
61
|
// Called on session start/switch/branch/clear
|
|
@@ -76,9 +75,6 @@ const factory: CustomToolFactory = (pi) => ({
|
|
|
76
75
|
|
|
77
76
|
export default factory;
|
|
78
77
|
```
|
|
79
|
-
|
|
80
|
-
**Legacy:** `parameters: pi.typebox.Type.Object({ ... })` still works; the injected `typebox` is a small Zod-backed shim, and schemas flow through the same Zod pipeline as `pi.zod` schemas.
|
|
81
|
-
|
|
82
78
|
**Custom rendering:**
|
|
83
79
|
|
|
84
80
|
```typescript
|
|
@@ -97,17 +93,12 @@ renderResult(result, { expanded, isPartial }, theme) {
|
|
|
97
93
|
},
|
|
98
94
|
```
|
|
99
95
|
|
|
100
|
-
**Use `
|
|
96
|
+
**Use `z.enum` for discriminated string tool args:**
|
|
101
97
|
|
|
102
98
|
```typescript
|
|
103
|
-
import { StringEnum } from "@oh-my-pi/pi-ai";
|
|
104
|
-
|
|
105
99
|
const { z } = pi.zod;
|
|
106
100
|
|
|
107
|
-
// Good — Google-safe enum wiring
|
|
108
101
|
parameters: z.object({
|
|
109
|
-
action:
|
|
102
|
+
action: z.enum(["list", "add"]),
|
|
110
103
|
});
|
|
111
|
-
|
|
112
|
-
// Avoid raw union-of-literals patterns that don't degrade well for strict JSON Schema providers
|
|
113
104
|
```
|
|
@@ -108,29 +108,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
108
108
|
});
|
|
109
109
|
}
|
|
110
110
|
```
|
|
111
|
-
|
|
112
|
-
**Legacy TypeBox-style schemas** (`pi.typebox`) remain available for older extensions and are backed by a tiny Zod-shim — prefer `pi.zod` directly for new code.
|
|
113
|
-
|
|
114
|
-
```typescript
|
|
115
|
-
const { Type } = pi.typebox;
|
|
116
|
-
parameters: Type.Object({ name: Type.String() });
|
|
117
|
-
```
|
|
118
|
-
|
|
119
111
|
## Key Patterns
|
|
120
112
|
|
|
121
|
-
**Use `
|
|
113
|
+
**Use `z.enum` for discriminated string tool args:**
|
|
122
114
|
|
|
123
115
|
```typescript
|
|
124
|
-
import { StringEnum } from "@oh-my-pi/pi-ai";
|
|
125
|
-
|
|
126
116
|
const { z } = pi.zod;
|
|
127
117
|
|
|
128
|
-
// Good — Google-safe enum wiring
|
|
129
118
|
parameters: z.object({
|
|
130
|
-
action:
|
|
119
|
+
action: z.enum(["list", "add"]),
|
|
131
120
|
});
|
|
132
|
-
|
|
133
|
-
// Avoid raw union-of-literals patterns that don't degrade well for strict JSON Schema providers
|
|
134
121
|
```
|
|
135
122
|
|
|
136
123
|
**State persistence via details:**
|
|
@@ -10,9 +10,6 @@ import type { ExtensionAPI } from "@oh-my-pi/pi-coding-agent";
|
|
|
10
10
|
export default function (pi: ExtensionAPI) {
|
|
11
11
|
const { z } = pi.zod;
|
|
12
12
|
|
|
13
|
-
// Access shared schema helpers from package exports (e.g. StringEnum for Google-safe enums)
|
|
14
|
-
const { StringEnum } = pi.pi;
|
|
15
|
-
|
|
16
13
|
// Access the logger for debugging
|
|
17
14
|
pi.logger.debug("API demo extension loaded");
|
|
18
15
|
|
|
@@ -22,10 +19,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
22
19
|
description: "Demonstrates ExtensionAPI capabilities: logger, zod, and pi module access",
|
|
23
20
|
parameters: z.object({
|
|
24
21
|
message: z.string().describe("Test message"),
|
|
25
|
-
logLevel:
|
|
26
|
-
description: "Log level to use",
|
|
27
|
-
default: "debug",
|
|
28
|
-
}),
|
|
22
|
+
logLevel: z.enum(["error", "warn", "debug"]).default("debug").describe("Log level to use"),
|
|
29
23
|
}),
|
|
30
24
|
|
|
31
25
|
async execute(_toolCallId, params, _onUpdate, ctx, _signal) {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-coding-agent",
|
|
4
|
-
"version": "15.1.
|
|
4
|
+
"version": "15.1.3",
|
|
5
5
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -47,12 +47,12 @@
|
|
|
47
47
|
"@agentclientprotocol/sdk": "0.21.0",
|
|
48
48
|
"@babel/parser": "^7.29.3",
|
|
49
49
|
"@mozilla/readability": "^0.6.0",
|
|
50
|
-
"@oh-my-pi/omp-stats": "15.1.
|
|
51
|
-
"@oh-my-pi/pi-agent-core": "15.1.
|
|
52
|
-
"@oh-my-pi/pi-ai": "15.1.
|
|
53
|
-
"@oh-my-pi/pi-natives": "15.1.
|
|
54
|
-
"@oh-my-pi/pi-tui": "15.1.
|
|
55
|
-
"@oh-my-pi/pi-utils": "15.1.
|
|
50
|
+
"@oh-my-pi/omp-stats": "15.1.3",
|
|
51
|
+
"@oh-my-pi/pi-agent-core": "15.1.3",
|
|
52
|
+
"@oh-my-pi/pi-ai": "15.1.3",
|
|
53
|
+
"@oh-my-pi/pi-natives": "15.1.3",
|
|
54
|
+
"@oh-my-pi/pi-tui": "15.1.3",
|
|
55
|
+
"@oh-my-pi/pi-utils": "15.1.3",
|
|
56
56
|
"@puppeteer/browsers": "^2.13.0",
|
|
57
57
|
"@types/turndown": "5.0.6",
|
|
58
58
|
"@xterm/headless": "^6.0.0",
|
|
@@ -17,42 +17,20 @@ export const DEFAULT_HARNESS_COMMAND = `bash ${HARNESS_FILENAME}`;
|
|
|
17
17
|
const HARNESS_COMMIT_TITLE = "autoresearch: harness setup";
|
|
18
18
|
|
|
19
19
|
const initExperimentSchema = z.object({
|
|
20
|
-
name: z.string().describe("
|
|
21
|
-
goal: z.string().describe("
|
|
22
|
-
primary_metric: z
|
|
23
|
-
|
|
24
|
-
.describe(
|
|
25
|
-
"Primary metric name shown in the dashboard. Match the `METRIC <name>=<value>` lines printed by the benchmark.",
|
|
26
|
-
),
|
|
27
|
-
metric_unit: z.string().describe("Unit for the primary metric (e.g. ms, µs, mb). Empty when unitless.").optional(),
|
|
20
|
+
name: z.string().describe("experiment name"),
|
|
21
|
+
goal: z.string().describe("session goal").optional(),
|
|
22
|
+
primary_metric: z.string().describe("primary metric name"),
|
|
23
|
+
metric_unit: z.string().describe("metric unit (e.g. ms, µs, mb)").optional(),
|
|
28
24
|
direction: z
|
|
29
25
|
.enum(["lower", "higher"] as const)
|
|
30
|
-
.describe("
|
|
31
|
-
.optional(),
|
|
32
|
-
secondary_metrics: z
|
|
33
|
-
.array(z.string())
|
|
34
|
-
.describe("Names of secondary metrics tracked alongside the primary metric.")
|
|
35
|
-
.optional(),
|
|
36
|
-
scope_paths: z
|
|
37
|
-
.array(z.string())
|
|
38
|
-
.describe(
|
|
39
|
-
"Files or directories the agent expects to modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
|
|
40
|
-
)
|
|
41
|
-
.optional(),
|
|
42
|
-
off_limits: z
|
|
43
|
-
.array(z.string())
|
|
44
|
-
.describe(
|
|
45
|
-
"Paths the agent SHOULD NOT modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
|
|
46
|
-
)
|
|
47
|
-
.optional(),
|
|
48
|
-
constraints: z.array(z.string()).describe("Free-form constraints (e.g. 'no api break').").optional(),
|
|
49
|
-
max_iterations: z.number().describe("Soft cap on iterations per segment. Optional.").optional(),
|
|
50
|
-
new_segment: z
|
|
51
|
-
.boolean()
|
|
52
|
-
.describe(
|
|
53
|
-
"When true, bump to a new segment even when an active session exists. New baselines and best-metric reset.",
|
|
54
|
-
)
|
|
26
|
+
.describe("better direction (default lower)")
|
|
55
27
|
.optional(),
|
|
28
|
+
secondary_metrics: z.array(z.string()).describe("secondary metric names").optional(),
|
|
29
|
+
scope_paths: z.array(z.string()).describe("expected-to-modify paths").optional(),
|
|
30
|
+
off_limits: z.array(z.string()).describe("off-limits paths").optional(),
|
|
31
|
+
constraints: z.array(z.string()).describe("free-form constraints").optional(),
|
|
32
|
+
max_iterations: z.number().describe("soft iteration cap per segment").optional(),
|
|
33
|
+
new_segment: z.boolean().describe("bump to a new segment in existing session").optional(),
|
|
56
34
|
});
|
|
57
35
|
|
|
58
36
|
interface InitExperimentDetails {
|
|
@@ -37,35 +37,21 @@ import type {
|
|
|
37
37
|
const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
|
|
38
38
|
|
|
39
39
|
const logExperimentSchema = z.object({
|
|
40
|
-
metric: z
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
.object({})
|
|
48
|
-
.passthrough()
|
|
49
|
-
.describe("Free-form structured metadata captured for this run (hypothesis, learnings, etc.).")
|
|
50
|
-
.optional(),
|
|
51
|
-
commit: z
|
|
52
|
-
.string()
|
|
53
|
-
.describe("Override the commit hash recorded for this run. Defaults to the current HEAD.")
|
|
54
|
-
.optional(),
|
|
55
|
-
justification: z
|
|
56
|
-
.string()
|
|
57
|
-
.describe(
|
|
58
|
-
"Required when the run modifies paths outside scope or inside off-limits and you still want it kept. Free-form explanation.",
|
|
59
|
-
)
|
|
60
|
-
.optional(),
|
|
40
|
+
metric: z.number().describe("primary metric value"),
|
|
41
|
+
status: z.enum(["keep", "discard", "crash", "checks_failed"] as const).describe("run outcome"),
|
|
42
|
+
description: z.string().describe("short run description"),
|
|
43
|
+
metrics: z.record(z.string(), z.number()).describe("secondary metrics").optional(),
|
|
44
|
+
asi: z.object({}).passthrough().describe("free-form structured metadata").optional(),
|
|
45
|
+
commit: z.string().describe("override recorded commit hash").optional(),
|
|
46
|
+
justification: z.string().describe("required when keeping a scope-deviating run").optional(),
|
|
61
47
|
flag_runs: z
|
|
62
48
|
.array(
|
|
63
49
|
z.object({
|
|
64
|
-
run_id: z.number().describe("
|
|
65
|
-
reason: z.string().describe("
|
|
50
|
+
run_id: z.number().describe("run id to flag"),
|
|
51
|
+
reason: z.string().describe("why this run is suspect"),
|
|
66
52
|
}),
|
|
67
53
|
)
|
|
68
|
-
.describe("
|
|
54
|
+
.describe("flag earlier runs as suspect")
|
|
69
55
|
.optional(),
|
|
70
56
|
});
|
|
71
57
|
|
|
@@ -27,7 +27,7 @@ import type { AutoresearchToolFactoryOptions, RunDetails, RunExperimentProgressD
|
|
|
27
27
|
import { DEFAULT_HARNESS_COMMAND } from "./init-experiment";
|
|
28
28
|
|
|
29
29
|
const runExperimentSchema = z.object({
|
|
30
|
-
timeout_seconds: z.number().describe("
|
|
30
|
+
timeout_seconds: z.number().describe("timeout in seconds (default 600)").optional(),
|
|
31
31
|
});
|
|
32
32
|
|
|
33
33
|
interface ProcessExecutionResult {
|
|
@@ -9,15 +9,8 @@ import { openAutoresearchStorageIfExists } from "../storage";
|
|
|
9
9
|
import type { AutoresearchToolFactoryOptions } from "../types";
|
|
10
10
|
|
|
11
11
|
const updateNotesSchema = z.object({
|
|
12
|
-
body: z
|
|
13
|
-
|
|
14
|
-
.describe("Replacement markdown body for the active autoresearch session's notes (your durable playbook)."),
|
|
15
|
-
append_idea: z
|
|
16
|
-
.string()
|
|
17
|
-
.describe(
|
|
18
|
-
"When set, append this string as a new bullet under an Ideas section instead of replacing the body. `body` is ignored.",
|
|
19
|
-
)
|
|
20
|
-
.optional(),
|
|
12
|
+
body: z.string().describe("replacement notes body"),
|
|
13
|
+
append_idea: z.string().describe("append as bullet under Ideas instead of replacing body").optional(),
|
|
21
14
|
});
|
|
22
15
|
|
|
23
16
|
interface UpdateNotesDetails {
|