@oh-my-pi/pi-coding-agent 15.1.2 → 15.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/dist/types/cli/auth-broker-cli.d.ts +25 -0
  3. package/dist/types/cli/auth-gateway-cli.d.ts +18 -0
  4. package/dist/types/cli/grievances-cli.d.ts +12 -0
  5. package/dist/types/commands/auth-broker.d.ts +54 -0
  6. package/dist/types/commands/auth-gateway.d.ts +32 -0
  7. package/dist/types/commands/grievances.d.ts +1 -1
  8. package/dist/types/commit/agentic/tools/propose-commit.d.ts +9 -1
  9. package/dist/types/commit/agentic/tools/schemas.d.ts +9 -1
  10. package/dist/types/commit/agentic/tools/split-commit.d.ts +9 -1
  11. package/dist/types/config/model-registry.d.ts +3 -0
  12. package/dist/types/config/models-config-schema.d.ts +1 -0
  13. package/dist/types/config/settings-schema.d.ts +46 -0
  14. package/dist/types/discovery/agents.d.ts +12 -1
  15. package/dist/types/edit/renderer.d.ts +3 -0
  16. package/dist/types/eval/index.d.ts +0 -2
  17. package/dist/types/goals/tools/goal-tool.d.ts +10 -2
  18. package/dist/types/index.d.ts +0 -1
  19. package/dist/types/internal-urls/index.d.ts +1 -1
  20. package/dist/types/internal-urls/{pi-protocol.d.ts → omp-protocol.d.ts} +3 -3
  21. package/dist/types/internal-urls/types.d.ts +1 -1
  22. package/dist/types/modes/acp/acp-agent.d.ts +1 -0
  23. package/dist/types/modes/emoji-autocomplete.d.ts +16 -0
  24. package/dist/types/modes/interactive-mode.d.ts +1 -1
  25. package/dist/types/modes/prompt-action-autocomplete.d.ts +4 -0
  26. package/dist/types/plan-mode/approved-plan.d.ts +4 -0
  27. package/dist/types/sdk.d.ts +10 -3
  28. package/dist/types/session/agent-session.d.ts +1 -1
  29. package/dist/types/session/auth-broker-config.d.ts +13 -0
  30. package/dist/types/session/auth-storage.d.ts +1 -1
  31. package/dist/types/tools/eval.d.ts +41 -7
  32. package/dist/types/tools/irc.d.ts +8 -2
  33. package/dist/types/tools/report-tool-issue.d.ts +118 -1
  34. package/dist/types/tools/resolve.d.ts +8 -2
  35. package/examples/custom-tools/README.md +3 -12
  36. package/examples/extensions/README.md +2 -15
  37. package/examples/extensions/api-demo.ts +1 -7
  38. package/package.json +7 -7
  39. package/src/autoresearch/tools/init-experiment.ts +11 -33
  40. package/src/autoresearch/tools/log-experiment.ts +10 -24
  41. package/src/autoresearch/tools/run-experiment.ts +1 -1
  42. package/src/autoresearch/tools/update-notes.ts +2 -9
  43. package/src/cli/auth-broker-cli.ts +746 -0
  44. package/src/cli/auth-gateway-cli.ts +342 -0
  45. package/src/cli/grievances-cli.ts +109 -16
  46. package/src/cli.ts +4 -2
  47. package/src/commands/auth-broker.ts +96 -0
  48. package/src/commands/auth-gateway.ts +61 -0
  49. package/src/commands/grievances.ts +13 -8
  50. package/src/commands/launch.ts +1 -1
  51. package/src/commit/agentic/agent.ts +2 -0
  52. package/src/commit/agentic/tools/analyze-file.ts +2 -2
  53. package/src/commit/agentic/tools/git-file-diff.ts +2 -2
  54. package/src/commit/agentic/tools/git-hunk.ts +3 -3
  55. package/src/commit/agentic/tools/git-overview.ts +2 -2
  56. package/src/commit/agentic/tools/propose-changelog.ts +1 -3
  57. package/src/commit/agentic/tools/recent-commits.ts +1 -1
  58. package/src/commit/agentic/tools/schemas.ts +1 -9
  59. package/src/config/model-equivalence.ts +279 -174
  60. package/src/config/model-registry.ts +37 -6
  61. package/src/config/model-resolver.ts +13 -8
  62. package/src/config/models-config-schema.ts +8 -0
  63. package/src/config/settings-schema.ts +52 -0
  64. package/src/cursor.ts +1 -1
  65. package/src/debug/log-formatting.ts +1 -1
  66. package/src/debug/log-viewer.ts +1 -1
  67. package/src/debug/profiler.ts +4 -0
  68. package/src/debug/raw-sse-buffer.ts +100 -59
  69. package/src/debug/raw-sse.ts +1 -1
  70. package/src/discovery/agents.ts +15 -4
  71. package/src/edit/modes/apply-patch.ts +1 -5
  72. package/src/edit/modes/patch.ts +5 -5
  73. package/src/edit/modes/replace.ts +5 -5
  74. package/src/edit/renderer.ts +2 -1
  75. package/src/edit/streaming.ts +1 -1
  76. package/src/eval/index.ts +0 -2
  77. package/src/eval/js/shared/runtime.ts +25 -0
  78. package/src/eval/py/kernel.ts +1 -1
  79. package/src/exa/researcher.ts +4 -4
  80. package/src/exa/search.ts +10 -22
  81. package/src/exa/websets.ts +33 -33
  82. package/src/goals/tools/goal-tool.ts +3 -3
  83. package/src/index.ts +0 -3
  84. package/src/internal-urls/docs-index.generated.ts +21 -18
  85. package/src/internal-urls/index.ts +1 -1
  86. package/src/internal-urls/{pi-protocol.ts → omp-protocol.ts} +10 -10
  87. package/src/internal-urls/router.ts +3 -3
  88. package/src/internal-urls/types.ts +1 -1
  89. package/src/lsp/types.ts +8 -11
  90. package/src/main.ts +3 -0
  91. package/src/mcp/tool-bridge.ts +3 -3
  92. package/src/modes/acp/acp-agent.ts +88 -25
  93. package/src/modes/components/bash-execution.ts +1 -1
  94. package/src/modes/components/diff.ts +1 -2
  95. package/src/modes/components/eval-execution.ts +1 -1
  96. package/src/modes/components/oauth-selector.ts +38 -2
  97. package/src/modes/components/tool-execution.ts +1 -2
  98. package/src/modes/controllers/command-controller.ts +95 -34
  99. package/src/modes/controllers/input-controller.ts +4 -3
  100. package/src/modes/data/emojis.json +1 -0
  101. package/src/modes/emoji-autocomplete.ts +285 -0
  102. package/src/modes/interactive-mode.ts +92 -19
  103. package/src/modes/print-mode.ts +3 -3
  104. package/src/modes/prompt-action-autocomplete.ts +14 -0
  105. package/src/plan-mode/approved-plan.ts +9 -0
  106. package/src/prompts/system/system-prompt.md +1 -1
  107. package/src/prompts/system/ttsr-tool-reminder.md +5 -0
  108. package/src/prompts/tools/eval.md +25 -26
  109. package/src/prompts/tools/read.md +1 -1
  110. package/src/prompts/tools/resolve.md +1 -1
  111. package/src/prompts/tools/search.md +1 -1
  112. package/src/prompts/tools/web-search.md +1 -1
  113. package/src/sdk.ts +78 -7
  114. package/src/session/agent-session.ts +176 -77
  115. package/src/session/agent-storage.ts +7 -2
  116. package/src/session/auth-broker-config.ts +102 -0
  117. package/src/session/auth-storage.ts +7 -1
  118. package/src/session/streaming-output.ts +1 -1
  119. package/src/task/types.ts +10 -35
  120. package/src/tools/bash-interactive.ts +4 -1
  121. package/src/tools/bash-pty-selection.ts +2 -2
  122. package/src/tools/browser.ts +12 -20
  123. package/src/tools/eval.ts +77 -100
  124. package/src/tools/gh.ts +21 -45
  125. package/src/tools/hindsight-recall.ts +1 -1
  126. package/src/tools/hindsight-reflect.ts +2 -2
  127. package/src/tools/hindsight-retain.ts +3 -7
  128. package/src/tools/index.ts +8 -1
  129. package/src/tools/inspect-image.ts +4 -1
  130. package/src/tools/irc.ts +4 -12
  131. package/src/tools/job.ts +3 -11
  132. package/src/tools/report-tool-issue.ts +462 -17
  133. package/src/tools/resolve.ts +2 -7
  134. package/src/tools/todo-write.ts +8 -15
  135. package/src/utils/title-generator.ts +3 -0
  136. package/src/web/search/index.ts +6 -6
  137. package/dist/types/eval/parse.d.ts +0 -28
  138. package/dist/types/eval/sniff.d.ts +0 -11
  139. package/src/eval/eval.lark +0 -36
  140. package/src/eval/parse.ts +0 -407
  141. package/src/eval/sniff.ts +0 -28
@@ -6,8 +6,32 @@ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
6
6
  import { type Theme } from "../modes/theme/theme";
7
7
  import { type ToolSession } from ".";
8
8
  export declare const EVAL_DEFAULT_PREVIEW_LINES = 10;
9
+ /**
10
+ * Per-cell input. Each cell runs in order; state persists within a language
11
+ * across cells and across tool calls.
12
+ */
13
+ declare const evalCellSchema: z.ZodObject<{
14
+ language: z.ZodEnum<{
15
+ js: "js";
16
+ py: "py";
17
+ }>;
18
+ code: z.ZodString;
19
+ title: z.ZodOptional<z.ZodString>;
20
+ timeout: z.ZodOptional<z.ZodNumber>;
21
+ reset: z.ZodOptional<z.ZodBoolean>;
22
+ }, z.core.$strip>;
23
+ export type EvalCellInput = z.infer<typeof evalCellSchema>;
9
24
  export declare const evalSchema: z.ZodObject<{
10
- input: z.ZodString;
25
+ cells: z.ZodArray<z.ZodObject<{
26
+ language: z.ZodEnum<{
27
+ js: "js";
28
+ py: "py";
29
+ }>;
30
+ code: z.ZodString;
31
+ title: z.ZodOptional<z.ZodString>;
32
+ timeout: z.ZodOptional<z.ZodNumber>;
33
+ reset: z.ZodOptional<z.ZodBoolean>;
34
+ }, z.core.$strip>>;
11
35
  }, z.core.$strip>;
12
36
  export type EvalToolParams = z.infer<typeof evalSchema>;
13
37
  export type EvalToolResult = {
@@ -35,20 +59,30 @@ export declare class EvalTool implements AgentTool<typeof evalSchema> {
35
59
  readonly label = "Eval";
36
60
  get description(): string;
37
61
  readonly parameters: z.ZodObject<{
38
- input: z.ZodString;
62
+ cells: z.ZodArray<z.ZodObject<{
63
+ language: z.ZodEnum<{
64
+ js: "js";
65
+ py: "py";
66
+ }>;
67
+ code: z.ZodString;
68
+ title: z.ZodOptional<z.ZodString>;
69
+ timeout: z.ZodOptional<z.ZodNumber>;
70
+ reset: z.ZodOptional<z.ZodBoolean>;
71
+ }, z.core.$strip>>;
39
72
  }, z.core.$strip>;
40
73
  readonly concurrency = "exclusive";
41
74
  readonly strict = true;
42
75
  readonly intent: (args: Partial<z.infer<typeof evalSchema>>) => string | undefined;
43
- get customFormat(): {
44
- syntax: "lark";
45
- definition: string;
46
- };
47
76
  constructor(session: ToolSession | null, options?: EvalToolOptions);
48
77
  execute(_toolCallId: string, params: z.infer<typeof evalSchema>, signal?: AbortSignal, onUpdate?: AgentToolUpdateCallback, _ctx?: AgentToolContext): Promise<AgentToolResult<EvalToolDetails | undefined>>;
49
78
  }
79
+ interface EvalRenderCellArg {
80
+ language?: string;
81
+ code?: string;
82
+ title?: string;
83
+ }
50
84
  interface EvalRenderArgs {
51
- input?: string;
85
+ cells?: EvalRenderCellArg[];
52
86
  __partialJson?: string;
53
87
  }
54
88
  interface EvalRenderContext {
@@ -21,7 +21,10 @@ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallb
21
21
  import * as z from "zod/v4";
22
22
  import type { ToolSession } from ".";
23
23
  declare const ircSchema: z.ZodObject<{
24
- op: z.ZodUnion<readonly [z.ZodLiteral<"send">, z.ZodLiteral<"list">]>;
24
+ op: z.ZodEnum<{
25
+ list: "list";
26
+ send: "send";
27
+ }>;
25
28
  to: z.ZodOptional<z.ZodString>;
26
29
  message: z.ZodOptional<z.ZodString>;
27
30
  awaitReply: z.ZodOptional<z.ZodBoolean>;
@@ -59,7 +62,10 @@ export declare class IrcTool implements AgentTool<typeof ircSchema, IrcDetails>
59
62
  readonly summary = "Send and receive messages between agents over IRC-like channels";
60
63
  readonly description: string;
61
64
  readonly parameters: z.ZodObject<{
62
- op: z.ZodUnion<readonly [z.ZodLiteral<"send">, z.ZodLiteral<"list">]>;
65
+ op: z.ZodEnum<{
66
+ list: "list";
67
+ send: "send";
68
+ }>;
63
69
  to: z.ZodOptional<z.ZodString>;
64
70
  message: z.ZodOptional<z.ZodString>;
65
71
  awaitReply: z.ZodOptional<z.ZodBoolean>;
@@ -1,6 +1,123 @@
1
+ /**
2
+ * report_tool_issue — automated QA tool for tracking unexpected tool behavior.
3
+ *
4
+ * Enabled by default; gated behind PI_AUTO_QA=1 / `dev.autoqa` so a user
5
+ * who flips the setting off short-circuits injection entirely.
6
+ * Always injected into every agent (including subagents) regardless of tool selection.
7
+ * Records grievances to a local SQLite database; never throws.
8
+ *
9
+ * Before the first record lands, the user's consent is checked. If they've
10
+ * never been asked (`dev.autoqa.consent === "unset"`) the process-global
11
+ * consent handler — wired by `InteractiveMode` to a Yes/No popup — is
12
+ * invoked exactly once and the decision is persisted. Subsequent calls
13
+ * (including from subagents) read the cached decision without prompting.
14
+ *
15
+ * When the user grants consent, push is automatically active against the
16
+ * bundled endpoint (`dev.autoqaPush.endpoint`, default `qa.omp.sh`). Each
17
+ * insert schedules a background flush that POSTs pending rows and deletes
18
+ * them on HTTP 2xx. `PI_AUTO_QA_PUSH=1` forces push in non-interactive
19
+ * environments where the consent dialog never fires. Tool execution is
20
+ * never blocked on the network and never throws.
21
+ */
22
+ import { Database } from "bun:sqlite";
1
23
  import type { AgentTool } from "@oh-my-pi/pi-agent-core";
2
24
  import type { Settings } from "..";
3
25
  import type { ToolSession } from "./index";
4
26
  export declare function isAutoQaEnabled(settings?: Settings): boolean;
27
+ /**
28
+ * Resolver for the user's "share grievances?" consent.
29
+ *
30
+ * Return values:
31
+ * - `true` — user agreed; record + ship for this run and persist.
32
+ * - `false` — user declined; suppress for this run and persist.
33
+ * - `null` — user dismissed the dialog (ESC, click-away, …) without
34
+ * picking an option. The decision is NOT cached or persisted,
35
+ * so the next `report_tool_issue` invocation re-prompts.
36
+ *
37
+ * Persistence is the tool's job (so subagent invocations can persist into
38
+ * the disk-backed `Settings` instance the host registered alongside the
39
+ * handler), not the handler's. Implementations live in hosts that have UI
40
+ * affordances — today only `InteractiveMode`. When no handler is
41
+ * registered (CLI subcommands, tests, non-interactive runs) consent
42
+ * defaults to `false` — the explicit "don't collect by default" stance.
43
+ */
44
+ export type AutoQaConsentHandler = () => Promise<boolean | null>;
45
+ /**
46
+ * Register the consent handler and the persistent {@link Settings} instance
47
+ * the decision should be written to. Passing `null` clears the handler
48
+ * (e.g. on `InteractiveMode` teardown). Re-registration is authoritative.
49
+ */
50
+ export declare function setAutoQaConsentHandler(handler: AutoQaConsentHandler | null, persistentSettings?: Settings | null): void;
51
+ /** Test-only: clear consent cache + handler. Never call from production code. */
52
+ export declare function __resetAutoQaConsentForTests(): void;
53
+ /**
54
+ * Resolve the user's consent for `report_tool_issue` grievances.
55
+ *
56
+ * Precedence (highest first):
57
+ * 1. Process-global cache (set on first successful resolution).
58
+ * 2. Persistent setting (`dev.autoqa.consent` on the supplied `Settings`).
59
+ * 3. Persistent setting on the registered host `Settings`.
60
+ * 4. Consent handler popup (single-flight; persists the answer).
61
+ * 5. Default-deny when no handler is registered.
62
+ *
63
+ * Never throws — handler errors degrade to "denied for this call" without
64
+ * caching, so a subsequent invocation can re-prompt instead of being
65
+ * permanently locked into the false branch.
66
+ */
67
+ export declare function resolveAutoQaConsent(settings: Settings | undefined): Promise<boolean>;
5
68
  export declare function getAutoQaDbPath(): string;
6
- export declare function createReportToolIssueTool(session: ToolSession): AgentTool;
69
+ /**
70
+ * Open (or return the cached handle for) the auto-QA SQLite database at
71
+ * `~/.omp/agent/autoqa.db`. Idempotently runs schema creation, the
72
+ * `pushed`-column migration, and index setup so every consumer — tool
73
+ * execute path, manual `omp grievances push`, future debug scripts —
74
+ * sees the same prepared schema. Returns `null` only on a hard open
75
+ * failure (filesystem permissions, etc.); a missing file is created.
76
+ *
77
+ * Exported because the `omp grievances` CLI handlers need the migrated
78
+ * handle too — having a second `openDb` in the CLI led to the column
79
+ * never being added on the manual-push path.
80
+ */
81
+ export declare function openAutoQaDb(): Database | null;
82
+ export interface FlushResult {
83
+ pushed: number;
84
+ ok: boolean;
85
+ skipped?: boolean;
86
+ }
87
+ /**
88
+ * Optional per-flush controls. Used by `omp grievances push` to surface
89
+ * progress to a TTY and to skip the user-facing consent gate (manual
90
+ * pushes are the user's explicit intent, not a side effect of a tool call).
91
+ */
92
+ export interface FlushOptions {
93
+ /**
94
+ * Skip the `dev.autoqa.consent === "granted"` gate in
95
+ * {@link resolvePushConfig}. Endpoint configuration is still required.
96
+ * Reserved for explicit user-driven pushes (CLI `grievances push`,
97
+ * future debug recipes); never set from the tool's auto-flush path.
98
+ */
99
+ bypassConsent?: boolean;
100
+ /**
101
+ * Fires once at the start of the loop with the snapshot count of
102
+ * unpushed rows. Subsequent inserts won't be reflected (the count is
103
+ * a planning hint for progress reporters, not a live total).
104
+ */
105
+ onStart?: (totalUnpushed: number) => void;
106
+ /**
107
+ * Fires after every successfully shipped batch with the running pushed
108
+ * count. Reporters compare against the `totalUnpushed` they saw in
109
+ * `onStart` to advance their bar.
110
+ */
111
+ onProgress?: (pushedSoFar: number) => void;
112
+ }
113
+ /** Test-only: clear single-flight + cooldown state. Never call from production code. */
114
+ export declare function __resetAutoQaFlushStateForTests(): void;
115
+ /**
116
+ * Flush queued grievances to the configured backend.
117
+ *
118
+ * Single-flight: concurrent callers share the in-flight promise. After a
119
+ * failed push, retries are skipped for {@link FAILURE_COOLDOWN_MS} ms.
120
+ * Never throws — all errors are caught and routed to the logger.
121
+ */
122
+ export declare function flushGrievances(db?: Database, settings?: Settings, options?: FlushOptions): Promise<FlushResult>;
123
+ export declare function createReportToolIssueTool(session: ToolSession, activeBuiltinNames?: readonly string[]): AgentTool;
@@ -5,7 +5,10 @@ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
5
5
  import type { Theme } from "../modes/theme/theme";
6
6
  import type { ToolSession } from ".";
7
7
  declare const resolveSchema: z.ZodObject<{
8
- action: z.ZodUnion<readonly [z.ZodLiteral<"apply">, z.ZodLiteral<"discard">]>;
8
+ action: z.ZodEnum<{
9
+ apply: "apply";
10
+ discard: "discard";
11
+ }>;
9
12
  reason: z.ZodString;
10
13
  extra: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
11
14
  }, z.core.$strip>;
@@ -54,7 +57,10 @@ export declare class ResolveTool implements AgentTool<typeof resolveSchema, Reso
54
57
  readonly hidden = true;
55
58
  readonly description: string;
56
59
  readonly parameters: z.ZodObject<{
57
- action: z.ZodUnion<readonly [z.ZodLiteral<"apply">, z.ZodLiteral<"discard">]>;
60
+ action: z.ZodEnum<{
61
+ apply: "apply";
62
+ discard: "discard";
63
+ }>;
58
64
  reason: z.ZodString;
59
65
  extra: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
60
66
  }, z.core.$strip>;
@@ -47,7 +47,6 @@ See [docs/custom-tools.md](../../docs/custom-tools.md) for full documentation.
47
47
  **Factory pattern:**
48
48
 
49
49
  ```typescript
50
- import { StringEnum } from "@oh-my-pi/pi-ai";
51
50
  import { Text } from "@oh-my-pi/pi-tui";
52
51
  import type { CustomToolFactory } from "@oh-my-pi/pi-coding-agent";
53
52
 
@@ -56,7 +55,7 @@ const factory: CustomToolFactory = (pi) => ({
56
55
  label: "My Tool",
57
56
  description: "Tool description for LLM",
58
57
  parameters: pi.zod.object({
59
- action: StringEnum(["list", "add"] as const),
58
+ action: pi.zod.enum(["list", "add"]),
60
59
  }),
61
60
 
62
61
  // Called on session start/switch/branch/clear
@@ -76,9 +75,6 @@ const factory: CustomToolFactory = (pi) => ({
76
75
 
77
76
  export default factory;
78
77
  ```
79
-
80
- **Legacy:** `parameters: pi.typebox.Type.Object({ ... })` still works; the injected `typebox` is a small Zod-backed shim, and schemas flow through the same Zod pipeline as `pi.zod` schemas.
81
-
82
78
  **Custom rendering:**
83
79
 
84
80
  ```typescript
@@ -97,17 +93,12 @@ renderResult(result, { expanded, isPartial }, theme) {
97
93
  },
98
94
  ```
99
95
 
100
- **Use `StringEnum` for discriminated string tool args** (required for Google API compatibility):
96
+ **Use `z.enum` for discriminated string tool args:**
101
97
 
102
98
  ```typescript
103
- import { StringEnum } from "@oh-my-pi/pi-ai";
104
-
105
99
  const { z } = pi.zod;
106
100
 
107
- // Good — Google-safe enum wiring
108
101
  parameters: z.object({
109
- action: StringEnum(["list", "add"] as const),
102
+ action: z.enum(["list", "add"]),
110
103
  });
111
-
112
- // Avoid raw union-of-literals patterns that don't degrade well for strict JSON Schema providers
113
104
  ```
@@ -108,29 +108,16 @@ export default function (pi: ExtensionAPI) {
108
108
  });
109
109
  }
110
110
  ```
111
-
112
- **Legacy TypeBox-style schemas** (`pi.typebox`) remain available for older extensions and are backed by a tiny Zod-shim — prefer `pi.zod` directly for new code.
113
-
114
- ```typescript
115
- const { Type } = pi.typebox;
116
- parameters: Type.Object({ name: Type.String() });
117
- ```
118
-
119
111
  ## Key Patterns
120
112
 
121
- **Use `StringEnum` for discriminated string tool args** (required for Google API compatibility):
113
+ **Use `z.enum` for discriminated string tool args:**
122
114
 
123
115
  ```typescript
124
- import { StringEnum } from "@oh-my-pi/pi-ai";
125
-
126
116
  const { z } = pi.zod;
127
117
 
128
- // Good — Google-safe enum wiring
129
118
  parameters: z.object({
130
- action: StringEnum(["list", "add"] as const),
119
+ action: z.enum(["list", "add"]),
131
120
  });
132
-
133
- // Avoid raw union-of-literals patterns that don't degrade well for strict JSON Schema providers
134
121
  ```
135
122
 
136
123
  **State persistence via details:**
@@ -10,9 +10,6 @@ import type { ExtensionAPI } from "@oh-my-pi/pi-coding-agent";
10
10
  export default function (pi: ExtensionAPI) {
11
11
  const { z } = pi.zod;
12
12
 
13
- // Access shared schema helpers from package exports (e.g. StringEnum for Google-safe enums)
14
- const { StringEnum } = pi.pi;
15
-
16
13
  // Access the logger for debugging
17
14
  pi.logger.debug("API demo extension loaded");
18
15
 
@@ -22,10 +19,7 @@ export default function (pi: ExtensionAPI) {
22
19
  description: "Demonstrates ExtensionAPI capabilities: logger, zod, and pi module access",
23
20
  parameters: z.object({
24
21
  message: z.string().describe("Test message"),
25
- logLevel: StringEnum(["error", "warn", "debug"], {
26
- description: "Log level to use",
27
- default: "debug",
28
- }),
22
+ logLevel: z.enum(["error", "warn", "debug"]).default("debug").describe("Log level to use"),
29
23
  }),
30
24
 
31
25
  async execute(_toolCallId, params, _onUpdate, ctx, _signal) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-coding-agent",
4
- "version": "15.1.2",
4
+ "version": "15.1.3",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -47,12 +47,12 @@
47
47
  "@agentclientprotocol/sdk": "0.21.0",
48
48
  "@babel/parser": "^7.29.3",
49
49
  "@mozilla/readability": "^0.6.0",
50
- "@oh-my-pi/omp-stats": "15.1.2",
51
- "@oh-my-pi/pi-agent-core": "15.1.2",
52
- "@oh-my-pi/pi-ai": "15.1.2",
53
- "@oh-my-pi/pi-natives": "15.1.2",
54
- "@oh-my-pi/pi-tui": "15.1.2",
55
- "@oh-my-pi/pi-utils": "15.1.2",
50
+ "@oh-my-pi/omp-stats": "15.1.3",
51
+ "@oh-my-pi/pi-agent-core": "15.1.3",
52
+ "@oh-my-pi/pi-ai": "15.1.3",
53
+ "@oh-my-pi/pi-natives": "15.1.3",
54
+ "@oh-my-pi/pi-tui": "15.1.3",
55
+ "@oh-my-pi/pi-utils": "15.1.3",
56
56
  "@puppeteer/browsers": "^2.13.0",
57
57
  "@types/turndown": "5.0.6",
58
58
  "@xterm/headless": "^6.0.0",
@@ -17,42 +17,20 @@ export const DEFAULT_HARNESS_COMMAND = `bash ${HARNESS_FILENAME}`;
17
17
  const HARNESS_COMMIT_TITLE = "autoresearch: harness setup";
18
18
 
19
19
  const initExperimentSchema = z.object({
20
- name: z.string().describe("Human-readable experiment name."),
21
- goal: z.string().describe("Free-form description of what this session optimizes.").optional(),
22
- primary_metric: z
23
- .string()
24
- .describe(
25
- "Primary metric name shown in the dashboard. Match the `METRIC <name>=<value>` lines printed by the benchmark.",
26
- ),
27
- metric_unit: z.string().describe("Unit for the primary metric (e.g. ms, µs, mb). Empty when unitless.").optional(),
20
+ name: z.string().describe("experiment name"),
21
+ goal: z.string().describe("session goal").optional(),
22
+ primary_metric: z.string().describe("primary metric name"),
23
+ metric_unit: z.string().describe("metric unit (e.g. ms, µs, mb)").optional(),
28
24
  direction: z
29
25
  .enum(["lower", "higher"] as const)
30
- .describe("Whether lower or higher values are better. Defaults to lower.")
31
- .optional(),
32
- secondary_metrics: z
33
- .array(z.string())
34
- .describe("Names of secondary metrics tracked alongside the primary metric.")
35
- .optional(),
36
- scope_paths: z
37
- .array(z.string())
38
- .describe(
39
- "Files or directories the agent expects to modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
40
- )
41
- .optional(),
42
- off_limits: z
43
- .array(z.string())
44
- .describe(
45
- "Paths the agent SHOULD NOT modify. Used post-hoc to flag scope deviations on log_experiment; never used to block edits.",
46
- )
47
- .optional(),
48
- constraints: z.array(z.string()).describe("Free-form constraints (e.g. 'no api break').").optional(),
49
- max_iterations: z.number().describe("Soft cap on iterations per segment. Optional.").optional(),
50
- new_segment: z
51
- .boolean()
52
- .describe(
53
- "When true, bump to a new segment even when an active session exists. New baselines and best-metric reset.",
54
- )
26
+ .describe("better direction (default lower)")
55
27
  .optional(),
28
+ secondary_metrics: z.array(z.string()).describe("secondary metric names").optional(),
29
+ scope_paths: z.array(z.string()).describe("expected-to-modify paths").optional(),
30
+ off_limits: z.array(z.string()).describe("off-limits paths").optional(),
31
+ constraints: z.array(z.string()).describe("free-form constraints").optional(),
32
+ max_iterations: z.number().describe("soft iteration cap per segment").optional(),
33
+ new_segment: z.boolean().describe("bump to a new segment in existing session").optional(),
56
34
  });
57
35
 
58
36
  interface InitExperimentDetails {
@@ -37,35 +37,21 @@ import type {
37
37
  const EXPERIMENT_TOOL_NAMES = ["init_experiment", "run_experiment", "log_experiment", "update_notes"];
38
38
 
39
39
  const logExperimentSchema = z.object({
40
- metric: z
41
- .number()
42
- .describe("Primary metric value for this run. May differ from the parsed value; deviation is recorded."),
43
- status: z.enum(["keep", "discard", "crash", "checks_failed"] as const).describe("Outcome for this run."),
44
- description: z.string().describe("Short description of the experiment."),
45
- metrics: z.record(z.string(), z.number()).describe("Secondary metrics for this run.").optional(),
46
- asi: z
47
- .object({})
48
- .passthrough()
49
- .describe("Free-form structured metadata captured for this run (hypothesis, learnings, etc.).")
50
- .optional(),
51
- commit: z
52
- .string()
53
- .describe("Override the commit hash recorded for this run. Defaults to the current HEAD.")
54
- .optional(),
55
- justification: z
56
- .string()
57
- .describe(
58
- "Required when the run modifies paths outside scope or inside off-limits and you still want it kept. Free-form explanation.",
59
- )
60
- .optional(),
40
+ metric: z.number().describe("primary metric value"),
41
+ status: z.enum(["keep", "discard", "crash", "checks_failed"] as const).describe("run outcome"),
42
+ description: z.string().describe("short run description"),
43
+ metrics: z.record(z.string(), z.number()).describe("secondary metrics").optional(),
44
+ asi: z.object({}).passthrough().describe("free-form structured metadata").optional(),
45
+ commit: z.string().describe("override recorded commit hash").optional(),
46
+ justification: z.string().describe("required when keeping a scope-deviating run").optional(),
61
47
  flag_runs: z
62
48
  .array(
63
49
  z.object({
64
- run_id: z.number().describe("Run id (#) of a previously logged run to flag as suspect."),
65
- reason: z.string().describe("Why this earlier run is suspect (e.g. reward-hacked, broken metric)."),
50
+ run_id: z.number().describe("run id to flag"),
51
+ reason: z.string().describe("why this run is suspect"),
66
52
  }),
67
53
  )
68
- .describe("Mark earlier runs as flagged. Flagged runs are excluded from baseline and best-metric math.")
54
+ .describe("flag earlier runs as suspect")
69
55
  .optional(),
70
56
  });
71
57
 
@@ -27,7 +27,7 @@ import type { AutoresearchToolFactoryOptions, RunDetails, RunExperimentProgressD
27
27
  import { DEFAULT_HARNESS_COMMAND } from "./init-experiment";
28
28
 
29
29
  const runExperimentSchema = z.object({
30
- timeout_seconds: z.number().describe("Timeout in seconds. Defaults to 600.").optional(),
30
+ timeout_seconds: z.number().describe("timeout in seconds (default 600)").optional(),
31
31
  });
32
32
 
33
33
  interface ProcessExecutionResult {
@@ -9,15 +9,8 @@ import { openAutoresearchStorageIfExists } from "../storage";
9
9
  import type { AutoresearchToolFactoryOptions } from "../types";
10
10
 
11
11
  const updateNotesSchema = z.object({
12
- body: z
13
- .string()
14
- .describe("Replacement markdown body for the active autoresearch session's notes (your durable playbook)."),
15
- append_idea: z
16
- .string()
17
- .describe(
18
- "When set, append this string as a new bullet under an Ideas section instead of replacing the body. `body` is ignored.",
19
- )
20
- .optional(),
12
+ body: z.string().describe("replacement notes body"),
13
+ append_idea: z.string().describe("append as bullet under Ideas instead of replacing body").optional(),
21
14
  });
22
15
 
23
16
  interface UpdateNotesDetails {