@oh-my-pi/pi-coding-agent 16.0.3 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +49 -0
  2. package/dist/cli.js +697 -337
  3. package/dist/types/advisor/advise-tool.d.ts +9 -0
  4. package/dist/types/cli/args.d.ts +2 -0
  5. package/dist/types/cli/bench-cli.d.ts +6 -0
  6. package/dist/types/commands/launch.d.ts +6 -0
  7. package/dist/types/config/settings-schema.d.ts +92 -3
  8. package/dist/types/edit/file-snapshot-store.d.ts +2 -0
  9. package/dist/types/extensibility/extensions/runner.d.ts +5 -2
  10. package/dist/types/extensibility/extensions/types.d.ts +8 -7
  11. package/dist/types/extensibility/shared-events.d.ts +22 -1
  12. package/dist/types/main.d.ts +1 -0
  13. package/dist/types/modes/components/status-line/component.d.ts +1 -1
  14. package/dist/types/modes/components/status-line/context-thresholds.d.ts +0 -1
  15. package/dist/types/modes/rpc/rpc-types.d.ts +1 -1
  16. package/dist/types/modes/utils/context-usage.d.ts +12 -0
  17. package/dist/types/sdk.d.ts +3 -1
  18. package/dist/types/session/agent-session.d.ts +20 -0
  19. package/dist/types/session/session-persistence.d.ts +4 -0
  20. package/dist/types/tools/read.d.ts +1 -0
  21. package/dist/types/tui/code-cell.d.ts +2 -0
  22. package/dist/types/utils/image-vision-fallback.d.ts +28 -0
  23. package/dist/types/web/search/providers/base.d.ts +1 -0
  24. package/dist/types/web/search/providers/gemini.d.ts +1 -0
  25. package/package.json +12 -12
  26. package/src/advisor/__tests__/advisor.test.ts +59 -0
  27. package/src/advisor/advise-tool.ts +13 -0
  28. package/src/cli/args.ts +4 -0
  29. package/src/cli/bench-cli.ts +30 -7
  30. package/src/cli/flag-tables.ts +9 -0
  31. package/src/collab/host.ts +2 -2
  32. package/src/commands/launch.ts +6 -0
  33. package/src/config/settings-schema.ts +85 -3
  34. package/src/edit/file-snapshot-store.ts +12 -3
  35. package/src/eval/py/runner.py +44 -0
  36. package/src/extensibility/extensions/runner.ts +20 -2
  37. package/src/extensibility/extensions/types.ts +16 -5
  38. package/src/extensibility/shared-events.ts +24 -0
  39. package/src/internal-urls/docs-index.generated.ts +81 -81
  40. package/src/main.ts +18 -9
  41. package/src/modes/components/branch-summary-message.ts +1 -0
  42. package/src/modes/components/collab-prompt-message.ts +9 -7
  43. package/src/modes/components/compaction-summary-message.ts +1 -0
  44. package/src/modes/components/custom-message.ts +1 -0
  45. package/src/modes/components/footer.ts +6 -5
  46. package/src/modes/components/hook-message.ts +1 -0
  47. package/src/modes/components/read-tool-group.ts +9 -3
  48. package/src/modes/components/skill-message.ts +1 -0
  49. package/src/modes/components/status-line/component.ts +131 -14
  50. package/src/modes/components/status-line/context-thresholds.ts +0 -1
  51. package/src/modes/components/tips.txt +2 -1
  52. package/src/modes/components/todo-reminder.ts +1 -0
  53. package/src/modes/components/ttsr-notification.ts +1 -0
  54. package/src/modes/components/user-message.ts +6 -6
  55. package/src/modes/controllers/event-controller.ts +2 -7
  56. package/src/modes/controllers/selector-controller.ts +10 -3
  57. package/src/modes/interactive-mode.ts +4 -2
  58. package/src/modes/rpc/rpc-types.ts +1 -1
  59. package/src/modes/utils/context-usage.ts +28 -15
  60. package/src/prompts/system/system-prompt.md +2 -0
  61. package/src/prompts/tools/image-attachment-describe-system.md +8 -0
  62. package/src/prompts/tools/image-attachment-describe.md +10 -0
  63. package/src/sdk.ts +14 -18
  64. package/src/session/agent-session.ts +571 -235
  65. package/src/session/session-loader.ts +19 -32
  66. package/src/session/session-persistence.ts +27 -11
  67. package/src/ssh/connection-manager.ts +3 -2
  68. package/src/task/executor.ts +1 -1
  69. package/src/tools/image-gen.ts +67 -25
  70. package/src/tools/read.ts +54 -6
  71. package/src/tui/code-cell.ts +44 -3
  72. package/src/utils/image-vision-fallback.ts +197 -0
  73. package/src/web/search/index.ts +12 -0
  74. package/src/web/search/providers/base.ts +1 -0
  75. package/src/web/search/providers/gemini.ts +56 -18
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Vision fallback for text-only models. When a user attaches an image to a model
3
+ * that cannot accept image input, this:
4
+ * 1. saves each image under the session `local://` root (for later analysis), and
5
+ * 2. asks a vision-capable model to describe it and injects that description as
6
+ * a text block in place of the image:
7
+ *
8
+ * <image path="local://image-<hash>.png">
9
+ * <description>
10
+ * </image>
11
+ *
12
+ * Without this the provider layer drops the image entirely (NON_VISION_IMAGE_PLACEHOLDER).
13
+ */
14
+ import * as path from "node:path";
15
+ import {
16
+ type AgentTelemetry,
17
+ type AgentTelemetryConfig,
18
+ instrumentedCompleteSimple,
19
+ resolveTelemetry,
20
+ } from "@oh-my-pi/pi-agent-core";
21
+ import type { Api, completeSimple, ImageContent, Model, TextContent } from "@oh-my-pi/pi-ai";
22
+ import { logger, prompt, toError } from "@oh-my-pi/pi-utils";
23
+ import { extractTextContent } from "../commit/utils";
24
+ import type { ModelRegistry } from "../config/model-registry";
25
+ import { expandRoleAlias, getModelMatchPreferences, resolveModelFromString } from "../config/model-resolver";
26
+ import type { Settings } from "../config/settings";
27
+ import { type LocalProtocolOptions, resolveLocalRoot } from "../internal-urls";
28
+ import describeUserPrompt from "../prompts/tools/image-attachment-describe.md" with { type: "text" };
29
+ import describeSystemPrompt from "../prompts/tools/image-attachment-describe-system.md" with { type: "text" };
30
+
31
+ /** Telemetry tag for the oneshot vision-description calls. */
32
+ const ONESHOT_KIND = "image_attachment_describe";
33
+
34
+ const NO_VISION_MODEL_NOTE =
35
+ "[No vision-capable model is configured, so this image could not be described automatically. " +
36
+ "The image was saved; configure a vision model role (modelRoles.vision) and use the inspect_image tool to analyze it.]";
37
+
38
+ const DESCRIPTION_UNAVAILABLE_NOTE =
39
+ "[Image description unavailable: the vision model returned no usable text. The image was saved for further analysis.]";
40
+
41
+ /** Registry surface needed to resolve a vision model and authorize requests. */
42
+ export type VisionFallbackRegistry = Pick<ModelRegistry, "getAvailable" | "getApiKey" | "resolver"> &
43
+ Partial<Pick<ModelRegistry, "resolveCanonicalModel" | "getCanonicalVariants" | "getCanonicalId">>;
44
+
45
+ export interface DescribeAttachedImagesDeps {
46
+ /** Active (text-only) model the prompt is destined for. */
47
+ activeModel: Model<Api>;
48
+ modelRegistry: VisionFallbackRegistry;
49
+ settings: Settings;
50
+ /** Inputs for resolving the session-scoped `local://` root. */
51
+ localProtocolOptions: LocalProtocolOptions;
52
+ /** `provider/id` of the active model; a last-resort vision-model candidate (filtered to image-capable). */
53
+ activeModelString?: string;
54
+ telemetryConfig?: AgentTelemetryConfig;
55
+ sessionId?: string;
56
+ /** Test seam: overrides the underlying completeSimple call. */
57
+ completeImpl?: typeof completeSimple;
58
+ }
59
+
60
+ /** Map an image MIME type to a file extension for the saved artifact. */
61
+ function extensionForMime(mimeType: string): string {
62
+ const subtype = mimeType.split("/")[1]?.toLowerCase() ?? "";
63
+ switch (subtype) {
64
+ case "jpeg":
65
+ case "jpg":
66
+ return "jpg";
67
+ case "png":
68
+ return "png";
69
+ case "gif":
70
+ return "gif";
71
+ case "webp":
72
+ return "webp";
73
+ default: {
74
+ const sanitized = subtype.replace(/[^a-z0-9]/g, "");
75
+ return sanitized || "png";
76
+ }
77
+ }
78
+ }
79
+
80
+ /** Content-addressed file name so re-pasting the same image reuses one artifact. */
81
+ function imageFileName(image: ImageContent): string {
82
+ const hash = Bun.hash(image.data).toString(16);
83
+ return `image-${hash}.${extensionForMime(image.mimeType)}`;
84
+ }
85
+
86
+ /** Persist an image under the local root; returns its `local://` URL. */
87
+ async function saveImage(image: ImageContent, localRoot: string): Promise<string> {
88
+ const fileName = imageFileName(image);
89
+ const filePath = path.join(localRoot, fileName);
90
+ // Content-addressed: identical bytes overwrite themselves harmlessly. Bun.write creates parent dirs.
91
+ await Bun.write(filePath, Buffer.from(image.data, "base64"));
92
+ return `local://${fileName}`;
93
+ }
94
+
95
+ function formatImageBlock(localUrl: string, description: string): string {
96
+ return `<image path="${localUrl}">\n${description}\n</image>`;
97
+ }
98
+
99
+ /**
100
+ * Resolve a vision-capable model, mirroring the inspect_image priority
101
+ * (`pi/vision` → `pi/default` → active → first image-capable available), but
102
+ * never returning a text-only model.
103
+ */
104
+ function resolveVisionModel(deps: DescribeAttachedImagesDeps): Model<Api> | undefined {
105
+ const available = deps.modelRegistry.getAvailable();
106
+ if (available.length === 0) return undefined;
107
+ const preferences = getModelMatchPreferences(deps.settings);
108
+ const resolvePattern = (pattern: string | undefined): Model<Api> | undefined => {
109
+ if (!pattern) return undefined;
110
+ const expanded = expandRoleAlias(pattern, deps.settings);
111
+ const model = resolveModelFromString(expanded, available, preferences, deps.modelRegistry);
112
+ return model?.input.includes("image") ? model : undefined;
113
+ };
114
+ return (
115
+ resolvePattern("pi/vision") ??
116
+ resolvePattern("pi/default") ??
117
+ resolvePattern(deps.activeModelString) ??
118
+ available.find(model => model.input.includes("image"))
119
+ );
120
+ }
121
+
122
+ /** Run one vision-description round-trip; returns trimmed text or `null` on any failure. */
123
+ async function describeImage(
124
+ image: ImageContent,
125
+ visionModel: Model<Api>,
126
+ deps: DescribeAttachedImagesDeps,
127
+ telemetry: AgentTelemetry | undefined,
128
+ signal: AbortSignal | undefined,
129
+ ): Promise<string | null> {
130
+ try {
131
+ const response = await instrumentedCompleteSimple(
132
+ visionModel,
133
+ {
134
+ systemPrompt: [prompt.render(describeSystemPrompt)],
135
+ messages: [
136
+ {
137
+ role: "user",
138
+ content: [
139
+ { type: "image", data: image.data, mimeType: image.mimeType },
140
+ { type: "text", text: prompt.render(describeUserPrompt) },
141
+ ],
142
+ timestamp: Date.now(),
143
+ },
144
+ ],
145
+ },
146
+ { apiKey: deps.modelRegistry.resolver(visionModel, deps.sessionId), signal },
147
+ { telemetry, oneshotKind: ONESHOT_KIND, completeImpl: deps.completeImpl },
148
+ );
149
+ if (response.stopReason === "error" || response.stopReason === "aborted") {
150
+ logger.warn("image attachment description did not complete", {
151
+ stopReason: response.stopReason,
152
+ model: `${visionModel.provider}/${visionModel.id}`,
153
+ });
154
+ return null;
155
+ }
156
+ const text = extractTextContent(response).trim();
157
+ return text.length > 0 ? text : null;
158
+ } catch (err) {
159
+ logger.warn("image attachment description failed", {
160
+ error: toError(err).message,
161
+ model: `${visionModel.provider}/${visionModel.id}`,
162
+ });
163
+ return null;
164
+ }
165
+ }
166
+
167
+ /**
168
+ * Save each attached image under `local://` and replace it with a descriptive
169
+ * text block. Returns one {@link TextContent} per input image, in order. Never
170
+ * throws for an individual image: a failed description falls back to a note while
171
+ * the saved-path block is still emitted.
172
+ */
173
+ export async function describeAttachedImagesForTextModel(
174
+ images: readonly ImageContent[],
175
+ deps: DescribeAttachedImagesDeps,
176
+ signal?: AbortSignal,
177
+ ): Promise<TextContent[]> {
178
+ const localRoot = resolveLocalRoot(deps.localProtocolOptions);
179
+ const visionModel = resolveVisionModel(deps);
180
+ const apiKey = visionModel ? await deps.modelRegistry.getApiKey(visionModel, deps.sessionId) : undefined;
181
+ const canDescribe = Boolean(visionModel && apiKey);
182
+ const telemetry = resolveTelemetry(deps.telemetryConfig, deps.sessionId);
183
+
184
+ return Promise.all(
185
+ images.map(async (image): Promise<TextContent> => {
186
+ const localUrl = await saveImage(image, localRoot);
187
+ let description: string;
188
+ if (canDescribe && visionModel) {
189
+ description =
190
+ (await describeImage(image, visionModel, deps, telemetry, signal)) ?? DESCRIPTION_UNAVAILABLE_NOTE;
191
+ } else {
192
+ description = NO_VISION_MODEL_NOTE;
193
+ }
194
+ return { type: "text", text: formatImageBlock(localUrl, description) };
195
+ }),
196
+ );
197
+ }
@@ -8,6 +8,7 @@ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallb
8
8
  import type { AuthStorage } from "@oh-my-pi/pi-ai";
9
9
  import { prompt } from "@oh-my-pi/pi-utils";
10
10
  import { z } from "zod/v4";
11
+ import { settings } from "../../config/settings";
11
12
  import type { CustomTool, CustomToolContext, RenderResultOptions } from "../../extensibility/custom-tools/types";
12
13
  import type { Theme } from "../../modes/theme/theme";
13
14
  import webSearchSystemPrompt from "../../prompts/system/web-search.md" with { type: "text" };
@@ -153,6 +154,16 @@ async function executeSearch(
153
154
  };
154
155
  }
155
156
 
157
+ // Invariant across providers; read once and tolerate an uninitialized
158
+ // Settings singleton (e.g. `omp q ...` CLI path, unit tests) so the
159
+ // provider-fallback loop never aborts before any provider runs.
160
+ let antigravityEndpointMode: "auto" | "production" | "sandbox" | undefined;
161
+ try {
162
+ antigravityEndpointMode = settings.get("providers.antigravityEndpoint");
163
+ } catch {
164
+ antigravityEndpointMode = undefined;
165
+ }
166
+
156
167
  const failures: Array<{ provider: SearchProvider; error: unknown }> = [];
157
168
  let lastProvider = providers[0];
158
169
  for (const provider of providers) {
@@ -169,6 +180,7 @@ async function executeSearch(
169
180
  signal,
170
181
  authStorage,
171
182
  sessionId,
183
+ antigravityEndpointMode,
172
184
  });
173
185
 
174
186
  if (!hasRenderableSearchContent(response)) {
@@ -51,6 +51,7 @@ export interface SearchParams {
51
51
  * caller's agent session when available; otherwise omit.
52
52
  */
53
53
  sessionId?: string;
54
+ antigravityEndpointMode?: "auto" | "production" | "sandbox";
54
55
  }
55
56
 
56
57
  /** Base class for web search providers. */
@@ -52,6 +52,7 @@ export interface GeminiSearchParams extends GeminiToolParams {
52
52
  authStorage: AuthStorage;
53
53
  sessionId?: string;
54
54
  fetch?: FetchImpl;
55
+ antigravityEndpointMode?: "auto" | "production" | "sandbox";
55
56
  }
56
57
 
57
58
  export function buildGeminiRequestTools(params: GeminiToolParams): Array<Record<string, Record<string, unknown>>> {
@@ -163,6 +164,7 @@ async function callGeminiSearch(
163
164
  toolParams: GeminiToolParams,
164
165
  fetchImpl: FetchImpl | undefined,
165
166
  signal: AbortSignal | undefined,
167
+ mode?: "auto" | "production" | "sandbox",
166
168
  ): Promise<{
167
169
  answer: string;
168
170
  sources: SearchSource[];
@@ -171,7 +173,19 @@ async function callGeminiSearch(
171
173
  model: string;
172
174
  usage?: { inputTokens: number; outputTokens: number; totalTokens: number };
173
175
  }> {
174
- const endpoints = auth.isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
176
+ let endpoints: string[];
177
+ if (auth.isAntigravity) {
178
+ const m = mode ?? "auto";
179
+ if (m === "sandbox") {
180
+ endpoints = [ANTIGRAVITY_SANDBOX_ENDPOINT];
181
+ } else if (m === "production") {
182
+ endpoints = [ANTIGRAVITY_DAILY_ENDPOINT];
183
+ } else {
184
+ endpoints = [...ANTIGRAVITY_ENDPOINT_FALLBACKS];
185
+ }
186
+ } else {
187
+ endpoints = [DEFAULT_ENDPOINT];
188
+ }
175
189
  const headers = auth.isAntigravity ? { "User-Agent": getAntigravityUserAgent() } : getGeminiCliHeaders();
176
190
 
177
191
  const requestMetadata = auth.isAntigravity
@@ -187,12 +201,7 @@ async function callGeminiSearch(
187
201
 
188
202
  const normalizedSystemPrompt = systemPrompt?.toWellFormed();
189
203
  const systemInstructionParts: Array<{ text: string }> = [
190
- ...(auth.isAntigravity
191
- ? [
192
- { text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
193
- { text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
194
- ]
195
- : []),
204
+ ...(auth.isAntigravity ? [{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION }] : []),
196
205
  ...(normalizedSystemPrompt ? [{ text: normalizedSystemPrompt }] : []),
197
206
  ];
198
207
 
@@ -238,16 +247,45 @@ async function callGeminiSearch(
238
247
  body: JSON.stringify(requestBody),
239
248
  signal: withHardTimeout(signal),
240
249
  });
241
- const urlFor = (attempt: number) =>
242
- `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`;
243
-
244
- const response = await fetchWithRetry(urlFor, {
245
- ...buildInit(),
246
- fetch: fetchImpl,
247
- maxAttempts: MAX_RETRIES + 1,
248
- defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
249
- maxDelayMs: RATE_LIMIT_BUDGET_MS,
250
- });
250
+
251
+ let response: Response | undefined;
252
+
253
+ for (let i = 0; i < endpoints.length; i++) {
254
+ const endpoint = endpoints[i];
255
+ const isLastEndpoint = i === endpoints.length - 1;
256
+ try {
257
+ response = await fetchWithRetry(() => `${endpoint}/v1internal:streamGenerateContent?alt=sse`, {
258
+ ...buildInit(),
259
+ fetch: fetchImpl,
260
+ maxAttempts: isLastEndpoint ? MAX_RETRIES + 1 : 1,
261
+ defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
262
+ maxDelayMs: RATE_LIMIT_BUDGET_MS,
263
+ });
264
+
265
+ if (response.ok) {
266
+ break;
267
+ }
268
+
269
+ if (response.status === 429 || (response.status >= 500 && response.status < 600)) {
270
+ if (!isLastEndpoint) {
271
+ continue;
272
+ }
273
+ }
274
+ break;
275
+ } catch (error) {
276
+ if (isLastEndpoint) {
277
+ throw error;
278
+ }
279
+ }
280
+ }
281
+
282
+ if (!response?.ok) {
283
+ const errorText = response ? await response.text() : "Network error";
284
+ const status = response?.status ?? 502;
285
+ const classified = classifyProviderHttpError("gemini", status, errorText);
286
+ if (classified) throw classified;
287
+ throw new SearchProviderError("gemini", `Gemini Cloud Code API error (${status}): ${errorText}`, status);
288
+ }
251
289
 
252
290
  if (!response.ok) {
253
291
  const errorText = await response.text();
@@ -410,7 +448,6 @@ export async function searchGemini(params: GeminiSearchParams): Promise<SearchRe
410
448
  // re-resolved access may omit projectId, in which case the seed's
411
449
  // project is still the right tenant for the credential. The
412
450
  // `fetchWithRetry` transport backoff stays INSIDE this attempt — auth
413
- // retry wraps transport retry.
414
451
  callGeminiSearch(
415
452
  {
416
453
  accessToken: access.accessToken,
@@ -428,6 +465,7 @@ export async function searchGemini(params: GeminiSearchParams): Promise<SearchRe
428
465
  },
429
466
  params.fetch,
430
467
  params.signal,
468
+ params.antigravityEndpointMode,
431
469
  ),
432
470
  { sessionId: params.sessionId, signal: params.signal, seed: seed.access },
433
471
  );