@oh-my-pi/pi-coding-agent 15.5.4 → 15.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +48 -2
  2. package/dist/types/config/settings-schema.d.ts +50 -2
  3. package/dist/types/edit/hashline/diff.d.ts +6 -1
  4. package/dist/types/edit/hashline/execute.d.ts +1 -2
  5. package/dist/types/edit/hashline/params.d.ts +4 -5
  6. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +23 -0
  7. package/dist/types/lib/xai-http.d.ts +40 -0
  8. package/dist/types/session/agent-session.d.ts +1 -0
  9. package/dist/types/tools/fetch.d.ts +19 -0
  10. package/dist/types/tools/find.d.ts +7 -0
  11. package/dist/types/tools/image-gen.d.ts +6 -2
  12. package/dist/types/tools/index.d.ts +1 -0
  13. package/dist/types/tools/plan-mode-guard.d.ts +5 -6
  14. package/dist/types/tools/tts.d.ts +18 -0
  15. package/package.json +8 -8
  16. package/scripts/build-binary.ts +11 -0
  17. package/src/config/model-registry.ts +41 -9
  18. package/src/config/settings-schema.ts +43 -2
  19. package/src/edit/diff.ts +5 -3
  20. package/src/edit/hashline/diff.ts +11 -4
  21. package/src/edit/hashline/execute.ts +3 -10
  22. package/src/edit/hashline/params.ts +10 -3
  23. package/src/edit/index.ts +9 -12
  24. package/src/edit/renderer.ts +14 -7
  25. package/src/edit/streaming.ts +15 -128
  26. package/src/extensibility/legacy-pi-ai-shim.ts +24 -0
  27. package/src/extensibility/plugins/legacy-pi-compat.ts +47 -3
  28. package/src/lib/xai-http.ts +124 -0
  29. package/src/main.ts +2 -1
  30. package/src/modes/controllers/selector-controller.ts +7 -2
  31. package/src/modes/interactive-mode.ts +1 -1
  32. package/src/modes/rpc/rpc-client.ts +3 -1
  33. package/src/prompts/tools/find.md +3 -2
  34. package/src/sdk.ts +15 -9
  35. package/src/session/agent-session.ts +48 -5
  36. package/src/tools/fetch.ts +145 -74
  37. package/src/tools/find.ts +38 -6
  38. package/src/tools/image-gen.ts +205 -7
  39. package/src/tools/index.ts +1 -0
  40. package/src/tools/plan-mode-guard.ts +14 -6
  41. package/src/tools/read.ts +57 -3
  42. package/src/tools/search.ts +2 -2
  43. package/src/tools/tts.ts +133 -0
@@ -22,12 +22,14 @@ import * as z from "zod/v4";
22
22
  import packageJson from "../../package.json" with { type: "json" };
23
23
  import { isAuthenticated, type ModelRegistry } from "../config/model-registry";
24
24
  import type { CustomTool } from "../extensibility/custom-tools/types";
25
+ import { ohMyPiXAIUserAgent, resolveXAIHttpCredentials } from "../lib/xai-http";
25
26
  import imageGenDescription from "../prompts/tools/image-gen.md" with { type: "text" };
26
27
  import { resolveReadPath } from "./path-utils";
27
28
 
28
29
  const DEFAULT_MODEL = "gemini-3-pro-image-preview";
29
30
  const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
30
31
  const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
32
+ const DEFAULT_XAI_IMAGE_MODEL = "grok-imagine-image";
31
33
  const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
32
34
  const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
33
35
  const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
@@ -38,7 +40,9 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
38
40
  const IMAGE_SYSTEM_INSTRUCTION =
39
41
  "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
40
42
 
41
- type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter";
43
+ export type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter" | "xai";
44
+ export type ImageProviderPreference = Exclude<ImageProvider, "openai-codex"> | "auto";
45
+
42
46
  interface ImageApiKey {
43
47
  provider: ImageProvider;
44
48
  apiKey: string;
@@ -46,8 +50,13 @@ interface ImageApiKey {
46
50
  model?: Model;
47
51
  }
48
52
 
53
+ const COMMON_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"] as const;
54
+ const XAI_IMAGE_ASPECT_RATIOS = [...COMMON_IMAGE_ASPECT_RATIOS, "3:2", "2:3"] as const;
55
+ const COMMON_IMAGE_ASPECT_RATIO_SET = new Set<string>(COMMON_IMAGE_ASPECT_RATIOS);
56
+ const IMAGE_PROVIDER_PREFERENCES = new Set<string>(["auto", "antigravity", "gemini", "openai", "openrouter", "xai"]);
57
+
49
58
  const responseModalitySchema = z.enum(["IMAGE", "TEXT"] as const);
50
- const aspectRatioSchema = z.enum(["1:1", "3:4", "4:3", "9:16", "16:9"] as const).describe("aspect ratio");
59
+ const aspectRatioSchema = z.enum(XAI_IMAGE_ASPECT_RATIOS).describe("aspect ratio");
51
60
  const imageSizeSchema = z.enum(["1024x1024", "1536x1024", "1024x1536"] as const).describe("image size");
52
61
 
53
62
  const inputImageSchema = z
@@ -274,6 +283,36 @@ interface AntigravityRequest {
274
283
  requestId?: string;
275
284
  }
276
285
 
286
+ interface XAIImageReference {
287
+ // OpenAI-compat discriminator. Every code example at
288
+ // docs.x.ai/developers/rest-api-reference/inference/images sends this
289
+ // alongside `url`; the schema text doesn't strictly require it, but
290
+ // matching the documented wire format avoids relying on schema-vs-example.
291
+ readonly type: "image_url";
292
+ readonly url: string;
293
+ }
294
+
295
+ interface XAIImageRequestBase {
296
+ readonly model: string;
297
+ readonly prompt: string;
298
+ readonly aspect_ratio: string;
299
+ readonly resolution: "1k" | "2k";
300
+ readonly n: number;
301
+ readonly response_format: "b64_json" | "url";
302
+ }
303
+
304
+ // xAI image request body. Three shapes:
305
+ // 1. text-only generation → POST /v1/images/generations
306
+ // 2. single-source edit (image field) → POST /v1/images/edits
307
+ // 3. multi-reference edit (images field) → POST /v1/images/edits
308
+ // `image` and `images` are mutually exclusive per docs.x.ai; the discriminated
309
+ // union enforces that statically. The runtime cap (XAI_MAX_EDIT_IMAGES) bounds
310
+ // the array length, which TypeScript cannot encode without lossy tuple unions.
311
+ type XAIImageRequestBody =
312
+ | (XAIImageRequestBase & { readonly image?: never; readonly images?: never })
313
+ | (XAIImageRequestBase & { readonly image: XAIImageReference; readonly images?: never })
314
+ | (XAIImageRequestBase & { readonly images: readonly XAIImageReference[]; readonly image?: never });
315
+
277
316
  interface AntigravityResponseChunk {
278
317
  response?: {
279
318
  candidates?: Array<{
@@ -391,12 +430,24 @@ function extractOpenRouterImageUrls(message: OpenRouterMessage | undefined): str
391
430
  }
392
431
 
393
432
  /** Preferred provider set via settings (default: auto) */
394
- let preferredImageProvider: ImageProvider | "auto" = "auto";
433
+ let preferredImageProvider: ImageProviderPreference = "auto";
434
+
435
+ export function isImageProviderPreference(value: unknown): value is ImageProviderPreference {
436
+ return typeof value === "string" && IMAGE_PROVIDER_PREFERENCES.has(value);
437
+ }
395
438
 
396
439
  /** Set the preferred image provider from settings */
397
- export function setPreferredImageProvider(provider: ImageProvider | "auto"): void {
440
+ export function setPreferredImageProvider(provider: ImageProviderPreference): void {
398
441
  preferredImageProvider = provider;
399
442
  }
443
+ function assertImageAspectRatioSupported(provider: ImageProvider, aspectRatio: ImageGenParams["aspect_ratio"]): void {
444
+ if (!aspectRatio || provider === "xai" || COMMON_IMAGE_ASPECT_RATIO_SET.has(aspectRatio)) {
445
+ return;
446
+ }
447
+ throw new Error(
448
+ `Aspect ratio ${aspectRatio} is only supported by xAI image generation. Set providers.image to xai or use one of ${COMMON_IMAGE_ASPECT_RATIOS.join(", ")}.`,
449
+ );
450
+ }
400
451
 
401
452
  interface ParsedAntigravityCredentials {
402
453
  accessToken: string;
@@ -429,6 +480,17 @@ async function findAntigravityCredentials(modelRegistry: ModelRegistry): Promise
429
480
  };
430
481
  }
431
482
 
483
+ async function findXAIImageCredentials(modelRegistry?: ModelRegistry): Promise<ImageApiKey | null> {
484
+ if (modelRegistry) {
485
+ const creds = await resolveXAIHttpCredentials(modelRegistry);
486
+ if (creds) return { provider: "xai", apiKey: creds.apiKey };
487
+ return null;
488
+ }
489
+ const apiKey = $env.XAI_API_KEY;
490
+ if (apiKey) return { provider: "xai", apiKey };
491
+ return null;
492
+ }
493
+
432
494
  async function findOpenAIHostedImageCredentials(
433
495
  modelRegistry: ModelRegistry | undefined,
434
496
  activeModel: Model | undefined,
@@ -468,9 +530,13 @@ async function findImageApiKey(
468
530
  const openRouterKey = getEnvApiKey("openrouter");
469
531
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
470
532
  // Fall through to auto-detect if preferred provider key not found.
533
+ } else if (preferredImageProvider === "xai") {
534
+ const xai = await findXAIImageCredentials(modelRegistry);
535
+ if (xai) return xai;
536
+ // Fall through to auto-detect if preferred provider key not found.
471
537
  }
472
538
 
473
- // Auto-detect: GPT hosted image generation, then Antigravity, OpenRouter, Gemini.
539
+ // Auto-detect: GPT hosted image generation, then Antigravity, xAI, OpenRouter, Gemini.
474
540
  const openAI = await findOpenAIHostedImageCredentials(modelRegistry, activeModel, sessionId);
475
541
  if (openAI) return openAI;
476
542
 
@@ -479,6 +545,9 @@ async function findImageApiKey(
479
545
  if (antigravity) return antigravity;
480
546
  }
481
547
 
548
+ const xai = await findXAIImageCredentials(modelRegistry);
549
+ if (xai) return xai;
550
+
482
551
  const openRouterKey = getEnvApiKey("openrouter");
483
552
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
484
553
 
@@ -857,6 +926,31 @@ function buildAntigravityRequest(
857
926
  };
858
927
  }
859
928
 
929
+ // xAI image-edit cap per docs.x.ai (POST /v1/images/edits supports up to 3
930
+ // source images for multi-reference editing).
931
+ const XAI_MAX_EDIT_IMAGES = 3;
932
+
933
+ // Map the OpenAI-style pixel-size enum (image_size) to xAI's discrete tier.
934
+ // "1024x1024" → "1k"; anything wider (1536x... or ...x1536) → "2k". Absent
935
+ // image_size defaults to "1k", matching hermes-agent's DEFAULT_RESOLUTION
936
+ // (plugins/image_gen/xai/__init__.py:71).
937
+ function resolveXAIResolution(imageSize: string | undefined): "1k" | "2k" {
938
+ if (!imageSize || imageSize === "1024x1024") return "1k";
939
+ return "2k";
940
+ }
941
+
942
+ // Build the discriminated edit body. Caller must ensure images.length is in
943
+ // [1, XAI_MAX_EDIT_IMAGES]; the bound check fires earlier in execute().
944
+ function buildXAIEditPayload(base: XAIImageRequestBase, images: readonly InlineImageData[]): XAIImageRequestBody {
945
+ const refs: readonly XAIImageReference[] = images.map(img => ({
946
+ type: "image_url",
947
+ url: toDataUrl(img),
948
+ }));
949
+ const [first, ...rest] = refs;
950
+ if (first === undefined) return base; // unreachable: caller checked images.length > 0
951
+ return rest.length === 0 ? { ...base, image: first } : { ...base, images: refs };
952
+ }
953
+
860
954
  interface AntigravitySseResult {
861
955
  images: InlineImageData[];
862
956
  text: string[];
@@ -910,7 +1004,7 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
910
1004
  const apiKey = await findImageApiKey(ctx.modelRegistry, ctx.model, sessionId);
911
1005
  if (!apiKey) {
912
1006
  throw new Error(
913
- "No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
1007
+ "No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity or xAI Grok OAuth, or set XAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
914
1008
  );
915
1009
  }
916
1010
 
@@ -922,8 +1016,11 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
922
1016
  ? DEFAULT_ANTIGRAVITY_MODEL
923
1017
  : provider === "openrouter"
924
1018
  ? DEFAULT_OPENROUTER_MODEL
925
- : DEFAULT_MODEL;
1019
+ : provider === "xai"
1020
+ ? DEFAULT_XAI_IMAGE_MODEL
1021
+ : DEFAULT_MODEL;
926
1022
  const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
1023
+ assertImageAspectRatioSupported(provider, params.aspect_ratio);
927
1024
  const cwd = ctx.sessionManager.getCwd();
928
1025
 
929
1026
  const resolvedImages: InlineImageData[] = [];
@@ -1059,6 +1156,107 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
1059
1156
  };
1060
1157
  }
1061
1158
 
1159
+ if (provider === "xai") {
1160
+ if (!ctx.modelRegistry) {
1161
+ throw new Error("Missing modelRegistry for xAI image generation");
1162
+ }
1163
+ const xaiCreds = await resolveXAIHttpCredentials(ctx.modelRegistry, resolvedModel);
1164
+ if (!xaiCreds) {
1165
+ throw new Error(
1166
+ "No xAI credentials. Run /login → xAI Grok OAuth (SuperGrok Subscription) or set XAI_API_KEY.",
1167
+ );
1168
+ }
1169
+
1170
+ const prompt = assemblePrompt(params);
1171
+ const aspectRatio = params.aspect_ratio ?? "1:1";
1172
+ const xaiResolution = resolveXAIResolution(params.image_size);
1173
+
1174
+ const isEdit = resolvedImages.length > 0;
1175
+ if (isEdit && resolvedImages.length > XAI_MAX_EDIT_IMAGES) {
1176
+ throw new Error(
1177
+ `xAI image edits accept up to ${XAI_MAX_EDIT_IMAGES} reference images; got ${resolvedImages.length}.`,
1178
+ );
1179
+ }
1180
+
1181
+ const xaiBaseBody: XAIImageRequestBase = {
1182
+ model: resolvedModel,
1183
+ prompt,
1184
+ aspect_ratio: aspectRatio,
1185
+ resolution: xaiResolution,
1186
+ n: 1,
1187
+ response_format: "b64_json",
1188
+ };
1189
+ const xaiBody: XAIImageRequestBody = isEdit
1190
+ ? buildXAIEditPayload(xaiBaseBody, resolvedImages)
1191
+ : xaiBaseBody;
1192
+ const xaiEndpoint = isEdit ? "/images/edits" : "/images/generations";
1193
+
1194
+ const xaiResponse = await fetch(`${xaiCreds.baseURL}${xaiEndpoint}`, {
1195
+ method: "POST",
1196
+ headers: {
1197
+ Authorization: `Bearer ${xaiCreds.apiKey}`,
1198
+ "Content-Type": "application/json",
1199
+ "User-Agent": ohMyPiXAIUserAgent(),
1200
+ },
1201
+ body: JSON.stringify(xaiBody),
1202
+ signal: requestSignal,
1203
+ });
1204
+
1205
+ const xaiRawText = await xaiResponse.text();
1206
+ if (!xaiResponse.ok) {
1207
+ let message = xaiRawText;
1208
+ try {
1209
+ const parsedErr = JSON.parse(xaiRawText) as { error?: { message?: string } };
1210
+ message = parsedErr.error?.message ?? message;
1211
+ } catch {
1212
+ // Keep raw text.
1213
+ }
1214
+ throw new Error(`xAI image request failed (${xaiResponse.status}): ${message}`);
1215
+ }
1216
+
1217
+ const xaiData = JSON.parse(xaiRawText) as {
1218
+ data?: Array<{ b64_json?: string; url?: string }>;
1219
+ };
1220
+ const xaiInlineImages: InlineImageData[] = [];
1221
+ for (const entry of xaiData.data ?? []) {
1222
+ if (entry.b64_json) {
1223
+ const bytes = Buffer.from(entry.b64_json, "base64");
1224
+ const mimeType = parseImageMetadata(bytes)?.mimeType ?? "image/png";
1225
+ xaiInlineImages.push({ data: entry.b64_json, mimeType });
1226
+ } else if (entry.url) {
1227
+ xaiInlineImages.push(await loadImageFromUrl(entry.url, requestSignal));
1228
+ }
1229
+ }
1230
+
1231
+ if (xaiInlineImages.length === 0) {
1232
+ return {
1233
+ content: [{ type: "text", text: "No image data returned." }],
1234
+ details: {
1235
+ provider,
1236
+ model: resolvedModel,
1237
+ imageCount: 0,
1238
+ imagePaths: [],
1239
+ images: [],
1240
+ },
1241
+ };
1242
+ }
1243
+
1244
+ const xaiImagePaths = await saveImagesToTemp(xaiInlineImages);
1245
+
1246
+ return {
1247
+ content: [
1248
+ { type: "text", text: buildResponseSummary(provider, resolvedModel, xaiImagePaths, undefined) },
1249
+ ],
1250
+ details: {
1251
+ provider,
1252
+ model: resolvedModel,
1253
+ imageCount: xaiInlineImages.length,
1254
+ imagePaths: xaiImagePaths,
1255
+ images: xaiInlineImages,
1256
+ },
1257
+ };
1258
+ }
1259
+
1062
1260
  if (provider === "openrouter") {
1063
1261
  const prompt = assemblePrompt(params);
1064
1262
  const contentParts: OpenRouterContentPart[] = [{ type: "text", text: prompt }];
@@ -92,6 +92,7 @@ export * from "./search";
92
92
  export * from "./search-tool-bm25";
93
93
  export * from "./ssh";
94
94
  export * from "./todo-write";
95
+ export * from "./tts";
95
96
  export * from "./write";
96
97
  export * from "./yield";
97
98
 
@@ -5,6 +5,8 @@ import { normalizeLocalScheme, resolveToCwd } from "./path-utils";
5
5
  import { ToolError } from "./tool-errors";
6
6
 
7
7
  const LOCAL_SCHEME_PREFIX = "local:";
8
+ const PLAN_ALIAS_FILE = "PLAN.md";
9
+ const LOCAL_PLAN_ALIAS = "local://PLAN.md";
8
10
 
9
11
  function resolveRawPath(session: ToolSession, targetPath: string): string {
10
12
  const normalized = normalizeLocalScheme(targetPath);
@@ -18,15 +20,20 @@ function resolveRawPath(session: ToolSession, targetPath: string): string {
18
20
  return resolveToCwd(normalized, session.cwd);
19
21
  }
20
22
 
23
+ function isPlanAliasTarget(session: ToolSession, targetPath: string, resolved: string): boolean {
24
+ const normalized = normalizeLocalScheme(targetPath);
25
+ if (normalized === LOCAL_PLAN_ALIAS) return true;
26
+ return resolved === resolveToCwd(PLAN_ALIAS_FILE, session.cwd);
27
+ }
28
+
21
29
  /**
22
30
  * Resolve a write/edit target to its absolute filesystem path.
23
31
  *
24
- * In plan mode, transparently redirects targets whose basename matches the
25
- * plan file's basename (e.g. a bare `PLAN.md` or `./PLAN.md`) to the canonical
26
- * plan file location at `state.planFilePath`. This lets `write` and `edit`
27
- * accept the unqualified plan filename and have the change land at the
28
- * session-scoped `local://PLAN.md` artifact instead of a stray cwd-relative
29
- * file the plan-mode guard would otherwise reject.
32
+ * In plan mode, transparently redirects `PLAN.md` aliases and targets whose
33
+ * basename matches the plan file's basename to the canonical plan file
34
+ * location at `state.planFilePath`. This lets `write` and `edit` accept the
35
+ * habitual plan filename after approval even when the active artifact has a
36
+ * titled path such as `local://APPROVED.md`.
30
37
  *
31
38
  * Outside plan mode (or when the basename does not match) this is a no-op.
32
39
  */
@@ -38,6 +45,7 @@ export function resolvePlanPath(session: ToolSession, targetPath: string): strin
38
45
 
39
46
  const planResolved = resolveRawPath(session, state.planFilePath);
40
47
  if (resolved === planResolved) return resolved;
48
+ if (isPlanAliasTarget(session, targetPath, resolved)) return planResolved;
41
49
  if (path.basename(resolved) !== path.basename(planResolved)) return resolved;
42
50
 
43
51
  return planResolved;
package/src/tools/read.ts CHANGED
@@ -1488,6 +1488,21 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
1488
1488
  if (!this.session.settings.get("fetch.enabled")) {
1489
1489
  throw new ToolError("URL reads are disabled by settings.");
1490
1490
  }
1491
+ if (parsedUrlTarget.ranges !== undefined) {
1492
+ const cached = await loadReadUrlCacheEntry(
1493
+ this.session,
1494
+ { path: parsedUrlTarget.path, raw: parsedUrlTarget.raw },
1495
+ signal,
1496
+ { ensureArtifact: true, preferCached: true },
1497
+ );
1498
+ return this.#buildInMemoryMultiRangeResult(cached.output, parsedUrlTarget.ranges, {
1499
+ details: { ...cached.details },
1500
+ sourceUrl: cached.details.finalUrl,
1501
+ entityLabel: "URL output",
1502
+ raw: parsedUrlTarget.raw,
1503
+ immutable: true,
1504
+ });
1505
+ }
1491
1506
  if (parsedUrlTarget.offset !== undefined || parsedUrlTarget.limit !== undefined) {
1492
1507
  const cached = await loadReadUrlCacheEntry(
1493
1508
  this.session,
@@ -1502,6 +1517,7 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
1502
1517
  details: { ...cached.details },
1503
1518
  sourceUrl: cached.details.finalUrl,
1504
1519
  entityLabel: "URL output",
1520
+ raw: parsedUrlTarget.raw,
1505
1521
  immutable: true,
1506
1522
  });
1507
1523
  }
@@ -1578,7 +1594,8 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
1578
1594
  if (isMultiRange(parsed)) {
1579
1595
  throw new ToolError("Multi-range line selectors are not supported for directory listings.");
1580
1596
  }
1581
- const dirResult = await this.#readDirectory(absolutePath, selToOffsetLimit(parsed).limit, signal);
1597
+ const { offset, limit } = selToOffsetLimit(parsed);
1598
+ const dirResult = await this.#readDirectory(absolutePath, offset, limit, signal);
1582
1599
  if (suffixResolution) {
1583
1600
  dirResult.details ??= {};
1584
1601
  dirResult.details.suffixResolution = suffixResolution;
@@ -2136,6 +2153,7 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
2136
2153
  /** Read directory contents as a formatted listing */
2137
2154
  async #readDirectory(
2138
2155
  absolutePath: string,
2156
+ offset: number | undefined,
2139
2157
  limit: number | undefined,
2140
2158
  signal?: AbortSignal,
2141
2159
  ): Promise<AgentToolResult<ReadToolDetails>> {
@@ -2149,7 +2167,9 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
2149
2167
  maxDepth: READ_DIRECTORY_MAX_DEPTH,
2150
2168
  perDirLimit: READ_DIRECTORY_CHILD_LIMIT,
2151
2169
  rootLimit: null,
2152
- lineCap: limit ?? null,
2170
+ // `lineCap` truncates the rendered tree itself, so apply it only when the caller
2171
+ // did not request an offset — otherwise we'd cap the first N lines before slicing.
2172
+ lineCap: offset === undefined && limit !== undefined ? limit : null,
2153
2173
  });
2154
2174
  } catch (error) {
2155
2175
  const message = error instanceof Error ? error.message : String(error);
@@ -2158,12 +2178,46 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
2158
2178
  throwIfAborted(signal);
2159
2179
 
2160
2180
  const output = tree.totalLines <= 1 ? "(empty directory)" : tree.rendered;
2161
- const truncation = truncateHead(output, { maxLines: Number.MAX_SAFE_INTEGER });
2162
2181
  const details: ReadToolDetails = {
2163
2182
  isDirectory: true,
2164
2183
  resolvedPath: tree.rootPath,
2165
2184
  };
2166
2185
 
2186
+ // Slice the rendered listing when the caller passed an offset/limit. We do this
2187
+ // instead of passing the selector down to `buildDirectoryTree` because the tree
2188
+ // builder lays out entries hierarchically (per-dir caps, recent-then-elided
2189
+ // summaries); line-based slicing operates on the formatted text and matches what
2190
+ // users expect from `:N-M` on long listings.
2191
+ const wantsSlice = offset !== undefined || limit !== undefined;
2192
+ if (wantsSlice) {
2193
+ const allLines = output.split("\n");
2194
+ const start = offset ? Math.max(0, offset - 1) : 0;
2195
+ if (start >= allLines.length) {
2196
+ const suggestion =
2197
+ allLines.length === 0
2198
+ ? "The listing is empty."
2199
+ : `Use :1 to read from the start, or :${allLines.length} to read the last line.`;
2200
+ return toolResult(details)
2201
+ .text(`Line ${start + 1} is beyond end of listing (${allLines.length} lines total). ${suggestion}`)
2202
+ .sourcePath(tree.rootPath)
2203
+ .done();
2204
+ }
2205
+ const end = limit !== undefined ? Math.min(start + limit, allLines.length) : allLines.length;
2206
+ const sliced = allLines.slice(start, end).join("\n");
2207
+ const resultBuilder = toolResult(details).sourcePath(tree.rootPath);
2208
+ let text = sliced;
2209
+ if (end < allLines.length) {
2210
+ const remaining = allLines.length - end;
2211
+ text += `\n\n[${remaining} more lines in listing. Use :${end + 1} to continue]`;
2212
+ }
2213
+ resultBuilder.text(text);
2214
+ if (tree.truncated) {
2215
+ resultBuilder.limits({ resultLimit: 1 });
2216
+ }
2217
+ return resultBuilder.done();
2218
+ }
2219
+
2220
+ const truncation = truncateHead(output, { maxLines: Number.MAX_SAFE_INTEGER });
2167
2221
  const resultBuilder = toolResult(details).text(truncation.content).sourcePath(tree.rootPath);
2168
2222
  if (tree.truncated) {
2169
2223
  resultBuilder.limits({ resultLimit: 1 });
@@ -478,8 +478,8 @@ export class SearchTool implements AgentTool<typeof searchSchema, SearchToolDeta
478
478
  );
479
479
  }
480
480
  } catch (err) {
481
- if (err instanceof Error && err.message.startsWith("regex parse error")) {
482
- throw new ToolError(err.message);
481
+ if (err instanceof Error && /^regex(?: parse)? error/i.test(err.message)) {
482
+ throw new ToolError(err.message.replace(/^regex(?: parse)? error:?\s*/i, "Invalid regex: "));
483
483
  }
484
484
  throw err;
485
485
  }
@@ -0,0 +1,133 @@
1
+ // Ported from NousResearch/hermes-agent (MIT) — tools/tts_tool.py L167-171, L896-959.
2
+
3
+ import type { AgentToolResult } from "@oh-my-pi/pi-agent-core";
4
+ import * as z from "zod/v4";
5
+ import type { CustomTool, CustomToolContext } from "../extensibility/custom-tools/types";
6
+ import { ohMyPiXAIUserAgent, resolveXAIHttpCredentials } from "../lib/xai-http";
7
+ import { formatPathRelativeToCwd, resolveToCwd } from "./path-utils";
8
+
9
+ // Hermes tts_tool.py L167-171
10
+ const DEFAULT_XAI_VOICE_ID = "eve" as const;
11
+ const DEFAULT_XAI_LANGUAGE = "en" as const;
12
+ const DEFAULT_XAI_SAMPLE_RATE = 24_000;
13
+ const DEFAULT_XAI_BIT_RATE = 128_000;
14
+ const XAI_MAX_TEXT_LENGTH = 15_000;
15
+
16
+ // Built-in voices per xAI Tier-1 docs (2026-05-16). xAI also accepts custom voice IDs,
17
+ // so the schema does NOT enum-restrict voice_id; this constant only drives the description.
18
+ const XAI_BUILTIN_VOICES = ["ara", "eve", "leo", "rex", "sal"] as const;
19
+
20
+ const formatVoiceList = (): string =>
21
+ XAI_BUILTIN_VOICES.map(v => (v === DEFAULT_XAI_VOICE_ID ? `${v} (default)` : v)).join(", ");
22
+
23
+ type TtsCodec = "mp3" | "wav";
24
+
25
+ const ttsSchema = z.object({
26
+ text: z.string().min(1).max(XAI_MAX_TEXT_LENGTH),
27
+ voice_id: z.string().default(DEFAULT_XAI_VOICE_ID),
28
+ language: z.string().default(DEFAULT_XAI_LANGUAGE),
29
+ output_path: z.string(),
30
+ sample_rate: z.number().int().optional(),
31
+ bit_rate: z.number().int().optional(),
32
+ });
33
+
34
+ interface TtsToolDetails {
35
+ bytes: number;
36
+ voiceId: string;
37
+ codec: TtsCodec;
38
+ }
39
+
40
+ export const ttsTool: CustomTool<typeof ttsSchema, TtsToolDetails> = {
41
+ name: "tts",
42
+ label: "TextToSpeech",
43
+ strict: false,
44
+ approval: "write",
45
+ description:
46
+ `Synthesize speech from text using xAI Grok Voice. Built-in voices: ${formatVoiceList()}. ` +
47
+ "Custom voice IDs also accepted. Output codec inferred from output_path suffix (.wav → wav, else mp3). " +
48
+ `Max ${XAI_MAX_TEXT_LENGTH.toLocaleString("en-US")} characters.`,
49
+ parameters: ttsSchema,
50
+ async execute(
51
+ _toolCallId: string,
52
+ params: z.infer<typeof ttsSchema>,
53
+ _onUpdate,
54
+ ctx: CustomToolContext,
55
+ signal?: AbortSignal,
56
+ ): Promise<AgentToolResult<TtsToolDetails, typeof ttsSchema>> {
57
+ const creds = await resolveXAIHttpCredentials(ctx.modelRegistry);
58
+ if (!creds) {
59
+ return {
60
+ isError: true,
61
+ content: [
62
+ {
63
+ type: "text",
64
+ text: "No xAI credentials. Run /login → xAI Grok OAuth (SuperGrok Subscription) or set XAI_API_KEY.",
65
+ },
66
+ ],
67
+ };
68
+ }
69
+
70
+ const cwd = ctx.sessionManager.getCwd();
71
+ const outputPath = resolveToCwd(params.output_path, cwd);
72
+ const displayPath = formatPathRelativeToCwd(outputPath, cwd);
73
+ const codec: TtsCodec = outputPath.toLowerCase().endsWith(".wav") ? "wav" : "mp3";
74
+ const voiceId = params.voice_id;
75
+ const language = params.language;
76
+ const sampleRate = params.sample_rate ?? DEFAULT_XAI_SAMPLE_RATE;
77
+ const bitRate = params.bit_rate ?? DEFAULT_XAI_BIT_RATE;
78
+
79
+ const payload: Record<string, unknown> = {
80
+ text: params.text,
81
+ voice_id: voiceId,
82
+ language,
83
+ };
84
+ // Hermes tts_tool.py L926-940 — only send output_format when caller overrides a default.
85
+ const codecOverridden = codec !== "mp3";
86
+ const sampleRateOverridden = sampleRate !== DEFAULT_XAI_SAMPLE_RATE;
87
+ const bitRateOverridden = codec === "mp3" && bitRate !== DEFAULT_XAI_BIT_RATE;
88
+ if (codecOverridden || sampleRateOverridden || bitRateOverridden) {
89
+ const fmt: Record<string, unknown> = { codec };
90
+ if (sampleRate) fmt.sample_rate = sampleRate;
91
+ if (codec === "mp3" && bitRate) fmt.bit_rate = bitRate;
92
+ payload.output_format = fmt;
93
+ }
94
+
95
+ // Compose the caller signal with a 60 s timeout fence.
96
+ const timeoutSignal = AbortSignal.timeout(60_000);
97
+ const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
98
+
99
+ const response = await fetch(`${creds.baseURL}/tts`, {
100
+ method: "POST",
101
+ headers: {
102
+ Authorization: `Bearer ${creds.apiKey}`,
103
+ "Content-Type": "application/json",
104
+ "User-Agent": ohMyPiXAIUserAgent(),
105
+ },
106
+ body: JSON.stringify(payload),
107
+ signal: combinedSignal,
108
+ });
109
+ if (!response.ok) {
110
+ const detail = await response.text();
111
+ return {
112
+ isError: true,
113
+ content: [
114
+ {
115
+ type: "text",
116
+ text: `xAI TTS failed (${response.status}): ${detail.slice(0, 300)}`,
117
+ },
118
+ ],
119
+ };
120
+ }
121
+ const bytes = new Uint8Array(await response.arrayBuffer());
122
+ await Bun.write(outputPath, bytes);
123
+ return {
124
+ content: [
125
+ {
126
+ type: "text",
127
+ text: `Saved ${bytes.length} bytes to ${displayPath} (voice=${voiceId}, codec=${codec}).`,
128
+ },
129
+ ],
130
+ details: { bytes: bytes.length, voiceId, codec },
131
+ };
132
+ },
133
+ };