@oh-my-pi/pi-coding-agent 15.5.6 → 15.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -1
- package/dist/types/config/settings-schema.d.ts +50 -2
- package/dist/types/lib/xai-http.d.ts +40 -0
- package/dist/types/session/agent-session.d.ts +1 -0
- package/dist/types/tools/fetch.d.ts +16 -0
- package/dist/types/tools/image-gen.d.ts +6 -2
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/plan-mode-guard.d.ts +5 -6
- package/dist/types/tools/tts.d.ts +18 -0
- package/package.json +8 -8
- package/src/config/model-registry.ts +41 -9
- package/src/config/settings-schema.ts +43 -2
- package/src/lib/xai-http.ts +124 -0
- package/src/modes/controllers/selector-controller.ts +7 -2
- package/src/modes/interactive-mode.ts +1 -1
- package/src/sdk.ts +15 -9
- package/src/session/agent-session.ts +30 -3
- package/src/tools/fetch.ts +52 -24
- package/src/tools/image-gen.ts +205 -7
- package/src/tools/index.ts +1 -0
- package/src/tools/plan-mode-guard.ts +14 -6
- package/src/tools/search.ts +2 -2
- package/src/tools/tts.ts +133 -0
package/src/tools/search.ts
CHANGED
|
@@ -478,8 +478,8 @@ export class SearchTool implements AgentTool<typeof searchSchema, SearchToolDeta
|
|
|
478
478
|
);
|
|
479
479
|
}
|
|
480
480
|
} catch (err) {
|
|
481
|
-
if (err instanceof Error &&
|
|
482
|
-
throw new ToolError(err.message);
|
|
481
|
+
if (err instanceof Error && /^regex(?: parse)? error/i.test(err.message)) {
|
|
482
|
+
throw new ToolError(err.message.replace(/^regex(?: parse)? error:?\s*/i, "Invalid regex: "));
|
|
483
483
|
}
|
|
484
484
|
throw err;
|
|
485
485
|
}
|
package/src/tools/tts.ts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// Ported from NousResearch/hermes-agent (MIT) — tools/tts_tool.py L167-171, L896-959.
|
|
2
|
+
|
|
3
|
+
import type { AgentToolResult } from "@oh-my-pi/pi-agent-core";
|
|
4
|
+
import * as z from "zod/v4";
|
|
5
|
+
import type { CustomTool, CustomToolContext } from "../extensibility/custom-tools/types";
|
|
6
|
+
import { ohMyPiXAIUserAgent, resolveXAIHttpCredentials } from "../lib/xai-http";
|
|
7
|
+
import { formatPathRelativeToCwd, resolveToCwd } from "./path-utils";
|
|
8
|
+
|
|
9
|
+
// Hermes tts_tool.py L167-171
|
|
10
|
+
const DEFAULT_XAI_VOICE_ID = "eve" as const;
|
|
11
|
+
const DEFAULT_XAI_LANGUAGE = "en" as const;
|
|
12
|
+
const DEFAULT_XAI_SAMPLE_RATE = 24_000;
|
|
13
|
+
const DEFAULT_XAI_BIT_RATE = 128_000;
|
|
14
|
+
const XAI_MAX_TEXT_LENGTH = 15_000;
|
|
15
|
+
|
|
16
|
+
// Built-in voices per xAI Tier-1 docs (2026-05-16). xAI also accepts custom voice IDs,
|
|
17
|
+
// so the schema does NOT enum-restrict voice_id; this constant only drives the description.
|
|
18
|
+
const XAI_BUILTIN_VOICES = ["ara", "eve", "leo", "rex", "sal"] as const;
|
|
19
|
+
|
|
20
|
+
const formatVoiceList = (): string =>
|
|
21
|
+
XAI_BUILTIN_VOICES.map(v => (v === DEFAULT_XAI_VOICE_ID ? `${v} (default)` : v)).join(", ");
|
|
22
|
+
|
|
23
|
+
type TtsCodec = "mp3" | "wav";
|
|
24
|
+
|
|
25
|
+
const ttsSchema = z.object({
|
|
26
|
+
text: z.string().min(1).max(XAI_MAX_TEXT_LENGTH),
|
|
27
|
+
voice_id: z.string().default(DEFAULT_XAI_VOICE_ID),
|
|
28
|
+
language: z.string().default(DEFAULT_XAI_LANGUAGE),
|
|
29
|
+
output_path: z.string(),
|
|
30
|
+
sample_rate: z.number().int().optional(),
|
|
31
|
+
bit_rate: z.number().int().optional(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
interface TtsToolDetails {
|
|
35
|
+
bytes: number;
|
|
36
|
+
voiceId: string;
|
|
37
|
+
codec: TtsCodec;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export const ttsTool: CustomTool<typeof ttsSchema, TtsToolDetails> = {
|
|
41
|
+
name: "tts",
|
|
42
|
+
label: "TextToSpeech",
|
|
43
|
+
strict: false,
|
|
44
|
+
approval: "write",
|
|
45
|
+
description:
|
|
46
|
+
`Synthesize speech from text using xAI Grok Voice. Built-in voices: ${formatVoiceList()}. ` +
|
|
47
|
+
"Custom voice IDs also accepted. Output codec inferred from output_path suffix (.wav → wav, else mp3). " +
|
|
48
|
+
`Max ${XAI_MAX_TEXT_LENGTH.toLocaleString("en-US")} characters.`,
|
|
49
|
+
parameters: ttsSchema,
|
|
50
|
+
async execute(
|
|
51
|
+
_toolCallId: string,
|
|
52
|
+
params: z.infer<typeof ttsSchema>,
|
|
53
|
+
_onUpdate,
|
|
54
|
+
ctx: CustomToolContext,
|
|
55
|
+
signal?: AbortSignal,
|
|
56
|
+
): Promise<AgentToolResult<TtsToolDetails, typeof ttsSchema>> {
|
|
57
|
+
const creds = await resolveXAIHttpCredentials(ctx.modelRegistry);
|
|
58
|
+
if (!creds) {
|
|
59
|
+
return {
|
|
60
|
+
isError: true,
|
|
61
|
+
content: [
|
|
62
|
+
{
|
|
63
|
+
type: "text",
|
|
64
|
+
text: "No xAI credentials. Run /login → xAI Grok OAuth (SuperGrok Subscription) or set XAI_API_KEY.",
|
|
65
|
+
},
|
|
66
|
+
],
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const cwd = ctx.sessionManager.getCwd();
|
|
71
|
+
const outputPath = resolveToCwd(params.output_path, cwd);
|
|
72
|
+
const displayPath = formatPathRelativeToCwd(outputPath, cwd);
|
|
73
|
+
const codec: TtsCodec = outputPath.toLowerCase().endsWith(".wav") ? "wav" : "mp3";
|
|
74
|
+
const voiceId = params.voice_id;
|
|
75
|
+
const language = params.language;
|
|
76
|
+
const sampleRate = params.sample_rate ?? DEFAULT_XAI_SAMPLE_RATE;
|
|
77
|
+
const bitRate = params.bit_rate ?? DEFAULT_XAI_BIT_RATE;
|
|
78
|
+
|
|
79
|
+
const payload: Record<string, unknown> = {
|
|
80
|
+
text: params.text,
|
|
81
|
+
voice_id: voiceId,
|
|
82
|
+
language,
|
|
83
|
+
};
|
|
84
|
+
// Hermes tts_tool.py L926-940 — only send output_format when caller overrides a default.
|
|
85
|
+
const codecOverridden = codec !== "mp3";
|
|
86
|
+
const sampleRateOverridden = sampleRate !== DEFAULT_XAI_SAMPLE_RATE;
|
|
87
|
+
const bitRateOverridden = codec === "mp3" && bitRate !== DEFAULT_XAI_BIT_RATE;
|
|
88
|
+
if (codecOverridden || sampleRateOverridden || bitRateOverridden) {
|
|
89
|
+
const fmt: Record<string, unknown> = { codec };
|
|
90
|
+
if (sampleRate) fmt.sample_rate = sampleRate;
|
|
91
|
+
if (codec === "mp3" && bitRate) fmt.bit_rate = bitRate;
|
|
92
|
+
payload.output_format = fmt;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Compose the caller signal with a 60 s timeout fence.
|
|
96
|
+
const timeoutSignal = AbortSignal.timeout(60_000);
|
|
97
|
+
const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
|
|
98
|
+
|
|
99
|
+
const response = await fetch(`${creds.baseURL}/tts`, {
|
|
100
|
+
method: "POST",
|
|
101
|
+
headers: {
|
|
102
|
+
Authorization: `Bearer ${creds.apiKey}`,
|
|
103
|
+
"Content-Type": "application/json",
|
|
104
|
+
"User-Agent": ohMyPiXAIUserAgent(),
|
|
105
|
+
},
|
|
106
|
+
body: JSON.stringify(payload),
|
|
107
|
+
signal: combinedSignal,
|
|
108
|
+
});
|
|
109
|
+
if (!response.ok) {
|
|
110
|
+
const detail = await response.text();
|
|
111
|
+
return {
|
|
112
|
+
isError: true,
|
|
113
|
+
content: [
|
|
114
|
+
{
|
|
115
|
+
type: "text",
|
|
116
|
+
text: `xAI TTS failed (${response.status}): ${detail.slice(0, 300)}`,
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
122
|
+
await Bun.write(outputPath, bytes);
|
|
123
|
+
return {
|
|
124
|
+
content: [
|
|
125
|
+
{
|
|
126
|
+
type: "text",
|
|
127
|
+
text: `Saved ${bytes.length} bytes to ${displayPath} (voice=${voiceId}, codec=${codec}).`,
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
details: { bytes: bytes.length, voiceId, codec },
|
|
131
|
+
};
|
|
132
|
+
},
|
|
133
|
+
};
|