quantum-ai-sdk 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/account.d.ts +22 -0
- package/dist/account.js +47 -0
- package/dist/agent.d.ts +12 -0
- package/dist/agent.js +114 -0
- package/dist/audio.d.ts +82 -0
- package/dist/audio.js +140 -0
- package/dist/auth.d.ts +7 -0
- package/dist/auth.js +8 -0
- package/dist/batch.d.ts +22 -0
- package/dist/batch.js +32 -0
- package/dist/chat.d.ts +27 -0
- package/dist/chat.js +122 -0
- package/dist/client.d.ts +251 -0
- package/dist/client.js +479 -0
- package/dist/compute.d.ts +37 -0
- package/dist/compute.js +56 -0
- package/dist/contact.d.ts +12 -0
- package/dist/contact.js +26 -0
- package/dist/credits.d.ts +27 -0
- package/dist/credits.js +40 -0
- package/dist/documents.d.ts +17 -0
- package/dist/documents.js +42 -0
- package/dist/embeddings.d.ts +7 -0
- package/dist/embeddings.js +14 -0
- package/dist/errors.d.ts +29 -0
- package/dist/errors.js +70 -0
- package/dist/image.d.ts +12 -0
- package/dist/image.js +28 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +33 -0
- package/dist/jobs.d.ts +28 -0
- package/dist/jobs.js +56 -0
- package/dist/keys.d.ts +17 -0
- package/dist/keys.js +24 -0
- package/dist/models.d.ts +12 -0
- package/dist/models.js +16 -0
- package/dist/rag.d.ts +22 -0
- package/dist/rag.js +44 -0
- package/dist/realtime.d.ts +121 -0
- package/dist/realtime.js +259 -0
- package/dist/session.d.ts +7 -0
- package/dist/session.js +17 -0
- package/dist/types.d.ts +1008 -0
- package/dist/types.js +5 -0
- package/dist/video.d.ts +46 -0
- package/dist/video.js +74 -0
- package/dist/voices.d.ts +27 -0
- package/dist/voices.js +55 -0
- package/package.json +3 -3
package/dist/realtime.js
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime voice sessions via WebSocket.
|
|
3
|
+
*
|
|
4
|
+
* Connects to the QAI Realtime API for bidirectional audio streaming
|
|
5
|
+
* with voice activity detection, transcription, and tool calling.
|
|
6
|
+
*/
|
|
7
|
+
// ── RealtimeSender ─────────────────────────────────────────────────
|
|
8
|
+
/** Write half of a realtime session. */
|
|
9
|
+
export class RealtimeSender {
|
|
10
|
+
ws;
|
|
11
|
+
/** @internal */
|
|
12
|
+
constructor(ws) {
|
|
13
|
+
this.ws = ws;
|
|
14
|
+
}
|
|
15
|
+
/** Send a base64-encoded PCM audio chunk. */
|
|
16
|
+
sendAudio(base64Pcm) {
|
|
17
|
+
this.ws.send(JSON.stringify({
|
|
18
|
+
type: "input_audio_buffer.append",
|
|
19
|
+
audio: base64Pcm,
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
/** Send a text message and request a response. */
|
|
23
|
+
sendText(text) {
|
|
24
|
+
this.ws.send(JSON.stringify({
|
|
25
|
+
type: "conversation.item.create",
|
|
26
|
+
item: {
|
|
27
|
+
type: "message",
|
|
28
|
+
role: "user",
|
|
29
|
+
content: [{ type: "input_text", text }],
|
|
30
|
+
},
|
|
31
|
+
}));
|
|
32
|
+
this.ws.send(JSON.stringify({
|
|
33
|
+
type: "response.create",
|
|
34
|
+
response: { modalities: ["text", "audio"] },
|
|
35
|
+
}));
|
|
36
|
+
}
|
|
37
|
+
/** Send a function/tool call result back to the model. */
|
|
38
|
+
sendFunctionResult(callId, output) {
|
|
39
|
+
this.ws.send(JSON.stringify({
|
|
40
|
+
type: "conversation.item.create",
|
|
41
|
+
item: {
|
|
42
|
+
type: "function_call_output",
|
|
43
|
+
call_id: callId,
|
|
44
|
+
output,
|
|
45
|
+
},
|
|
46
|
+
}));
|
|
47
|
+
this.ws.send(JSON.stringify({ type: "response.create" }));
|
|
48
|
+
}
|
|
49
|
+
/** Cancel the current response (interrupt). */
|
|
50
|
+
cancelResponse() {
|
|
51
|
+
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
52
|
+
}
|
|
53
|
+
/** Close the session gracefully. */
|
|
54
|
+
close() {
|
|
55
|
+
this.ws.close();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// ── RealtimeReceiver ───────────────────────────────────────────────
|
|
59
|
+
/** Read half of a realtime session. */
|
|
60
|
+
export class RealtimeReceiver {
|
|
61
|
+
queue = [];
|
|
62
|
+
resolve = null;
|
|
63
|
+
closed = false;
|
|
64
|
+
/** @internal */
|
|
65
|
+
constructor(ws) {
|
|
66
|
+
ws.addEventListener("message", (ev) => {
|
|
67
|
+
const event = parseEvent(typeof ev.data === "string" ? ev.data : "");
|
|
68
|
+
if (this.resolve) {
|
|
69
|
+
const r = this.resolve;
|
|
70
|
+
this.resolve = null;
|
|
71
|
+
r(event);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
this.queue.push(event);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
ws.addEventListener("close", () => {
|
|
78
|
+
this.closed = true;
|
|
79
|
+
if (this.resolve) {
|
|
80
|
+
const r = this.resolve;
|
|
81
|
+
this.resolve = null;
|
|
82
|
+
r(null);
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
ws.addEventListener("error", () => {
|
|
86
|
+
this.closed = true;
|
|
87
|
+
if (this.resolve) {
|
|
88
|
+
const r = this.resolve;
|
|
89
|
+
this.resolve = null;
|
|
90
|
+
r(null);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
/** Receive the next event. Returns null when the connection closes. */
|
|
95
|
+
recv() {
|
|
96
|
+
if (this.queue.length > 0) {
|
|
97
|
+
return Promise.resolve(this.queue.shift());
|
|
98
|
+
}
|
|
99
|
+
if (this.closed) {
|
|
100
|
+
return Promise.resolve(null);
|
|
101
|
+
}
|
|
102
|
+
return new Promise((resolve) => {
|
|
103
|
+
this.resolve = resolve;
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
/** Async iterator support. */
|
|
107
|
+
async *[Symbol.asyncIterator]() {
|
|
108
|
+
while (true) {
|
|
109
|
+
const event = await this.recv();
|
|
110
|
+
if (event === null)
|
|
111
|
+
break;
|
|
112
|
+
yield event;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Request an ephemeral token from the QAI proxy for direct voice connection.
|
|
118
|
+
* Call this before `realtimeConnectDirect` to get a scoped token.
|
|
119
|
+
*
|
|
120
|
+
* @param provider - Optional provider ("xai" default, "elevenlabs"). When
|
|
121
|
+
* provider is "elevenlabs", the response contains a WebSocket proxy URL
|
|
122
|
+
* (signed_url) instead of an ephemeral token.
|
|
123
|
+
*/
|
|
124
|
+
export async function realtimeSession(client, provider) {
|
|
125
|
+
const body = {};
|
|
126
|
+
if (provider)
|
|
127
|
+
body.provider = provider;
|
|
128
|
+
const { data } = await client._doJSON("POST", "/qai/v1/realtime/session", body);
|
|
129
|
+
return data;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* End a realtime session and finalize billing.
|
|
133
|
+
* Call after disconnecting from the direct xAI WebSocket.
|
|
134
|
+
*/
|
|
135
|
+
export async function realtimeEnd(client, sessionId, durationSeconds) {
|
|
136
|
+
await client._doJSON("POST", "/qai/v1/realtime/end", {
|
|
137
|
+
session_id: sessionId,
|
|
138
|
+
duration_seconds: durationSeconds,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Refresh an ephemeral token for long sessions (>4 min).
|
|
143
|
+
* Returns a new ephemeral token string.
|
|
144
|
+
*/
|
|
145
|
+
export async function realtimeRefresh(client, sessionId) {
|
|
146
|
+
const { data } = await client._doJSON("POST", "/qai/v1/realtime/refresh", { session_id: sessionId });
|
|
147
|
+
return data.ephemeral_token;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Connect directly to xAI's realtime API with an ephemeral token.
|
|
151
|
+
* Much lower latency than the proxy path -- no extra hop.
|
|
152
|
+
* Use `realtimeSession()` first to get the token.
|
|
153
|
+
*/
|
|
154
|
+
export async function realtimeConnectDirect(ephemeralToken, config, wsUrl = "wss://api.x.ai/v1/realtime") {
|
|
155
|
+
const ws = new WebSocket(wsUrl, ["realtime", `token-${ephemeralToken}`]);
|
|
156
|
+
await new Promise((resolve, reject) => {
|
|
157
|
+
const timeout = setTimeout(() => {
|
|
158
|
+
ws.close();
|
|
159
|
+
reject(new Error("Direct xAI WebSocket connection timed out (10s)"));
|
|
160
|
+
}, 10_000);
|
|
161
|
+
ws.addEventListener("open", () => { clearTimeout(timeout); resolve(); });
|
|
162
|
+
ws.addEventListener("error", () => { clearTimeout(timeout); reject(new Error("Direct xAI WebSocket connection failed")); });
|
|
163
|
+
});
|
|
164
|
+
sendSessionUpdate(ws, config);
|
|
165
|
+
return [new RealtimeSender(ws), new RealtimeReceiver(ws)];
|
|
166
|
+
}
|
|
167
|
+
// ── Connect (proxy path) ───────────────────────────────────────────
|
|
168
|
+
/**
|
|
169
|
+
* Open a realtime voice session via the QAI proxy.
|
|
170
|
+
* Returns [sender, receiver] for bidirectional communication.
|
|
171
|
+
*/
|
|
172
|
+
export async function realtimeConnect(client, config) {
|
|
173
|
+
const baseUrl = client._baseUrl;
|
|
174
|
+
const apiKey = client._apiKey;
|
|
175
|
+
// Convert https:// -> wss://, http:// -> ws://
|
|
176
|
+
const wsBase = baseUrl
|
|
177
|
+
.replace(/^https:\/\//, "wss://")
|
|
178
|
+
.replace(/^http:\/\//, "ws://");
|
|
179
|
+
const url = `${wsBase}/qai/v1/realtime`;
|
|
180
|
+
// Browser WebSocket doesn't support custom headers, so pass token as protocol
|
|
181
|
+
// For Node.js, we'd use the headers option -- but the QAI proxy also accepts
|
|
182
|
+
// the token as a query parameter for browser compatibility.
|
|
183
|
+
const wsUrl = `${url}?token=${encodeURIComponent(apiKey)}`;
|
|
184
|
+
const ws = new WebSocket(wsUrl);
|
|
185
|
+
await new Promise((resolve, reject) => {
|
|
186
|
+
const timeout = setTimeout(() => {
|
|
187
|
+
ws.close();
|
|
188
|
+
reject(new Error("WebSocket connection timed out (15s)"));
|
|
189
|
+
}, 15_000);
|
|
190
|
+
ws.addEventListener("open", () => {
|
|
191
|
+
clearTimeout(timeout);
|
|
192
|
+
resolve();
|
|
193
|
+
});
|
|
194
|
+
ws.addEventListener("error", () => {
|
|
195
|
+
clearTimeout(timeout);
|
|
196
|
+
reject(new Error("WebSocket connection failed"));
|
|
197
|
+
});
|
|
198
|
+
});
|
|
199
|
+
sendSessionUpdate(ws, config);
|
|
200
|
+
return [new RealtimeSender(ws), new RealtimeReceiver(ws)];
|
|
201
|
+
}
|
|
202
|
+
// ── Session update helper ──────────────────────────────────────────
|
|
203
|
+
function sendSessionUpdate(ws, config) {
|
|
204
|
+
ws.send(JSON.stringify({
|
|
205
|
+
type: "session.update",
|
|
206
|
+
session: {
|
|
207
|
+
voice: config?.voice ?? "Sal",
|
|
208
|
+
instructions: config?.instructions ?? "",
|
|
209
|
+
input_audio_format: "pcm16",
|
|
210
|
+
output_audio_format: "pcm16",
|
|
211
|
+
input_audio_transcription: { model: "grok-2-audio" },
|
|
212
|
+
turn_detection: { type: "server_vad" },
|
|
213
|
+
tools: config?.tools ?? [],
|
|
214
|
+
},
|
|
215
|
+
}));
|
|
216
|
+
}
|
|
217
|
+
// ── Event parsing ──────────────────────────────────────────────────
|
|
218
|
+
function parseEvent(data) {
|
|
219
|
+
let v;
|
|
220
|
+
try {
|
|
221
|
+
v = JSON.parse(data);
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
return { type: "unknown", raw: data };
|
|
225
|
+
}
|
|
226
|
+
const t = v.type;
|
|
227
|
+
switch (t) {
|
|
228
|
+
case "session.updated":
|
|
229
|
+
return { type: "session_ready" };
|
|
230
|
+
case "response.audio.delta":
|
|
231
|
+
case "response.output_audio.delta":
|
|
232
|
+
return { type: "audio_delta", delta: v.delta ?? "" };
|
|
233
|
+
case "response.audio_transcript.delta":
|
|
234
|
+
case "response.output_audio_transcript.delta":
|
|
235
|
+
return { type: "transcript_delta", delta: v.delta ?? "", source: "output" };
|
|
236
|
+
case "response.audio_transcript.done":
|
|
237
|
+
case "response.output_audio_transcript.done":
|
|
238
|
+
return { type: "transcript_done", transcript: v.transcript ?? "", source: "output" };
|
|
239
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
240
|
+
return { type: "transcript_done", transcript: v.transcript ?? "", source: "input" };
|
|
241
|
+
case "input_audio_buffer.speech_started":
|
|
242
|
+
return { type: "speech_started" };
|
|
243
|
+
case "input_audio_buffer.speech_stopped":
|
|
244
|
+
return { type: "speech_stopped" };
|
|
245
|
+
case "response.function_call_arguments.done":
|
|
246
|
+
return {
|
|
247
|
+
type: "function_call",
|
|
248
|
+
name: v.name ?? "",
|
|
249
|
+
callId: v.call_id ?? "",
|
|
250
|
+
arguments: v.arguments ?? "",
|
|
251
|
+
};
|
|
252
|
+
case "response.done":
|
|
253
|
+
return { type: "response_done" };
|
|
254
|
+
case "error":
|
|
255
|
+
return { type: "error", message: v.error?.message ?? v.message ?? "unknown error" };
|
|
256
|
+
default:
|
|
257
|
+
return { type: "unknown", raw: v };
|
|
258
|
+
}
|
|
259
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuantumClient } from "./client.js";
|
|
2
|
+
import type { SessionChatRequest, SessionChatResponse } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Send a session-based chat request. The server manages conversation history.
|
|
5
|
+
* @internal — called by QuantumClient.chatSession()
|
|
6
|
+
*/
|
|
7
|
+
export declare function chatSession(client: QuantumClient, req: SessionChatRequest): Promise<SessionChatResponse>;
|
package/dist/session.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Send a session-based chat request. The server manages conversation history.
|
|
3
|
+
* @internal — called by QuantumClient.chatSession()
|
|
4
|
+
*/
|
|
5
|
+
export async function chatSession(client, req) {
|
|
6
|
+
const body = { ...req, stream: false };
|
|
7
|
+
const { data, meta } = await client._doJSON("POST", "/qai/v1/chat/session", body);
|
|
8
|
+
if (data.response) {
|
|
9
|
+
if (!data.response.cost_ticks) {
|
|
10
|
+
data.response.cost_ticks = meta.costTicks;
|
|
11
|
+
}
|
|
12
|
+
if (!data.response.request_id) {
|
|
13
|
+
data.response.request_id = meta.requestId;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return data;
|
|
17
|
+
}
|