autopreso 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -0
- package/package.json +59 -0
- package/public/app.js +1232 -0
- package/public/index.html +28 -0
- package/public/starter-elements.js +37 -0
- package/public/style.css +535 -0
- package/public/transcript-panel.js +10 -0
- package/src/agent-provider.js +106 -0
- package/src/cli-options.js +29 -0
- package/src/cli.js +96 -0
- package/src/codex-auth.js +135 -0
- package/src/moonshine-transcription.js +146 -0
- package/src/openai-transcription.js +186 -0
- package/src/server.js +996 -0
- package/src/settings-store.js +137 -0
- package/src/simulator-agent-provider.js +24 -0
- package/src/simulator-options.js +76 -0
- package/src/transcript-chunker.js +22 -0
- package/src/transcript-turn-queue.js +78 -0
- package/src/whiteboard-elements.js +74 -0
- package/src/whiteboard-session.js +235 -0
- package/src/whiteboard-tools.js +48 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export function parseCliArgs(args, env = process.env) {
|
|
2
|
+
const options = {
|
|
3
|
+
host: "127.0.0.1",
|
|
4
|
+
port: parsePort(env.PORT),
|
|
5
|
+
openBrowser: true,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
for (let index = 0; index < args.length; index += 1) {
|
|
9
|
+
const arg = args[index];
|
|
10
|
+
if (arg === "--no-open") {
|
|
11
|
+
options.openBrowser = false;
|
|
12
|
+
} else if (arg === "--help" || arg === "-h") {
|
|
13
|
+
options.help = true;
|
|
14
|
+
} else {
|
|
15
|
+
throw new Error(`Unknown argument "${arg}".`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return options;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parsePort(value) {
|
|
23
|
+
const raw = value || "3210";
|
|
24
|
+
const port = Number(raw);
|
|
25
|
+
if (!Number.isInteger(port) || port < 1 || port > 65535) {
|
|
26
|
+
throw new Error(`Invalid PORT "${raw}".`);
|
|
27
|
+
}
|
|
28
|
+
return port;
|
|
29
|
+
}
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import open from "open";
|
|
5
|
+
|
|
6
|
+
import { resolveAgentProviderFromSettings } from "./agent-provider.js";
|
|
7
|
+
import { parseCliArgs } from "./cli-options.js";
|
|
8
|
+
import { startServer } from "./server.js";
|
|
9
|
+
import { createSettingsStore } from "./settings-store.js";
|
|
10
|
+
|
|
11
|
+
const SETTINGS_PATH = path.join(os.homedir(), ".config", "autopreso", "settings.json");
|
|
12
|
+
|
|
13
|
+
async function main() {
|
|
14
|
+
let options;
|
|
15
|
+
try {
|
|
16
|
+
options = parseCliArgs(process.argv.slice(2));
|
|
17
|
+
} catch (error) {
|
|
18
|
+
console.error(error.message);
|
|
19
|
+
console.error("Run `autopreso --help` for usage.");
|
|
20
|
+
process.exitCode = 1;
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (options.help) {
|
|
25
|
+
printHelp();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const settingsStore = createSettingsStore({ filePath: SETTINGS_PATH });
|
|
30
|
+
const settings = await settingsStore.load();
|
|
31
|
+
|
|
32
|
+
let agentProvider;
|
|
33
|
+
try {
|
|
34
|
+
agentProvider = resolveAgentProviderFromSettings({ settings, env: process.env });
|
|
35
|
+
} catch (error) {
|
|
36
|
+
console.error(`Whiteboard agent is not configured: ${error.message}`);
|
|
37
|
+
console.error("Open the app and configure the agent in the status panel, or set OPENAI_API_KEY / OLLAMA_MODEL in your shell.");
|
|
38
|
+
console.error(`Settings file: ${SETTINGS_PATH}`);
|
|
39
|
+
process.exitCode = 1;
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (settings.transcription.provider === "openai" && !(settings.apiKeys?.openai || process.env.OPENAI_API_KEY)) {
|
|
44
|
+
console.error("OpenAI transcription is selected but no API key is configured.");
|
|
45
|
+
console.error("Open the app and add the key in the STT engine row, or set OPENAI_API_KEY in your shell.");
|
|
46
|
+
process.exitCode = 1;
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const { url } = await startServer({
|
|
51
|
+
...options,
|
|
52
|
+
settingsStore,
|
|
53
|
+
onStatus: (message) => console.log(message),
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
console.log(`autopreso listening at ${url}`);
|
|
57
|
+
console.log(`whiteboard agent: ${agentProvider.provider} ${agentProvider.requestedModel ?? agentProvider.model}`);
|
|
58
|
+
console.log(`settings file: ${SETTINGS_PATH}`);
|
|
59
|
+
|
|
60
|
+
if (options.openBrowser) {
|
|
61
|
+
await open(url);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function printHelp() {
|
|
66
|
+
console.log(`autopreso
|
|
67
|
+
|
|
68
|
+
Usage:
|
|
69
|
+
autopreso [options]
|
|
70
|
+
|
|
71
|
+
Options:
|
|
72
|
+
--no-open Do not open the browser automatically
|
|
73
|
+
-h, --help Show this help
|
|
74
|
+
|
|
75
|
+
The server binds to 127.0.0.1 only.
|
|
76
|
+
|
|
77
|
+
Environment:
|
|
78
|
+
PORT Port to listen on. Default: 3210
|
|
79
|
+
OPENAI_API_KEY Seeds the OpenAI key on first run if no settings file exists
|
|
80
|
+
OPENAI_MODEL Seeds the OpenAI agent model on first run
|
|
81
|
+
OPENAI_REASONING_EFFORT Seeds reasoning effort on first run (none, low, medium, high, xhigh)
|
|
82
|
+
CODEX_HOME Codex CLI home directory. Default: ~/.codex
|
|
83
|
+
CODEX_MODEL Seeds the Codex model on first run
|
|
84
|
+
CODEX_BASE_URL Seeds the Codex backend URL on first run
|
|
85
|
+
OLLAMA_MODEL Seeds the Ollama model on first run
|
|
86
|
+
OLLAMA_BASE_URL Seeds the Ollama base URL on first run
|
|
87
|
+
|
|
88
|
+
Models and providers are configured in the UI after launch. Settings persist at:
|
|
89
|
+
${SETTINGS_PATH}
|
|
90
|
+
`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
main().catch((error) => {
|
|
94
|
+
console.error(error);
|
|
95
|
+
process.exitCode = 1;
|
|
96
|
+
});
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { chmodSync, existsSync, readFileSync, renameSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex";
|
|
6
|
+
const CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann";
|
|
7
|
+
const CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token";
|
|
8
|
+
const CODEX_ACCESS_TOKEN_REFRESH_SKEW_MS = 120_000;
|
|
9
|
+
|
|
10
|
+
export function readCodexCliAuthSync(env = process.env) {
|
|
11
|
+
const authPath = codexAuthPath(env);
|
|
12
|
+
if (!existsSync(authPath)) return undefined;
|
|
13
|
+
|
|
14
|
+
try {
|
|
15
|
+
const payload = JSON.parse(readFileSync(authPath, "utf8"));
|
|
16
|
+
return parseCodexAuthPayload(payload);
|
|
17
|
+
} catch {
|
|
18
|
+
return undefined;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export async function resolveCodexCliCredentials(env = process.env, options = {}) {
|
|
23
|
+
const authPath = codexAuthPath(env);
|
|
24
|
+
const payload = JSON.parse(readFileSync(authPath, "utf8"));
|
|
25
|
+
const auth = parseCodexAuthPayload(payload);
|
|
26
|
+
if (!auth) {
|
|
27
|
+
throw new Error(`Codex CLI auth is missing usable tokens at ${authPath}. Run \`codex\` and sign in with ChatGPT.`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let tokens = { ...auth.tokens };
|
|
31
|
+
if (codexAccessTokenIsExpiring(tokens.access_token, options.now ?? Date.now())) {
|
|
32
|
+
tokens = await refreshCodexTokens(tokens, options.fetchFn ?? fetch);
|
|
33
|
+
payload.tokens = { ...payload.tokens, ...tokens };
|
|
34
|
+
writeCodexAuthPayload(authPath, payload);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
provider: "codex",
|
|
39
|
+
baseURL: cleanEnvValue(env.CODEX_BASE_URL) ?? DEFAULT_CODEX_BASE_URL,
|
|
40
|
+
apiKey: tokens.access_token,
|
|
41
|
+
refreshToken: tokens.refresh_token,
|
|
42
|
+
accountId: auth.accountId,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function createCodexFetch(env = process.env) {
|
|
47
|
+
return async (input, init = {}) => {
|
|
48
|
+
const credentials = await resolveCodexCliCredentials(env);
|
|
49
|
+
const headers = new Headers(init.headers);
|
|
50
|
+
headers.set("authorization", `Bearer ${credentials.apiKey}`);
|
|
51
|
+
if (credentials.accountId) {
|
|
52
|
+
headers.set("ChatGPT-Account-Id", credentials.accountId);
|
|
53
|
+
}
|
|
54
|
+
return fetch(input, { ...init, headers });
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function codexAuthPath(env) {
|
|
59
|
+
return join(cleanEnvValue(env.CODEX_HOME) ?? join(homedir(), ".codex"), "auth.json");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function parseCodexAuthPayload(payload) {
|
|
63
|
+
if (!payload || typeof payload !== "object") return undefined;
|
|
64
|
+
const tokens = payload.tokens;
|
|
65
|
+
if (!tokens || typeof tokens !== "object") return undefined;
|
|
66
|
+
const accessToken = cleanEnvValue(tokens.access_token);
|
|
67
|
+
const refreshToken = cleanEnvValue(tokens.refresh_token);
|
|
68
|
+
if (!accessToken || !refreshToken) return undefined;
|
|
69
|
+
return {
|
|
70
|
+
tokens: { ...tokens, access_token: accessToken, refresh_token: refreshToken },
|
|
71
|
+
accessToken,
|
|
72
|
+
refreshToken,
|
|
73
|
+
accountId: cleanEnvValue(payload.account_id) ?? extractAccountId(accessToken),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function codexAccessTokenIsExpiring(accessToken, nowMs) {
|
|
78
|
+
const exp = decodeJwtPayload(accessToken)?.exp;
|
|
79
|
+
if (typeof exp !== "number") return false;
|
|
80
|
+
return exp * 1000 <= nowMs + CODEX_ACCESS_TOKEN_REFRESH_SKEW_MS;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function refreshCodexTokens(tokens, fetchFn) {
|
|
84
|
+
const response = await fetchFn(CODEX_OAUTH_TOKEN_URL, {
|
|
85
|
+
method: "POST",
|
|
86
|
+
headers: { "Content-Type": "application/x-www-form-urlencoded", Accept: "application/json" },
|
|
87
|
+
body: new URLSearchParams({
|
|
88
|
+
grant_type: "refresh_token",
|
|
89
|
+
refresh_token: tokens.refresh_token,
|
|
90
|
+
client_id: CODEX_OAUTH_CLIENT_ID,
|
|
91
|
+
}),
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
if (!response.ok) {
|
|
95
|
+
throw new Error(`Codex token refresh failed with status ${response.status}. Run \`codex\` and sign in with ChatGPT.`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const payload = await response.json();
|
|
99
|
+
const accessToken = cleanEnvValue(payload.access_token);
|
|
100
|
+
if (!accessToken) {
|
|
101
|
+
throw new Error("Codex token refresh response was missing access_token.");
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
...tokens,
|
|
106
|
+
access_token: accessToken,
|
|
107
|
+
refresh_token: cleanEnvValue(payload.refresh_token) ?? tokens.refresh_token,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function writeCodexAuthPayload(authPath, payload) {
|
|
112
|
+
const tmpPath = `${authPath}.tmp`;
|
|
113
|
+
writeFileSync(tmpPath, `${JSON.stringify(payload, null, 2)}\n`);
|
|
114
|
+
chmodSync(tmpPath, 0o600);
|
|
115
|
+
renameSync(tmpPath, authPath);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function extractAccountId(accessToken) {
|
|
119
|
+
const claims = decodeJwtPayload(accessToken);
|
|
120
|
+
return cleanEnvValue(claims?.chatgpt_account_id) ?? cleanEnvValue(claims?.["https://api.openai.com/auth"]?.chatgpt_account_id);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function decodeJwtPayload(token) {
|
|
124
|
+
if (typeof token !== "string" || token.split(".").length !== 3) return undefined;
|
|
125
|
+
try {
|
|
126
|
+
return JSON.parse(Buffer.from(token.split(".")[1], "base64url").toString("utf8"));
|
|
127
|
+
} catch {
|
|
128
|
+
return undefined;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function cleanEnvValue(value) {
|
|
133
|
+
const trimmedValue = value?.trim();
|
|
134
|
+
return trimmedValue || undefined;
|
|
135
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
const require = createRequire(import.meta.url);
|
|
6
|
+
const SAMPLE_RATE = 24000;
|
|
7
|
+
const SIDECAR_PACKAGE_BY_PLATFORM = new Map([
|
|
8
|
+
["darwin:arm64", "@autopreso/moonshine-darwin-arm64"],
|
|
9
|
+
["darwin:x64", "@autopreso/moonshine-darwin-x64"],
|
|
10
|
+
]);
|
|
11
|
+
|
|
12
|
+
export function moonshinePlatformPackageName(platform = process.platform, arch = process.arch) {
|
|
13
|
+
const packageName = SIDECAR_PACKAGE_BY_PLATFORM.get(`${platform}:${arch}`);
|
|
14
|
+
if (!packageName) {
|
|
15
|
+
throw new Error("Moonshine local transcription is currently available for macOS arm64 and x64.");
|
|
16
|
+
}
|
|
17
|
+
return packageName;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function resolveMoonshineSidecarPath({
|
|
21
|
+
env = process.env,
|
|
22
|
+
platform = process.platform,
|
|
23
|
+
arch = process.arch,
|
|
24
|
+
requireResolve = require.resolve,
|
|
25
|
+
} = {}) {
|
|
26
|
+
if (env.AUTOPRESO_MOONSHINE_BIN) return env.AUTOPRESO_MOONSHINE_BIN;
|
|
27
|
+
|
|
28
|
+
const packageName = moonshinePlatformPackageName(platform, arch);
|
|
29
|
+
const packageJsonPath = requireResolve(`${packageName}/package.json`);
|
|
30
|
+
return path.join(path.dirname(packageJsonPath), "bin", "autopreso-moonshine");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function createMoonshineTranscription({
|
|
34
|
+
sendTranscript,
|
|
35
|
+
queueTranscript,
|
|
36
|
+
options,
|
|
37
|
+
spawnProcess = spawn,
|
|
38
|
+
resolveSidecarPath = () => resolveMoonshineSidecarPath(),
|
|
39
|
+
}) {
|
|
40
|
+
let child = null;
|
|
41
|
+
let stdoutBuffer = "";
|
|
42
|
+
let readyPromise = null;
|
|
43
|
+
let resolveReady = null;
|
|
44
|
+
let rejectReady = null;
|
|
45
|
+
|
|
46
|
+
function ensureChild() {
|
|
47
|
+
if (child) return child;
|
|
48
|
+
|
|
49
|
+
const binary = resolveSidecarPath();
|
|
50
|
+
readyPromise = new Promise((resolve, reject) => {
|
|
51
|
+
resolveReady = resolve;
|
|
52
|
+
rejectReady = reject;
|
|
53
|
+
});
|
|
54
|
+
child = spawnProcess(binary, ["--model", options.moonshineModel, "--language", "en"], {
|
|
55
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
child.stdout.on("data", (chunk) => {
|
|
59
|
+
stdoutBuffer += chunk.toString("utf8");
|
|
60
|
+
const lines = stdoutBuffer.split("\n");
|
|
61
|
+
stdoutBuffer = lines.pop() ?? "";
|
|
62
|
+
for (const line of lines) {
|
|
63
|
+
handleSidecarLine(line, { sendTranscript, queueTranscript, onReady: resolveReady });
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
child.stderr.on("data", (chunk) => {
|
|
68
|
+
const message = chunk.toString("utf8").trim();
|
|
69
|
+
if (message) sendTranscript({ type: "error", message });
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
child.on("error", (error) => {
|
|
73
|
+
sendTranscript({ type: "error", message: error.message });
|
|
74
|
+
rejectReady?.(error);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
child.on("close", (code) => {
|
|
78
|
+
rejectReady?.(new Error(`Moonshine sidecar exited before it was ready${code === null ? "" : ` (code ${code})`}.`));
|
|
79
|
+
child = null;
|
|
80
|
+
readyPromise = null;
|
|
81
|
+
resolveReady = null;
|
|
82
|
+
rejectReady = null;
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
return child;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
ready: async () => {
|
|
90
|
+
ensureChild();
|
|
91
|
+
await readyPromise;
|
|
92
|
+
},
|
|
93
|
+
sendAudio: (audio) => {
|
|
94
|
+
if (!audio) return;
|
|
95
|
+
let process;
|
|
96
|
+
try {
|
|
97
|
+
process = ensureChild();
|
|
98
|
+
} catch (error) {
|
|
99
|
+
sendTranscript({ type: "error", message: error.message });
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
process.stdin.write(`${JSON.stringify({ type: "audio", encoding: "pcm16le", sampleRate: SAMPLE_RATE, audio })}\n`);
|
|
103
|
+
},
|
|
104
|
+
stop: () => {
|
|
105
|
+
if (!child) return;
|
|
106
|
+
child.stdin.write(`${JSON.stringify({ type: "stop" })}\n`);
|
|
107
|
+
},
|
|
108
|
+
close: () => {
|
|
109
|
+
if (!child) return;
|
|
110
|
+
child.stdin.end();
|
|
111
|
+
child.kill();
|
|
112
|
+
child = null;
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function handleSidecarLine(line, { sendTranscript, queueTranscript, onReady }) {
|
|
118
|
+
if (!line.trim()) return;
|
|
119
|
+
|
|
120
|
+
let message;
|
|
121
|
+
try {
|
|
122
|
+
message = JSON.parse(line);
|
|
123
|
+
} catch {
|
|
124
|
+
sendTranscript({ type: "error", message: `Invalid Moonshine sidecar message: ${line}` });
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (message.type === "ready") {
|
|
129
|
+
onReady?.();
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (message.type === "transcript:partial") {
|
|
134
|
+
sendTranscript({ type: "transcript:partial", text: message.text ?? "" });
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (message.type === "transcript:committed") {
|
|
138
|
+
const text = message.text ?? "";
|
|
139
|
+
sendTranscript({ type: "transcript:committed", text });
|
|
140
|
+
queueTranscript(text);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (message.type === "error") {
|
|
144
|
+
sendTranscript({ type: "error", message: message.message ?? "Moonshine transcription error" });
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import { WebSocket } from "ws";
|
|
2
|
+
|
|
3
|
+
const REALTIME_URL = "wss://api.openai.com/v1/realtime?intent=transcription";
|
|
4
|
+
|
|
5
|
+
export function createOpenAITranscription({
|
|
6
|
+
sendTranscript,
|
|
7
|
+
queueTranscript,
|
|
8
|
+
options,
|
|
9
|
+
env = process.env,
|
|
10
|
+
createWebSocket = (url, protocols, init) => new WebSocket(url, protocols, init),
|
|
11
|
+
}) {
|
|
12
|
+
let socket = null;
|
|
13
|
+
let readyPromise = null;
|
|
14
|
+
let resolveReady = null;
|
|
15
|
+
let rejectReady = null;
|
|
16
|
+
let configured = false;
|
|
17
|
+
let pendingAudio = [];
|
|
18
|
+
let partialText = "";
|
|
19
|
+
let bufferedSinceCommit = false;
|
|
20
|
+
|
|
21
|
+
function ensureSocket() {
|
|
22
|
+
if (socket) return socket;
|
|
23
|
+
|
|
24
|
+
const apiKey = env.OPENAI_API_KEY;
|
|
25
|
+
if (!apiKey) {
|
|
26
|
+
throw new Error("OPENAI_API_KEY is required for the OpenAI transcription provider.");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
readyPromise = new Promise((resolve, reject) => {
|
|
30
|
+
resolveReady = resolve;
|
|
31
|
+
rejectReady = reject;
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
socket = createWebSocket(REALTIME_URL, undefined, {
|
|
35
|
+
headers: {
|
|
36
|
+
Authorization: `Bearer ${apiKey}`,
|
|
37
|
+
},
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
socket.on("open", () => {
|
|
41
|
+
configured = true;
|
|
42
|
+
socket.send(JSON.stringify({
|
|
43
|
+
type: "session.update",
|
|
44
|
+
session: {
|
|
45
|
+
type: "transcription",
|
|
46
|
+
audio: {
|
|
47
|
+
input: {
|
|
48
|
+
format: { type: "audio/pcm", rate: 24000 },
|
|
49
|
+
transcription: { model: options.openaiTranscriptionModel },
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
}));
|
|
54
|
+
for (const audio of pendingAudio) {
|
|
55
|
+
socket.send(JSON.stringify({ type: "input_audio_buffer.append", audio }));
|
|
56
|
+
}
|
|
57
|
+
pendingAudio = [];
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
socket.on("message", (raw) => {
|
|
61
|
+
handleSocketMessage(raw.toString("utf8"), {
|
|
62
|
+
sendTranscript,
|
|
63
|
+
queueTranscript,
|
|
64
|
+
onReady: () => resolveReady?.(),
|
|
65
|
+
getPartial: () => partialText,
|
|
66
|
+
setPartial: (value) => { partialText = value; },
|
|
67
|
+
onBufferDrained: () => { bufferedSinceCommit = false; },
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
socket.on("error", (error) => {
|
|
72
|
+
sendTranscript({ type: "error", message: error.message });
|
|
73
|
+
rejectReady?.(error);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
socket.on("close", () => {
|
|
77
|
+
rejectReady?.(new Error("OpenAI realtime socket closed before it was ready."));
|
|
78
|
+
socket = null;
|
|
79
|
+
readyPromise = null;
|
|
80
|
+
resolveReady = null;
|
|
81
|
+
rejectReady = null;
|
|
82
|
+
configured = false;
|
|
83
|
+
pendingAudio = [];
|
|
84
|
+
partialText = "";
|
|
85
|
+
bufferedSinceCommit = false;
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
return socket;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
ready: async () => {
|
|
93
|
+
try {
|
|
94
|
+
ensureSocket();
|
|
95
|
+
} catch (error) {
|
|
96
|
+
sendTranscript({ type: "error", message: error.message });
|
|
97
|
+
throw error;
|
|
98
|
+
}
|
|
99
|
+
await readyPromise;
|
|
100
|
+
},
|
|
101
|
+
sendAudio: (audio) => {
|
|
102
|
+
if (!audio) return;
|
|
103
|
+
let connection;
|
|
104
|
+
try {
|
|
105
|
+
connection = ensureSocket();
|
|
106
|
+
} catch (error) {
|
|
107
|
+
sendTranscript({ type: "error", message: error.message });
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
if (!configured) {
|
|
111
|
+
pendingAudio.push(audio);
|
|
112
|
+
bufferedSinceCommit = true;
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
connection.send(JSON.stringify({ type: "input_audio_buffer.append", audio }));
|
|
116
|
+
bufferedSinceCommit = true;
|
|
117
|
+
},
|
|
118
|
+
stop: () => {
|
|
119
|
+
if (!socket || !configured) return;
|
|
120
|
+
// If server-side VAD already auto-committed (or no audio was sent), skip the manual
|
|
121
|
+
// commit - OpenAI rejects commits on empty buffers with "buffer too small".
|
|
122
|
+
if (!bufferedSinceCommit) return;
|
|
123
|
+
socket.send(JSON.stringify({ type: "input_audio_buffer.commit" }));
|
|
124
|
+
bufferedSinceCommit = false;
|
|
125
|
+
},
|
|
126
|
+
close: () => {
|
|
127
|
+
if (!socket) return;
|
|
128
|
+
socket.close();
|
|
129
|
+
socket = null;
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function handleSocketMessage(line, { sendTranscript, queueTranscript, onReady, getPartial, setPartial, onBufferDrained }) {
|
|
135
|
+
if (!line.trim()) return;
|
|
136
|
+
|
|
137
|
+
let message;
|
|
138
|
+
try {
|
|
139
|
+
message = JSON.parse(line);
|
|
140
|
+
} catch {
|
|
141
|
+
sendTranscript({ type: "error", message: `Invalid OpenAI realtime message: ${line}` });
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (
|
|
146
|
+
message.type === "transcription_session.created" ||
|
|
147
|
+
message.type === "transcription_session.updated" ||
|
|
148
|
+
message.type === "session.created" ||
|
|
149
|
+
message.type === "session.updated"
|
|
150
|
+
) {
|
|
151
|
+
onReady?.();
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (message.type === "conversation.item.input_audio_transcription.delta") {
|
|
156
|
+
const next = getPartial() + (message.delta ?? "");
|
|
157
|
+
setPartial(next);
|
|
158
|
+
sendTranscript({ type: "transcript:partial", text: next });
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (message.type === "conversation.item.input_audio_transcription.completed") {
|
|
163
|
+
const text = message.transcript ?? "";
|
|
164
|
+
setPartial("");
|
|
165
|
+
onBufferDrained?.();
|
|
166
|
+
sendTranscript({ type: "transcript:committed", text });
|
|
167
|
+
queueTranscript(text);
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Server VAD commits the buffer well before transcription completes, and
|
|
172
|
+
// discards silent audio outright. Track the actual buffer state from this
|
|
173
|
+
// signal so a later stop() doesn't try to commit an empty buffer.
|
|
174
|
+
if (message.type === "input_audio_buffer.committed") {
|
|
175
|
+
onBufferDrained?.();
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (message.type === "error") {
|
|
180
|
+
// input_audio_buffer_commit_empty is a benign race: server VAD already
|
|
181
|
+
// drained (or discarded silent audio) before our manual commit landed.
|
|
182
|
+
// Don't surface it to the UI.
|
|
183
|
+
if (message.error?.code === "input_audio_buffer_commit_empty") return;
|
|
184
|
+
sendTranscript({ type: "error", message: message.error?.message ?? "OpenAI realtime error" });
|
|
185
|
+
}
|
|
186
|
+
}
|