@absolutejs/voice-gladia 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -0
- package/dist/gladia.d.ts +3 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +257 -0
- package/dist/types.d.ts +21 -0
- package/package.json +38 -0
package/README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# `@absolutejs/voice-gladia`
|
|
2
|
+
|
|
3
|
+
Gladia real-time speech-to-text adapter for `@absolutejs/voice`.
|
|
4
|
+
|
|
5
|
+
Implements Gladia's two-step v2 live handshake directly (no Gladia SDK dep): POST `/v2/live` to create a session and receive a one-time WebSocket URL, connect, stream binary PCM/μ-law/A-law audio, and consume `transcript` / `end_of_utterance` JSON messages. Excellent at multilingual code-switch (Hindi-English, Catalan-Spanish, etc.) thanks to Gladia's language model.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
bun add @absolutejs/voice-gladia
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
`@absolutejs/voice` is a runtime dependency.
|
|
14
|
+
|
|
15
|
+
## Use
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { voice } from "@absolutejs/voice";
|
|
19
|
+
import { gladia } from "@absolutejs/voice-gladia";
|
|
20
|
+
|
|
21
|
+
const app = voice({
|
|
22
|
+
stt: gladia({
|
|
23
|
+
apiKey: process.env.GLADIA_API_KEY!,
|
|
24
|
+
// optional:
|
|
25
|
+
model: "solaria-1",
|
|
26
|
+
languages: ["en"], // language detection seed
|
|
27
|
+
codeSwitching: true, // for multilingual callers
|
|
28
|
+
realtimeProcessing: { sentences: true },
|
|
29
|
+
}),
|
|
30
|
+
// ... tts + other options ...
|
|
31
|
+
});
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
For mixed Hindi/English callers (CoSHE-style), pair with `languageStrategy`:
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
app.use({
|
|
38
|
+
stt: gladia({ apiKey, codeSwitching: true }),
|
|
39
|
+
// and at open() time:
|
|
40
|
+
// languageStrategy: { mode: 'allow-switching', primaryLanguage: 'en', secondaryLanguages: ['hi'] }
|
|
41
|
+
});
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Options
|
|
45
|
+
|
|
46
|
+
| Option | Required | Default | Notes |
|
|
47
|
+
| --- | --- | --- | --- |
|
|
48
|
+
| `apiKey` | yes | — | Gladia API key, sent as `X-Gladia-Key`. |
|
|
49
|
+
| `model` | no | `solaria-1` | Gladia model id. |
|
|
50
|
+
| `languages` | no | — | Default language list, overridden when `STTAdapterOpenOptions.languageStrategy` resolves a list. |
|
|
51
|
+
| `codeSwitching` | no | — | Enable mid-utterance language switching. |
|
|
52
|
+
| `realtimeProcessing` | no | — | Forwarded to Gladia's `realtime_processing` config (sentences, diarization, etc.). |
|
|
53
|
+
| `punctuationConfig` | no | — | Forwarded to `punctuation_config`. |
|
|
54
|
+
| `baseUrl` | no | `https://api.gladia.io` | Override for staging / enterprise endpoints. |
|
|
55
|
+
| `sessionPath` | no | `/v2/live` | Override if you proxy Gladia behind a gateway. |
|
|
56
|
+
| `connectTimeoutMs` | no | `8000` | Time to wait for the WebSocket `open` event. |
|
|
57
|
+
| `fetch` | no | `globalThis.fetch` | Inject for tests; opportunistic HTTP/2 multiplexing on outbound HTTPS. |
|
|
58
|
+
| `webSocket.factory` | no | `new WebSocket(url)` | Inject a fake socket for tests. |
|
|
59
|
+
|
|
60
|
+
## Notes
|
|
61
|
+
|
|
62
|
+
- Audio sent before the WebSocket open completes is buffered and flushed once the socket is ready.
|
|
63
|
+
- `transcript` messages → `partial` when `data.is_final !== true`, `final` otherwise. Confidence + per-utterance `start` / `end` (converted to ms) are lifted onto the transcript.
|
|
64
|
+
- `end_of_utterance` / `speech_end` → `endOfTurn` with `reason: "vendor"`.
|
|
65
|
+
- `session.close(reason)` sends `{ "type": "stop_recording" }` and then closes the socket cleanly.
|
|
66
|
+
- Supported encodings on `STTAdapterOpenOptions.format.encoding`: `pcm_s16le` → `wav/pcm`, `mulaw` / `pcm_mulaw` → `wav/ulaw`, `alaw` / `pcm_alaw` → `wav/alaw`.
|
package/dist/gladia.d.ts
ADDED
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/gladia.ts
|
|
3
|
+
var DEFAULT_BASE_URL = "https://api.gladia.io";
|
|
4
|
+
var DEFAULT_SESSION_PATH = "/v2/live";
|
|
5
|
+
var DEFAULT_LANGUAGE = "en";
|
|
6
|
+
var DEFAULT_CONNECT_TIMEOUT_MS = 8000;
|
|
7
|
+
var DEFAULT_MODEL = "solaria-1";
|
|
8
|
+
var isHttpsUrl = (url) => typeof url === "string" ? url.startsWith("https://") : url.protocol === "https:";
|
|
9
|
+
var h2IfHttps = (url) => isHttpsUrl(url) ? { protocol: "http2" } : {};
|
|
10
|
+
var createListenerMap = () => ({
|
|
11
|
+
close: new Set,
|
|
12
|
+
endOfTurn: new Set,
|
|
13
|
+
error: new Set,
|
|
14
|
+
final: new Set,
|
|
15
|
+
partial: new Set
|
|
16
|
+
});
|
|
17
|
+
var emit = async (listeners, event, payload) => {
|
|
18
|
+
for (const listener of listeners[event]) {
|
|
19
|
+
await listener(payload);
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
var omitUndefined = (value) => Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined));
|
|
23
|
+
var resolveBaseUrl = (config) => (config.baseUrl ?? DEFAULT_BASE_URL).replace(/\/$/, "");
|
|
24
|
+
var resolveSessionUrl = (config) => {
|
|
25
|
+
const path = config.sessionPath ?? DEFAULT_SESSION_PATH;
|
|
26
|
+
return new URL(`${resolveBaseUrl(config)}${path.startsWith("/") ? path : `/${path}`}`);
|
|
27
|
+
};
|
|
28
|
+
var resolveEncoding = (format) => {
|
|
29
|
+
switch (format.encoding) {
|
|
30
|
+
case "pcm_s16le":
|
|
31
|
+
return "wav/pcm";
|
|
32
|
+
case "pcm_mulaw":
|
|
33
|
+
case "mulaw":
|
|
34
|
+
return "wav/ulaw";
|
|
35
|
+
case "pcm_alaw":
|
|
36
|
+
case "alaw":
|
|
37
|
+
return "wav/alaw";
|
|
38
|
+
default:
|
|
39
|
+
throw new Error(`Unsupported audio encoding "${String(format.encoding)}" for @absolutejs/voice-gladia. ` + `Use pcm_s16le, mulaw, or alaw.`);
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
var resolveBitDepth = (format) => {
|
|
43
|
+
switch (format.encoding) {
|
|
44
|
+
case "pcm_s16le":
|
|
45
|
+
return 16;
|
|
46
|
+
case "pcm_mulaw":
|
|
47
|
+
case "mulaw":
|
|
48
|
+
case "pcm_alaw":
|
|
49
|
+
case "alaw":
|
|
50
|
+
return 8;
|
|
51
|
+
default:
|
|
52
|
+
return 16;
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
var resolveLanguages = (openOptions, config) => {
|
|
56
|
+
const strategy = openOptions.languageStrategy;
|
|
57
|
+
if (strategy?.mode === "fixed" && strategy.primaryLanguage) {
|
|
58
|
+
return [strategy.primaryLanguage];
|
|
59
|
+
}
|
|
60
|
+
if (strategy?.mode === "allow-switching") {
|
|
61
|
+
const primary = strategy.primaryLanguage;
|
|
62
|
+
const secondary = strategy.secondaryLanguages ?? [];
|
|
63
|
+
const combined = primary ? [primary, ...secondary] : [...secondary];
|
|
64
|
+
return combined.length > 0 ? combined : config.languages;
|
|
65
|
+
}
|
|
66
|
+
if (strategy?.mode === "auto-detect" && strategy.allowedLanguages?.length) {
|
|
67
|
+
return strategy.allowedLanguages;
|
|
68
|
+
}
|
|
69
|
+
return config.languages;
|
|
70
|
+
};
|
|
71
|
+
var buildSessionInitBody = (config, openOptions) => omitUndefined({
|
|
72
|
+
bit_depth: resolveBitDepth(openOptions.format),
|
|
73
|
+
channels: openOptions.format.channels ?? 1,
|
|
74
|
+
encoding: resolveEncoding(openOptions.format),
|
|
75
|
+
language_config: omitUndefined({
|
|
76
|
+
code_switching: config.codeSwitching,
|
|
77
|
+
languages: resolveLanguages(openOptions, config) ?? [DEFAULT_LANGUAGE]
|
|
78
|
+
}),
|
|
79
|
+
model: config.model ?? DEFAULT_MODEL,
|
|
80
|
+
punctuation_config: config.punctuationConfig,
|
|
81
|
+
realtime_processing: config.realtimeProcessing,
|
|
82
|
+
sample_rate: openOptions.format.sampleRateHz ?? 16000
|
|
83
|
+
});
|
|
84
|
+
var toUint8Array = (chunk) => {
|
|
85
|
+
if (chunk instanceof Uint8Array)
|
|
86
|
+
return chunk;
|
|
87
|
+
if (ArrayBuffer.isView(chunk)) {
|
|
88
|
+
return new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
89
|
+
}
|
|
90
|
+
return new Uint8Array(chunk);
|
|
91
|
+
};
|
|
92
|
+
var gladia = (config) => {
|
|
93
|
+
if (!config.apiKey) {
|
|
94
|
+
throw new Error("@absolutejs/voice-gladia requires an apiKey.");
|
|
95
|
+
}
|
|
96
|
+
resolveBaseUrl(config);
|
|
97
|
+
const fetchImpl = config.fetch ?? globalThis.fetch;
|
|
98
|
+
return {
|
|
99
|
+
kind: "stt",
|
|
100
|
+
open: async (openOptions) => {
|
|
101
|
+
const listeners = createListenerMap();
|
|
102
|
+
const sessionTarget = resolveSessionUrl(config);
|
|
103
|
+
const sessionResponse = await fetchImpl(sessionTarget, {
|
|
104
|
+
...h2IfHttps(sessionTarget),
|
|
105
|
+
body: JSON.stringify(buildSessionInitBody(config, openOptions)),
|
|
106
|
+
headers: {
|
|
107
|
+
"Content-Type": "application/json",
|
|
108
|
+
"X-Gladia-Key": config.apiKey
|
|
109
|
+
},
|
|
110
|
+
method: "POST"
|
|
111
|
+
});
|
|
112
|
+
if (!sessionResponse.ok) {
|
|
113
|
+
const bodyText = await sessionResponse.text().catch(() => "");
|
|
114
|
+
throw new Error(`Gladia /v2/live returned ${String(sessionResponse.status)} ${sessionResponse.statusText}${bodyText ? `: ${bodyText.slice(0, 200)}` : ""}`);
|
|
115
|
+
}
|
|
116
|
+
const sessionBody = await sessionResponse.json();
|
|
117
|
+
if (!sessionBody?.url) {
|
|
118
|
+
throw new Error("Gladia session response did not include a websocket url.");
|
|
119
|
+
}
|
|
120
|
+
const factory = config.webSocket?.factory ?? ((target) => new WebSocket(target));
|
|
121
|
+
const socket = factory(sessionBody.url);
|
|
122
|
+
let opened = false;
|
|
123
|
+
let closed = false;
|
|
124
|
+
let seq = 0;
|
|
125
|
+
const pendingAudio = [];
|
|
126
|
+
const connectTimeoutMs = config.connectTimeoutMs ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
127
|
+
const sendAudio = (audio) => {
|
|
128
|
+
socket.send(audio);
|
|
129
|
+
seq += 1;
|
|
130
|
+
};
|
|
131
|
+
const flushPending = () => {
|
|
132
|
+
for (const chunk of pendingAudio.splice(0, pendingAudio.length)) {
|
|
133
|
+
sendAudio(chunk);
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
const openPromise = new Promise((resolve, reject) => {
|
|
137
|
+
const openTimeout = setTimeout(() => {
|
|
138
|
+
if (opened)
|
|
139
|
+
return;
|
|
140
|
+
reject(new Error(`Gladia websocket open timeout after ${String(connectTimeoutMs)}ms`));
|
|
141
|
+
try {
|
|
142
|
+
socket.close(1013, "open-timeout");
|
|
143
|
+
} catch {}
|
|
144
|
+
}, connectTimeoutMs);
|
|
145
|
+
socket.addEventListener("open", () => {
|
|
146
|
+
opened = true;
|
|
147
|
+
clearTimeout(openTimeout);
|
|
148
|
+
flushPending();
|
|
149
|
+
resolve();
|
|
150
|
+
}, { once: true });
|
|
151
|
+
socket.addEventListener("error", () => {
|
|
152
|
+
clearTimeout(openTimeout);
|
|
153
|
+
if (!opened) {
|
|
154
|
+
reject(new Error("Gladia websocket failed to open."));
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
socket.addEventListener("message", (event) => {
|
|
159
|
+
if (typeof event.data !== "string")
|
|
160
|
+
return;
|
|
161
|
+
let parsed;
|
|
162
|
+
try {
|
|
163
|
+
parsed = JSON.parse(event.data);
|
|
164
|
+
} catch {
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
if (!parsed)
|
|
168
|
+
return;
|
|
169
|
+
const type = parsed.type;
|
|
170
|
+
const data = parsed.data;
|
|
171
|
+
if (type === "transcript" && data?.utterance) {
|
|
172
|
+
const isFinal = data.is_final === true;
|
|
173
|
+
const utterance = data.utterance;
|
|
174
|
+
if (!utterance.text)
|
|
175
|
+
return;
|
|
176
|
+
const event2 = isFinal ? "final" : "partial";
|
|
177
|
+
emit(listeners, event2, {
|
|
178
|
+
receivedAt: Date.now(),
|
|
179
|
+
transcript: {
|
|
180
|
+
confidence: utterance.confidence,
|
|
181
|
+
endedAtMs: typeof utterance.end === "number" ? Math.round(utterance.end * 1000) : undefined,
|
|
182
|
+
id: `gladia:${isFinal ? "final" : "partial"}:${String(seq)}`,
|
|
183
|
+
isFinal,
|
|
184
|
+
language: utterance.language,
|
|
185
|
+
startedAtMs: typeof utterance.start === "number" ? Math.round(utterance.start * 1000) : undefined,
|
|
186
|
+
text: utterance.text,
|
|
187
|
+
vendor: "gladia"
|
|
188
|
+
},
|
|
189
|
+
type: event2
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
if (type === "end_of_utterance" || type === "speech_end") {
|
|
193
|
+
emit(listeners, "endOfTurn", {
|
|
194
|
+
reason: "vendor",
|
|
195
|
+
receivedAt: Date.now(),
|
|
196
|
+
type: "endOfTurn"
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
if (type === "error") {
|
|
200
|
+
const message = parsed.data?.message ?? "Gladia error";
|
|
201
|
+
emit(listeners, "error", {
|
|
202
|
+
error: new Error(message),
|
|
203
|
+
recoverable: false,
|
|
204
|
+
type: "error"
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
socket.addEventListener("close", (event) => {
|
|
209
|
+
if (closed)
|
|
210
|
+
return;
|
|
211
|
+
closed = true;
|
|
212
|
+
emit(listeners, "close", {
|
|
213
|
+
code: event.code,
|
|
214
|
+
reason: event.reason || undefined,
|
|
215
|
+
recoverable: false,
|
|
216
|
+
type: "close"
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
await openPromise;
|
|
220
|
+
const session = {
|
|
221
|
+
close: async (reason) => {
|
|
222
|
+
if (closed)
|
|
223
|
+
return;
|
|
224
|
+
closed = true;
|
|
225
|
+
try {
|
|
226
|
+
if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) {
|
|
227
|
+
socket.send(JSON.stringify({ type: "stop_recording" }));
|
|
228
|
+
socket.close(1000, reason);
|
|
229
|
+
}
|
|
230
|
+
} catch {}
|
|
231
|
+
},
|
|
232
|
+
on: (event, handler) => {
|
|
233
|
+
listeners[event].add(handler);
|
|
234
|
+
return () => {
|
|
235
|
+
listeners[event].delete(handler);
|
|
236
|
+
};
|
|
237
|
+
},
|
|
238
|
+
send: async (audio) => {
|
|
239
|
+
if (closed)
|
|
240
|
+
return;
|
|
241
|
+
const bytes = toUint8Array(audio);
|
|
242
|
+
if (bytes.byteLength === 0)
|
|
243
|
+
return;
|
|
244
|
+
if (!opened) {
|
|
245
|
+
pendingAudio.push(bytes);
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
sendAudio(bytes);
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
return session;
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
};
|
|
255
|
+
export {
|
|
256
|
+
gladia
|
|
257
|
+
};
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export type GladiaModel = 'solaria-1' | (string & {});
|
|
2
|
+
export type GladiaEncoding = 'wav/alaw' | 'wav/pcm' | 'wav/ulaw';
|
|
3
|
+
export type GladiaLanguageConfig = {
|
|
4
|
+
code_switching?: boolean;
|
|
5
|
+
languages?: readonly string[];
|
|
6
|
+
};
|
|
7
|
+
export type GladiaSTTOptions = {
|
|
8
|
+
apiKey: string;
|
|
9
|
+
baseUrl?: string;
|
|
10
|
+
codeSwitching?: boolean;
|
|
11
|
+
connectTimeoutMs?: number;
|
|
12
|
+
fetch?: typeof fetch;
|
|
13
|
+
languages?: readonly string[];
|
|
14
|
+
model?: GladiaModel;
|
|
15
|
+
punctuationConfig?: Record<string, unknown>;
|
|
16
|
+
realtimeProcessing?: Record<string, unknown>;
|
|
17
|
+
sessionPath?: string;
|
|
18
|
+
webSocket?: {
|
|
19
|
+
factory?: (url: string) => WebSocket;
|
|
20
|
+
};
|
|
21
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@absolutejs/voice-gladia",
|
|
3
|
+
"version": "0.0.1-beta.1",
|
|
4
|
+
"description": "Gladia real-time speech-to-text adapter for @absolutejs/voice",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "https://github.com/absolutejs/voice-adapters.git",
|
|
8
|
+
"directory": "gladia"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"main": "./dist/index.js",
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"import": "./dist/index.js",
|
|
19
|
+
"types": "./dist/index.d.ts"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"license": "CC BY-NC 4.0",
|
|
23
|
+
"author": "Alex Kahn",
|
|
24
|
+
"scripts": {
|
|
25
|
+
"build": "rm -rf dist && bun build ./src/index.ts --outdir dist --target bun --external @absolutejs/voice && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
26
|
+
"format": "prettier --write \"./**/*.{js,ts,json,md}\"",
|
|
27
|
+
"release": "bun run format && bun run build && bun publish --access public",
|
|
28
|
+
"test": "bun test",
|
|
29
|
+
"typecheck": "bun run tsc --noEmit"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"@absolutejs/voice": "0.0.22-beta.471"
|
|
33
|
+
},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"@types/bun": "1.3.9",
|
|
36
|
+
"typescript": "^5.9.3"
|
|
37
|
+
}
|
|
38
|
+
}
|