@kithjs/server 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -0
- package/characters/example.json +16 -0
- package/docker-compose.yml +16 -0
- package/package.json +37 -0
- package/src/character-registry.ts +57 -0
- package/src/index.ts +24 -0
- package/src/server.ts +318 -0
- package/src/session.ts +190 -0
package/Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM python:3.11-slim AS python-base
|
|
2
|
+
RUN pip install uv
|
|
3
|
+
COPY packages/runtime-pipecat/python /app/python
|
|
4
|
+
WORKDIR /app/python
|
|
5
|
+
RUN uv venv && . .venv/bin/activate && uv pip install -e .
|
|
6
|
+
|
|
7
|
+
FROM oven/bun:1.3
|
|
8
|
+
COPY --from=python-base /app/python /app/python
|
|
9
|
+
COPY --from=python-base /usr/local/bin/python3.11 /usr/local/bin/python3.11
|
|
10
|
+
COPY --from=python-base /usr/local/lib/python3.11 /usr/local/lib/python3.11
|
|
11
|
+
WORKDIR /app/server
|
|
12
|
+
COPY packages/server/package.json .
|
|
13
|
+
COPY packages/server/src ./src
|
|
14
|
+
COPY packages/server/characters ./characters
|
|
15
|
+
RUN bun install
|
|
16
|
+
ENV PIPECAT_PYTHON_PATH=/app/python/.venv/bin/python
|
|
17
|
+
ENV PIPECAT_PYTHON_CWD=/app/python
|
|
18
|
+
ENV PORT=3040
|
|
19
|
+
EXPOSE 3040
|
|
20
|
+
CMD ["bun", "src/index.ts"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"voice": {
|
|
3
|
+
"stability": 0.5,
|
|
4
|
+
"similarityBoost": 0.85,
|
|
5
|
+
"style": 0.4,
|
|
6
|
+
"useSpeakerBoost": true,
|
|
7
|
+
"speed": 1.0
|
|
8
|
+
},
|
|
9
|
+
"slang": {
|
|
10
|
+
"gg": "good game",
|
|
11
|
+
"fr": "for real",
|
|
12
|
+
"ngl": "not gonna lie"
|
|
13
|
+
},
|
|
14
|
+
"pronunciation": {},
|
|
15
|
+
"personaMode": "neutral"
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
version: "3.8"
|
|
2
|
+
services:
|
|
3
|
+
kith-voice:
|
|
4
|
+
build:
|
|
5
|
+
context: ../..
|
|
6
|
+
dockerfile: packages/server/Dockerfile
|
|
7
|
+
ports:
|
|
8
|
+
- "3040:3040"
|
|
9
|
+
environment:
|
|
10
|
+
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
|
11
|
+
- ELEVENLABS_VOICE_ID=${ELEVENLABS_VOICE_ID:-kPzsL2i3teMYv0FxEYQ6}
|
|
12
|
+
- ELEVENLABS_MODEL_ID=${ELEVENLABS_MODEL_ID:-eleven_v3}
|
|
13
|
+
- KITH_CHARACTER_DIR=/app/server/characters
|
|
14
|
+
- PORT=3040
|
|
15
|
+
volumes:
|
|
16
|
+
- ./characters:/app/server/characters:ro
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kithjs/server",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Standalone voice microservice for Kith — POST text, get streaming audio. Framework agnostic.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./src/index.ts",
|
|
7
|
+
"types": "./src/index.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": "./src/index.ts"
|
|
10
|
+
},
|
|
11
|
+
"files": ["src", "characters", "Dockerfile", "docker-compose.yml", "README.md"],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"dev": "bun --hot src/index.ts",
|
|
14
|
+
"start": "bun src/index.ts",
|
|
15
|
+
"typecheck": "tsc --noEmit",
|
|
16
|
+
"test": "bun test"
|
|
17
|
+
},
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "https://github.com/wbaxterh/kith",
|
|
22
|
+
"directory": "packages/server"
|
|
23
|
+
},
|
|
24
|
+
"keywords": ["kith", "voice", "tts", "microservice", "ai-companion", "streaming"],
|
|
25
|
+
"publishConfig": {
|
|
26
|
+
"access": "public"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@kithjs/core": "^0.2.0",
|
|
30
|
+
"@kithjs/runtime-pipecat": "^0.2.0",
|
|
31
|
+
"@kithjs/voice-router": "^0.2.0",
|
|
32
|
+
"@kithjs/observability": "^0.2.0"
|
|
33
|
+
},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"typescript": "^5.6.0"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character registry — loads VoiceCharacter profiles from a directory.
|
|
3
|
+
*
|
|
4
|
+
* Characters are JSON files in a configurable directory. The filename
|
|
5
|
+
* (without .json) becomes the character ID.
|
|
6
|
+
*
|
|
7
|
+
* characters/
|
|
8
|
+
* kaori.json → characterId: "kaori"
|
|
9
|
+
* apollo.json → characterId: "apollo"
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import type { VoiceCharacter } from "@kithjs/voice-router";
|
|
14
|
+
|
|
15
|
+
export class CharacterRegistry {
|
|
16
|
+
private characters = new Map<string, VoiceCharacter>();
|
|
17
|
+
private dir: string;
|
|
18
|
+
|
|
19
|
+
constructor(dir: string) {
|
|
20
|
+
this.dir = dir;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async load(): Promise<void> {
|
|
24
|
+
const fs = await import("node:fs/promises");
|
|
25
|
+
try {
|
|
26
|
+
const files = await fs.readdir(this.dir);
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
if (!file.endsWith(".json")) continue;
|
|
29
|
+
const id = file.replace(/\.json$/, "");
|
|
30
|
+
try {
|
|
31
|
+
const raw = await fs.readFile(path.join(this.dir, file), "utf-8");
|
|
32
|
+
this.characters.set(id, JSON.parse(raw) as VoiceCharacter);
|
|
33
|
+
} catch (err) {
|
|
34
|
+
console.warn(`[kith] failed to load character ${file}:`, err);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
console.log(`[kith] loaded ${this.characters.size} character(s): ${[...this.characters.keys()].join(", ")}`);
|
|
38
|
+
} catch {
|
|
39
|
+
console.log(`[kith] no characters directory at ${this.dir}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
get(id: string): VoiceCharacter | undefined {
|
|
44
|
+
return this.characters.get(id);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
list(): { id: string; personaMode?: string }[] {
|
|
48
|
+
return [...this.characters.entries()].map(([id, c]) => ({
|
|
49
|
+
id,
|
|
50
|
+
personaMode: c.personaMode,
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
has(id: string): boolean {
|
|
55
|
+
return this.characters.has(id);
|
|
56
|
+
}
|
|
57
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @kithjs/server — Standalone voice microservice for Kith.
|
|
3
|
+
*
|
|
4
|
+
* POST text, get streaming audio. Framework agnostic.
|
|
5
|
+
*
|
|
6
|
+
* Quick start:
|
|
7
|
+
* ELEVENLABS_API_KEY=sk_... bun @kithjs/server
|
|
8
|
+
*
|
|
9
|
+
* Or programmatic:
|
|
10
|
+
* import { KithServer } from "@kithjs/server";
|
|
11
|
+
* const server = new KithServer({ port: 3040 });
|
|
12
|
+
* await server.start();
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export { KithServer, type KithServerOptions } from "./server.ts";
|
|
16
|
+
export { SessionManager, type Session, type SessionConfig } from "./session.ts";
|
|
17
|
+
export { CharacterRegistry } from "./character-registry.ts";
|
|
18
|
+
|
|
19
|
+
// Auto-start when run directly
|
|
20
|
+
if (import.meta.main) {
|
|
21
|
+
const { KithServer } = await import("./server.ts");
|
|
22
|
+
const server = new KithServer();
|
|
23
|
+
await server.start();
|
|
24
|
+
}
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KithServer — standalone voice microservice.
|
|
3
|
+
*
|
|
4
|
+
* Framework-agnostic HTTP/WebSocket server. POST text, get streaming audio.
|
|
5
|
+
*
|
|
6
|
+
* Endpoints:
|
|
7
|
+
* POST /sessions — create session { characterId? } → { sessionId, wsUrl }
|
|
8
|
+
* DELETE /sessions/:id — destroy session
|
|
9
|
+
* POST /sessions/:id/speak — send text { text } → audio via WS/SSE
|
|
10
|
+
* POST /sessions/:id/barge-in — stop current TTS
|
|
11
|
+
* GET /sessions/:id/events — SSE stream (fallback)
|
|
12
|
+
* WS /ws?sessionId=xxx — full duplex WebSocket
|
|
13
|
+
* GET /characters — list character profiles
|
|
14
|
+
* GET /health — { ok, sessions, uptime }
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import path from "node:path";
|
|
18
|
+
import type { ServerWebSocket } from "bun";
|
|
19
|
+
|
|
20
|
+
import { CharacterRegistry } from "./character-registry.ts";
|
|
21
|
+
import { SessionManager, type SessionConfig } from "./session.ts";
|
|
22
|
+
|
|
23
|
+
export interface KithServerOptions {
|
|
24
|
+
port?: number;
|
|
25
|
+
characterDir?: string;
|
|
26
|
+
defaultCharacterId?: string;
|
|
27
|
+
pythonPath?: string;
|
|
28
|
+
pythonCwd?: string;
|
|
29
|
+
corsOrigins?: string;
|
|
30
|
+
maxSessions?: number;
|
|
31
|
+
pipeline?: string;
|
|
32
|
+
apiKey?: string;
|
|
33
|
+
voiceId?: string;
|
|
34
|
+
modelId?: string;
|
|
35
|
+
pipelineConfig?: Record<string, unknown>;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface WsData {
|
|
39
|
+
sessionId: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export class KithServer {
|
|
43
|
+
private options: Required<KithServerOptions>;
|
|
44
|
+
private characters: CharacterRegistry;
|
|
45
|
+
private sessions: SessionManager;
|
|
46
|
+
private server: ReturnType<typeof Bun.serve> | null = null;
|
|
47
|
+
private startedAt = 0;
|
|
48
|
+
|
|
49
|
+
constructor(opts: KithServerOptions = {}) {
|
|
50
|
+
const root = path.dirname(Bun.fileURLToPath(import.meta.url));
|
|
51
|
+
|
|
52
|
+
this.options = {
|
|
53
|
+
port: opts.port ?? Number(process.env.PORT ?? 3040),
|
|
54
|
+
characterDir: opts.characterDir ?? process.env.KITH_CHARACTER_DIR ?? "./characters",
|
|
55
|
+
defaultCharacterId: opts.defaultCharacterId ?? process.env.KITH_DEFAULT_CHARACTER ?? "",
|
|
56
|
+
pythonPath:
|
|
57
|
+
opts.pythonPath ??
|
|
58
|
+
process.env.PIPECAT_PYTHON_PATH ??
|
|
59
|
+
path.resolve(root, "../node_modules/@kithjs/runtime-pipecat/python/.venv/bin/python"),
|
|
60
|
+
pythonCwd:
|
|
61
|
+
opts.pythonCwd ??
|
|
62
|
+
process.env.PIPECAT_PYTHON_CWD ??
|
|
63
|
+
path.resolve(root, "../node_modules/@kithjs/runtime-pipecat/python"),
|
|
64
|
+
corsOrigins: opts.corsOrigins ?? process.env.KITH_CORS ?? "*",
|
|
65
|
+
maxSessions: opts.maxSessions ?? 100,
|
|
66
|
+
pipeline: opts.pipeline ?? process.env.KITH_PIPELINE ?? "elevenlabs",
|
|
67
|
+
apiKey: opts.apiKey ?? process.env.ELEVENLABS_API_KEY ?? "",
|
|
68
|
+
voiceId: opts.voiceId ?? process.env.ELEVENLABS_VOICE_ID ?? "",
|
|
69
|
+
modelId: opts.modelId ?? process.env.ELEVENLABS_MODEL_ID ?? "eleven_v3",
|
|
70
|
+
pipelineConfig: opts.pipelineConfig ?? {},
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
if (!this.options.apiKey) {
|
|
74
|
+
console.error("[kith] ELEVENLABS_API_KEY is required. Set it via env or options.");
|
|
75
|
+
process.exit(2);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
this.characters = new CharacterRegistry(this.options.characterDir);
|
|
79
|
+
|
|
80
|
+
const sessionConfig: SessionConfig = {
|
|
81
|
+
pythonPath: this.options.pythonPath,
|
|
82
|
+
pythonCwd: this.options.pythonCwd,
|
|
83
|
+
apiKey: this.options.apiKey,
|
|
84
|
+
voiceId: this.options.voiceId,
|
|
85
|
+
modelId: this.options.modelId,
|
|
86
|
+
pipelineConfig: {
|
|
87
|
+
pipeline: this.options.pipeline,
|
|
88
|
+
...this.options.pipelineConfig,
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
this.sessions = new SessionManager(sessionConfig, this.options.maxSessions);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async start(): Promise<void> {
|
|
96
|
+
await this.characters.load();
|
|
97
|
+
this.startedAt = Date.now();
|
|
98
|
+
|
|
99
|
+
const self = this;
|
|
100
|
+
|
|
101
|
+
this.server = Bun.serve<WsData>({
|
|
102
|
+
port: this.options.port,
|
|
103
|
+
|
|
104
|
+
async fetch(req, server) {
|
|
105
|
+
return self.handleRequest(req, server);
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
websocket: {
|
|
109
|
+
async open(ws: ServerWebSocket<WsData>) {
|
|
110
|
+
const { sessionId } = ws.data;
|
|
111
|
+
console.log(`[kith] ws open session=${sessionId}`);
|
|
112
|
+
|
|
113
|
+
// Attach to existing session or create new one
|
|
114
|
+
if (self.sessions.has(sessionId)) {
|
|
115
|
+
self.sessions.attachWs(sessionId, ws);
|
|
116
|
+
ws.send(JSON.stringify({ type: "_ready", sessionId }));
|
|
117
|
+
} else {
|
|
118
|
+
try {
|
|
119
|
+
const charId = self.options.defaultCharacterId;
|
|
120
|
+
const character = charId ? self.characters.get(charId) : undefined;
|
|
121
|
+
await self.sessions.create(sessionId, character, ws);
|
|
122
|
+
ws.send(JSON.stringify({ type: "_ready", sessionId }));
|
|
123
|
+
} catch (err) {
|
|
124
|
+
console.error(`[kith] session create failed:`, err);
|
|
125
|
+
ws.close(1011, "session create failed");
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
|
|
130
|
+
async message(ws: ServerWebSocket<WsData>, raw) {
|
|
131
|
+
const { sessionId } = ws.data;
|
|
132
|
+
const session = self.sessions.get(sessionId);
|
|
133
|
+
if (!session) return;
|
|
134
|
+
|
|
135
|
+
let msg: { type: string; text?: string };
|
|
136
|
+
try {
|
|
137
|
+
msg = JSON.parse(String(raw));
|
|
138
|
+
} catch {
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (msg.type === "speak" && typeof msg.text === "string") {
|
|
143
|
+
try {
|
|
144
|
+
await session.voice.speak(msg.text);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
console.error(`[kith] speak failed session=${sessionId}:`, err);
|
|
147
|
+
}
|
|
148
|
+
} else if (msg.type === "barge-in") {
|
|
149
|
+
await session.runtime.bargeIn();
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
|
|
153
|
+
async close(ws: ServerWebSocket<WsData>) {
|
|
154
|
+
const { sessionId } = ws.data;
|
|
155
|
+
console.log(`[kith] ws close session=${sessionId}`);
|
|
156
|
+
await self.sessions.destroy(sessionId);
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
console.log(`[kith] server listening on http://localhost:${this.server.port}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async stop(): Promise<void> {
|
|
165
|
+
this.server?.stop();
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
private async handleRequest(
|
|
169
|
+
req: Request,
|
|
170
|
+
server: { upgrade: (req: Request, opts: any) => boolean },
|
|
171
|
+
): Promise<Response | undefined> {
|
|
172
|
+
const url = new URL(req.url);
|
|
173
|
+
const cors = this.corsHeaders();
|
|
174
|
+
|
|
175
|
+
// CORS preflight
|
|
176
|
+
if (req.method === "OPTIONS") {
|
|
177
|
+
return new Response(null, { status: 204, headers: cors });
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// WebSocket upgrade
|
|
181
|
+
if (url.pathname === "/ws") {
|
|
182
|
+
const sessionId = url.searchParams.get("sessionId") ?? crypto.randomUUID();
|
|
183
|
+
const ok = server.upgrade(req, { data: { sessionId } });
|
|
184
|
+
if (ok) return undefined;
|
|
185
|
+
return this.json({ error: "WebSocket upgrade failed" }, 500, cors);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// POST /sessions — create session
|
|
189
|
+
if (url.pathname === "/sessions" && req.method === "POST") {
|
|
190
|
+
try {
|
|
191
|
+
const body = (await req.json().catch(() => ({}))) as { characterId?: string };
|
|
192
|
+
const sessionId = crypto.randomUUID();
|
|
193
|
+
const character = body.characterId ? this.characters.get(body.characterId) : undefined;
|
|
194
|
+
await this.sessions.create(sessionId, character);
|
|
195
|
+
return this.json(
|
|
196
|
+
{
|
|
197
|
+
sessionId,
|
|
198
|
+
wsUrl: `ws://localhost:${this.options.port}/ws?sessionId=${sessionId}`,
|
|
199
|
+
},
|
|
200
|
+
201,
|
|
201
|
+
cors,
|
|
202
|
+
);
|
|
203
|
+
} catch (err: any) {
|
|
204
|
+
return this.json({ error: err.message }, 500, cors);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// DELETE /sessions/:id
|
|
209
|
+
const deleteMatch = url.pathname.match(/^\/sessions\/([^/]+)$/);
|
|
210
|
+
if (deleteMatch && req.method === "DELETE") {
|
|
211
|
+
await this.sessions.destroy(deleteMatch[1]);
|
|
212
|
+
return this.json({ ok: true }, 200, cors);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// POST /sessions/:id/speak
|
|
216
|
+
const speakMatch = url.pathname.match(/^\/sessions\/([^/]+)\/speak$/);
|
|
217
|
+
if (speakMatch && req.method === "POST") {
|
|
218
|
+
const session = this.sessions.get(speakMatch[1]);
|
|
219
|
+
if (!session) {
|
|
220
|
+
return this.json({ error: "session not found" }, 404, cors);
|
|
221
|
+
}
|
|
222
|
+
try {
|
|
223
|
+
const body = (await req.json()) as { text: string };
|
|
224
|
+
if (!body.text) return this.json({ error: "text is required" }, 400, cors);
|
|
225
|
+
session.voice.speak(body.text).catch((err) => {
|
|
226
|
+
console.error(`[kith] speak failed:`, err);
|
|
227
|
+
});
|
|
228
|
+
return this.json({ ok: true, sessionId: speakMatch[1] }, 200, cors);
|
|
229
|
+
} catch (err: any) {
|
|
230
|
+
return this.json({ error: err.message }, 500, cors);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// POST /sessions/:id/barge-in
|
|
235
|
+
const bargeMatch = url.pathname.match(/^\/sessions\/([^/]+)\/barge-in$/);
|
|
236
|
+
if (bargeMatch && req.method === "POST") {
|
|
237
|
+
const session = this.sessions.get(bargeMatch[1]);
|
|
238
|
+
if (!session) return this.json({ error: "session not found" }, 404, cors);
|
|
239
|
+
await session.runtime.bargeIn();
|
|
240
|
+
return this.json({ ok: true }, 200, cors);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// GET /sessions/:id/events — SSE stream
|
|
244
|
+
const sseMatch = url.pathname.match(/^\/sessions\/([^/]+)\/events$/);
|
|
245
|
+
if (sseMatch && req.method === "GET") {
|
|
246
|
+
const session = this.sessions.get(sseMatch[1]);
|
|
247
|
+
if (!session) return this.json({ error: "session not found" }, 404, cors);
|
|
248
|
+
|
|
249
|
+
const stream = new ReadableStream({
|
|
250
|
+
start(controller) {
|
|
251
|
+
const encoder = new TextEncoder();
|
|
252
|
+
const unsub = session.voice.on((event) => {
|
|
253
|
+
try {
|
|
254
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`));
|
|
255
|
+
} catch {
|
|
256
|
+
unsub();
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Flush buffered events
|
|
261
|
+
for (const event of session.eventBuffer) {
|
|
262
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`));
|
|
263
|
+
}
|
|
264
|
+
session.eventBuffer.length = 0;
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
return new Response(stream, {
|
|
269
|
+
headers: {
|
|
270
|
+
...cors,
|
|
271
|
+
"Content-Type": "text/event-stream",
|
|
272
|
+
"Cache-Control": "no-cache",
|
|
273
|
+
Connection: "keep-alive",
|
|
274
|
+
},
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// GET /characters
|
|
279
|
+
if (url.pathname === "/characters" && req.method === "GET") {
|
|
280
|
+
return this.json(this.characters.list(), 200, cors);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// GET /health
|
|
284
|
+
if (url.pathname === "/health") {
|
|
285
|
+
const stats = this.sessions.stats();
|
|
286
|
+
return this.json(
|
|
287
|
+
{
|
|
288
|
+
ok: true,
|
|
289
|
+
sessions: stats.count,
|
|
290
|
+
uptime: (Date.now() - this.startedAt) / 1000,
|
|
291
|
+
},
|
|
292
|
+
200,
|
|
293
|
+
cors,
|
|
294
|
+
);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return this.json(
|
|
298
|
+
{ name: "@kithjs/server", docs: "https://kith.weshuber.com" },
|
|
299
|
+
200,
|
|
300
|
+
cors,
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
private json(data: unknown, status: number, headers: Record<string, string> = {}): Response {
|
|
305
|
+
return Response.json(data, {
|
|
306
|
+
status,
|
|
307
|
+
headers: { ...headers, "Content-Type": "application/json" },
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
private corsHeaders(): Record<string, string> {
|
|
312
|
+
return {
|
|
313
|
+
"Access-Control-Allow-Origin": this.options.corsOrigins,
|
|
314
|
+
"Access-Control-Allow-Methods": "GET, POST, DELETE, OPTIONS",
|
|
315
|
+
"Access-Control-Allow-Headers": "Content-Type, Authorization, x-kith-session",
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
}
|
package/src/session.ts
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session manager — tracks per-client voice sessions.
|
|
3
|
+
*
|
|
4
|
+
* Each session owns a PipecatRuntime + VoiceRouter pair. Sessions can be
|
|
5
|
+
* created via HTTP (before a WebSocket connects) or on WS open.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { KithEvent } from "@kithjs/core";
|
|
9
|
+
import { PipecatRuntime } from "@kithjs/runtime-pipecat";
|
|
10
|
+
import { InMemoryObservability, consoleExporter } from "@kithjs/observability";
|
|
11
|
+
import {
|
|
12
|
+
DEFAULT_BOARD_SPORTS_SLANG,
|
|
13
|
+
DEFAULT_ENGLISH_SLANG,
|
|
14
|
+
DEFAULT_GENZ_SLANG,
|
|
15
|
+
DEFAULT_LAUGH_TAGS,
|
|
16
|
+
VoiceRouter,
|
|
17
|
+
voiceCharacterToRuntimeConfig,
|
|
18
|
+
type VoiceCharacter,
|
|
19
|
+
} from "@kithjs/voice-router";
|
|
20
|
+
|
|
21
|
+
import type { ServerWebSocket } from "bun";
|
|
22
|
+
|
|
23
|
+
export interface SessionConfig {
|
|
24
|
+
pythonPath: string;
|
|
25
|
+
pythonCwd: string;
|
|
26
|
+
apiKey: string;
|
|
27
|
+
voiceId: string;
|
|
28
|
+
modelId: string;
|
|
29
|
+
pipelineConfig?: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface Session {
|
|
33
|
+
id: string;
|
|
34
|
+
runtime: PipecatRuntime;
|
|
35
|
+
voice: VoiceRouter;
|
|
36
|
+
obs: InMemoryObservability;
|
|
37
|
+
unsubscribe: () => void;
|
|
38
|
+
ws: ServerWebSocket<{ sessionId: string }> | null;
|
|
39
|
+
character: VoiceCharacter | undefined;
|
|
40
|
+
eventBuffer: KithEvent[];
|
|
41
|
+
createdAt: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Clean AI-generated text for natural TTS output. */
|
|
45
|
+
function cleanForTTS(text: string): string {
|
|
46
|
+
let t = text;
|
|
47
|
+
t = t.replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1");
|
|
48
|
+
t = t.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
|
|
49
|
+
t = t.replace(/([!?.]){2,}/g, "$1");
|
|
50
|
+
t = t.replace(/([a-z])\1{3,}/gi, "$1$1");
|
|
51
|
+
t = t.replace(/:[a-z_]+:/g, "");
|
|
52
|
+
return t;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export class SessionManager {
|
|
56
|
+
private sessions = new Map<string, Session>();
|
|
57
|
+
private config: SessionConfig;
|
|
58
|
+
private maxSessions: number;
|
|
59
|
+
private bufferTtlMs = 30_000;
|
|
60
|
+
|
|
61
|
+
constructor(config: SessionConfig, maxSessions = 100) {
|
|
62
|
+
this.config = config;
|
|
63
|
+
this.maxSessions = maxSessions;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async create(
|
|
67
|
+
sessionId: string,
|
|
68
|
+
character?: VoiceCharacter,
|
|
69
|
+
ws?: ServerWebSocket<{ sessionId: string }> | null,
|
|
70
|
+
): Promise<Session> {
|
|
71
|
+
if (this.sessions.size >= this.maxSessions) {
|
|
72
|
+
throw new Error(`max sessions reached (${this.maxSessions})`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const obs = new InMemoryObservability();
|
|
76
|
+
obs.onRecord(consoleExporter);
|
|
77
|
+
|
|
78
|
+
const runtimeConfig: Record<string, unknown> = {
|
|
79
|
+
pipeline: this.config.pipelineConfig?.pipeline ?? "elevenlabs",
|
|
80
|
+
apiKey: this.config.apiKey,
|
|
81
|
+
voiceId: this.config.voiceId,
|
|
82
|
+
modelId: this.config.modelId,
|
|
83
|
+
...(this.config.pipelineConfig ?? {}),
|
|
84
|
+
...(character ? voiceCharacterToRuntimeConfig(character) : {}),
|
|
85
|
+
outputFormat: "mp3_44100_128",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
const runtime = new PipecatRuntime({
|
|
89
|
+
pythonPath: this.config.pythonPath,
|
|
90
|
+
cwd: this.config.pythonCwd,
|
|
91
|
+
observability: obs,
|
|
92
|
+
config: runtimeConfig,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
await runtime.connect({ sessionId });
|
|
96
|
+
|
|
97
|
+
const slang = {
|
|
98
|
+
...DEFAULT_ENGLISH_SLANG,
|
|
99
|
+
...DEFAULT_GENZ_SLANG,
|
|
100
|
+
...DEFAULT_BOARD_SPORTS_SLANG,
|
|
101
|
+
...DEFAULT_LAUGH_TAGS,
|
|
102
|
+
...(character?.slang ?? {}),
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const voice = new VoiceRouter({
|
|
106
|
+
runtime,
|
|
107
|
+
character,
|
|
108
|
+
slang,
|
|
109
|
+
transforms: [cleanForTTS],
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const eventBuffer: KithEvent[] = [];
|
|
113
|
+
|
|
114
|
+
const unsubscribe = voice.on((event: KithEvent) => {
|
|
115
|
+
if (ws) {
|
|
116
|
+
try {
|
|
117
|
+
ws.send(JSON.stringify(event));
|
|
118
|
+
} catch {
|
|
119
|
+
// ws closed
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
// Buffer events until a WS/SSE client connects
|
|
123
|
+
eventBuffer.push(event);
|
|
124
|
+
if (eventBuffer.length > 200) eventBuffer.shift();
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const session: Session = {
|
|
129
|
+
id: sessionId,
|
|
130
|
+
runtime,
|
|
131
|
+
voice,
|
|
132
|
+
obs,
|
|
133
|
+
unsubscribe,
|
|
134
|
+
ws: ws ?? null,
|
|
135
|
+
character,
|
|
136
|
+
eventBuffer,
|
|
137
|
+
createdAt: Date.now(),
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
this.sessions.set(sessionId, session);
|
|
141
|
+
return session;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
get(id: string): Session | undefined {
|
|
145
|
+
return this.sessions.get(id);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
has(id: string): boolean {
|
|
149
|
+
return this.sessions.has(id);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
attachWs(sessionId: string, ws: ServerWebSocket<{ sessionId: string }>): boolean {
|
|
153
|
+
const session = this.sessions.get(sessionId);
|
|
154
|
+
if (!session) return false;
|
|
155
|
+
session.ws = ws;
|
|
156
|
+
|
|
157
|
+
// Flush buffered events
|
|
158
|
+
for (const event of session.eventBuffer) {
|
|
159
|
+
try {
|
|
160
|
+
ws.send(JSON.stringify(event));
|
|
161
|
+
} catch {
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
session.eventBuffer.length = 0;
|
|
166
|
+
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async destroy(sessionId: string): Promise<void> {
|
|
171
|
+
const session = this.sessions.get(sessionId);
|
|
172
|
+
if (!session) return;
|
|
173
|
+
this.sessions.delete(sessionId);
|
|
174
|
+
session.unsubscribe();
|
|
175
|
+
session.voice.destroy();
|
|
176
|
+
try {
|
|
177
|
+
await session.runtime.disconnect();
|
|
178
|
+
} catch (err) {
|
|
179
|
+
console.error(`[kith] disconnect failed session=${sessionId}:`, err);
|
|
180
|
+
}
|
|
181
|
+
console.log(`[kith] session torn down: ${sessionId}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
stats(): { count: number; ids: string[] } {
|
|
185
|
+
return {
|
|
186
|
+
count: this.sessions.size,
|
|
187
|
+
ids: [...this.sessions.keys()],
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
}
|