@crewhaus/target-voice 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +43 -0
- package/src/index.test.ts +88 -0
- package/src/index.ts +195 -0
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@crewhaus/target-voice",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Codegen for the VOICE target — emits a realtime daemon (Section 24 VOICE)",
|
|
6
|
+
"main": "src/index.ts",
|
|
7
|
+
"types": "src/index.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": "./src/index.ts"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"test": "bun test src"
|
|
13
|
+
},
|
|
14
|
+
"dependencies": {
|
|
15
|
+
"@crewhaus/errors": "0.0.0",
|
|
16
|
+
"@crewhaus/infra-utils": "0.0.0",
|
|
17
|
+
"@crewhaus/ir": "0.0.0"
|
|
18
|
+
},
|
|
19
|
+
"license": "Apache-2.0",
|
|
20
|
+
"author": {
|
|
21
|
+
"name": "Max Meier",
|
|
22
|
+
"email": "max@studiomax.io",
|
|
23
|
+
"url": "https://studiomax.io"
|
|
24
|
+
},
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "git+https://github.com/crewhaus/factory.git",
|
|
28
|
+
"directory": "packages/target-voice"
|
|
29
|
+
},
|
|
30
|
+
"homepage": "https://github.com/crewhaus/factory/tree/main/packages/target-voice#readme",
|
|
31
|
+
"bugs": {
|
|
32
|
+
"url": "https://github.com/crewhaus/factory/issues"
|
|
33
|
+
},
|
|
34
|
+
"publishConfig": {
|
|
35
|
+
"access": "restricted"
|
|
36
|
+
},
|
|
37
|
+
"files": [
|
|
38
|
+
"src",
|
|
39
|
+
"README.md",
|
|
40
|
+
"LICENSE",
|
|
41
|
+
"NOTICE"
|
|
42
|
+
]
|
|
43
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import type { IrVoiceV0 } from "@crewhaus/ir";
|
|
3
|
+
import { TargetEmitError, emitVoice } from "./index.js";
|
|
4
|
+
|
|
5
|
+
const baseIr: IrVoiceV0 = {
|
|
6
|
+
version: 0,
|
|
7
|
+
name: "hello-voice",
|
|
8
|
+
target: "voice",
|
|
9
|
+
agent: { model: "gpt-4o-realtime-preview", instructions: "be brief" },
|
|
10
|
+
voice: {
|
|
11
|
+
provider: "openai",
|
|
12
|
+
voiceId: "alloy",
|
|
13
|
+
vad: "server",
|
|
14
|
+
bargeInTriggerFrames: 4,
|
|
15
|
+
bargeInWindowMs: 200,
|
|
16
|
+
},
|
|
17
|
+
tools: [],
|
|
18
|
+
toolConfigs: Object.freeze({}),
|
|
19
|
+
mcp_servers: Object.freeze({}),
|
|
20
|
+
permissions: { rules: [] },
|
|
21
|
+
compaction: {},
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
describe("emitVoice", () => {
|
|
25
|
+
test("emits agent.ts + voice-loop.ts + daemon.ts (T1 bundle structure)", () => {
|
|
26
|
+
const bundle = emitVoice(baseIr);
|
|
27
|
+
expect(bundle.files.map((f) => f.path).sort()).toEqual([
|
|
28
|
+
"agent.ts",
|
|
29
|
+
"daemon.ts",
|
|
30
|
+
"voice-loop.ts",
|
|
31
|
+
]);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("agent.ts exports AGENT_CONFIG with the spec values inlined", () => {
|
|
35
|
+
const code = emitVoice(baseIr).files.find((f) => f.path === "agent.ts")?.content ?? "";
|
|
36
|
+
expect(code).toContain("AGENT_CONFIG");
|
|
37
|
+
expect(code).toContain('"hello-voice"');
|
|
38
|
+
expect(code).toContain('"gpt-4o-realtime-preview"');
|
|
39
|
+
expect(code).toContain('"alloy"');
|
|
40
|
+
expect(code).toContain("bargeInTriggerFrames: 4");
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("voice-loop.ts wires barge-in-controller + vad-engine + voice-runtime", () => {
|
|
44
|
+
const code = emitVoice(baseIr).files.find((f) => f.path === "voice-loop.ts")?.content ?? "";
|
|
45
|
+
expect(code).toContain("@crewhaus/barge-in-controller");
|
|
46
|
+
expect(code).toContain("@crewhaus/voice-runtime");
|
|
47
|
+
expect(code).toContain("@crewhaus/vad-engine");
|
|
48
|
+
expect(code).toContain("createBargeInController");
|
|
49
|
+
expect(code).toContain("createRealtimeAdapter");
|
|
50
|
+
expect(code).toContain("frames30ms");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("daemon.ts has a --smoke <pcm-path> path that emits voice_event JSON lines", () => {
|
|
54
|
+
const code = emitVoice(baseIr).files.find((f) => f.path === "daemon.ts")?.content ?? "";
|
|
55
|
+
expect(code).toContain('"--smoke"');
|
|
56
|
+
expect(code).toContain('"voice_event"');
|
|
57
|
+
expect(code).toContain('"smoke_start"');
|
|
58
|
+
expect(code).toContain('"smoke_done"');
|
|
59
|
+
expect(code).toContain("loadPcm");
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("vapi provider is forwarded to the loop verbatim", () => {
|
|
63
|
+
const ir: IrVoiceV0 = {
|
|
64
|
+
...baseIr,
|
|
65
|
+
voice: { ...baseIr.voice, provider: "vapi" },
|
|
66
|
+
};
|
|
67
|
+
const code = emitVoice(ir).files.find((f) => f.path === "agent.ts")?.content ?? "";
|
|
68
|
+
expect(code).toContain('provider: "vapi"');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("telephony block (when present) is wired into AGENT_CONFIG", () => {
|
|
72
|
+
const ir: IrVoiceV0 = {
|
|
73
|
+
...baseIr,
|
|
74
|
+
telephony: { provider: "twilio" },
|
|
75
|
+
};
|
|
76
|
+
const code = emitVoice(ir).files.find((f) => f.path === "agent.ts")?.content ?? "";
|
|
77
|
+
expect(code).toContain('telephony: { provider: "twilio" }');
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("absent telephony block yields telephony: undefined", () => {
|
|
81
|
+
const code = emitVoice(baseIr).files.find((f) => f.path === "agent.ts")?.content ?? "";
|
|
82
|
+
expect(code).toContain("telephony: undefined");
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test("TargetEmitError type is exported", () => {
|
|
86
|
+
expect(TargetEmitError).toBeDefined();
|
|
87
|
+
});
|
|
88
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Catalog F2 `target-voice` — Section 24 VOICE.
|
|
3
|
+
*
|
|
4
|
+
* Codegen for the realtime voice target. Emits three files:
|
|
5
|
+
*
|
|
6
|
+
* - `voice-loop.ts` — the per-call audio I/O loop. Wires the
|
|
7
|
+
* RealtimeAdapter, vad-engine + barge-in-controller, and an event
|
|
8
|
+
* emitter that forwards transcripts/audio/tool-uses to the daemon.
|
|
9
|
+
* - `agent.ts` — config wrapper exporting the spec's model +
|
|
10
|
+
* instructions + tool list + voice block.
|
|
11
|
+
* - `daemon.ts` — entrypoint. Either runs a one-shot smoke handler
|
|
12
|
+
* (when invoked with `--smoke <pcm-path>` it pumps a PCM file in
|
|
13
|
+
* and emits one JSON event per stdout line; useful for the smoke
|
|
14
|
+
* test) or boots a long-running WebSocket bridge (placeholder; v0
|
|
15
|
+
* ships the smoke path, full bridge lands in a follow-up).
|
|
16
|
+
*
|
|
17
|
+
* The emitted bundle is callable headless (no microphone needed) — the
|
|
18
|
+
* daemon reads PCM from stdin or a file path, so the VOICE smoke
|
|
19
|
+
* doesn't require a browser.
|
|
20
|
+
*/
|
|
21
|
+
import { CrewhausError } from "@crewhaus/errors";
|
|
22
|
+
import { escapeJsonString } from "@crewhaus/infra-utils";
|
|
23
|
+
import type { Bundle, IrVoiceV0 } from "@crewhaus/ir";
|
|
24
|
+
|
|
25
|
+
export class TargetEmitError extends CrewhausError {
|
|
26
|
+
override readonly name = "TargetEmitError";
|
|
27
|
+
constructor(message: string, cause?: unknown) {
|
|
28
|
+
super("compiler", message, cause);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function emitVoice(ir: IrVoiceV0): Bundle {
|
|
33
|
+
return {
|
|
34
|
+
files: [
|
|
35
|
+
{ path: "agent.ts", content: renderAgent(ir) },
|
|
36
|
+
{ path: "voice-loop.ts", content: renderVoiceLoop(ir) },
|
|
37
|
+
{ path: "daemon.ts", content: renderDaemon(ir) },
|
|
38
|
+
],
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function renderAgent(ir: IrVoiceV0): string {
|
|
43
|
+
return `// Generated by crewhaus. DO NOT EDIT.
|
|
44
|
+
// Source spec: ${ir.name} (target: voice, ir version: ${ir.version}, file: agent.ts)
|
|
45
|
+
|
|
46
|
+
export const AGENT_CONFIG = {
|
|
47
|
+
name: ${escapeJsonString(ir.name)},
|
|
48
|
+
model: ${escapeJsonString(ir.agent.model)},
|
|
49
|
+
instructions: ${escapeJsonString(ir.agent.instructions)},
|
|
50
|
+
voice: {
|
|
51
|
+
provider: ${escapeJsonString(ir.voice.provider)},
|
|
52
|
+
voiceId: ${escapeJsonString(ir.voice.voiceId)},
|
|
53
|
+
vad: ${escapeJsonString(ir.voice.vad)},
|
|
54
|
+
bargeInTriggerFrames: ${ir.voice.bargeInTriggerFrames},
|
|
55
|
+
bargeInWindowMs: ${ir.voice.bargeInWindowMs},
|
|
56
|
+
},
|
|
57
|
+
telephony: ${
|
|
58
|
+
ir.telephony !== undefined
|
|
59
|
+
? `{ provider: ${escapeJsonString(ir.telephony.provider)} }`
|
|
60
|
+
: "undefined"
|
|
61
|
+
},
|
|
62
|
+
} as const;
|
|
63
|
+
`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function renderVoiceLoop(ir: IrVoiceV0): string {
|
|
67
|
+
return `// Generated by crewhaus. DO NOT EDIT.
|
|
68
|
+
// Source spec: ${ir.name} (target: voice, ir version: ${ir.version}, file: voice-loop.ts)
|
|
69
|
+
import { createBargeInController } from "@crewhaus/barge-in-controller";
|
|
70
|
+
import { createRealtimeAdapter, type RealtimeEvent } from "@crewhaus/voice-runtime";
|
|
71
|
+
import { createVadDetector, frames30ms } from "@crewhaus/vad-engine";
|
|
72
|
+
import { AGENT_CONFIG } from "./agent.js";
|
|
73
|
+
|
|
74
|
+
export type VoiceEventListener = (event: RealtimeEvent | { kind: "barge_in"; speechFrames: number }) => void;
|
|
75
|
+
|
|
76
|
+
export type VoiceCallOptions = {
|
|
77
|
+
/** PCM 16-bit signed mono buffer at 24kHz; the loop frames + streams it. */
|
|
78
|
+
readonly inboundPcm: Int16Array;
|
|
79
|
+
readonly listener: VoiceEventListener;
|
|
80
|
+
/** Time budget for a single call. Defaults to 60s. */
|
|
81
|
+
readonly maxDurationMs?: number;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const DEFAULT_MAX_DURATION_MS = 60_000;
|
|
85
|
+
|
|
86
|
+
export async function runOneCall(opts: VoiceCallOptions): Promise<void> {
|
|
87
|
+
const adapter = createRealtimeAdapter(AGENT_CONFIG.voice.provider);
|
|
88
|
+
// Attach the listener BEFORE connect() so the first session.created
|
|
89
|
+
// event (and any error events during the upgrade) are captured.
|
|
90
|
+
const off = adapter.on((ev) => opts.listener(ev));
|
|
91
|
+
await adapter.connect({
|
|
92
|
+
model: AGENT_CONFIG.model,
|
|
93
|
+
instructions: AGENT_CONFIG.instructions,
|
|
94
|
+
voice: AGENT_CONFIG.voice.voiceId,
|
|
95
|
+
vad: AGENT_CONFIG.voice.vad,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
const detector = createVadDetector({ aggressiveness: 1, sampleRate: 24_000 });
|
|
99
|
+
const bargeIn = createBargeInController({
|
|
100
|
+
adapter,
|
|
101
|
+
detector,
|
|
102
|
+
triggerFrames: AGENT_CONFIG.voice.bargeInTriggerFrames,
|
|
103
|
+
windowMs: AGENT_CONFIG.voice.bargeInWindowMs,
|
|
104
|
+
onBargeIn: ({ speechFrames }) => opts.listener({ kind: "barge_in", speechFrames }),
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const fs = frames30ms(opts.inboundPcm, 24_000);
|
|
108
|
+
for (const f of fs) {
|
|
109
|
+
bargeIn.feedAudioFrame(f);
|
|
110
|
+
adapter.sendAudio(f);
|
|
111
|
+
}
|
|
112
|
+
// Tell the server "this is end-of-utterance — process it now". With
|
|
113
|
+
// server VAD this would happen automatically after silence, but the
|
|
114
|
+
// smoke pumps a single bounded clip and needs a deterministic kick.
|
|
115
|
+
adapter.commitInput();
|
|
116
|
+
|
|
117
|
+
// Wait for transcript_final or budget expiry.
|
|
118
|
+
const budget = opts.maxDurationMs ?? DEFAULT_MAX_DURATION_MS;
|
|
119
|
+
let resolved = false;
|
|
120
|
+
await new Promise<void>((resolve) => {
|
|
121
|
+
const timer = setTimeout(() => {
|
|
122
|
+
if (!resolved) resolve();
|
|
123
|
+
}, budget);
|
|
124
|
+
const inner = adapter.on((ev) => {
|
|
125
|
+
if (ev.kind === "transcript_final" || ev.kind === "disconnect") {
|
|
126
|
+
if (!resolved) {
|
|
127
|
+
resolved = true;
|
|
128
|
+
clearTimeout(timer);
|
|
129
|
+
inner();
|
|
130
|
+
resolve();
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
bargeIn.stop();
|
|
137
|
+
off();
|
|
138
|
+
await adapter.disconnect();
|
|
139
|
+
}
|
|
140
|
+
`;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function renderDaemon(ir: IrVoiceV0): string {
|
|
144
|
+
return `#!/usr/bin/env bun
|
|
145
|
+
// Generated by crewhaus. DO NOT EDIT.
|
|
146
|
+
// Source spec: ${ir.name} (target: voice, ir version: ${ir.version}, file: daemon.ts)
|
|
147
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
148
|
+
import { runOneCall } from "./voice-loop.js";
|
|
149
|
+
|
|
150
|
+
function emit(event: Record<string, unknown>): void {
|
|
151
|
+
process.stdout.write(\`\${JSON.stringify(event)}\\n\`);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function loadPcm(path: string): Int16Array {
|
|
155
|
+
if (!existsSync(path)) {
|
|
156
|
+
throw new Error("[voice-daemon] PCM file not found: " + path);
|
|
157
|
+
}
|
|
158
|
+
const buf = readFileSync(path);
|
|
159
|
+
// raw PCM 16-bit little-endian mono. Align onto a fresh ArrayBuffer
|
|
160
|
+
// because Buffer's underlying buffer can have an unaligned offset.
|
|
161
|
+
const aligned = new ArrayBuffer(buf.byteLength);
|
|
162
|
+
new Uint8Array(aligned).set(buf);
|
|
163
|
+
return new Int16Array(aligned);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async function smokeMode(pcmPath: string): Promise<void> {
|
|
167
|
+
emit({ kind: "smoke_start", pcmPath });
|
|
168
|
+
const pcm = loadPcm(pcmPath);
|
|
169
|
+
emit({ kind: "smoke_pcm_loaded", samples: pcm.length });
|
|
170
|
+
await runOneCall({
|
|
171
|
+
inboundPcm: pcm,
|
|
172
|
+
listener: (ev) => emit({ kind: "voice_event", event: ev }),
|
|
173
|
+
});
|
|
174
|
+
emit({ kind: "smoke_done" });
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function main(): Promise<void> {
|
|
178
|
+
const args = process.argv.slice(2);
|
|
179
|
+
const smokeIdx = args.indexOf("--smoke");
|
|
180
|
+
if (smokeIdx !== -1 && smokeIdx + 1 < args.length) {
|
|
181
|
+
await smokeMode(args[smokeIdx + 1]!);
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
process.stderr.write(
|
|
185
|
+
"[voice-daemon] no --smoke <pcm-path> provided. v0 ships the headless smoke path; the WebRTC + telephony bridge lands in a follow-up.\\n",
|
|
186
|
+
);
|
|
187
|
+
process.exit(2);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
main().catch((err) => {
|
|
191
|
+
process.stderr.write(\`[voice-daemon] fatal: \${(err as Error).message}\\n\`);
|
|
192
|
+
process.exit(1);
|
|
193
|
+
});
|
|
194
|
+
`;
|
|
195
|
+
}
|