@kognitivedev/voice-recording 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+
2
+ $ tsc
@@ -0,0 +1,11 @@
1
+ $ vitest run
2
+
3
+ RUN v3.2.4 /Users/vserifsaglam/work/memory-experiment/packages/voice-recording
4
+
5
+ ✓ src/__tests__/voice-recording.test.ts (1 test) 3ms
6
+
7
+ Test Files 1 passed (1)
8
+ Tests 1 passed (1)
9
+ Start at 17:29:58
10
+ Duration 886ms (transform 58ms, setup 0ms, collect 61ms, tests 3ms, environment 0ms, prepare 342ms)
11
+
package/CHANGELOG.md ADDED
@@ -0,0 +1,10 @@
1
+ # @kognitivedev/voice-recording
2
+
3
+ ## 0.2.29
4
+
5
+ ### Patch Changes
6
+
7
+ - release
8
+
9
+ - Updated dependencies []:
10
+ - @kognitivedev/shared@0.2.29
@@ -0,0 +1,12 @@
1
+ import type { VoiceRecordingAdapter } from "./types";
2
+ export declare function createVoiceRecordingBackendAdapter(config: {
3
+ baseUrl: string;
4
+ fetch?: typeof fetch;
5
+ headers?: Record<string, string>;
6
+ endpoints?: {
7
+ init?: string;
8
+ complete?: (recordingSessionId: string) => string;
9
+ createAsset?: (recordingSessionId: string) => string;
10
+ manifest?: (sessionId: string) => string;
11
+ };
12
+ }): VoiceRecordingAdapter;
@@ -0,0 +1,70 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createVoiceRecordingBackendAdapter = createVoiceRecordingBackendAdapter;
4
+ function trimTrailingSlash(value) {
5
+ return value.replace(/\/+$/, "");
6
+ }
7
+ function createVoiceRecordingBackendAdapter(config) {
8
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j;
9
+ const baseUrl = trimTrailingSlash(config.baseUrl);
10
+ const fetchImpl = (_a = config.fetch) !== null && _a !== void 0 ? _a : fetch;
11
+ const endpoint = {
12
+ init: (_c = (_b = config.endpoints) === null || _b === void 0 ? void 0 : _b.init) !== null && _c !== void 0 ? _c : "/api/kognitive/cognitive/voice/recordings/sessions/init",
13
+ complete: (_e = (_d = config.endpoints) === null || _d === void 0 ? void 0 : _d.complete) !== null && _e !== void 0 ? _e : ((recordingSessionId) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(recordingSessionId)}/complete`),
14
+ createAsset: (_g = (_f = config.endpoints) === null || _f === void 0 ? void 0 : _f.createAsset) !== null && _g !== void 0 ? _g : ((recordingSessionId) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(recordingSessionId)}/assets`),
15
+ manifest: (_j = (_h = config.endpoints) === null || _h === void 0 ? void 0 : _h.manifest) !== null && _j !== void 0 ? _j : ((sessionId) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(sessionId)}`),
16
+ };
17
+ const requestJson = async (url, init) => {
18
+ var _a, _b, _c;
19
+ const response = await fetchImpl(`${baseUrl}${url}`, Object.assign(Object.assign({}, init), { headers: Object.assign(Object.assign({ "Content-Type": "application/json" }, ((_a = config.headers) !== null && _a !== void 0 ? _a : {})), ((_b = init === null || init === void 0 ? void 0 : init.headers) !== null && _b !== void 0 ? _b : {})) }));
20
+ const payload = await response.json().catch(() => ({}));
21
+ if (!response.ok) {
22
+ throw new Error((_c = payload.error) !== null && _c !== void 0 ? _c : `Voice recording request failed (${response.status})`);
23
+ }
24
+ return payload;
25
+ };
26
+ return {
27
+ async initSession(input) {
28
+ const payload = await requestJson(endpoint.init, {
29
+ method: "POST",
30
+ body: JSON.stringify(input),
31
+ });
32
+ return payload;
33
+ },
34
+ async createAssetUpload(input) {
35
+ return requestJson(endpoint.createAsset(input.recordingSessionId), {
36
+ method: "POST",
37
+ body: JSON.stringify(input),
38
+ });
39
+ },
40
+ async uploadBinary(target, blob) {
41
+ const response = await fetchImpl(target.uploadUrl, {
42
+ method: "PUT",
43
+ headers: target.uploadHeaders,
44
+ body: blob,
45
+ });
46
+ if (!response.ok) {
47
+ throw new Error(`Voice recording upload failed (${response.status})`);
48
+ }
49
+ },
50
+ async completeSession(input) {
51
+ return requestJson(endpoint.complete(input.recordingSessionId), {
52
+ method: "POST",
53
+ body: JSON.stringify(input),
54
+ });
55
+ },
56
+ async fetchManifest(sessionId) {
57
+ var _a;
58
+ const response = await fetchImpl(`${baseUrl}${endpoint.manifest(sessionId)}`, {
59
+ headers: config.headers,
60
+ });
61
+ if (response.status === 404)
62
+ return null;
63
+ const payload = await response.json().catch(() => ({}));
64
+ if (!response.ok) {
65
+ throw new Error((_a = payload.error) !== null && _a !== void 0 ? _a : `Voice recording manifest request failed (${response.status})`);
66
+ }
67
+ return payload;
68
+ },
69
+ };
70
+ }
@@ -0,0 +1,4 @@
1
+ import type { VoiceRecordingAdapter, VoiceRecordingController } from "./types";
2
+ export declare function createVoiceRecordingController(config: {
3
+ adapter: VoiceRecordingAdapter;
4
+ }): VoiceRecordingController;
@@ -0,0 +1,255 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createVoiceRecordingController = createVoiceRecordingController;
4
+ const wav_1 = require("./wav");
5
+ function detectMixedMimeType() {
6
+ const candidates = [
7
+ "audio/webm;codecs=opus",
8
+ "audio/webm",
9
+ "audio/ogg;codecs=opus",
10
+ ];
11
+ for (const candidate of candidates) {
12
+ if (typeof MediaRecorder !== "undefined" && MediaRecorder.isTypeSupported(candidate)) {
13
+ return candidate;
14
+ }
15
+ }
16
+ return "";
17
+ }
18
+ async function computeChecksum(blob) {
19
+ const buffer = await blob.arrayBuffer();
20
+ const digest = await crypto.subtle.digest("SHA-256", buffer);
21
+ return Array.from(new Uint8Array(digest)).map((byte) => byte.toString(16).padStart(2, "0")).join("");
22
+ }
23
+ function buildMixedStream(inputStream, outputStream) {
24
+ if (!inputStream && !outputStream)
25
+ return null;
26
+ const context = new AudioContext();
27
+ const destination = context.createMediaStreamDestination();
28
+ if (inputStream) {
29
+ const source = context.createMediaStreamSource(inputStream);
30
+ source.connect(destination);
31
+ }
32
+ if (outputStream) {
33
+ const source = context.createMediaStreamSource(outputStream);
34
+ source.connect(destination);
35
+ }
36
+ return {
37
+ context,
38
+ stream: destination.stream,
39
+ };
40
+ }
41
+ function createMixedRecorder(stream, mimeType) {
42
+ const chunks = [];
43
+ const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
44
+ const done = new Promise((resolve) => {
45
+ recorder.addEventListener("dataavailable", (event) => {
46
+ if (event.data && event.data.size > 0) {
47
+ chunks.push(event.data);
48
+ }
49
+ });
50
+ recorder.addEventListener("stop", () => {
51
+ resolve(new Blob(chunks, { type: recorder.mimeType || mimeType || "audio/webm" }));
52
+ });
53
+ });
54
+ recorder.start(1000);
55
+ return {
56
+ stop: async () => {
57
+ recorder.stop();
58
+ return done;
59
+ },
60
+ };
61
+ }
62
+ function createPcmCapture(stream) {
63
+ const context = new AudioContext();
64
+ const source = context.createMediaStreamSource(stream);
65
+ const processor = context.createScriptProcessor(4096, 1, 1);
66
+ const chunks = [];
67
+ let sampleCount = 0;
68
+ processor.onaudioprocess = (event) => {
69
+ const input = event.inputBuffer.getChannelData(0);
70
+ const copy = new Float32Array(input.length);
71
+ copy.set(input);
72
+ chunks.push(copy);
73
+ sampleCount += copy.length;
74
+ };
75
+ source.connect(processor);
76
+ processor.connect(context.destination);
77
+ return {
78
+ stop: async () => {
79
+ processor.disconnect();
80
+ source.disconnect();
81
+ await context.close().catch(() => { });
82
+ const samples = new Float32Array(sampleCount);
83
+ let offset = 0;
84
+ for (const chunk of chunks) {
85
+ samples.set(chunk, offset);
86
+ offset += chunk.length;
87
+ }
88
+ const bytes = (0, wav_1.encodePcm16Wav)(samples, context.sampleRate, 1);
89
+ const wavBuffer = new Uint8Array(bytes.byteLength);
90
+ wavBuffer.set(bytes);
91
+ return {
92
+ blob: new Blob([wavBuffer.buffer], { type: "audio/wav" }),
93
+ sampleRate: context.sampleRate,
94
+ durationMs: sampleCount > 0 ? Math.round((sampleCount / context.sampleRate) * 1000) : 0,
95
+ channels: 1,
96
+ };
97
+ },
98
+ };
99
+ }
100
+ function createVoiceRecordingController(config) {
101
+ let sources = {};
102
+ let metadata = null;
103
+ let recordingSessionId = null;
104
+ let state = { status: "idle", manifest: null, lastError: null };
105
+ let startedAt = 0;
106
+ let inputCapture = null;
107
+ let outputCapture = null;
108
+ let mixedRecorder = null;
109
+ let mixedContext = null;
110
+ const setState = (patch) => {
111
+ state = Object.assign(Object.assign({}, state), patch);
112
+ };
113
+ const uploadAsset = async (kind, blob, extra = {}) => {
114
+ var _a, _b, _c, _d, _e, _f;
115
+ if (!recordingSessionId || !metadata || blob.size === 0)
116
+ return null;
117
+ const checksum = await computeChecksum(blob);
118
+ const target = await config.adapter.createAssetUpload({
119
+ recordingSessionId,
120
+ sessionId: metadata.sessionId,
121
+ kind,
122
+ mimeType: blob.type || (kind === "mixed" ? "audio/webm" : "audio/wav"),
123
+ byteSize: blob.size,
124
+ durationMs: (_a = extra.durationMs) !== null && _a !== void 0 ? _a : null,
125
+ sampleRate: (_b = extra.sampleRate) !== null && _b !== void 0 ? _b : null,
126
+ channels: (_c = extra.channels) !== null && _c !== void 0 ? _c : null,
127
+ checksum,
128
+ });
129
+ await config.adapter.uploadBinary(target, blob);
130
+ return {
131
+ id: target.assetId,
132
+ kind,
133
+ mimeType: blob.type || (kind === "mixed" ? "audio/webm" : "audio/wav"),
134
+ byteSize: blob.size,
135
+ durationMs: (_d = extra.durationMs) !== null && _d !== void 0 ? _d : null,
136
+ sampleRate: (_e = extra.sampleRate) !== null && _e !== void 0 ? _e : null,
137
+ channels: (_f = extra.channels) !== null && _f !== void 0 ? _f : null,
138
+ checksum,
139
+ storageKey: target.storageKey,
140
+ };
141
+ };
142
+ return {
143
+ attachSources(nextSources) {
144
+ sources = nextSources;
145
+ },
146
+ async start(nextMetadata) {
147
+ var _a;
148
+ if (state.status === "recording")
149
+ return;
150
+ metadata = nextMetadata;
151
+ setState({ status: "initializing", lastError: null });
152
+ try {
153
+ const initialized = await config.adapter.initSession(nextMetadata);
154
+ recordingSessionId = initialized.recordingSessionId;
155
+ startedAt = Date.now();
156
+ inputCapture = sources.inputStream ? createPcmCapture(sources.inputStream) : null;
157
+ outputCapture = sources.outputStream ? createPcmCapture(sources.outputStream) : null;
158
+ const mixedStream = (_a = sources.mixedStream) !== null && _a !== void 0 ? _a : (() => {
159
+ var _a;
160
+ mixedContext = buildMixedStream(sources.inputStream, sources.outputStream);
161
+ return (_a = mixedContext === null || mixedContext === void 0 ? void 0 : mixedContext.stream) !== null && _a !== void 0 ? _a : null;
162
+ })();
163
+ const mimeType = detectMixedMimeType();
164
+ mixedRecorder = mixedStream && typeof MediaRecorder !== "undefined"
165
+ ? createMixedRecorder(mixedStream, mimeType)
166
+ : null;
167
+ setState({ status: "recording" });
168
+ }
169
+ catch (error) {
170
+ const message = error instanceof Error ? error.message : String(error);
171
+ setState({ status: "failed", lastError: message });
172
+ throw error;
173
+ }
174
+ },
175
+ async stop(reason) {
176
+ var _a, _b;
177
+ if (!metadata || !recordingSessionId)
178
+ return (_a = state.manifest) !== null && _a !== void 0 ? _a : null;
179
+ if (state.status !== "recording" && state.status !== "failed")
180
+ return (_b = state.manifest) !== null && _b !== void 0 ? _b : null;
181
+ setState({ status: "stopping" });
182
+ try {
183
+ const assets = (await Promise.all([
184
+ mixedRecorder
185
+ ? mixedRecorder.stop().then((blob) => uploadAsset("mixed", blob, {
186
+ durationMs: Date.now() - startedAt,
187
+ channels: 2,
188
+ }))
189
+ : Promise.resolve(null),
190
+ inputCapture
191
+ ? inputCapture.stop().then((result) => uploadAsset("user_input", result.blob, result))
192
+ : Promise.resolve(null),
193
+ outputCapture
194
+ ? outputCapture.stop().then((result) => uploadAsset("assistant_output", result.blob, result))
195
+ : Promise.resolve(null),
196
+ ])).filter(Boolean);
197
+ await (mixedContext === null || mixedContext === void 0 ? void 0 : mixedContext.context.close().catch(() => { }));
198
+ mixedContext = null;
199
+ const manifest = await config.adapter.completeSession({
200
+ recordingSessionId,
201
+ sessionId: metadata.sessionId,
202
+ status: reason === "abort" ? "aborted" : "completed",
203
+ totalDurationMs: Date.now() - startedAt,
204
+ assets,
205
+ });
206
+ setState({
207
+ status: manifest.status === "completed" ? "completed" : manifest.status === "aborted" ? "aborted" : "failed",
208
+ manifest,
209
+ });
210
+ return manifest;
211
+ }
212
+ catch (error) {
213
+ const message = error instanceof Error ? error.message : String(error);
214
+ setState({ status: "failed", lastError: message });
215
+ await config.adapter.completeSession({
216
+ recordingSessionId,
217
+ sessionId: metadata.sessionId,
218
+ status: "failed",
219
+ totalDurationMs: Date.now() - startedAt,
220
+ assets: [],
221
+ errorMessage: message,
222
+ }).catch(() => { });
223
+ throw error;
224
+ }
225
+ },
226
+ async flush() {
227
+ return this.stop("flush");
228
+ },
229
+ async abort() {
230
+ if (!metadata || !recordingSessionId) {
231
+ setState({ status: "aborted" });
232
+ return;
233
+ }
234
+ await Promise.allSettled([
235
+ inputCapture === null || inputCapture === void 0 ? void 0 : inputCapture.stop(),
236
+ outputCapture === null || outputCapture === void 0 ? void 0 : outputCapture.stop(),
237
+ mixedRecorder === null || mixedRecorder === void 0 ? void 0 : mixedRecorder.stop(),
238
+ ]);
239
+ await (mixedContext === null || mixedContext === void 0 ? void 0 : mixedContext.context.close().catch(() => { }));
240
+ mixedContext = null;
241
+ const manifest = await config.adapter.completeSession({
242
+ recordingSessionId,
243
+ sessionId: metadata.sessionId,
244
+ status: "aborted",
245
+ totalDurationMs: Date.now() - startedAt,
246
+ assets: [],
247
+ errorMessage: "aborted",
248
+ }).catch(() => null);
249
+ setState({ status: "aborted", manifest });
250
+ },
251
+ getState() {
252
+ return state;
253
+ },
254
+ };
255
+ }
@@ -0,0 +1,4 @@
1
+ export * from "./types";
2
+ export * from "./tokens";
3
+ export { createVoiceRecordingBackendAdapter } from "./backend-adapter";
4
+ export { createVoiceRecordingController } from "./controller";
package/dist/index.js ADDED
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.createVoiceRecordingController = exports.createVoiceRecordingBackendAdapter = void 0;
18
+ __exportStar(require("./types"), exports);
19
+ __exportStar(require("./tokens"), exports);
20
+ var backend_adapter_1 = require("./backend-adapter");
21
+ Object.defineProperty(exports, "createVoiceRecordingBackendAdapter", { enumerable: true, get: function () { return backend_adapter_1.createVoiceRecordingBackendAdapter; } });
22
+ var controller_1 = require("./controller");
23
+ Object.defineProperty(exports, "createVoiceRecordingController", { enumerable: true, get: function () { return controller_1.createVoiceRecordingController; } });
@@ -0,0 +1,9 @@
1
+ export interface VoiceRecordingSignedTokenPayload {
2
+ assetId: string;
3
+ storageKey: string;
4
+ projectId: string;
5
+ operation: "upload" | "download";
6
+ exp: number;
7
+ }
8
+ export declare function createVoiceRecordingSignedToken(payload: VoiceRecordingSignedTokenPayload, secret?: string): Promise<string>;
9
+ export declare function verifyVoiceRecordingSignedToken(token: string | null | undefined, secret?: string): Promise<VoiceRecordingSignedTokenPayload | null>;
package/dist/tokens.js ADDED
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createVoiceRecordingSignedToken = createVoiceRecordingSignedToken;
4
+ exports.verifyVoiceRecordingSignedToken = verifyVoiceRecordingSignedToken;
5
+ function getSecret(secret) {
6
+ return secret || process.env.VOICE_RECORDING_SIGNING_SECRET || "kognitive-voice-recording-secret";
7
+ }
8
+ function encodeBase64Url(value) {
9
+ return Buffer.from(value, "utf8").toString("base64url");
10
+ }
11
+ function decodeBase64Url(value) {
12
+ return Buffer.from(value, "base64url").toString("utf8");
13
+ }
14
+ async function sign(value, secret) {
15
+ const key = await crypto.subtle.importKey("raw", new TextEncoder().encode(getSecret(secret)), { name: "HMAC", hash: "SHA-256" }, false, ["sign"]);
16
+ const signature = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(value));
17
+ return Buffer.from(signature).toString("base64url");
18
+ }
19
+ async function createVoiceRecordingSignedToken(payload, secret) {
20
+ const encodedPayload = encodeBase64Url(JSON.stringify(payload));
21
+ const signature = await sign(encodedPayload, secret);
22
+ return `${encodedPayload}.${signature}`;
23
+ }
24
+ async function verifyVoiceRecordingSignedToken(token, secret) {
25
+ if (!token)
26
+ return null;
27
+ const [encodedPayload, signature] = token.split(".");
28
+ if (!encodedPayload || !signature)
29
+ return null;
30
+ const expected = await sign(encodedPayload, secret);
31
+ if (expected !== signature)
32
+ return null;
33
+ try {
34
+ const parsed = JSON.parse(decodeBase64Url(encodedPayload));
35
+ if (!parsed.assetId || !parsed.storageKey || !parsed.projectId || !parsed.operation || !parsed.exp) {
36
+ return null;
37
+ }
38
+ if (parsed.exp < Date.now())
39
+ return null;
40
+ return parsed;
41
+ }
42
+ catch (_a) {
43
+ return null;
44
+ }
45
+ }
@@ -0,0 +1,98 @@
1
+ export type VoiceRecordingAssetKind = "mixed" | "user_input" | "assistant_output" | "manager_input";
2
+ export interface VoiceRecordingSessionMetadata {
3
+ sessionId: string;
4
+ userId: string;
5
+ provider: string;
6
+ modelId: string;
7
+ voice?: string | null;
8
+ transport?: string | null;
9
+ metadata?: Record<string, unknown>;
10
+ consent?: {
11
+ recordingEnabled?: boolean;
12
+ consentGranted?: boolean;
13
+ };
14
+ }
15
+ export interface VoiceRecordingAsset {
16
+ id: string;
17
+ kind: VoiceRecordingAssetKind;
18
+ mimeType: string;
19
+ byteSize: number;
20
+ durationMs?: number | null;
21
+ sampleRate?: number | null;
22
+ channels?: number | null;
23
+ checksum?: string | null;
24
+ storageKey: string;
25
+ playbackUrl?: string | null;
26
+ }
27
+ export interface VoiceRecordingManifest {
28
+ recordingSessionId: string;
29
+ sessionId: string;
30
+ sessionDbId?: string | null;
31
+ status: "pending" | "recording" | "uploading" | "completed" | "failed" | "aborted";
32
+ provider: string;
33
+ modelId: string;
34
+ voice?: string | null;
35
+ transport?: string | null;
36
+ totalDurationMs?: number | null;
37
+ consent?: {
38
+ recordingEnabled?: boolean;
39
+ consentGranted?: boolean;
40
+ } | null;
41
+ summary?: {
42
+ hasAssets: boolean;
43
+ assetKinds: VoiceRecordingAssetKind[];
44
+ };
45
+ assets: VoiceRecordingAsset[];
46
+ }
47
+ export interface VoiceRecordingUploadTarget {
48
+ assetId: string;
49
+ storageKey: string;
50
+ uploadUrl: string;
51
+ uploadHeaders?: Record<string, string>;
52
+ }
53
+ export interface VoiceRecordingUploadAssetInput {
54
+ recordingSessionId: string;
55
+ sessionId: string;
56
+ kind: VoiceRecordingAssetKind;
57
+ blob: Blob;
58
+ mimeType: string;
59
+ byteSize: number;
60
+ durationMs?: number | null;
61
+ sampleRate?: number | null;
62
+ channels?: number | null;
63
+ checksum?: string | null;
64
+ }
65
+ export interface VoiceRecordingAdapter {
66
+ initSession(input: VoiceRecordingSessionMetadata): Promise<{
67
+ recordingSessionId: string;
68
+ sessionDbId?: string | null;
69
+ }>;
70
+ createAssetUpload(input: Omit<VoiceRecordingUploadAssetInput, "blob">): Promise<VoiceRecordingUploadTarget>;
71
+ uploadBinary(target: VoiceRecordingUploadTarget, blob: Blob): Promise<void>;
72
+ completeSession(input: {
73
+ recordingSessionId: string;
74
+ sessionId: string;
75
+ status: "completed" | "failed" | "aborted";
76
+ totalDurationMs?: number | null;
77
+ assets: Array<Omit<VoiceRecordingAsset, "playbackUrl">>;
78
+ errorMessage?: string | null;
79
+ }): Promise<VoiceRecordingManifest>;
80
+ fetchManifest(sessionId: string): Promise<VoiceRecordingManifest | null>;
81
+ }
82
+ export interface VoiceRecordingControllerState {
83
+ status: "idle" | "initializing" | "recording" | "stopping" | "completed" | "failed" | "aborted";
84
+ manifest?: VoiceRecordingManifest | null;
85
+ lastError?: string | null;
86
+ }
87
+ export interface VoiceRecordingController {
88
+ attachSources(sources: {
89
+ inputStream?: MediaStream | null;
90
+ outputStream?: MediaStream | null;
91
+ mixedStream?: MediaStream | null;
92
+ }): void;
93
+ start(metadata: VoiceRecordingSessionMetadata): Promise<void>;
94
+ stop(reason?: string): Promise<VoiceRecordingManifest | null>;
95
+ flush(): Promise<VoiceRecordingManifest | null>;
96
+ abort(reason?: string): Promise<void>;
97
+ getState(): VoiceRecordingControllerState;
98
+ }
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/dist/wav.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function encodePcm16Wav(samples: Float32Array, sampleRate: number, channels?: number): Uint8Array;
package/dist/wav.js ADDED
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.encodePcm16Wav = encodePcm16Wav;
4
+ function clamp(sample) {
5
+ return Math.max(-1, Math.min(1, sample));
6
+ }
7
+ function encodePcm16Wav(samples, sampleRate, channels = 1) {
8
+ var _a;
9
+ const bytesPerSample = 2;
10
+ const blockAlign = channels * bytesPerSample;
11
+ const byteRate = sampleRate * blockAlign;
12
+ const dataSize = samples.length * bytesPerSample;
13
+ const buffer = new ArrayBuffer(44 + dataSize);
14
+ const view = new DataView(buffer);
15
+ const writeString = (offset, value) => {
16
+ for (let index = 0; index < value.length; index += 1) {
17
+ view.setUint8(offset + index, value.charCodeAt(index));
18
+ }
19
+ };
20
+ writeString(0, "RIFF");
21
+ view.setUint32(4, 36 + dataSize, true);
22
+ writeString(8, "WAVE");
23
+ writeString(12, "fmt ");
24
+ view.setUint32(16, 16, true);
25
+ view.setUint16(20, 1, true);
26
+ view.setUint16(22, channels, true);
27
+ view.setUint32(24, sampleRate, true);
28
+ view.setUint32(28, byteRate, true);
29
+ view.setUint16(32, blockAlign, true);
30
+ view.setUint16(34, 16, true);
31
+ writeString(36, "data");
32
+ view.setUint32(40, dataSize, true);
33
+ let offset = 44;
34
+ for (let index = 0; index < samples.length; index += 1) {
35
+ const value = clamp((_a = samples[index]) !== null && _a !== void 0 ? _a : 0);
36
+ view.setInt16(offset, value < 0 ? value * 0x8000 : value * 0x7fff, true);
37
+ offset += 2;
38
+ }
39
+ return new Uint8Array(buffer);
40
+ }
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@kognitivedev/voice-recording",
3
+ "version": "0.2.29",
4
+ "main": "dist/index.js",
5
+ "types": "dist/index.d.ts",
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "dev": "tsc -w --noCheck",
12
+ "prepublishOnly": "npm run build",
13
+ "test": "vitest run"
14
+ },
15
+ "dependencies": {
16
+ "@kognitivedev/shared": "^0.2.29"
17
+ },
18
+ "devDependencies": {
19
+ "@types/node": "^20.0.0",
20
+ "typescript": "^5.0.0",
21
+ "vitest": "^3.0.0"
22
+ },
23
+ "description": "Voice session recording controllers and backend adapters for Kognitive",
24
+ "keywords": [
25
+ "kognitive",
26
+ "voice",
27
+ "recording",
28
+ "audio"
29
+ ],
30
+ "license": "MIT",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "https://github.com/kognitivedev/kognitive",
34
+ "directory": "packages/voice-recording"
35
+ },
36
+ "homepage": "https://kognitive.dev"
37
+ }
@@ -0,0 +1,51 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { createVoiceRecordingBackendAdapter } from "../backend-adapter";
3
+
4
+ describe("@kognitivedev/voice-recording", () => {
5
+ it("uses the configured backend endpoints", async () => {
6
+ const fetch = vi.fn()
7
+ .mockResolvedValueOnce(new Response(JSON.stringify({ recordingSessionId: "rec-1" }), { status: 200 }))
8
+ .mockResolvedValueOnce(new Response(JSON.stringify({
9
+ assetId: "asset-1",
10
+ storageKey: "voice-recordings/project/session/mixed.webm",
11
+ uploadUrl: "https://upload.example/asset-1",
12
+ }), { status: 200 }))
13
+ .mockResolvedValueOnce(new Response(null, { status: 200 }))
14
+ .mockResolvedValueOnce(new Response(JSON.stringify({
15
+ recordingSessionId: "rec-1",
16
+ sessionId: "call-1",
17
+ status: "completed",
18
+ provider: "openai-realtime",
19
+ modelId: "gpt-realtime-1.5",
20
+ assets: [],
21
+ }), { status: 200 }));
22
+ const adapter = createVoiceRecordingBackendAdapter({
23
+ baseUrl: "https://example.test",
24
+ fetch,
25
+ });
26
+
27
+ await adapter.initSession({
28
+ sessionId: "call-1",
29
+ userId: "user-1",
30
+ provider: "openai-realtime",
31
+ modelId: "gpt-realtime-1.5",
32
+ });
33
+ const target = await adapter.createAssetUpload({
34
+ recordingSessionId: "rec-1",
35
+ sessionId: "call-1",
36
+ kind: "mixed",
37
+ mimeType: "audio/webm",
38
+ byteSize: 12,
39
+ });
40
+ await adapter.uploadBinary(target, new Blob(["hello"], { type: "audio/webm" }));
41
+ const manifest = await adapter.completeSession({
42
+ recordingSessionId: "rec-1",
43
+ sessionId: "call-1",
44
+ status: "completed",
45
+ assets: [],
46
+ });
47
+
48
+ expect(fetch).toHaveBeenCalledTimes(4);
49
+ expect(manifest.status).toBe("completed");
50
+ });
51
+ });
@@ -0,0 +1,93 @@
1
+ import type {
2
+ VoiceRecordingAdapter,
3
+ VoiceRecordingManifest,
4
+ VoiceRecordingSessionMetadata,
5
+ VoiceRecordingUploadAssetInput,
6
+ VoiceRecordingUploadTarget,
7
+ } from "./types";
8
+
9
+ function trimTrailingSlash(value: string) {
10
+ return value.replace(/\/+$/, "");
11
+ }
12
+
13
+ export function createVoiceRecordingBackendAdapter(config: {
14
+ baseUrl: string;
15
+ fetch?: typeof fetch;
16
+ headers?: Record<string, string>;
17
+ endpoints?: {
18
+ init?: string;
19
+ complete?: (recordingSessionId: string) => string;
20
+ createAsset?: (recordingSessionId: string) => string;
21
+ manifest?: (sessionId: string) => string;
22
+ };
23
+ }): VoiceRecordingAdapter {
24
+ const baseUrl = trimTrailingSlash(config.baseUrl);
25
+ const fetchImpl = config.fetch ?? fetch;
26
+
27
+ const endpoint = {
28
+ init: config.endpoints?.init ?? "/api/kognitive/cognitive/voice/recordings/sessions/init",
29
+ complete: config.endpoints?.complete ?? ((recordingSessionId: string) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(recordingSessionId)}/complete`),
30
+ createAsset: config.endpoints?.createAsset ?? ((recordingSessionId: string) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(recordingSessionId)}/assets`),
31
+ manifest: config.endpoints?.manifest ?? ((sessionId: string) => `/api/kognitive/cognitive/voice/recordings/sessions/${encodeURIComponent(sessionId)}`),
32
+ };
33
+
34
+ const requestJson = async <T>(url: string, init?: RequestInit): Promise<T> => {
35
+ const response = await fetchImpl(`${baseUrl}${url}`, {
36
+ ...init,
37
+ headers: {
38
+ "Content-Type": "application/json",
39
+ ...(config.headers ?? {}),
40
+ ...(init?.headers ?? {}),
41
+ },
42
+ });
43
+ const payload = await response.json().catch(() => ({}));
44
+ if (!response.ok) {
45
+ throw new Error((payload as { error?: string }).error ?? `Voice recording request failed (${response.status})`);
46
+ }
47
+ return payload as T;
48
+ };
49
+
50
+ return {
51
+ async initSession(input: VoiceRecordingSessionMetadata) {
52
+ const payload = await requestJson<{ recordingSessionId: string; sessionDbId?: string | null }>(endpoint.init, {
53
+ method: "POST",
54
+ body: JSON.stringify(input),
55
+ });
56
+ return payload;
57
+ },
58
+ async createAssetUpload(input: Omit<VoiceRecordingUploadAssetInput, "blob">): Promise<VoiceRecordingUploadTarget> {
59
+ return requestJson<VoiceRecordingUploadTarget>(endpoint.createAsset(input.recordingSessionId), {
60
+ method: "POST",
61
+ body: JSON.stringify(input),
62
+ });
63
+ },
64
+ async uploadBinary(target: VoiceRecordingUploadTarget, blob: Blob) {
65
+ const response = await fetchImpl(target.uploadUrl, {
66
+ method: "PUT",
67
+ headers: target.uploadHeaders,
68
+ body: blob,
69
+ });
70
+ if (!response.ok) {
71
+ throw new Error(`Voice recording upload failed (${response.status})`);
72
+ }
73
+ },
74
+ async completeSession(input) {
75
+ return requestJson<VoiceRecordingManifest>(endpoint.complete(input.recordingSessionId), {
76
+ method: "POST",
77
+ body: JSON.stringify(input),
78
+ });
79
+ },
80
+ async fetchManifest(sessionId: string) {
81
+ const response = await fetchImpl(`${baseUrl}${endpoint.manifest(sessionId)}`, {
82
+ headers: config.headers,
83
+ });
84
+ if (response.status === 404) return null;
85
+ const payload = await response.json().catch(() => ({}));
86
+ if (!response.ok) {
87
+ throw new Error((payload as { error?: string }).error ?? `Voice recording manifest request failed (${response.status})`);
88
+ }
89
+ return payload as VoiceRecordingManifest;
90
+ },
91
+ };
92
+ }
93
+
@@ -0,0 +1,277 @@
1
+ import type {
2
+ VoiceRecordingAdapter,
3
+ VoiceRecordingAsset,
4
+ VoiceRecordingAssetKind,
5
+ VoiceRecordingController,
6
+ VoiceRecordingControllerState,
7
+ VoiceRecordingManifest,
8
+ VoiceRecordingSessionMetadata,
9
+ } from "./types";
10
+ import { encodePcm16Wav } from "./wav";
11
+
12
+ type AttachedSources = {
13
+ inputStream?: MediaStream | null;
14
+ outputStream?: MediaStream | null;
15
+ mixedStream?: MediaStream | null;
16
+ };
17
+
18
+ function detectMixedMimeType() {
19
+ const candidates = [
20
+ "audio/webm;codecs=opus",
21
+ "audio/webm",
22
+ "audio/ogg;codecs=opus",
23
+ ];
24
+ for (const candidate of candidates) {
25
+ if (typeof MediaRecorder !== "undefined" && MediaRecorder.isTypeSupported(candidate)) {
26
+ return candidate;
27
+ }
28
+ }
29
+ return "";
30
+ }
31
+
32
+ async function computeChecksum(blob: Blob) {
33
+ const buffer = await blob.arrayBuffer();
34
+ const digest = await crypto.subtle.digest("SHA-256", buffer);
35
+ return Array.from(new Uint8Array(digest)).map((byte) => byte.toString(16).padStart(2, "0")).join("");
36
+ }
37
+
38
+ function buildMixedStream(inputStream?: MediaStream | null, outputStream?: MediaStream | null) {
39
+ if (!inputStream && !outputStream) return null;
40
+ const context = new AudioContext();
41
+ const destination = context.createMediaStreamDestination();
42
+ if (inputStream) {
43
+ const source = context.createMediaStreamSource(inputStream);
44
+ source.connect(destination);
45
+ }
46
+ if (outputStream) {
47
+ const source = context.createMediaStreamSource(outputStream);
48
+ source.connect(destination);
49
+ }
50
+ return {
51
+ context,
52
+ stream: destination.stream,
53
+ };
54
+ }
55
+
56
+ function createMixedRecorder(stream: MediaStream, mimeType: string) {
57
+ const chunks: BlobPart[] = [];
58
+ const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
59
+ const done = new Promise<Blob>((resolve) => {
60
+ recorder.addEventListener("dataavailable", (event) => {
61
+ if (event.data && event.data.size > 0) {
62
+ chunks.push(event.data);
63
+ }
64
+ });
65
+ recorder.addEventListener("stop", () => {
66
+ resolve(new Blob(chunks, { type: recorder.mimeType || mimeType || "audio/webm" }));
67
+ });
68
+ });
69
+ recorder.start(1000);
70
+ return {
71
+ stop: async () => {
72
+ recorder.stop();
73
+ return done;
74
+ },
75
+ };
76
+ }
77
+
78
+ function createPcmCapture(stream: MediaStream) {
79
+ const context = new AudioContext();
80
+ const source = context.createMediaStreamSource(stream);
81
+ const processor = context.createScriptProcessor(4096, 1, 1);
82
+ const chunks: Float32Array[] = [];
83
+ let sampleCount = 0;
84
+ processor.onaudioprocess = (event) => {
85
+ const input = event.inputBuffer.getChannelData(0);
86
+ const copy = new Float32Array(input.length);
87
+ copy.set(input);
88
+ chunks.push(copy);
89
+ sampleCount += copy.length;
90
+ };
91
+ source.connect(processor);
92
+ processor.connect(context.destination);
93
+ return {
94
+ stop: async () => {
95
+ processor.disconnect();
96
+ source.disconnect();
97
+ await context.close().catch(() => {});
98
+ const samples = new Float32Array(sampleCount);
99
+ let offset = 0;
100
+ for (const chunk of chunks) {
101
+ samples.set(chunk, offset);
102
+ offset += chunk.length;
103
+ }
104
+ const bytes = encodePcm16Wav(samples, context.sampleRate, 1);
105
+ const wavBuffer = new Uint8Array(bytes.byteLength);
106
+ wavBuffer.set(bytes);
107
+ return {
108
+ blob: new Blob([wavBuffer.buffer], { type: "audio/wav" }),
109
+ sampleRate: context.sampleRate,
110
+ durationMs: sampleCount > 0 ? Math.round((sampleCount / context.sampleRate) * 1000) : 0,
111
+ channels: 1,
112
+ };
113
+ },
114
+ };
115
+ }
116
+
117
+ export function createVoiceRecordingController(config: {
118
+ adapter: VoiceRecordingAdapter;
119
+ }): VoiceRecordingController {
120
+ let sources: AttachedSources = {};
121
+ let metadata: VoiceRecordingSessionMetadata | null = null;
122
+ let recordingSessionId: string | null = null;
123
+ let state: VoiceRecordingControllerState = { status: "idle", manifest: null, lastError: null };
124
+ let startedAt = 0;
125
+ let inputCapture: ReturnType<typeof createPcmCapture> | null = null;
126
+ let outputCapture: ReturnType<typeof createPcmCapture> | null = null;
127
+ let mixedRecorder: ReturnType<typeof createMixedRecorder> | null = null;
128
+ let mixedContext: { context: AudioContext; stream: MediaStream } | null = null;
129
+
130
+ const setState = (patch: Partial<VoiceRecordingControllerState>) => {
131
+ state = {
132
+ ...state,
133
+ ...patch,
134
+ };
135
+ };
136
+
137
+ const uploadAsset = async (kind: VoiceRecordingAssetKind, blob: Blob, extra: {
138
+ durationMs?: number | null;
139
+ sampleRate?: number | null;
140
+ channels?: number | null;
141
+ } = {}): Promise<Omit<VoiceRecordingAsset, "playbackUrl"> | null> => {
142
+ if (!recordingSessionId || !metadata || blob.size === 0) return null;
143
+ const checksum = await computeChecksum(blob);
144
+ const target = await config.adapter.createAssetUpload({
145
+ recordingSessionId,
146
+ sessionId: metadata.sessionId,
147
+ kind,
148
+ mimeType: blob.type || (kind === "mixed" ? "audio/webm" : "audio/wav"),
149
+ byteSize: blob.size,
150
+ durationMs: extra.durationMs ?? null,
151
+ sampleRate: extra.sampleRate ?? null,
152
+ channels: extra.channels ?? null,
153
+ checksum,
154
+ });
155
+ await config.adapter.uploadBinary(target, blob);
156
+ return {
157
+ id: target.assetId,
158
+ kind,
159
+ mimeType: blob.type || (kind === "mixed" ? "audio/webm" : "audio/wav"),
160
+ byteSize: blob.size,
161
+ durationMs: extra.durationMs ?? null,
162
+ sampleRate: extra.sampleRate ?? null,
163
+ channels: extra.channels ?? null,
164
+ checksum,
165
+ storageKey: target.storageKey,
166
+ };
167
+ };
168
+
169
+ return {
170
+ attachSources(nextSources) {
171
+ sources = nextSources;
172
+ },
173
+ async start(nextMetadata) {
174
+ if (state.status === "recording") return;
175
+ metadata = nextMetadata;
176
+ setState({ status: "initializing", lastError: null });
177
+ try {
178
+ const initialized = await config.adapter.initSession(nextMetadata);
179
+ recordingSessionId = initialized.recordingSessionId;
180
+ startedAt = Date.now();
181
+
182
+ inputCapture = sources.inputStream ? createPcmCapture(sources.inputStream) : null;
183
+ outputCapture = sources.outputStream ? createPcmCapture(sources.outputStream) : null;
184
+
185
+ const mixedStream = sources.mixedStream ?? (() => {
186
+ mixedContext = buildMixedStream(sources.inputStream, sources.outputStream);
187
+ return mixedContext?.stream ?? null;
188
+ })();
189
+ const mimeType = detectMixedMimeType();
190
+ mixedRecorder = mixedStream && typeof MediaRecorder !== "undefined"
191
+ ? createMixedRecorder(mixedStream, mimeType)
192
+ : null;
193
+
194
+ setState({ status: "recording" });
195
+ } catch (error) {
196
+ const message = error instanceof Error ? error.message : String(error);
197
+ setState({ status: "failed", lastError: message });
198
+ throw error;
199
+ }
200
+ },
201
+ async stop(reason) {
202
+ if (!metadata || !recordingSessionId) return state.manifest ?? null;
203
+ if (state.status !== "recording" && state.status !== "failed") return state.manifest ?? null;
204
+ setState({ status: "stopping" });
205
+ try {
206
+ const assets = (await Promise.all([
207
+ mixedRecorder
208
+ ? mixedRecorder.stop().then((blob) => uploadAsset("mixed", blob, {
209
+ durationMs: Date.now() - startedAt,
210
+ channels: 2,
211
+ }))
212
+ : Promise.resolve(null),
213
+ inputCapture
214
+ ? inputCapture.stop().then((result) => uploadAsset("user_input", result.blob, result))
215
+ : Promise.resolve(null),
216
+ outputCapture
217
+ ? outputCapture.stop().then((result) => uploadAsset("assistant_output", result.blob, result))
218
+ : Promise.resolve(null),
219
+ ])).filter(Boolean) as Array<Omit<VoiceRecordingAsset, "playbackUrl">>;
220
+ await mixedContext?.context.close().catch(() => {});
221
+ mixedContext = null;
222
+ const manifest = await config.adapter.completeSession({
223
+ recordingSessionId,
224
+ sessionId: metadata.sessionId,
225
+ status: reason === "abort" ? "aborted" : "completed",
226
+ totalDurationMs: Date.now() - startedAt,
227
+ assets,
228
+ });
229
+ setState({
230
+ status: manifest.status === "completed" ? "completed" : manifest.status === "aborted" ? "aborted" : "failed",
231
+ manifest,
232
+ });
233
+ return manifest;
234
+ } catch (error) {
235
+ const message = error instanceof Error ? error.message : String(error);
236
+ setState({ status: "failed", lastError: message });
237
+ await config.adapter.completeSession({
238
+ recordingSessionId,
239
+ sessionId: metadata.sessionId,
240
+ status: "failed",
241
+ totalDurationMs: Date.now() - startedAt,
242
+ assets: [],
243
+ errorMessage: message,
244
+ }).catch(() => {});
245
+ throw error;
246
+ }
247
+ },
248
+ async flush() {
249
+ return this.stop("flush");
250
+ },
251
+ async abort() {
252
+ if (!metadata || !recordingSessionId) {
253
+ setState({ status: "aborted" });
254
+ return;
255
+ }
256
+ await Promise.allSettled([
257
+ inputCapture?.stop(),
258
+ outputCapture?.stop(),
259
+ mixedRecorder?.stop(),
260
+ ]);
261
+ await mixedContext?.context.close().catch(() => {});
262
+ mixedContext = null;
263
+ const manifest = await config.adapter.completeSession({
264
+ recordingSessionId,
265
+ sessionId: metadata.sessionId,
266
+ status: "aborted",
267
+ totalDurationMs: Date.now() - startedAt,
268
+ assets: [],
269
+ errorMessage: "aborted",
270
+ }).catch(() => null);
271
+ setState({ status: "aborted", manifest });
272
+ },
273
+ getState() {
274
+ return state;
275
+ },
276
+ };
277
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from "./types";
2
+ export * from "./tokens";
3
+ export { createVoiceRecordingBackendAdapter } from "./backend-adapter";
4
+ export { createVoiceRecordingController } from "./controller";
package/src/tokens.ts ADDED
@@ -0,0 +1,63 @@
1
+ function getSecret(secret?: string) {
2
+ return secret || process.env.VOICE_RECORDING_SIGNING_SECRET || "kognitive-voice-recording-secret";
3
+ }
4
+
5
+ function encodeBase64Url(value: string) {
6
+ return Buffer.from(value, "utf8").toString("base64url");
7
+ }
8
+
9
+ function decodeBase64Url(value: string) {
10
+ return Buffer.from(value, "base64url").toString("utf8");
11
+ }
12
+
13
+ async function sign(value: string, secret?: string) {
14
+ const key = await crypto.subtle.importKey(
15
+ "raw",
16
+ new TextEncoder().encode(getSecret(secret)),
17
+ { name: "HMAC", hash: "SHA-256" },
18
+ false,
19
+ ["sign"],
20
+ );
21
+ const signature = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(value));
22
+ return Buffer.from(signature).toString("base64url");
23
+ }
24
+
25
+ export interface VoiceRecordingSignedTokenPayload {
26
+ assetId: string;
27
+ storageKey: string;
28
+ projectId: string;
29
+ operation: "upload" | "download";
30
+ exp: number;
31
+ }
32
+
33
+ export async function createVoiceRecordingSignedToken(
34
+ payload: VoiceRecordingSignedTokenPayload,
35
+ secret?: string,
36
+ ) {
37
+ const encodedPayload = encodeBase64Url(JSON.stringify(payload));
38
+ const signature = await sign(encodedPayload, secret);
39
+ return `${encodedPayload}.${signature}`;
40
+ }
41
+
42
+ export async function verifyVoiceRecordingSignedToken(
43
+ token: string | null | undefined,
44
+ secret?: string,
45
+ ): Promise<VoiceRecordingSignedTokenPayload | null> {
46
+ if (!token) return null;
47
+ const [encodedPayload, signature] = token.split(".");
48
+ if (!encodedPayload || !signature) return null;
49
+ const expected = await sign(encodedPayload, secret);
50
+ if (expected !== signature) return null;
51
+
52
+ try {
53
+ const parsed = JSON.parse(decodeBase64Url(encodedPayload)) as Partial<VoiceRecordingSignedTokenPayload>;
54
+ if (!parsed.assetId || !parsed.storageKey || !parsed.projectId || !parsed.operation || !parsed.exp) {
55
+ return null;
56
+ }
57
+ if (parsed.exp < Date.now()) return null;
58
+ return parsed as VoiceRecordingSignedTokenPayload;
59
+ } catch {
60
+ return null;
61
+ }
62
+ }
63
+
package/src/types.ts ADDED
@@ -0,0 +1,106 @@
1
+ export type VoiceRecordingAssetKind = "mixed" | "user_input" | "assistant_output" | "manager_input";
2
+
3
+ export interface VoiceRecordingSessionMetadata {
4
+ sessionId: string;
5
+ userId: string;
6
+ provider: string;
7
+ modelId: string;
8
+ voice?: string | null;
9
+ transport?: string | null;
10
+ metadata?: Record<string, unknown>;
11
+ consent?: {
12
+ recordingEnabled?: boolean;
13
+ consentGranted?: boolean;
14
+ };
15
+ }
16
+
17
+ export interface VoiceRecordingAsset {
18
+ id: string;
19
+ kind: VoiceRecordingAssetKind;
20
+ mimeType: string;
21
+ byteSize: number;
22
+ durationMs?: number | null;
23
+ sampleRate?: number | null;
24
+ channels?: number | null;
25
+ checksum?: string | null;
26
+ storageKey: string;
27
+ playbackUrl?: string | null;
28
+ }
29
+
30
+ export interface VoiceRecordingManifest {
31
+ recordingSessionId: string;
32
+ sessionId: string;
33
+ sessionDbId?: string | null;
34
+ status: "pending" | "recording" | "uploading" | "completed" | "failed" | "aborted";
35
+ provider: string;
36
+ modelId: string;
37
+ voice?: string | null;
38
+ transport?: string | null;
39
+ totalDurationMs?: number | null;
40
+ consent?: {
41
+ recordingEnabled?: boolean;
42
+ consentGranted?: boolean;
43
+ } | null;
44
+ summary?: {
45
+ hasAssets: boolean;
46
+ assetKinds: VoiceRecordingAssetKind[];
47
+ };
48
+ assets: VoiceRecordingAsset[];
49
+ }
50
+
51
+ export interface VoiceRecordingUploadTarget {
52
+ assetId: string;
53
+ storageKey: string;
54
+ uploadUrl: string;
55
+ uploadHeaders?: Record<string, string>;
56
+ }
57
+
58
+ export interface VoiceRecordingUploadAssetInput {
59
+ recordingSessionId: string;
60
+ sessionId: string;
61
+ kind: VoiceRecordingAssetKind;
62
+ blob: Blob;
63
+ mimeType: string;
64
+ byteSize: number;
65
+ durationMs?: number | null;
66
+ sampleRate?: number | null;
67
+ channels?: number | null;
68
+ checksum?: string | null;
69
+ }
70
+
71
+ export interface VoiceRecordingAdapter {
72
+ initSession(input: VoiceRecordingSessionMetadata): Promise<{
73
+ recordingSessionId: string;
74
+ sessionDbId?: string | null;
75
+ }>;
76
+ createAssetUpload(input: Omit<VoiceRecordingUploadAssetInput, "blob">): Promise<VoiceRecordingUploadTarget>;
77
+ uploadBinary(target: VoiceRecordingUploadTarget, blob: Blob): Promise<void>;
78
+ completeSession(input: {
79
+ recordingSessionId: string;
80
+ sessionId: string;
81
+ status: "completed" | "failed" | "aborted";
82
+ totalDurationMs?: number | null;
83
+ assets: Array<Omit<VoiceRecordingAsset, "playbackUrl">>;
84
+ errorMessage?: string | null;
85
+ }): Promise<VoiceRecordingManifest>;
86
+ fetchManifest(sessionId: string): Promise<VoiceRecordingManifest | null>;
87
+ }
88
+
89
+ export interface VoiceRecordingControllerState {
90
+ status: "idle" | "initializing" | "recording" | "stopping" | "completed" | "failed" | "aborted";
91
+ manifest?: VoiceRecordingManifest | null;
92
+ lastError?: string | null;
93
+ }
94
+
95
+ export interface VoiceRecordingController {
96
+ attachSources(sources: {
97
+ inputStream?: MediaStream | null;
98
+ outputStream?: MediaStream | null;
99
+ mixedStream?: MediaStream | null;
100
+ }): void;
101
+ start(metadata: VoiceRecordingSessionMetadata): Promise<void>;
102
+ stop(reason?: string): Promise<VoiceRecordingManifest | null>;
103
+ flush(): Promise<VoiceRecordingManifest | null>;
104
+ abort(reason?: string): Promise<void>;
105
+ getState(): VoiceRecordingControllerState;
106
+ }
package/src/wav.ts ADDED
@@ -0,0 +1,42 @@
1
+ function clamp(sample: number) {
2
+ return Math.max(-1, Math.min(1, sample));
3
+ }
4
+
5
+ export function encodePcm16Wav(samples: Float32Array, sampleRate: number, channels = 1): Uint8Array {
6
+ const bytesPerSample = 2;
7
+ const blockAlign = channels * bytesPerSample;
8
+ const byteRate = sampleRate * blockAlign;
9
+ const dataSize = samples.length * bytesPerSample;
10
+ const buffer = new ArrayBuffer(44 + dataSize);
11
+ const view = new DataView(buffer);
12
+
13
+ const writeString = (offset: number, value: string) => {
14
+ for (let index = 0; index < value.length; index += 1) {
15
+ view.setUint8(offset + index, value.charCodeAt(index));
16
+ }
17
+ };
18
+
19
+ writeString(0, "RIFF");
20
+ view.setUint32(4, 36 + dataSize, true);
21
+ writeString(8, "WAVE");
22
+ writeString(12, "fmt ");
23
+ view.setUint32(16, 16, true);
24
+ view.setUint16(20, 1, true);
25
+ view.setUint16(22, channels, true);
26
+ view.setUint32(24, sampleRate, true);
27
+ view.setUint32(28, byteRate, true);
28
+ view.setUint16(32, blockAlign, true);
29
+ view.setUint16(34, 16, true);
30
+ writeString(36, "data");
31
+ view.setUint32(40, dataSize, true);
32
+
33
+ let offset = 44;
34
+ for (let index = 0; index < samples.length; index += 1) {
35
+ const value = clamp(samples[index] ?? 0);
36
+ view.setInt16(offset, value < 0 ? value * 0x8000 : value * 0x7fff, true);
37
+ offset += 2;
38
+ }
39
+
40
+ return new Uint8Array(buffer);
41
+ }
42
+
package/tsconfig.json ADDED
@@ -0,0 +1,17 @@
1
+ {
2
+ "extends": "../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "module": "commonjs",
5
+ "rootDir": "src",
6
+ "outDir": "dist",
7
+ "declaration": true,
8
+ "noEmit": false,
9
+ "incremental": false
10
+ },
11
+ "include": [
12
+ "src"
13
+ ],
14
+ "exclude": [
15
+ "src/__tests__"
16
+ ]
17
+ }