@absolutejs/voice 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -71,6 +71,7 @@ var __decorateElement = (array, flags, name, decorators, target, extra) => {
71
71
 
72
72
  // src/plugin.ts
73
73
  import { Elysia } from "elysia";
74
+ import { resolve } from "path";
74
75
 
75
76
  // src/htmx.ts
76
77
  var DEFAULT_HTMX_TARGETS = {
@@ -680,6 +681,51 @@ var createVoiceSession = (options) => {
680
681
  };
681
682
 
682
683
  // src/plugin.ts
684
+ var HTMX_BOOTSTRAP_DIST_CANDIDATES = [
685
+ resolve(import.meta.dir, "client", "htmxBootstrap.js"),
686
+ resolve(import.meta.dir, "..", "dist", "client", "htmxBootstrap.js")
687
+ ];
688
+ var HTMX_BOOTSTRAP_SOURCE_CANDIDATES = [
689
+ resolve(import.meta.dir, "client", "htmxBootstrap.ts"),
690
+ resolve(import.meta.dir, "..", "src", "client", "htmxBootstrap.ts")
691
+ ];
692
+ var loadHTMXBootstrap = (() => {
693
+ let cached = null;
694
+ return () => {
695
+ if (cached) {
696
+ return cached;
697
+ }
698
+ cached = (async () => {
699
+ for (const candidate of HTMX_BOOTSTRAP_DIST_CANDIDATES) {
700
+ const asset = Bun.file(candidate);
701
+ if (await asset.exists()) {
702
+ return await asset.text();
703
+ }
704
+ }
705
+ for (const candidate of HTMX_BOOTSTRAP_SOURCE_CANDIDATES) {
706
+ const asset = Bun.file(candidate);
707
+ if (!await asset.exists()) {
708
+ continue;
709
+ }
710
+ const build = await Bun.build({
711
+ entrypoints: [candidate],
712
+ format: "esm",
713
+ minify: true,
714
+ target: "browser"
715
+ });
716
+ if (!build.success || build.outputs.length === 0) {
717
+ const log = build.logs.map((entry) => entry.message).join(`
718
+ `);
719
+ throw new Error(`Failed to build the voice HTMX bootstrap bundle.${log ? `
720
+ ${log}` : ""}`);
721
+ }
722
+ return await build.outputs[0].text();
723
+ }
724
+ throw new Error("Unable to locate the voice HTMX bootstrap client.");
725
+ })();
726
+ return cached;
727
+ };
728
+ })();
683
729
  var isArrayBufferView = (value) => typeof value === "object" && value !== null && ArrayBuffer.isView(value);
684
730
  var isVoiceClientMessage = (value) => {
685
731
  if (!value || typeof value !== "object" || !("type" in value)) {
@@ -755,6 +801,7 @@ var voice = (config) => {
755
801
  const onTurn = normalizeOnTurn(config.onTurn);
756
802
  const htmxOptions = config.htmx && typeof config.htmx === "object" ? config.htmx : undefined;
757
803
  const htmxRoute = htmxOptions?.route ?? `${config.path}/htmx/session`;
804
+ const htmxBootstrapRoute = htmxOptions?.bootstrapRoute ?? `${config.path}/htmx/bootstrap.js`;
758
805
  const htmxRenderers = resolveVoiceHTMXRenderers(config.htmx && config.htmx !== true ? config.htmx : undefined);
759
806
  const htmxTargets = resolveVoiceHTMXTargets(htmxOptions?.targets);
760
807
  const htmxRoutes = () => {
@@ -778,7 +825,11 @@ var voice = (config) => {
778
825
  }, htmxRenderers, htmxTargets), {
779
826
  headers: { "Content-Type": "text/html; charset=utf-8" }
780
827
  });
781
- });
828
+ }).get(htmxBootstrapRoute, async () => new Response(await loadHTMXBootstrap(), {
829
+ headers: {
830
+ "Content-Type": "application/javascript; charset=utf-8"
831
+ }
832
+ }));
782
833
  };
783
834
  return new Elysia({ name: "absolutejs-voice" }).ws(config.path, {
784
835
  close: async (ws, code, reason) => {
package/dist/plugin.d.ts CHANGED
@@ -47,6 +47,18 @@ export declare const voice: <TContext = unknown, TSession extends VoiceSessionRe
47
47
  };
48
48
  };
49
49
  };
50
+ } & {
51
+ [x: string]: {
52
+ get: {
53
+ body: unknown;
54
+ params: {};
55
+ query: unknown;
56
+ headers: unknown;
57
+ response: {
58
+ 200: Response;
59
+ };
60
+ };
61
+ };
50
62
  }), {
51
63
  derive: {};
52
64
  resolve: {};
@@ -0,0 +1,13 @@
1
+ import type { Transcript } from '../types';
2
+ export type VoiceTranscriptAccuracy = {
3
+ actualText: string;
4
+ charDistance: number;
5
+ charErrorRate: number;
6
+ expectedText: string;
7
+ passesThreshold: boolean;
8
+ threshold: number;
9
+ wordDistance: number;
10
+ wordErrorRate: number;
11
+ };
12
+ export declare const mergeFinalTranscriptText: (transcripts: Transcript[]) => string;
13
+ export declare const scoreTranscriptAccuracy: (actualText: string, expectedText: string, threshold?: number) => VoiceTranscriptAccuracy;
@@ -0,0 +1,76 @@
1
+ import type { STTAdapter } from '../types';
2
+ import { type VoiceSTTAdapterHarnessOptions, type VoiceSTTAdapterHarnessResult } from './stt';
3
+ import type { VoiceTestFixture } from './fixtures';
4
+ export type VoiceExpectedTermAccuracy = {
5
+ allMatched: boolean;
6
+ expectedTerms: string[];
7
+ matchedTerms: string[];
8
+ missingTerms: string[];
9
+ recall: number;
10
+ };
11
+ export type VoiceSTTBenchmarkFixtureResult = {
12
+ accuracy: VoiceSTTAdapterHarnessResult['accuracy'];
13
+ closeCount: number;
14
+ difficulty?: VoiceTestFixture['difficulty'];
15
+ elapsedMs: number;
16
+ endOfTurnCount: number;
17
+ errorCount: number;
18
+ expectedTerms: VoiceExpectedTermAccuracy;
19
+ finalCount: number;
20
+ finalText: string;
21
+ fixtureId: string;
22
+ fragmentationCount: number;
23
+ passes: boolean;
24
+ partialCount: number;
25
+ tags: string[];
26
+ timeToEndOfTurnMs?: number;
27
+ timeToFirstFinalMs?: number;
28
+ timeToFirstPartialMs?: number;
29
+ title: string;
30
+ };
31
+ export type VoiceSTTBenchmarkSummary = {
32
+ adapterId: string;
33
+ averageCharErrorRate: number;
34
+ averageElapsedMs: number;
35
+ averageEndOfTurnCount: number;
36
+ averageFinalCount: number;
37
+ averageTermRecall: number;
38
+ averageTimeToEndOfTurnMs?: number;
39
+ averageTimeToFirstFinalMs?: number;
40
+ averageTimeToFirstPartialMs?: number;
41
+ averageWordErrorRate: number;
42
+ fixtureCount: number;
43
+ fixturesWithErrors: number;
44
+ fixturesWithFragmentation: number;
45
+ passCount: number;
46
+ passRate: number;
47
+ totalErrorCount: number;
48
+ wordAccuracyRate: number;
49
+ };
50
+ export type VoiceSTTBenchmarkReport = {
51
+ adapterId: string;
52
+ fixtures: VoiceSTTBenchmarkFixtureResult[];
53
+ generatedAt: number;
54
+ summary: VoiceSTTBenchmarkSummary;
55
+ };
56
+ export type VoiceSTTBenchmarkComparisonEntry = {
57
+ adapterId: string;
58
+ summary: VoiceSTTBenchmarkSummary;
59
+ };
60
+ export type VoiceSTTBenchmarkComparison = {
61
+ bestByPassRate?: VoiceSTTBenchmarkComparisonEntry;
62
+ bestByTermRecall?: VoiceSTTBenchmarkComparisonEntry;
63
+ bestByWordErrorRate?: VoiceSTTBenchmarkComparisonEntry;
64
+ entries: VoiceSTTBenchmarkComparisonEntry[];
65
+ };
66
+ export type VoiceSTTBenchmarkOptions = VoiceSTTAdapterHarnessOptions & {
67
+ fixtureOptions?: Record<string, Omit<VoiceSTTAdapterHarnessOptions, 'fixtureOptions'>>;
68
+ };
69
+ export declare const summarizeSTTBenchmark: (adapterId: string, fixtures: VoiceSTTBenchmarkFixtureResult[]) => VoiceSTTBenchmarkSummary;
70
+ export declare const compareSTTBenchmarks: (reports: VoiceSTTBenchmarkReport[]) => VoiceSTTBenchmarkComparison;
71
+ export declare const runSTTAdapterBenchmark: ({ adapter, adapterId, fixtures, options }: {
72
+ adapter: STTAdapter;
73
+ adapterId: string;
74
+ fixtures: VoiceTestFixture[];
75
+ options?: VoiceSTTBenchmarkOptions;
76
+ }) => Promise<VoiceSTTBenchmarkReport>;
@@ -0,0 +1,21 @@
1
+ import type { AudioFormat } from '../types';
2
+ export type VoiceTestFixtureManifestEntry = {
3
+ id: string;
4
+ title: string;
5
+ audioPath: string;
6
+ expectedText: string;
7
+ expectedTerms?: string[];
8
+ chunkDurationMs?: number;
9
+ language?: string;
10
+ difficulty?: 'clean' | 'noisy' | 'challenging';
11
+ tags?: string[];
12
+ tailPaddingMs?: number;
13
+ format?: Partial<AudioFormat>;
14
+ };
15
+ export type VoiceTestFixture = Omit<VoiceTestFixtureManifestEntry, 'audioPath'> & {
16
+ audio: Uint8Array;
17
+ audioPath: string;
18
+ format: AudioFormat;
19
+ };
20
+ export declare const getVoiceFixtureDirectory: () => Promise<string>;
21
+ export declare const loadVoiceTestFixtures: (fixtureDirectory?: string) => Promise<VoiceTestFixture[]>;
@@ -0,0 +1,4 @@
1
+ export * from './accuracy';
2
+ export * from './benchmark';
3
+ export * from './fixtures';
4
+ export * from './stt';
@@ -0,0 +1,446 @@
1
+ // @bun
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __name = (target, name) => {
6
+ Object.defineProperty(target, "name", {
7
+ value: name,
8
+ enumerable: false,
9
+ configurable: true
10
+ });
11
+ return target;
12
+ };
13
+ var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
14
+ var __typeError = (msg) => {
15
+ throw TypeError(msg);
16
+ };
17
+ var __defNormalProp = (obj, key, value) => (key in obj) ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
18
+ var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
19
+ var __privateIn = (member, obj) => Object(obj) !== obj ? __typeError('Cannot use the "in" operator on this value') : member.has(obj);
20
+ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
21
+ var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
22
+ var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
23
+ var __decoratorStart = (base) => [, , , __create(base?.[__knownSymbol("metadata")] ?? null)];
24
+ var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
25
+ var __expectFn = (fn) => fn !== undefined && typeof fn !== "function" ? __typeError("Function expected") : fn;
26
+ var __decoratorContext = (kind, name, done, metadata, fns) => ({
27
+ kind: __decoratorStrings[kind],
28
+ name,
29
+ metadata,
30
+ addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null))
31
+ });
32
+ var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
33
+ var __runInitializers = (array, flags, self, value) => {
34
+ for (var i = 0, fns = array[flags >> 1], n = fns && fns.length;i < n; i++)
35
+ flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
36
+ return value;
37
+ };
38
+ var __decorateElement = (array, flags, name, decorators, target, extra) => {
39
+ var fn, it, done, ctx, access, k = flags & 7, s = !!(flags & 8), p = !!(flags & 16);
40
+ var j = k > 3 ? array.length + 1 : k ? s ? 1 : 2 : 0, key = __decoratorStrings[k + 5];
41
+ var initializers = k > 3 && (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
42
+ var desc = k && (!p && !s && (target = target.prototype), k < 5 && (k > 3 || !p) && __getOwnPropDesc(k < 4 ? target : {
43
+ get [name]() {
44
+ return __privateGet(this, extra);
45
+ },
46
+ set [name](x) {
47
+ __privateSet(this, extra, x);
48
+ }
49
+ }, name));
50
+ k ? p && k < 4 && __name(extra, (k > 2 ? "set " : k > 1 ? "get " : "") + name) : __name(target, name);
51
+ for (var i = decorators.length - 1;i >= 0; i--) {
52
+ ctx = __decoratorContext(k, name, done = {}, array[3], extraInitializers);
53
+ if (k) {
54
+ ctx.static = s, ctx.private = p, access = ctx.access = { has: p ? (x) => __privateIn(target, x) : (x) => (name in x) };
55
+ if (k ^ 3)
56
+ access.get = p ? (x) => (k ^ 1 ? __privateGet : __privateMethod)(x, target, k ^ 4 ? extra : desc.get) : (x) => x[name];
57
+ if (k > 2)
58
+ access.set = p ? (x, y) => __privateSet(x, target, y, k ^ 4 ? extra : desc.set) : (x, y) => x[name] = y;
59
+ }
60
+ it = (0, decorators[i])(k ? k < 4 ? p ? extra : desc[key] : k > 4 ? undefined : { get: desc.get, set: desc.set } : target, ctx);
61
+ done._ = 1;
62
+ if (k ^ 4 || it === undefined)
63
+ __expectFn(it) && (k > 4 ? initializers.unshift(it) : k ? p ? extra = it : desc[key] = it : target = it);
64
+ else if (typeof it !== "object" || it === null)
65
+ __typeError("Object expected");
66
+ else
67
+ __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
68
+ }
69
+ return k || __decoratorMetadata(array, target), desc && __defProp(target, name, desc), p ? k ^ 4 ? extra : desc : target;
70
+ };
71
+
72
+ // src/turnDetection.ts
73
+ var DEFAULT_SILENCE_MS = 700;
74
+ var DEFAULT_SPEECH_THRESHOLD = 0.015;
75
+ var toUint8Array = (audio) => {
76
+ if (audio instanceof ArrayBuffer) {
77
+ return new Uint8Array(audio);
78
+ }
79
+ return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
80
+ };
81
+ var measureAudioLevel = (audio) => {
82
+ const bytes = toUint8Array(audio);
83
+ if (bytes.byteLength < 2) {
84
+ return 0;
85
+ }
86
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
87
+ if (samples.length === 0) {
88
+ return 0;
89
+ }
90
+ let sumSquares = 0;
91
+ for (const sample of samples) {
92
+ const normalized = sample / 32768;
93
+ sumSquares += normalized * normalized;
94
+ }
95
+ return Math.sqrt(sumSquares / samples.length);
96
+ };
97
+ var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
98
+ var mergeTranscriptTexts = (transcripts) => {
99
+ const merged = [];
100
+ for (const transcript of transcripts) {
101
+ const nextText = normalizeText(transcript.text);
102
+ if (!nextText) {
103
+ continue;
104
+ }
105
+ const previous = merged.at(-1);
106
+ if (!previous) {
107
+ merged.push(nextText);
108
+ continue;
109
+ }
110
+ if (nextText === previous || previous.includes(nextText)) {
111
+ continue;
112
+ }
113
+ if (nextText.includes(previous)) {
114
+ merged[merged.length - 1] = nextText;
115
+ continue;
116
+ }
117
+ merged.push(nextText);
118
+ }
119
+ return merged.join(" ").trim();
120
+ };
121
+ var buildTurnText = (transcripts, partialText) => {
122
+ const finalText = mergeTranscriptTexts(transcripts);
123
+ if (finalText) {
124
+ return finalText;
125
+ }
126
+ return normalizeText(partialText);
127
+ };
128
+
129
+ // src/testing/accuracy.ts
130
+ var normalizeAccuracyText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
131
+ var levenshteinDistance = (left, right) => {
132
+ if (left.length === 0) {
133
+ return right.length;
134
+ }
135
+ if (right.length === 0) {
136
+ return left.length;
137
+ }
138
+ const previous = new Array(right.length + 1).fill(0);
139
+ const current = new Array(right.length + 1).fill(0);
140
+ for (let column = 0;column <= right.length; column += 1) {
141
+ previous[column] = column;
142
+ }
143
+ for (let row = 1;row <= left.length; row += 1) {
144
+ current[0] = row;
145
+ for (let column = 1;column <= right.length; column += 1) {
146
+ const substitutionCost = left[row - 1] === right[column - 1] ? 0 : 1;
147
+ current[column] = Math.min(current[column - 1] + 1, previous[column] + 1, previous[column - 1] + substitutionCost);
148
+ }
149
+ for (let column = 0;column <= right.length; column += 1) {
150
+ previous[column] = current[column];
151
+ }
152
+ }
153
+ return previous[right.length];
154
+ };
155
+ var mergeFinalTranscriptText = (transcripts) => buildTurnText(transcripts.filter((transcript) => transcript.isFinal), "");
156
+ var scoreTranscriptAccuracy = (actualText, expectedText, threshold = 0.35) => {
157
+ const normalizedActual = normalizeAccuracyText(actualText);
158
+ const normalizedExpected = normalizeAccuracyText(expectedText);
159
+ const actualWords = normalizedActual ? normalizedActual.split(" ") : [];
160
+ const expectedWords = normalizedExpected ? normalizedExpected.split(" ") : [];
161
+ const wordDistance = levenshteinDistance(actualWords, expectedWords);
162
+ const charDistance = levenshteinDistance(Array.from(normalizedActual), Array.from(normalizedExpected));
163
+ const wordErrorRate = expectedWords.length > 0 ? wordDistance / expectedWords.length : 0;
164
+ const charErrorRate = normalizedExpected.length > 0 ? charDistance / normalizedExpected.length : 0;
165
+ return {
166
+ actualText: normalizedActual,
167
+ charDistance,
168
+ charErrorRate,
169
+ expectedText: normalizedExpected,
170
+ passesThreshold: wordErrorRate <= threshold,
171
+ threshold,
172
+ wordDistance,
173
+ wordErrorRate
174
+ };
175
+ };
176
+ // src/testing/stt.ts
177
+ var chunkAudio = (audio, bytesPerChunk) => {
178
+ const chunks = [];
179
+ for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
180
+ chunks.push(audio.slice(offset, offset + bytesPerChunk));
181
+ }
182
+ return chunks;
183
+ };
184
+ var createSilence = (byteLength) => new Uint8Array(byteLength);
185
+ var waitForIdle = async (readLastActivityAt, idleTimeoutMs, settleMs) => {
186
+ const startedAt = Date.now();
187
+ while (Date.now() - startedAt < idleTimeoutMs) {
188
+ if (Date.now() - readLastActivityAt() >= settleMs) {
189
+ return;
190
+ }
191
+ await Bun.sleep(Math.min(50, settleMs));
192
+ }
193
+ };
194
+ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
195
+ const startedAt = Date.now();
196
+ const partialEvents = [];
197
+ const finalEvents = [];
198
+ const endOfTurnEvents = [];
199
+ const errorEvents = [];
200
+ const closeEvents = [];
201
+ const chunkDurationMs = options.chunkDurationMs ?? fixture.chunkDurationMs ?? 100;
202
+ const tailPaddingMs = options.tailPaddingMs ?? fixture.tailPaddingMs ?? 1000;
203
+ const idleTimeoutMs = options.idleTimeoutMs ?? 8000;
204
+ const settleMs = options.settleMs ?? 500;
205
+ const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
206
+ let lastActivityAt = Date.now();
207
+ const markActive = () => {
208
+ lastActivityAt = Date.now();
209
+ };
210
+ const session = await adapter.open({
211
+ format: fixture.format,
212
+ sessionId: `fixture-${fixture.id}`
213
+ });
214
+ const unsubscribers = [
215
+ session.on("partial", (event) => {
216
+ partialEvents.push(event);
217
+ markActive();
218
+ }),
219
+ session.on("final", (event) => {
220
+ finalEvents.push(event);
221
+ markActive();
222
+ }),
223
+ session.on("endOfTurn", (event) => {
224
+ endOfTurnEvents.push(event);
225
+ markActive();
226
+ }),
227
+ session.on("error", (event) => {
228
+ errorEvents.push(event);
229
+ markActive();
230
+ }),
231
+ session.on("close", (event) => {
232
+ closeEvents.push(event);
233
+ markActive();
234
+ })
235
+ ];
236
+ try {
237
+ const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
238
+ const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
239
+ const chunks = chunkAudio(fixture.audio, bytesPerChunk);
240
+ const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
241
+ for (const chunk of chunks) {
242
+ await session.send(chunk);
243
+ await Bun.sleep(realtimeDelayMs);
244
+ }
245
+ if (tailPaddingMs > 0) {
246
+ const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
247
+ for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
248
+ await session.send(chunk);
249
+ await Bun.sleep(realtimeDelayMs);
250
+ }
251
+ }
252
+ await waitForIdle(() => lastActivityAt, idleTimeoutMs, settleMs);
253
+ } finally {
254
+ await session.close("fixture-complete");
255
+ for (const unsubscribe of unsubscribers) {
256
+ unsubscribe();
257
+ }
258
+ }
259
+ const finalText = mergeFinalTranscriptText(finalEvents.map((event) => event.transcript));
260
+ return {
261
+ accuracy: scoreTranscriptAccuracy(finalText, fixture.expectedText, options.transcriptThreshold),
262
+ closeEvents,
263
+ endOfTurnEvents,
264
+ errorEvents,
265
+ finalEvents,
266
+ finalText,
267
+ partialEvents,
268
+ startedAt
269
+ };
270
+ };
271
+
272
+ // src/testing/benchmark.ts
273
+ var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
274
+ var scoreExpectedTerms = (actualText, expectedTerms) => {
275
+ const normalizedActual = normalizeBenchmarkText(actualText);
276
+ const normalizedExpectedTerms = (expectedTerms ?? []).map((entry) => normalizeBenchmarkText(entry));
277
+ const matchedTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && normalizedActual.includes(term));
278
+ const missingTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && !matchedTerms.includes(term));
279
+ const denominator = normalizedExpectedTerms.length;
280
+ const recall = denominator > 0 ? matchedTerms.length / denominator : 1;
281
+ return {
282
+ allMatched: missingTerms.length === 0,
283
+ expectedTerms: normalizedExpectedTerms,
284
+ matchedTerms,
285
+ missingTerms,
286
+ recall
287
+ };
288
+ };
289
+ var average = (values) => {
290
+ const filtered = values.filter((value) => typeof value === "number" && Number.isFinite(value));
291
+ if (filtered.length === 0) {
292
+ return;
293
+ }
294
+ return filtered.reduce((sum, value) => sum + value, 0) / filtered.length;
295
+ };
296
+ var roundMetric = (value, digits = 4) => {
297
+ if (typeof value !== "number" || !Number.isFinite(value)) {
298
+ return;
299
+ }
300
+ const factor = 10 ** digits;
301
+ return Math.round(value * factor) / factor;
302
+ };
303
+ var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
304
+ const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
305
+ const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
306
+ const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
307
+ const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
308
+ return {
309
+ accuracy: result.accuracy,
310
+ closeCount: result.closeEvents.length,
311
+ difficulty: fixture.difficulty,
312
+ elapsedMs,
313
+ endOfTurnCount: result.endOfTurnEvents.length,
314
+ errorCount: result.errorEvents.length,
315
+ expectedTerms,
316
+ finalCount: result.finalEvents.length,
317
+ finalText: result.finalText,
318
+ fixtureId: fixture.id,
319
+ fragmentationCount: Math.max(0, result.finalEvents.length - 1),
320
+ passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold,
321
+ partialCount: result.partialEvents.length,
322
+ tags: fixture.tags ?? [],
323
+ timeToEndOfTurnMs,
324
+ timeToFirstFinalMs,
325
+ timeToFirstPartialMs,
326
+ title: fixture.title
327
+ };
328
+ };
329
+ var summarizeSTTBenchmark = (adapterId, fixtures) => {
330
+ const fixtureCount = fixtures.length;
331
+ const passCount = fixtures.filter((fixture) => fixture.passes).length;
332
+ return {
333
+ adapterId,
334
+ averageCharErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.charErrorRate))) ?? 0,
335
+ averageElapsedMs: roundMetric(average(fixtures.map((fixture) => fixture.elapsedMs)), 2) ?? 0,
336
+ averageEndOfTurnCount: roundMetric(average(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
337
+ averageFinalCount: roundMetric(average(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
338
+ averageTermRecall: roundMetric(average(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
339
+ averageTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
340
+ averageTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
341
+ averageTimeToFirstPartialMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
342
+ averageWordErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate))) ?? 0,
343
+ fixtureCount,
344
+ fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
345
+ fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
346
+ passCount,
347
+ passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
348
+ totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
349
+ wordAccuracyRate: fixtureCount > 0 ? roundMetric(1 - (average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
350
+ };
351
+ };
352
+ var compareSTTBenchmarks = (reports) => {
353
+ const entries = reports.map((report) => ({
354
+ adapterId: report.adapterId,
355
+ summary: report.summary
356
+ }));
357
+ const bestByMetric = (selectMetric, direction) => entries.reduce((best, entry) => {
358
+ if (!best) {
359
+ return entry;
360
+ }
361
+ const next = selectMetric(entry);
362
+ const current = selectMetric(best);
363
+ if (direction === "max" ? next > current : next < current) {
364
+ return entry;
365
+ }
366
+ return best;
367
+ }, undefined);
368
+ return {
369
+ bestByPassRate: bestByMetric((entry) => entry.summary.passRate, "max"),
370
+ bestByTermRecall: bestByMetric((entry) => entry.summary.averageTermRecall, "max"),
371
+ bestByWordErrorRate: bestByMetric((entry) => entry.summary.averageWordErrorRate, "min"),
372
+ entries
373
+ };
374
+ };
375
+ var runSTTAdapterBenchmark = async ({
376
+ adapter,
377
+ adapterId,
378
+ fixtures,
379
+ options = {}
380
+ }) => {
381
+ const results = [];
382
+ for (const fixture of fixtures) {
383
+ const startedAt = Date.now();
384
+ const fixtureResult = await runSTTAdapterFixture(adapter, fixture, {
385
+ ...options,
386
+ ...options.fixtureOptions?.[fixture.id] ?? {}
387
+ });
388
+ results.push(toFixtureBenchmarkResult(fixture, fixtureResult, Date.now() - startedAt));
389
+ }
390
+ return {
391
+ adapterId,
392
+ fixtures: results,
393
+ generatedAt: Date.now(),
394
+ summary: summarizeSTTBenchmark(adapterId, results)
395
+ };
396
+ };
397
+ // src/testing/fixtures.ts
398
+ import { resolve } from "path";
399
+ var DEFAULT_AUDIO_FORMAT = {
400
+ channels: 1,
401
+ container: "raw",
402
+ encoding: "pcm_s16le",
403
+ sampleRateHz: 16000
404
+ };
405
+ var FIXTURE_DIR_CANDIDATES = [
406
+ resolve(import.meta.dir, "..", "..", "fixtures"),
407
+ resolve(import.meta.dir, "..", "..", "..", "fixtures"),
408
+ resolve(import.meta.dir, "..", "..", "..", "..", "fixtures")
409
+ ];
410
+ var resolveFixtureDirectory = async () => {
411
+ for (const candidate of FIXTURE_DIR_CANDIDATES) {
412
+ if (await Bun.file(resolve(candidate, "manifest.json")).exists()) {
413
+ return candidate;
414
+ }
415
+ }
416
+ throw new Error("Unable to locate the bundled voice test fixtures. Expected fixtures/manifest.json next to the package root.");
417
+ };
418
+ var getVoiceFixtureDirectory = async () => resolveFixtureDirectory();
419
+ var loadVoiceTestFixtures = async (fixtureDirectory) => {
420
+ const resolvedFixtureDirectory = fixtureDirectory ?? await resolveFixtureDirectory();
421
+ const manifestFile = Bun.file(resolve(resolvedFixtureDirectory, "manifest.json"));
422
+ const manifest = await manifestFile.json();
423
+ return await Promise.all(manifest.map(async (entry) => {
424
+ const audioPath = resolve(resolvedFixtureDirectory, "pcm", entry.audioPath);
425
+ const audio = new Uint8Array(await Bun.file(audioPath).arrayBuffer());
426
+ return {
427
+ ...entry,
428
+ audio,
429
+ audioPath,
430
+ format: {
431
+ ...DEFAULT_AUDIO_FORMAT,
432
+ ...entry.format
433
+ }
434
+ };
435
+ }));
436
+ };
437
+ export {
438
+ summarizeSTTBenchmark,
439
+ scoreTranscriptAccuracy,
440
+ runSTTAdapterFixture,
441
+ runSTTAdapterBenchmark,
442
+ mergeFinalTranscriptText,
443
+ loadVoiceTestFixtures,
444
+ getVoiceFixtureDirectory,
445
+ compareSTTBenchmarks
446
+ };
@@ -0,0 +1,22 @@
1
+ import { type VoiceTranscriptAccuracy } from './accuracy';
2
+ import type { STTAdapter, VoiceCloseEvent, VoiceEndOfTurnEvent, VoiceErrorEvent, VoiceFinalEvent, VoicePartialEvent } from '../types';
3
+ import type { VoiceTestFixture } from './fixtures';
4
+ export type VoiceSTTAdapterHarnessOptions = {
5
+ chunkDurationMs?: number;
6
+ idleTimeoutMs?: number;
7
+ settleMs?: number;
8
+ tailPaddingMs?: number;
9
+ transcriptThreshold?: number;
10
+ waitForRealtimeMs?: number;
11
+ };
12
+ export type VoiceSTTAdapterHarnessResult = {
13
+ accuracy: VoiceTranscriptAccuracy;
14
+ closeEvents: VoiceCloseEvent[];
15
+ endOfTurnEvents: VoiceEndOfTurnEvent[];
16
+ errorEvents: VoiceErrorEvent[];
17
+ finalEvents: VoiceFinalEvent[];
18
+ finalText: string;
19
+ partialEvents: VoicePartialEvent[];
20
+ startedAt: number;
21
+ };
22
+ export declare const runSTTAdapterFixture: (adapter: STTAdapter, fixture: VoiceTestFixture, options?: VoiceSTTAdapterHarnessOptions) => Promise<VoiceSTTAdapterHarnessResult>;