@absolutejs/voice 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ import type { Transcript } from '../types';
2
+ export type VoiceTranscriptAccuracy = {
3
+ actualText: string;
4
+ charDistance: number;
5
+ charErrorRate: number;
6
+ expectedText: string;
7
+ passesThreshold: boolean;
8
+ threshold: number;
9
+ wordDistance: number;
10
+ wordErrorRate: number;
11
+ };
12
+ export declare const mergeFinalTranscriptText: (transcripts: Transcript[]) => string;
13
+ export declare const scoreTranscriptAccuracy: (actualText: string, expectedText: string, threshold?: number) => VoiceTranscriptAccuracy;
@@ -0,0 +1,76 @@
1
+ import type { STTAdapter } from '../types';
2
+ import { type VoiceSTTAdapterHarnessOptions, type VoiceSTTAdapterHarnessResult } from './stt';
3
+ import type { VoiceTestFixture } from './fixtures';
4
+ export type VoiceExpectedTermAccuracy = {
5
+ allMatched: boolean;
6
+ expectedTerms: string[];
7
+ matchedTerms: string[];
8
+ missingTerms: string[];
9
+ recall: number;
10
+ };
11
+ export type VoiceSTTBenchmarkFixtureResult = {
12
+ accuracy: VoiceSTTAdapterHarnessResult['accuracy'];
13
+ closeCount: number;
14
+ difficulty?: VoiceTestFixture['difficulty'];
15
+ elapsedMs: number;
16
+ endOfTurnCount: number;
17
+ errorCount: number;
18
+ expectedTerms: VoiceExpectedTermAccuracy;
19
+ finalCount: number;
20
+ finalText: string;
21
+ fixtureId: string;
22
+ fragmentationCount: number;
23
+ passes: boolean;
24
+ partialCount: number;
25
+ tags: string[];
26
+ timeToEndOfTurnMs?: number;
27
+ timeToFirstFinalMs?: number;
28
+ timeToFirstPartialMs?: number;
29
+ title: string;
30
+ };
31
+ export type VoiceSTTBenchmarkSummary = {
32
+ adapterId: string;
33
+ averageCharErrorRate: number;
34
+ averageElapsedMs: number;
35
+ averageEndOfTurnCount: number;
36
+ averageFinalCount: number;
37
+ averageTermRecall: number;
38
+ averageTimeToEndOfTurnMs?: number;
39
+ averageTimeToFirstFinalMs?: number;
40
+ averageTimeToFirstPartialMs?: number;
41
+ averageWordErrorRate: number;
42
+ fixtureCount: number;
43
+ fixturesWithErrors: number;
44
+ fixturesWithFragmentation: number;
45
+ passCount: number;
46
+ passRate: number;
47
+ totalErrorCount: number;
48
+ wordAccuracyRate: number;
49
+ };
50
+ export type VoiceSTTBenchmarkReport = {
51
+ adapterId: string;
52
+ fixtures: VoiceSTTBenchmarkFixtureResult[];
53
+ generatedAt: number;
54
+ summary: VoiceSTTBenchmarkSummary;
55
+ };
56
+ export type VoiceSTTBenchmarkComparisonEntry = {
57
+ adapterId: string;
58
+ summary: VoiceSTTBenchmarkSummary;
59
+ };
60
+ export type VoiceSTTBenchmarkComparison = {
61
+ bestByPassRate?: VoiceSTTBenchmarkComparisonEntry;
62
+ bestByTermRecall?: VoiceSTTBenchmarkComparisonEntry;
63
+ bestByWordErrorRate?: VoiceSTTBenchmarkComparisonEntry;
64
+ entries: VoiceSTTBenchmarkComparisonEntry[];
65
+ };
66
+ export type VoiceSTTBenchmarkOptions = VoiceSTTAdapterHarnessOptions & {
67
+ fixtureOptions?: Record<string, Omit<VoiceSTTAdapterHarnessOptions, 'fixtureOptions'>>;
68
+ };
69
+ export declare const summarizeSTTBenchmark: (adapterId: string, fixtures: VoiceSTTBenchmarkFixtureResult[]) => VoiceSTTBenchmarkSummary;
70
+ export declare const compareSTTBenchmarks: (reports: VoiceSTTBenchmarkReport[]) => VoiceSTTBenchmarkComparison;
71
+ export declare const runSTTAdapterBenchmark: ({ adapter, adapterId, fixtures, options }: {
72
+ adapter: STTAdapter;
73
+ adapterId: string;
74
+ fixtures: VoiceTestFixture[];
75
+ options?: VoiceSTTBenchmarkOptions;
76
+ }) => Promise<VoiceSTTBenchmarkReport>;
@@ -0,0 +1,21 @@
1
+ import type { AudioFormat } from '../types';
2
+ export type VoiceTestFixtureManifestEntry = {
3
+ id: string;
4
+ title: string;
5
+ audioPath: string;
6
+ expectedText: string;
7
+ expectedTerms?: string[];
8
+ chunkDurationMs?: number;
9
+ language?: string;
10
+ difficulty?: 'clean' | 'noisy' | 'challenging';
11
+ tags?: string[];
12
+ tailPaddingMs?: number;
13
+ format?: Partial<AudioFormat>;
14
+ };
15
+ export type VoiceTestFixture = Omit<VoiceTestFixtureManifestEntry, 'audioPath'> & {
16
+ audio: Uint8Array;
17
+ audioPath: string;
18
+ format: AudioFormat;
19
+ };
20
+ export declare const getVoiceFixtureDirectory: () => Promise<string>;
21
+ export declare const loadVoiceTestFixtures: (fixtureDirectory?: string) => Promise<VoiceTestFixture[]>;
@@ -0,0 +1,4 @@
1
+ export * from './accuracy';
2
+ export * from './benchmark';
3
+ export * from './fixtures';
4
+ export * from './stt';
@@ -0,0 +1,446 @@
1
+ // @bun
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __name = (target, name) => {
6
+ Object.defineProperty(target, "name", {
7
+ value: name,
8
+ enumerable: false,
9
+ configurable: true
10
+ });
11
+ return target;
12
+ };
13
+ var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
14
+ var __typeError = (msg) => {
15
+ throw TypeError(msg);
16
+ };
17
+ var __defNormalProp = (obj, key, value) => (key in obj) ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
18
+ var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
19
+ var __privateIn = (member, obj) => Object(obj) !== obj ? __typeError('Cannot use the "in" operator on this value') : member.has(obj);
20
+ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
21
+ var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
22
+ var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
23
+ var __decoratorStart = (base) => [, , , __create(base?.[__knownSymbol("metadata")] ?? null)];
24
+ var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
25
+ var __expectFn = (fn) => fn !== undefined && typeof fn !== "function" ? __typeError("Function expected") : fn;
26
+ var __decoratorContext = (kind, name, done, metadata, fns) => ({
27
+ kind: __decoratorStrings[kind],
28
+ name,
29
+ metadata,
30
+ addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null))
31
+ });
32
+ var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
33
+ var __runInitializers = (array, flags, self, value) => {
34
+ for (var i = 0, fns = array[flags >> 1], n = fns && fns.length;i < n; i++)
35
+ flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
36
+ return value;
37
+ };
38
+ var __decorateElement = (array, flags, name, decorators, target, extra) => {
39
+ var fn, it, done, ctx, access, k = flags & 7, s = !!(flags & 8), p = !!(flags & 16);
40
+ var j = k > 3 ? array.length + 1 : k ? s ? 1 : 2 : 0, key = __decoratorStrings[k + 5];
41
+ var initializers = k > 3 && (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
42
+ var desc = k && (!p && !s && (target = target.prototype), k < 5 && (k > 3 || !p) && __getOwnPropDesc(k < 4 ? target : {
43
+ get [name]() {
44
+ return __privateGet(this, extra);
45
+ },
46
+ set [name](x) {
47
+ __privateSet(this, extra, x);
48
+ }
49
+ }, name));
50
+ k ? p && k < 4 && __name(extra, (k > 2 ? "set " : k > 1 ? "get " : "") + name) : __name(target, name);
51
+ for (var i = decorators.length - 1;i >= 0; i--) {
52
+ ctx = __decoratorContext(k, name, done = {}, array[3], extraInitializers);
53
+ if (k) {
54
+ ctx.static = s, ctx.private = p, access = ctx.access = { has: p ? (x) => __privateIn(target, x) : (x) => (name in x) };
55
+ if (k ^ 3)
56
+ access.get = p ? (x) => (k ^ 1 ? __privateGet : __privateMethod)(x, target, k ^ 4 ? extra : desc.get) : (x) => x[name];
57
+ if (k > 2)
58
+ access.set = p ? (x, y) => __privateSet(x, target, y, k ^ 4 ? extra : desc.set) : (x, y) => x[name] = y;
59
+ }
60
+ it = (0, decorators[i])(k ? k < 4 ? p ? extra : desc[key] : k > 4 ? undefined : { get: desc.get, set: desc.set } : target, ctx);
61
+ done._ = 1;
62
+ if (k ^ 4 || it === undefined)
63
+ __expectFn(it) && (k > 4 ? initializers.unshift(it) : k ? p ? extra = it : desc[key] = it : target = it);
64
+ else if (typeof it !== "object" || it === null)
65
+ __typeError("Object expected");
66
+ else
67
+ __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
68
+ }
69
+ return k || __decoratorMetadata(array, target), desc && __defProp(target, name, desc), p ? k ^ 4 ? extra : desc : target;
70
+ };
71
+
72
+ // src/turnDetection.ts
73
+ var DEFAULT_SILENCE_MS = 700;
74
+ var DEFAULT_SPEECH_THRESHOLD = 0.015;
75
+ var toUint8Array = (audio) => {
76
+ if (audio instanceof ArrayBuffer) {
77
+ return new Uint8Array(audio);
78
+ }
79
+ return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
80
+ };
81
+ var measureAudioLevel = (audio) => {
82
+ const bytes = toUint8Array(audio);
83
+ if (bytes.byteLength < 2) {
84
+ return 0;
85
+ }
86
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
87
+ if (samples.length === 0) {
88
+ return 0;
89
+ }
90
+ let sumSquares = 0;
91
+ for (const sample of samples) {
92
+ const normalized = sample / 32768;
93
+ sumSquares += normalized * normalized;
94
+ }
95
+ return Math.sqrt(sumSquares / samples.length);
96
+ };
97
+ var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
98
+ var mergeTranscriptTexts = (transcripts) => {
99
+ const merged = [];
100
+ for (const transcript of transcripts) {
101
+ const nextText = normalizeText(transcript.text);
102
+ if (!nextText) {
103
+ continue;
104
+ }
105
+ const previous = merged.at(-1);
106
+ if (!previous) {
107
+ merged.push(nextText);
108
+ continue;
109
+ }
110
+ if (nextText === previous || previous.includes(nextText)) {
111
+ continue;
112
+ }
113
+ if (nextText.includes(previous)) {
114
+ merged[merged.length - 1] = nextText;
115
+ continue;
116
+ }
117
+ merged.push(nextText);
118
+ }
119
+ return merged.join(" ").trim();
120
+ };
121
+ var buildTurnText = (transcripts, partialText) => {
122
+ const finalText = mergeTranscriptTexts(transcripts);
123
+ if (finalText) {
124
+ return finalText;
125
+ }
126
+ return normalizeText(partialText);
127
+ };
128
+
129
+ // src/testing/accuracy.ts
130
+ var normalizeAccuracyText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
131
+ var levenshteinDistance = (left, right) => {
132
+ if (left.length === 0) {
133
+ return right.length;
134
+ }
135
+ if (right.length === 0) {
136
+ return left.length;
137
+ }
138
+ const previous = new Array(right.length + 1).fill(0);
139
+ const current = new Array(right.length + 1).fill(0);
140
+ for (let column = 0;column <= right.length; column += 1) {
141
+ previous[column] = column;
142
+ }
143
+ for (let row = 1;row <= left.length; row += 1) {
144
+ current[0] = row;
145
+ for (let column = 1;column <= right.length; column += 1) {
146
+ const substitutionCost = left[row - 1] === right[column - 1] ? 0 : 1;
147
+ current[column] = Math.min(current[column - 1] + 1, previous[column] + 1, previous[column - 1] + substitutionCost);
148
+ }
149
+ for (let column = 0;column <= right.length; column += 1) {
150
+ previous[column] = current[column];
151
+ }
152
+ }
153
+ return previous[right.length];
154
+ };
155
+ var mergeFinalTranscriptText = (transcripts) => buildTurnText(transcripts.filter((transcript) => transcript.isFinal), "");
156
+ var scoreTranscriptAccuracy = (actualText, expectedText, threshold = 0.35) => {
157
+ const normalizedActual = normalizeAccuracyText(actualText);
158
+ const normalizedExpected = normalizeAccuracyText(expectedText);
159
+ const actualWords = normalizedActual ? normalizedActual.split(" ") : [];
160
+ const expectedWords = normalizedExpected ? normalizedExpected.split(" ") : [];
161
+ const wordDistance = levenshteinDistance(actualWords, expectedWords);
162
+ const charDistance = levenshteinDistance(Array.from(normalizedActual), Array.from(normalizedExpected));
163
+ const wordErrorRate = expectedWords.length > 0 ? wordDistance / expectedWords.length : 0;
164
+ const charErrorRate = normalizedExpected.length > 0 ? charDistance / normalizedExpected.length : 0;
165
+ return {
166
+ actualText: normalizedActual,
167
+ charDistance,
168
+ charErrorRate,
169
+ expectedText: normalizedExpected,
170
+ passesThreshold: wordErrorRate <= threshold,
171
+ threshold,
172
+ wordDistance,
173
+ wordErrorRate
174
+ };
175
+ };
176
+ // src/testing/stt.ts
177
+ var chunkAudio = (audio, bytesPerChunk) => {
178
+ const chunks = [];
179
+ for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
180
+ chunks.push(audio.slice(offset, offset + bytesPerChunk));
181
+ }
182
+ return chunks;
183
+ };
184
+ var createSilence = (byteLength) => new Uint8Array(byteLength);
185
+ var waitForIdle = async (readLastActivityAt, idleTimeoutMs, settleMs) => {
186
+ const startedAt = Date.now();
187
+ while (Date.now() - startedAt < idleTimeoutMs) {
188
+ if (Date.now() - readLastActivityAt() >= settleMs) {
189
+ return;
190
+ }
191
+ await Bun.sleep(Math.min(50, settleMs));
192
+ }
193
+ };
194
+ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
195
+ const startedAt = Date.now();
196
+ const partialEvents = [];
197
+ const finalEvents = [];
198
+ const endOfTurnEvents = [];
199
+ const errorEvents = [];
200
+ const closeEvents = [];
201
+ const chunkDurationMs = options.chunkDurationMs ?? fixture.chunkDurationMs ?? 100;
202
+ const tailPaddingMs = options.tailPaddingMs ?? fixture.tailPaddingMs ?? 1000;
203
+ const idleTimeoutMs = options.idleTimeoutMs ?? 8000;
204
+ const settleMs = options.settleMs ?? 500;
205
+ const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
206
+ let lastActivityAt = Date.now();
207
+ const markActive = () => {
208
+ lastActivityAt = Date.now();
209
+ };
210
+ const session = await adapter.open({
211
+ format: fixture.format,
212
+ sessionId: `fixture-${fixture.id}`
213
+ });
214
+ const unsubscribers = [
215
+ session.on("partial", (event) => {
216
+ partialEvents.push(event);
217
+ markActive();
218
+ }),
219
+ session.on("final", (event) => {
220
+ finalEvents.push(event);
221
+ markActive();
222
+ }),
223
+ session.on("endOfTurn", (event) => {
224
+ endOfTurnEvents.push(event);
225
+ markActive();
226
+ }),
227
+ session.on("error", (event) => {
228
+ errorEvents.push(event);
229
+ markActive();
230
+ }),
231
+ session.on("close", (event) => {
232
+ closeEvents.push(event);
233
+ markActive();
234
+ })
235
+ ];
236
+ try {
237
+ const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
238
+ const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
239
+ const chunks = chunkAudio(fixture.audio, bytesPerChunk);
240
+ const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
241
+ for (const chunk of chunks) {
242
+ await session.send(chunk);
243
+ await Bun.sleep(realtimeDelayMs);
244
+ }
245
+ if (tailPaddingMs > 0) {
246
+ const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
247
+ for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
248
+ await session.send(chunk);
249
+ await Bun.sleep(realtimeDelayMs);
250
+ }
251
+ }
252
+ await waitForIdle(() => lastActivityAt, idleTimeoutMs, settleMs);
253
+ } finally {
254
+ await session.close("fixture-complete");
255
+ for (const unsubscribe of unsubscribers) {
256
+ unsubscribe();
257
+ }
258
+ }
259
+ const finalText = mergeFinalTranscriptText(finalEvents.map((event) => event.transcript));
260
+ return {
261
+ accuracy: scoreTranscriptAccuracy(finalText, fixture.expectedText, options.transcriptThreshold),
262
+ closeEvents,
263
+ endOfTurnEvents,
264
+ errorEvents,
265
+ finalEvents,
266
+ finalText,
267
+ partialEvents,
268
+ startedAt
269
+ };
270
+ };
271
+
272
+ // src/testing/benchmark.ts
273
+ var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
274
+ var scoreExpectedTerms = (actualText, expectedTerms) => {
275
+ const normalizedActual = normalizeBenchmarkText(actualText);
276
+ const normalizedExpectedTerms = (expectedTerms ?? []).map((entry) => normalizeBenchmarkText(entry));
277
+ const matchedTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && normalizedActual.includes(term));
278
+ const missingTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && !matchedTerms.includes(term));
279
+ const denominator = normalizedExpectedTerms.length;
280
+ const recall = denominator > 0 ? matchedTerms.length / denominator : 1;
281
+ return {
282
+ allMatched: missingTerms.length === 0,
283
+ expectedTerms: normalizedExpectedTerms,
284
+ matchedTerms,
285
+ missingTerms,
286
+ recall
287
+ };
288
+ };
289
+ var average = (values) => {
290
+ const filtered = values.filter((value) => typeof value === "number" && Number.isFinite(value));
291
+ if (filtered.length === 0) {
292
+ return;
293
+ }
294
+ return filtered.reduce((sum, value) => sum + value, 0) / filtered.length;
295
+ };
296
+ var roundMetric = (value, digits = 4) => {
297
+ if (typeof value !== "number" || !Number.isFinite(value)) {
298
+ return;
299
+ }
300
+ const factor = 10 ** digits;
301
+ return Math.round(value * factor) / factor;
302
+ };
303
+ var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
304
+ const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
305
+ const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
306
+ const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
307
+ const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
308
+ return {
309
+ accuracy: result.accuracy,
310
+ closeCount: result.closeEvents.length,
311
+ difficulty: fixture.difficulty,
312
+ elapsedMs,
313
+ endOfTurnCount: result.endOfTurnEvents.length,
314
+ errorCount: result.errorEvents.length,
315
+ expectedTerms,
316
+ finalCount: result.finalEvents.length,
317
+ finalText: result.finalText,
318
+ fixtureId: fixture.id,
319
+ fragmentationCount: Math.max(0, result.finalEvents.length - 1),
320
+ passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold,
321
+ partialCount: result.partialEvents.length,
322
+ tags: fixture.tags ?? [],
323
+ timeToEndOfTurnMs,
324
+ timeToFirstFinalMs,
325
+ timeToFirstPartialMs,
326
+ title: fixture.title
327
+ };
328
+ };
329
+ var summarizeSTTBenchmark = (adapterId, fixtures) => {
330
+ const fixtureCount = fixtures.length;
331
+ const passCount = fixtures.filter((fixture) => fixture.passes).length;
332
+ return {
333
+ adapterId,
334
+ averageCharErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.charErrorRate))) ?? 0,
335
+ averageElapsedMs: roundMetric(average(fixtures.map((fixture) => fixture.elapsedMs)), 2) ?? 0,
336
+ averageEndOfTurnCount: roundMetric(average(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
337
+ averageFinalCount: roundMetric(average(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
338
+ averageTermRecall: roundMetric(average(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
339
+ averageTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
340
+ averageTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
341
+ averageTimeToFirstPartialMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
342
+ averageWordErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate))) ?? 0,
343
+ fixtureCount,
344
+ fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
345
+ fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
346
+ passCount,
347
+ passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
348
+ totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
349
+ wordAccuracyRate: fixtureCount > 0 ? roundMetric(1 - (average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
350
+ };
351
+ };
352
+ var compareSTTBenchmarks = (reports) => {
353
+ const entries = reports.map((report) => ({
354
+ adapterId: report.adapterId,
355
+ summary: report.summary
356
+ }));
357
+ const bestByMetric = (selectMetric, direction) => entries.reduce((best, entry) => {
358
+ if (!best) {
359
+ return entry;
360
+ }
361
+ const next = selectMetric(entry);
362
+ const current = selectMetric(best);
363
+ if (direction === "max" ? next > current : next < current) {
364
+ return entry;
365
+ }
366
+ return best;
367
+ }, undefined);
368
+ return {
369
+ bestByPassRate: bestByMetric((entry) => entry.summary.passRate, "max"),
370
+ bestByTermRecall: bestByMetric((entry) => entry.summary.averageTermRecall, "max"),
371
+ bestByWordErrorRate: bestByMetric((entry) => entry.summary.averageWordErrorRate, "min"),
372
+ entries
373
+ };
374
+ };
375
+ var runSTTAdapterBenchmark = async ({
376
+ adapter,
377
+ adapterId,
378
+ fixtures,
379
+ options = {}
380
+ }) => {
381
+ const results = [];
382
+ for (const fixture of fixtures) {
383
+ const startedAt = Date.now();
384
+ const fixtureResult = await runSTTAdapterFixture(adapter, fixture, {
385
+ ...options,
386
+ ...options.fixtureOptions?.[fixture.id] ?? {}
387
+ });
388
+ results.push(toFixtureBenchmarkResult(fixture, fixtureResult, Date.now() - startedAt));
389
+ }
390
+ return {
391
+ adapterId,
392
+ fixtures: results,
393
+ generatedAt: Date.now(),
394
+ summary: summarizeSTTBenchmark(adapterId, results)
395
+ };
396
+ };
397
+ // src/testing/fixtures.ts
398
+ import { resolve } from "path";
399
+ var DEFAULT_AUDIO_FORMAT = {
400
+ channels: 1,
401
+ container: "raw",
402
+ encoding: "pcm_s16le",
403
+ sampleRateHz: 16000
404
+ };
405
+ var FIXTURE_DIR_CANDIDATES = [
406
+ resolve(import.meta.dir, "..", "..", "fixtures"),
407
+ resolve(import.meta.dir, "..", "..", "..", "fixtures"),
408
+ resolve(import.meta.dir, "..", "..", "..", "..", "fixtures")
409
+ ];
410
+ var resolveFixtureDirectory = async () => {
411
+ for (const candidate of FIXTURE_DIR_CANDIDATES) {
412
+ if (await Bun.file(resolve(candidate, "manifest.json")).exists()) {
413
+ return candidate;
414
+ }
415
+ }
416
+ throw new Error("Unable to locate the bundled voice test fixtures. Expected fixtures/manifest.json next to the package root.");
417
+ };
418
+ var getVoiceFixtureDirectory = async () => resolveFixtureDirectory();
419
+ var loadVoiceTestFixtures = async (fixtureDirectory) => {
420
+ const resolvedFixtureDirectory = fixtureDirectory ?? await resolveFixtureDirectory();
421
+ const manifestFile = Bun.file(resolve(resolvedFixtureDirectory, "manifest.json"));
422
+ const manifest = await manifestFile.json();
423
+ return await Promise.all(manifest.map(async (entry) => {
424
+ const audioPath = resolve(resolvedFixtureDirectory, "pcm", entry.audioPath);
425
+ const audio = new Uint8Array(await Bun.file(audioPath).arrayBuffer());
426
+ return {
427
+ ...entry,
428
+ audio,
429
+ audioPath,
430
+ format: {
431
+ ...DEFAULT_AUDIO_FORMAT,
432
+ ...entry.format
433
+ }
434
+ };
435
+ }));
436
+ };
437
+ export {
438
+ summarizeSTTBenchmark,
439
+ scoreTranscriptAccuracy,
440
+ runSTTAdapterFixture,
441
+ runSTTAdapterBenchmark,
442
+ mergeFinalTranscriptText,
443
+ loadVoiceTestFixtures,
444
+ getVoiceFixtureDirectory,
445
+ compareSTTBenchmarks
446
+ };
@@ -0,0 +1,22 @@
1
+ import { type VoiceTranscriptAccuracy } from './accuracy';
2
+ import type { STTAdapter, VoiceCloseEvent, VoiceEndOfTurnEvent, VoiceErrorEvent, VoiceFinalEvent, VoicePartialEvent } from '../types';
3
+ import type { VoiceTestFixture } from './fixtures';
4
+ export type VoiceSTTAdapterHarnessOptions = {
5
+ chunkDurationMs?: number;
6
+ idleTimeoutMs?: number;
7
+ settleMs?: number;
8
+ tailPaddingMs?: number;
9
+ transcriptThreshold?: number;
10
+ waitForRealtimeMs?: number;
11
+ };
12
+ export type VoiceSTTAdapterHarnessResult = {
13
+ accuracy: VoiceTranscriptAccuracy;
14
+ closeEvents: VoiceCloseEvent[];
15
+ endOfTurnEvents: VoiceEndOfTurnEvent[];
16
+ errorEvents: VoiceErrorEvent[];
17
+ finalEvents: VoiceFinalEvent[];
18
+ finalText: string;
19
+ partialEvents: VoicePartialEvent[];
20
+ startedAt: number;
21
+ };
22
+ export declare const runSTTAdapterFixture: (adapter: STTAdapter, fixture: VoiceTestFixture, options?: VoiceSTTAdapterHarnessOptions) => Promise<VoiceSTTAdapterHarnessResult>;
@@ -0,0 +1,32 @@
1
+ [
2
+ {
3
+ "id": "quietly-alone-clean",
4
+ "title": "Short clean utterance",
5
+ "audioPath": "quietly-alone-clean.pcm",
6
+ "expectedText": "GO QUIETLY ALONE NO HARM WILL BEFALL YOU",
7
+ "expectedTerms": ["quietly alone", "no harm"],
8
+ "chunkDurationMs": 100,
9
+ "difficulty": "clean",
10
+ "tags": ["clean", "short", "librispeech"]
11
+ },
12
+ {
13
+ "id": "traveled-back-route-clean",
14
+ "title": "Long clean utterance",
15
+ "audioPath": "traveled-back-route-clean.pcm",
16
+ "expectedText": "WE PASSED AROUND ATLANTA CROSSED THE CHATTAHOOCHEE AND TRAVELED BACK OVER THE SAME ROUTE ON WHICH WE HAD MADE THE ARDUOUS CAMPAIGN UNDER JOE JOHNSTON",
17
+ "expectedTerms": ["atlanta", "chattahoochee", "joe johnston"],
18
+ "chunkDurationMs": 100,
19
+ "difficulty": "clean",
20
+ "tags": ["clean", "long", "librispeech"]
21
+ },
22
+ {
23
+ "id": "rainstorms-noisy",
24
+ "title": "Noisy utterance with synthetic pink noise",
25
+ "audioPath": "rainstorms-noisy.pcm",
26
+ "expectedText": "SLIGHT RAINSTORMS ARE LIKELY TO BE ENCOUNTERED IN A TRIP ROUND THE MOUNTAIN BUT ONE MAY EASILY FIND SHELTER BENEATH WELL THATCHED TREES THAT SHED THE RAIN LIKE A ROOF",
27
+ "expectedTerms": ["rainstorms", "thatched trees"],
28
+ "chunkDurationMs": 100,
29
+ "difficulty": "noisy",
30
+ "tags": ["noisy", "long", "synthetic-noise", "librispeech"]
31
+ }
32
+ ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.17",
3
+ "version": "0.0.18",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",
@@ -8,6 +8,7 @@
8
8
  },
9
9
  "files": [
10
10
  "dist",
11
+ "fixtures",
11
12
  "README.md"
12
13
  ],
13
14
  "main": "./dist/index.js",
@@ -15,11 +16,19 @@
15
16
  "license": "CC BY-NC 4.0",
16
17
  "author": "Alex Kahn",
17
18
  "scripts": {
18
- "build": "rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
19
+ "bench:assemblyai": "bun run ./scripts/benchmark-stt.ts assemblyai",
20
+ "bench:deepgram": "bun run ./scripts/benchmark-stt.ts deepgram",
21
+ "bench:stt": "bun run ./scripts/benchmark-stt.ts all",
22
+ "build": "rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
19
23
  "format": "prettier --write \"./**/*.{js,jsx,ts,tsx,json,md}\"",
20
24
  "lint": "eslint ./src",
21
25
  "release": "bun run format && bun run build && bun publish",
22
- "test": "echo \"Error: no test specified\" && exit 1",
26
+ "test": "bun test ./test/*.test.ts",
27
+ "test:adapters": "bun test ./test/live/*.test.ts",
28
+ "test:assemblyai": "bun test ./test/live/assemblyai.live.test.ts",
29
+ "test:deepgram": "bun test ./test/live/deepgram.live.test.ts",
30
+ "test:elevenlabs": "bun test ./test/live/elevenlabs.live.test.ts",
31
+ "test:openai": "bun test ./test/live/openai.live.test.ts",
23
32
  "typecheck": "bun run tsc --noEmit"
24
33
  },
25
34
  "exports": {
@@ -49,10 +58,17 @@
49
58
  "./angular": {
50
59
  "import": "./dist/angular/index.js",
51
60
  "types": "./dist/angular/index.d.ts"
61
+ },
62
+ "./testing": {
63
+ "import": "./dist/testing/index.js",
64
+ "types": "./dist/testing/index.d.ts"
52
65
  }
53
66
  },
54
67
  "typesVersions": {
55
68
  "*": {
69
+ "testing": [
70
+ "dist/testing/index.d.ts"
71
+ ],
56
72
  "client": [
57
73
  "dist/client/index.d.ts"
58
74
  ],