npm - @absolutejs/voice - Versions diffs - 0.0.16 → 0.0.18 - Mend

@absolutejs/voice 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/client/htmxBootstrap.d.ts +1 -0
package/dist/client/htmxBootstrap.js +888 -0
package/dist/index.js +52 -1
package/dist/plugin.d.ts +12 -0
package/dist/testing/accuracy.d.ts +13 -0
package/dist/testing/benchmark.d.ts +76 -0
package/dist/testing/fixtures.d.ts +21 -0
package/dist/testing/index.d.ts +4 -0
package/dist/testing/index.js +446 -0
package/dist/testing/stt.d.ts +22 -0
package/dist/types.d.ts +1 -0
package/fixtures/manifest.json +32 -0
package/fixtures/pcm/quietly-alone-clean.pcm +0 -0
package/fixtures/pcm/rainstorms-noisy.pcm +0 -0
package/fixtures/pcm/traveled-back-route-clean.pcm +0 -0
package/package.json +19 -3

package/dist/index.js CHANGED Viewed

@@ -71,6 +71,7 @@ var __decorateElement = (array, flags, name, decorators, target, extra) => {
 // src/plugin.ts
 import { Elysia } from "elysia";
+import { resolve } from "path";
 // src/htmx.ts
 var DEFAULT_HTMX_TARGETS = {
@@ -680,6 +681,51 @@ var createVoiceSession = (options) => {
 };
 // src/plugin.ts
+var HTMX_BOOTSTRAP_DIST_CANDIDATES = [
+  resolve(import.meta.dir, "client", "htmxBootstrap.js"),
+  resolve(import.meta.dir, "..", "dist", "client", "htmxBootstrap.js")
+];
+var HTMX_BOOTSTRAP_SOURCE_CANDIDATES = [
+  resolve(import.meta.dir, "client", "htmxBootstrap.ts"),
+  resolve(import.meta.dir, "..", "src", "client", "htmxBootstrap.ts")
+];
+var loadHTMXBootstrap = (() => {
+  let cached = null;
+  return () => {
+    if (cached) {
+      return cached;
+    }
+    cached = (async () => {
+      for (const candidate of HTMX_BOOTSTRAP_DIST_CANDIDATES) {
+        const asset = Bun.file(candidate);
+        if (await asset.exists()) {
+          return await asset.text();
+        }
+      }
+      for (const candidate of HTMX_BOOTSTRAP_SOURCE_CANDIDATES) {
+        const asset = Bun.file(candidate);
+        if (!await asset.exists()) {
+          continue;
+        }
+        const build = await Bun.build({
+          entrypoints: [candidate],
+          format: "esm",
+          minify: true,
+          target: "browser"
+        });
+        if (!build.success || build.outputs.length === 0) {
+          const log = build.logs.map((entry) => entry.message).join(`
+`);
+          throw new Error(`Failed to build the voice HTMX bootstrap bundle.${log ? `
+${log}` : ""}`);
+        }
+        return await build.outputs[0].text();
+      }
+      throw new Error("Unable to locate the voice HTMX bootstrap client.");
+    })();
+    return cached;
+  };
+})();
 var isArrayBufferView = (value) => typeof value === "object" && value !== null && ArrayBuffer.isView(value);
 var isVoiceClientMessage = (value) => {
   if (!value || typeof value !== "object" || !("type" in value)) {
@@ -755,6 +801,7 @@ var voice = (config) => {
   const onTurn = normalizeOnTurn(config.onTurn);
   const htmxOptions = config.htmx && typeof config.htmx === "object" ? config.htmx : undefined;
   const htmxRoute = htmxOptions?.route ?? `${config.path}/htmx/session`;
+  const htmxBootstrapRoute = htmxOptions?.bootstrapRoute ?? `${config.path}/htmx/bootstrap.js`;
   const htmxRenderers = resolveVoiceHTMXRenderers(config.htmx && config.htmx !== true ? config.htmx : undefined);
   const htmxTargets = resolveVoiceHTMXTargets(htmxOptions?.targets);
   const htmxRoutes = () => {
@@ -778,7 +825,11 @@ var voice = (config) => {
       }, htmxRenderers, htmxTargets), {
         headers: { "Content-Type": "text/html; charset=utf-8" }
       });
-    });
+    }).get(htmxBootstrapRoute, async () => new Response(await loadHTMXBootstrap(), {
+      headers: {
+        "Content-Type": "application/javascript; charset=utf-8"
+      }
+    }));
   };
   return new Elysia({ name: "absolutejs-voice" }).ws(config.path, {
     close: async (ws, code, reason) => {

package/dist/plugin.d.ts CHANGED Viewed

@@ -47,6 +47,18 @@ export declare const voice: <TContext = unknown, TSession extends VoiceSessionRe
             };
         };
     };
+} & {
+    [x: string]: {
+        get: {
+            body: unknown;
+            params: {};
+            query: unknown;
+            headers: unknown;
+            response: {
+                200: Response;
+            };
+        };
+    };
 }), {
     derive: {};
     resolve: {};

package/dist/testing/accuracy.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { Transcript } from '../types';
+export type VoiceTranscriptAccuracy = {
+    actualText: string;
+    charDistance: number;
+    charErrorRate: number;
+    expectedText: string;
+    passesThreshold: boolean;
+    threshold: number;
+    wordDistance: number;
+    wordErrorRate: number;
+};
+export declare const mergeFinalTranscriptText: (transcripts: Transcript[]) => string;
+export declare const scoreTranscriptAccuracy: (actualText: string, expectedText: string, threshold?: number) => VoiceTranscriptAccuracy;

package/dist/testing/benchmark.d.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import type { STTAdapter } from '../types';
+import { type VoiceSTTAdapterHarnessOptions, type VoiceSTTAdapterHarnessResult } from './stt';
+import type { VoiceTestFixture } from './fixtures';
+export type VoiceExpectedTermAccuracy = {
+    allMatched: boolean;
+    expectedTerms: string[];
+    matchedTerms: string[];
+    missingTerms: string[];
+    recall: number;
+};
+export type VoiceSTTBenchmarkFixtureResult = {
+    accuracy: VoiceSTTAdapterHarnessResult['accuracy'];
+    closeCount: number;
+    difficulty?: VoiceTestFixture['difficulty'];
+    elapsedMs: number;
+    endOfTurnCount: number;
+    errorCount: number;
+    expectedTerms: VoiceExpectedTermAccuracy;
+    finalCount: number;
+    finalText: string;
+    fixtureId: string;
+    fragmentationCount: number;
+    passes: boolean;
+    partialCount: number;
+    tags: string[];
+    timeToEndOfTurnMs?: number;
+    timeToFirstFinalMs?: number;
+    timeToFirstPartialMs?: number;
+    title: string;
+};
+export type VoiceSTTBenchmarkSummary = {
+    adapterId: string;
+    averageCharErrorRate: number;
+    averageElapsedMs: number;
+    averageEndOfTurnCount: number;
+    averageFinalCount: number;
+    averageTermRecall: number;
+    averageTimeToEndOfTurnMs?: number;
+    averageTimeToFirstFinalMs?: number;
+    averageTimeToFirstPartialMs?: number;
+    averageWordErrorRate: number;
+    fixtureCount: number;
+    fixturesWithErrors: number;
+    fixturesWithFragmentation: number;
+    passCount: number;
+    passRate: number;
+    totalErrorCount: number;
+    wordAccuracyRate: number;
+};
+export type VoiceSTTBenchmarkReport = {
+    adapterId: string;
+    fixtures: VoiceSTTBenchmarkFixtureResult[];
+    generatedAt: number;
+    summary: VoiceSTTBenchmarkSummary;
+};
+export type VoiceSTTBenchmarkComparisonEntry = {
+    adapterId: string;
+    summary: VoiceSTTBenchmarkSummary;
+};
+export type VoiceSTTBenchmarkComparison = {
+    bestByPassRate?: VoiceSTTBenchmarkComparisonEntry;
+    bestByTermRecall?: VoiceSTTBenchmarkComparisonEntry;
+    bestByWordErrorRate?: VoiceSTTBenchmarkComparisonEntry;
+    entries: VoiceSTTBenchmarkComparisonEntry[];
+};
+export type VoiceSTTBenchmarkOptions = VoiceSTTAdapterHarnessOptions & {
+    fixtureOptions?: Record<string, Omit<VoiceSTTAdapterHarnessOptions, 'fixtureOptions'>>;
+};
+export declare const summarizeSTTBenchmark: (adapterId: string, fixtures: VoiceSTTBenchmarkFixtureResult[]) => VoiceSTTBenchmarkSummary;
+export declare const compareSTTBenchmarks: (reports: VoiceSTTBenchmarkReport[]) => VoiceSTTBenchmarkComparison;
+export declare const runSTTAdapterBenchmark: ({ adapter, adapterId, fixtures, options }: {
+    adapter: STTAdapter;
+    adapterId: string;
+    fixtures: VoiceTestFixture[];
+    options?: VoiceSTTBenchmarkOptions;
+}) => Promise<VoiceSTTBenchmarkReport>;

package/dist/testing/fixtures.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { AudioFormat } from '../types';
+export type VoiceTestFixtureManifestEntry = {
+    id: string;
+    title: string;
+    audioPath: string;
+    expectedText: string;
+    expectedTerms?: string[];
+    chunkDurationMs?: number;
+    language?: string;
+    difficulty?: 'clean' | 'noisy' | 'challenging';
+    tags?: string[];
+    tailPaddingMs?: number;
+    format?: Partial<AudioFormat>;
+};
+export type VoiceTestFixture = Omit<VoiceTestFixtureManifestEntry, 'audioPath'> & {
+    audio: Uint8Array;
+    audioPath: string;
+    format: AudioFormat;
+};
+export declare const getVoiceFixtureDirectory: () => Promise<string>;
+export declare const loadVoiceTestFixtures: (fixtureDirectory?: string) => Promise<VoiceTestFixture[]>;

package/dist/testing/index.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export * from './accuracy';
+export * from './benchmark';
+export * from './fixtures';
+export * from './stt';

package/dist/testing/index.js ADDED Viewed

@@ -0,0 +1,446 @@
+// @bun
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __name = (target, name) => {
+  Object.defineProperty(target, "name", {
+    value: name,
+    enumerable: false,
+    configurable: true
+  });
+  return target;
+};
+var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
+var __typeError = (msg) => {
+  throw TypeError(msg);
+};
+var __defNormalProp = (obj, key, value) => (key in obj) ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
+var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
+var __privateIn = (member, obj) => Object(obj) !== obj ? __typeError('Cannot use the "in" operator on this value') : member.has(obj);
+var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
+var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
+var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
+var __decoratorStart = (base) => [, , , __create(base?.[__knownSymbol("metadata")] ?? null)];
+var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
+var __expectFn = (fn) => fn !== undefined && typeof fn !== "function" ? __typeError("Function expected") : fn;
+var __decoratorContext = (kind, name, done, metadata, fns) => ({
+  kind: __decoratorStrings[kind],
+  name,
+  metadata,
+  addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null))
+});
+var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
+var __runInitializers = (array, flags, self, value) => {
+  for (var i = 0, fns = array[flags >> 1], n = fns && fns.length;i < n; i++)
+    flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
+  return value;
+};
+var __decorateElement = (array, flags, name, decorators, target, extra) => {
+  var fn, it, done, ctx, access, k = flags & 7, s = !!(flags & 8), p = !!(flags & 16);
+  var j = k > 3 ? array.length + 1 : k ? s ? 1 : 2 : 0, key = __decoratorStrings[k + 5];
+  var initializers = k > 3 && (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
+  var desc = k && (!p && !s && (target = target.prototype), k < 5 && (k > 3 || !p) && __getOwnPropDesc(k < 4 ? target : {
+    get [name]() {
+      return __privateGet(this, extra);
+    },
+    set [name](x) {
+      __privateSet(this, extra, x);
+    }
+  }, name));
+  k ? p && k < 4 && __name(extra, (k > 2 ? "set " : k > 1 ? "get " : "") + name) : __name(target, name);
+  for (var i = decorators.length - 1;i >= 0; i--) {
+    ctx = __decoratorContext(k, name, done = {}, array[3], extraInitializers);
+    if (k) {
+      ctx.static = s, ctx.private = p, access = ctx.access = { has: p ? (x) => __privateIn(target, x) : (x) => (name in x) };
+      if (k ^ 3)
+        access.get = p ? (x) => (k ^ 1 ? __privateGet : __privateMethod)(x, target, k ^ 4 ? extra : desc.get) : (x) => x[name];
+      if (k > 2)
+        access.set = p ? (x, y) => __privateSet(x, target, y, k ^ 4 ? extra : desc.set) : (x, y) => x[name] = y;
+    }
+    it = (0, decorators[i])(k ? k < 4 ? p ? extra : desc[key] : k > 4 ? undefined : { get: desc.get, set: desc.set } : target, ctx);
+    done._ = 1;
+    if (k ^ 4 || it === undefined)
+      __expectFn(it) && (k > 4 ? initializers.unshift(it) : k ? p ? extra = it : desc[key] = it : target = it);
+    else if (typeof it !== "object" || it === null)
+      __typeError("Object expected");
+    else
+      __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
+  }
+  return k || __decoratorMetadata(array, target), desc && __defProp(target, name, desc), p ? k ^ 4 ? extra : desc : target;
+};
+// src/turnDetection.ts
+var DEFAULT_SILENCE_MS = 700;
+var DEFAULT_SPEECH_THRESHOLD = 0.015;
+var toUint8Array = (audio) => {
+  if (audio instanceof ArrayBuffer) {
+    return new Uint8Array(audio);
+  }
+  return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
+};
+var measureAudioLevel = (audio) => {
+  const bytes = toUint8Array(audio);
+  if (bytes.byteLength < 2) {
+    return 0;
+  }
+  const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
+  if (samples.length === 0) {
+    return 0;
+  }
+  let sumSquares = 0;
+  for (const sample of samples) {
+    const normalized = sample / 32768;
+    sumSquares += normalized * normalized;
+  }
+  return Math.sqrt(sumSquares / samples.length);
+};
+var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
+var mergeTranscriptTexts = (transcripts) => {
+  const merged = [];
+  for (const transcript of transcripts) {
+    const nextText = normalizeText(transcript.text);
+    if (!nextText) {
+      continue;
+    }
+    const previous = merged.at(-1);
+    if (!previous) {
+      merged.push(nextText);
+      continue;
+    }
+    if (nextText === previous || previous.includes(nextText)) {
+      continue;
+    }
+    if (nextText.includes(previous)) {
+      merged[merged.length - 1] = nextText;
+      continue;
+    }
+    merged.push(nextText);
+  }
+  return merged.join(" ").trim();
+};
+var buildTurnText = (transcripts, partialText) => {
+  const finalText = mergeTranscriptTexts(transcripts);
+  if (finalText) {
+    return finalText;
+  }
+  return normalizeText(partialText);
+};
+// src/testing/accuracy.ts
+var normalizeAccuracyText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
+var levenshteinDistance = (left, right) => {
+  if (left.length === 0) {
+    return right.length;
+  }
+  if (right.length === 0) {
+    return left.length;
+  }
+  const previous = new Array(right.length + 1).fill(0);
+  const current = new Array(right.length + 1).fill(0);
+  for (let column = 0;column <= right.length; column += 1) {
+    previous[column] = column;
+  }
+  for (let row = 1;row <= left.length; row += 1) {
+    current[0] = row;
+    for (let column = 1;column <= right.length; column += 1) {
+      const substitutionCost = left[row - 1] === right[column - 1] ? 0 : 1;
+      current[column] = Math.min(current[column - 1] + 1, previous[column] + 1, previous[column - 1] + substitutionCost);
+    }
+    for (let column = 0;column <= right.length; column += 1) {
+      previous[column] = current[column];
+    }
+  }
+  return previous[right.length];
+};
+var mergeFinalTranscriptText = (transcripts) => buildTurnText(transcripts.filter((transcript) => transcript.isFinal), "");
+var scoreTranscriptAccuracy = (actualText, expectedText, threshold = 0.35) => {
+  const normalizedActual = normalizeAccuracyText(actualText);
+  const normalizedExpected = normalizeAccuracyText(expectedText);
+  const actualWords = normalizedActual ? normalizedActual.split(" ") : [];
+  const expectedWords = normalizedExpected ? normalizedExpected.split(" ") : [];
+  const wordDistance = levenshteinDistance(actualWords, expectedWords);
+  const charDistance = levenshteinDistance(Array.from(normalizedActual), Array.from(normalizedExpected));
+  const wordErrorRate = expectedWords.length > 0 ? wordDistance / expectedWords.length : 0;
+  const charErrorRate = normalizedExpected.length > 0 ? charDistance / normalizedExpected.length : 0;
+  return {
+    actualText: normalizedActual,
+    charDistance,
+    charErrorRate,
+    expectedText: normalizedExpected,
+    passesThreshold: wordErrorRate <= threshold,
+    threshold,
+    wordDistance,
+    wordErrorRate
+  };
+};
+// src/testing/stt.ts
+var chunkAudio = (audio, bytesPerChunk) => {
+  const chunks = [];
+  for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
+    chunks.push(audio.slice(offset, offset + bytesPerChunk));
+  }
+  return chunks;
+};
+var createSilence = (byteLength) => new Uint8Array(byteLength);
+var waitForIdle = async (readLastActivityAt, idleTimeoutMs, settleMs) => {
+  const startedAt = Date.now();
+  while (Date.now() - startedAt < idleTimeoutMs) {
+    if (Date.now() - readLastActivityAt() >= settleMs) {
+      return;
+    }
+    await Bun.sleep(Math.min(50, settleMs));
+  }
+};
+var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
+  const startedAt = Date.now();
+  const partialEvents = [];
+  const finalEvents = [];
+  const endOfTurnEvents = [];
+  const errorEvents = [];
+  const closeEvents = [];
+  const chunkDurationMs = options.chunkDurationMs ?? fixture.chunkDurationMs ?? 100;
+  const tailPaddingMs = options.tailPaddingMs ?? fixture.tailPaddingMs ?? 1000;
+  const idleTimeoutMs = options.idleTimeoutMs ?? 8000;
+  const settleMs = options.settleMs ?? 500;
+  const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
+  let lastActivityAt = Date.now();
+  const markActive = () => {
+    lastActivityAt = Date.now();
+  };
+  const session = await adapter.open({
+    format: fixture.format,
+    sessionId: `fixture-${fixture.id}`
+  });
+  const unsubscribers = [
+    session.on("partial", (event) => {
+      partialEvents.push(event);
+      markActive();
+    }),
+    session.on("final", (event) => {
+      finalEvents.push(event);
+      markActive();
+    }),
+    session.on("endOfTurn", (event) => {
+      endOfTurnEvents.push(event);
+      markActive();
+    }),
+    session.on("error", (event) => {
+      errorEvents.push(event);
+      markActive();
+    }),
+    session.on("close", (event) => {
+      closeEvents.push(event);
+      markActive();
+    })
+  ];
+  try {
+    const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
+    const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
+    const chunks = chunkAudio(fixture.audio, bytesPerChunk);
+    const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
+    for (const chunk of chunks) {
+      await session.send(chunk);
+      await Bun.sleep(realtimeDelayMs);
+    }
+    if (tailPaddingMs > 0) {
+      const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
+      for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
+        await session.send(chunk);
+        await Bun.sleep(realtimeDelayMs);
+      }
+    }
+    await waitForIdle(() => lastActivityAt, idleTimeoutMs, settleMs);
+  } finally {
+    await session.close("fixture-complete");
+    for (const unsubscribe of unsubscribers) {
+      unsubscribe();
+    }
+  }
+  const finalText = mergeFinalTranscriptText(finalEvents.map((event) => event.transcript));
+  return {
+    accuracy: scoreTranscriptAccuracy(finalText, fixture.expectedText, options.transcriptThreshold),
+    closeEvents,
+    endOfTurnEvents,
+    errorEvents,
+    finalEvents,
+    finalText,
+    partialEvents,
+    startedAt
+  };
+};
+// src/testing/benchmark.ts
+var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
+var scoreExpectedTerms = (actualText, expectedTerms) => {
+  const normalizedActual = normalizeBenchmarkText(actualText);
+  const normalizedExpectedTerms = (expectedTerms ?? []).map((entry) => normalizeBenchmarkText(entry));
+  const matchedTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && normalizedActual.includes(term));
+  const missingTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && !matchedTerms.includes(term));
+  const denominator = normalizedExpectedTerms.length;
+  const recall = denominator > 0 ? matchedTerms.length / denominator : 1;
+  return {
+    allMatched: missingTerms.length === 0,
+    expectedTerms: normalizedExpectedTerms,
+    matchedTerms,
+    missingTerms,
+    recall
+  };
+};
+var average = (values) => {
+  const filtered = values.filter((value) => typeof value === "number" && Number.isFinite(value));
+  if (filtered.length === 0) {
+    return;
+  }
+  return filtered.reduce((sum, value) => sum + value, 0) / filtered.length;
+};
+var roundMetric = (value, digits = 4) => {
+  if (typeof value !== "number" || !Number.isFinite(value)) {
+    return;
+  }
+  const factor = 10 ** digits;
+  return Math.round(value * factor) / factor;
+};
+var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
+  const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
+  const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
+  const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
+  const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
+  return {
+    accuracy: result.accuracy,
+    closeCount: result.closeEvents.length,
+    difficulty: fixture.difficulty,
+    elapsedMs,
+    endOfTurnCount: result.endOfTurnEvents.length,
+    errorCount: result.errorEvents.length,
+    expectedTerms,
+    finalCount: result.finalEvents.length,
+    finalText: result.finalText,
+    fixtureId: fixture.id,
+    fragmentationCount: Math.max(0, result.finalEvents.length - 1),
+    passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold,
+    partialCount: result.partialEvents.length,
+    tags: fixture.tags ?? [],
+    timeToEndOfTurnMs,
+    timeToFirstFinalMs,
+    timeToFirstPartialMs,
+    title: fixture.title
+  };
+};
+var summarizeSTTBenchmark = (adapterId, fixtures) => {
+  const fixtureCount = fixtures.length;
+  const passCount = fixtures.filter((fixture) => fixture.passes).length;
+  return {
+    adapterId,
+    averageCharErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.charErrorRate))) ?? 0,
+    averageElapsedMs: roundMetric(average(fixtures.map((fixture) => fixture.elapsedMs)), 2) ?? 0,
+    averageEndOfTurnCount: roundMetric(average(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
+    averageFinalCount: roundMetric(average(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
+    averageTermRecall: roundMetric(average(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
+    averageTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
+    averageTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
+    averageTimeToFirstPartialMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
+    averageWordErrorRate: roundMetric(average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate))) ?? 0,
+    fixtureCount,
+    fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
+    fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
+    passCount,
+    passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
+    totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
+    wordAccuracyRate: fixtureCount > 0 ? roundMetric(1 - (average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
+  };
+};
+var compareSTTBenchmarks = (reports) => {
+  const entries = reports.map((report) => ({
+    adapterId: report.adapterId,
+    summary: report.summary
+  }));
+  const bestByMetric = (selectMetric, direction) => entries.reduce((best, entry) => {
+    if (!best) {
+      return entry;
+    }
+    const next = selectMetric(entry);
+    const current = selectMetric(best);
+    if (direction === "max" ? next > current : next < current) {
+      return entry;
+    }
+    return best;
+  }, undefined);
+  return {
+    bestByPassRate: bestByMetric((entry) => entry.summary.passRate, "max"),
+    bestByTermRecall: bestByMetric((entry) => entry.summary.averageTermRecall, "max"),
+    bestByWordErrorRate: bestByMetric((entry) => entry.summary.averageWordErrorRate, "min"),
+    entries
+  };
+};
+var runSTTAdapterBenchmark = async ({
+  adapter,
+  adapterId,
+  fixtures,
+  options = {}
+}) => {
+  const results = [];
+  for (const fixture of fixtures) {
+    const startedAt = Date.now();
+    const fixtureResult = await runSTTAdapterFixture(adapter, fixture, {
+      ...options,
+      ...options.fixtureOptions?.[fixture.id] ?? {}
+    });
+    results.push(toFixtureBenchmarkResult(fixture, fixtureResult, Date.now() - startedAt));
+  }
+  return {
+    adapterId,
+    fixtures: results,
+    generatedAt: Date.now(),
+    summary: summarizeSTTBenchmark(adapterId, results)
+  };
+};
+// src/testing/fixtures.ts
+import { resolve } from "path";
+var DEFAULT_AUDIO_FORMAT = {
+  channels: 1,
+  container: "raw",
+  encoding: "pcm_s16le",
+  sampleRateHz: 16000
+};
+var FIXTURE_DIR_CANDIDATES = [
+  resolve(import.meta.dir, "..", "..", "fixtures"),
+  resolve(import.meta.dir, "..", "..", "..", "fixtures"),
+  resolve(import.meta.dir, "..", "..", "..", "..", "fixtures")
+];
+var resolveFixtureDirectory = async () => {
+  for (const candidate of FIXTURE_DIR_CANDIDATES) {
+    if (await Bun.file(resolve(candidate, "manifest.json")).exists()) {
+      return candidate;
+    }
+  }
+  throw new Error("Unable to locate the bundled voice test fixtures. Expected fixtures/manifest.json next to the package root.");
+};
+var getVoiceFixtureDirectory = async () => resolveFixtureDirectory();
+var loadVoiceTestFixtures = async (fixtureDirectory) => {
+  const resolvedFixtureDirectory = fixtureDirectory ?? await resolveFixtureDirectory();
+  const manifestFile = Bun.file(resolve(resolvedFixtureDirectory, "manifest.json"));
+  const manifest = await manifestFile.json();
+  return await Promise.all(manifest.map(async (entry) => {
+    const audioPath = resolve(resolvedFixtureDirectory, "pcm", entry.audioPath);
+    const audio = new Uint8Array(await Bun.file(audioPath).arrayBuffer());
+    return {
+      ...entry,
+      audio,
+      audioPath,
+      format: {
+        ...DEFAULT_AUDIO_FORMAT,
+        ...entry.format
+      }
+    };
+  }));
+};
+export {
+  summarizeSTTBenchmark,
+  scoreTranscriptAccuracy,
+  runSTTAdapterFixture,
+  runSTTAdapterBenchmark,
+  mergeFinalTranscriptText,
+  loadVoiceTestFixtures,
+  getVoiceFixtureDirectory,
+  compareSTTBenchmarks
+};

package/dist/testing/stt.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+import { type VoiceTranscriptAccuracy } from './accuracy';
+import type { STTAdapter, VoiceCloseEvent, VoiceEndOfTurnEvent, VoiceErrorEvent, VoiceFinalEvent, VoicePartialEvent } from '../types';
+import type { VoiceTestFixture } from './fixtures';
+export type VoiceSTTAdapterHarnessOptions = {
+    chunkDurationMs?: number;
+    idleTimeoutMs?: number;
+    settleMs?: number;
+    tailPaddingMs?: number;
+    transcriptThreshold?: number;
+    waitForRealtimeMs?: number;
+};
+export type VoiceSTTAdapterHarnessResult = {
+    accuracy: VoiceTranscriptAccuracy;
+    closeEvents: VoiceCloseEvent[];
+    endOfTurnEvents: VoiceEndOfTurnEvent[];
+    errorEvents: VoiceErrorEvent[];
+    finalEvents: VoiceFinalEvent[];
+    finalText: string;
+    partialEvents: VoicePartialEvent[];
+    startedAt: number;
+};
+export declare const runSTTAdapterFixture: (adapter: STTAdapter, fixture: VoiceTestFixture, options?: VoiceSTTAdapterHarnessOptions) => Promise<VoiceSTTAdapterHarnessResult>;