@absolutejs/voice 0.0.22-beta.470 → 0.0.22-beta.472
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1359 -0
- package/dist/multilingualProof.d.ts +77 -0
- package/dist/vapiAdapter.d.ts +160 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -34851,6 +34851,280 @@ var createVoiceApiRequestTool = (options) => {
|
|
|
34851
34851
|
resultToMessage: options.formatResult ?? ((result) => result.ok ? `API request ${options.name} succeeded (${String(result.status)}).` : `API request ${options.name} failed with status ${String(result.status)}.`)
|
|
34852
34852
|
});
|
|
34853
34853
|
};
|
|
34854
|
+
// src/vapiAdapter.ts
|
|
34855
|
+
var VAPI_BUILT_IN_VARIABLES = {
|
|
34856
|
+
date: () => new Date().toISOString().slice(0, 10),
|
|
34857
|
+
now: () => new Date().toISOString(),
|
|
34858
|
+
time: () => new Date().toISOString().slice(11, 19)
|
|
34859
|
+
};
|
|
34860
|
+
var VAPI_TEMPLATE_REGEX = /\{\{\s*([^}|]+?)\s*(?:\|[^}]*)?\}\}/g;
|
|
34861
|
+
var VAPI_TEMPLATE_FILTER_REGEX = /\{\{[^}]*\|[^}]*\}\}/;
|
|
34862
|
+
var extractSystemPrompt = (model, fallback) => {
|
|
34863
|
+
if (model?.systemPrompt)
|
|
34864
|
+
return model.systemPrompt;
|
|
34865
|
+
const systemMessage = model?.messages?.find((message) => message.role === "system" && typeof message.content === "string");
|
|
34866
|
+
if (systemMessage?.content)
|
|
34867
|
+
return systemMessage.content;
|
|
34868
|
+
return fallback;
|
|
34869
|
+
};
|
|
34870
|
+
var compileVapiSystem = (template, variableResolver) => {
|
|
34871
|
+
if (!template)
|
|
34872
|
+
return;
|
|
34873
|
+
if (!VAPI_TEMPLATE_REGEX.test(template)) {
|
|
34874
|
+
return template;
|
|
34875
|
+
}
|
|
34876
|
+
VAPI_TEMPLATE_REGEX.lastIndex = 0;
|
|
34877
|
+
return (input) => template.replace(VAPI_TEMPLATE_REGEX, (match, rawPath) => {
|
|
34878
|
+
const path = rawPath.trim();
|
|
34879
|
+
const builtIn = VAPI_BUILT_IN_VARIABLES[path];
|
|
34880
|
+
if (builtIn) {
|
|
34881
|
+
return builtIn({ context: input.context, session: input.session });
|
|
34882
|
+
}
|
|
34883
|
+
if (variableResolver) {
|
|
34884
|
+
const resolved = variableResolver(path, input);
|
|
34885
|
+
if (resolved !== undefined)
|
|
34886
|
+
return String(resolved);
|
|
34887
|
+
}
|
|
34888
|
+
return match;
|
|
34889
|
+
});
|
|
34890
|
+
};
|
|
34891
|
+
var httpMethodFor = (raw) => {
|
|
34892
|
+
const upper = (raw ?? "GET").toUpperCase();
|
|
34893
|
+
if (upper === "GET" || upper === "POST" || upper === "PUT" || upper === "DELETE" || upper === "PATCH") {
|
|
34894
|
+
return upper;
|
|
34895
|
+
}
|
|
34896
|
+
return "GET";
|
|
34897
|
+
};
|
|
34898
|
+
var buildToolFromVapi = (raw, options, unsupported) => {
|
|
34899
|
+
const type = raw.type ?? "function";
|
|
34900
|
+
if (type === "endCall") {
|
|
34901
|
+
return createVoiceEndCallTool({
|
|
34902
|
+
description: raw.function?.description ?? raw.description,
|
|
34903
|
+
name: raw.function?.name ?? "endCall"
|
|
34904
|
+
});
|
|
34905
|
+
}
|
|
34906
|
+
if (type === "transferCall") {
|
|
34907
|
+
const destinations = (raw.destinations ?? []).map((entry, index) => {
|
|
34908
|
+
const target = entry.number ?? entry.sipUri;
|
|
34909
|
+
if (!target)
|
|
34910
|
+
return;
|
|
34911
|
+
return {
|
|
34912
|
+
description: entry.description,
|
|
34913
|
+
id: target.replace(/[^a-zA-Z0-9_-]/g, "-") || `destination-${String(index)}`,
|
|
34914
|
+
message: entry.message,
|
|
34915
|
+
metadata: entry.metadata,
|
|
34916
|
+
target
|
|
34917
|
+
};
|
|
34918
|
+
}).filter((entry) => entry !== undefined);
|
|
34919
|
+
if (destinations.length === 0) {
|
|
34920
|
+
unsupported.push({
|
|
34921
|
+
detail: "transferCall tool has no usable destinations (need number or sipUri).",
|
|
34922
|
+
field: "tools[].destinations"
|
|
34923
|
+
});
|
|
34924
|
+
return;
|
|
34925
|
+
}
|
|
34926
|
+
return createVoiceTransferCallTool({
|
|
34927
|
+
description: raw.function?.description ?? raw.description,
|
|
34928
|
+
destinations,
|
|
34929
|
+
name: raw.function?.name ?? "transferCall"
|
|
34930
|
+
});
|
|
34931
|
+
}
|
|
34932
|
+
if (type === "dtmf") {
|
|
34933
|
+
const send = options.dtmfSendFactory?.({ raw });
|
|
34934
|
+
if (!send) {
|
|
34935
|
+
unsupported.push({
|
|
34936
|
+
detail: "dtmf tool requires a dtmfSendFactory to map to the telephony adapter.",
|
|
34937
|
+
field: "tools[].type=dtmf"
|
|
34938
|
+
});
|
|
34939
|
+
return;
|
|
34940
|
+
}
|
|
34941
|
+
return createVoiceDTMFTool({
|
|
34942
|
+
description: raw.function?.description ?? raw.description,
|
|
34943
|
+
name: raw.function?.name ?? "sendDTMF",
|
|
34944
|
+
send
|
|
34945
|
+
});
|
|
34946
|
+
}
|
|
34947
|
+
if (type === "voicemail") {
|
|
34948
|
+
return createVoiceVoicemailDetectionTool({
|
|
34949
|
+
description: raw.function?.description ?? raw.description,
|
|
34950
|
+
name: raw.function?.name ?? "markVoicemail"
|
|
34951
|
+
});
|
|
34952
|
+
}
|
|
34953
|
+
if (type === "apiRequest") {
|
|
34954
|
+
if (!raw.url) {
|
|
34955
|
+
unsupported.push({
|
|
34956
|
+
detail: "apiRequest tool is missing url.",
|
|
34957
|
+
field: "tools[].url"
|
|
34958
|
+
});
|
|
34959
|
+
return;
|
|
34960
|
+
}
|
|
34961
|
+
return createVoiceApiRequestTool({
|
|
34962
|
+
description: raw.function?.description ?? raw.description ?? `Call ${raw.url}`,
|
|
34963
|
+
fetch: options.fetch,
|
|
34964
|
+
headers: raw.headers,
|
|
34965
|
+
method: httpMethodFor(raw.method),
|
|
34966
|
+
name: raw.function?.name ?? raw.name ?? "apiRequest",
|
|
34967
|
+
parameters: raw.function?.parameters,
|
|
34968
|
+
url: raw.url
|
|
34969
|
+
});
|
|
34970
|
+
}
|
|
34971
|
+
if (type === "query") {
|
|
34972
|
+
if (!options.knowledgeBase) {
|
|
34973
|
+
unsupported.push({
|
|
34974
|
+
detail: "query tool requires a knowledgeBase option (a VoiceRAGCollectionLike).",
|
|
34975
|
+
field: "tools[].type=query"
|
|
34976
|
+
});
|
|
34977
|
+
return;
|
|
34978
|
+
}
|
|
34979
|
+
return createVoiceRAGTool(options.knowledgeBase.collection, {
|
|
34980
|
+
description: raw.function?.description ?? raw.description,
|
|
34981
|
+
name: raw.function?.name ?? "searchKnowledgeBase",
|
|
34982
|
+
...options.knowledgeBase.toolOptions ?? {}
|
|
34983
|
+
});
|
|
34984
|
+
}
|
|
34985
|
+
if (type === "function") {
|
|
34986
|
+
if (raw.server?.url) {
|
|
34987
|
+
return createVoiceApiRequestTool({
|
|
34988
|
+
description: raw.function?.description ?? raw.description ?? `Custom function tool (server: ${raw.server.url}).`,
|
|
34989
|
+
fetch: options.fetch,
|
|
34990
|
+
headers: raw.server.headers,
|
|
34991
|
+
method: "POST",
|
|
34992
|
+
name: raw.function?.name ?? raw.name ?? "customFunction",
|
|
34993
|
+
parameters: raw.function?.parameters,
|
|
34994
|
+
url: raw.server.url
|
|
34995
|
+
});
|
|
34996
|
+
}
|
|
34997
|
+
const customTool = options.customToolFactory?.({ raw });
|
|
34998
|
+
if (customTool)
|
|
34999
|
+
return customTool;
|
|
35000
|
+
unsupported.push({
|
|
35001
|
+
detail: "function tool without server.url; provide customToolFactory to handle it.",
|
|
35002
|
+
field: `tools[].name=${raw.function?.name ?? raw.name ?? "<unnamed>"}`
|
|
35003
|
+
});
|
|
35004
|
+
return;
|
|
35005
|
+
}
|
|
35006
|
+
unsupported.push({
|
|
35007
|
+
detail: `Unrecognized Vapi tool type "${type}".`,
|
|
35008
|
+
field: "tools[].type"
|
|
35009
|
+
});
|
|
35010
|
+
return;
|
|
35011
|
+
};
|
|
35012
|
+
var collectRouteHints = (config) => ({
|
|
35013
|
+
backgroundDenoisingEnabled: config.backgroundDenoisingEnabled,
|
|
35014
|
+
backgroundSound: config.backgroundSound,
|
|
35015
|
+
endCallMessage: config.endCallMessage,
|
|
35016
|
+
endCallPhrases: config.endCallPhrases,
|
|
35017
|
+
firstMessage: config.firstMessage,
|
|
35018
|
+
firstMessageMode: config.firstMessageMode,
|
|
35019
|
+
hipaaEnabled: config.compliancePlan?.hipaaEnabled,
|
|
35020
|
+
maxDurationSeconds: config.maxDurationSeconds,
|
|
35021
|
+
pciEnabled: config.compliancePlan?.pciEnabled,
|
|
35022
|
+
recordingEnabled: config.recordingEnabled,
|
|
35023
|
+
silenceTimeoutSeconds: config.silenceTimeoutSeconds,
|
|
35024
|
+
stt: config.transcriber ? {
|
|
35025
|
+
confidenceThreshold: config.transcriber.confidenceThreshold,
|
|
35026
|
+
language: config.transcriber.language,
|
|
35027
|
+
model: config.transcriber.model,
|
|
35028
|
+
provider: config.transcriber.provider
|
|
35029
|
+
} : undefined,
|
|
35030
|
+
tts: config.voice ? {
|
|
35031
|
+
provider: config.voice.provider,
|
|
35032
|
+
speed: config.voice.speed,
|
|
35033
|
+
stability: config.voice.stability,
|
|
35034
|
+
style: config.voice.style,
|
|
35035
|
+
voiceId: config.voice.voiceId
|
|
35036
|
+
} : undefined,
|
|
35037
|
+
voicemailMessage: config.voicemailMessage
|
|
35038
|
+
});
|
|
35039
|
+
var collectAssistantLevelUnsupported = (config, unsupported, hasKnowledgeBaseOption) => {
|
|
35040
|
+
if (config.knowledgeBaseId && !hasKnowledgeBaseOption) {
|
|
35041
|
+
unsupported.push({
|
|
35042
|
+
detail: "knowledgeBaseId is set but no knowledgeBase option was provided; configure one to surface a query tool.",
|
|
35043
|
+
field: "knowledgeBaseId"
|
|
35044
|
+
});
|
|
35045
|
+
}
|
|
35046
|
+
if (config.voicemailDetection?.provider) {
|
|
35047
|
+
unsupported.push({
|
|
35048
|
+
detail: "voicemailDetection.provider (ML detection) has no direct voice equivalent; wire your carrier's AMD callback and call VoiceSessionHandle.markVoicemail yourself.",
|
|
35049
|
+
field: "voicemailDetection.provider"
|
|
35050
|
+
});
|
|
35051
|
+
}
|
|
35052
|
+
if (config.startSpeakingPlan) {
|
|
35053
|
+
unsupported.push({
|
|
35054
|
+
detail: "startSpeakingPlan is a route-level concern; map it to the voice() plugin / barge-in config.",
|
|
35055
|
+
field: "startSpeakingPlan"
|
|
35056
|
+
});
|
|
35057
|
+
}
|
|
35058
|
+
if (config.stopSpeakingPlan) {
|
|
35059
|
+
unsupported.push({
|
|
35060
|
+
detail: "stopSpeakingPlan is a route-level concern; map it to the voice() plugin / barge-in config.",
|
|
35061
|
+
field: "stopSpeakingPlan"
|
|
35062
|
+
});
|
|
35063
|
+
}
|
|
35064
|
+
if (config.monitorPlan) {
|
|
35065
|
+
unsupported.push({
|
|
35066
|
+
detail: "monitorPlan (listenUrl / controlUrl) has no direct voice equivalent yet; use live-ops routes for control.",
|
|
35067
|
+
field: "monitorPlan"
|
|
35068
|
+
});
|
|
35069
|
+
}
|
|
35070
|
+
if (config.firstMessageMode) {
|
|
35071
|
+
unsupported.push({
|
|
35072
|
+
detail: `firstMessageMode "${config.firstMessageMode}" is a route concern; the system/firstMessage hints are in routeHints.`,
|
|
35073
|
+
field: "firstMessageMode"
|
|
35074
|
+
});
|
|
35075
|
+
}
|
|
35076
|
+
const systemPromptText = extractSystemPrompt(config.model, undefined);
|
|
35077
|
+
if (systemPromptText && VAPI_TEMPLATE_FILTER_REGEX.test(systemPromptText)) {
|
|
35078
|
+
unsupported.push({
|
|
35079
|
+
detail: "system prompt uses LiquidJS filter syntax (e.g. {{ now | date: ... }}); only bare {{var}} expansion is auto-translated.",
|
|
35080
|
+
field: "model.messages[0].content (filters)"
|
|
35081
|
+
});
|
|
35082
|
+
}
|
|
35083
|
+
};
|
|
35084
|
+
var fromVapiAssistantConfig = (config, options) => {
|
|
35085
|
+
const unsupported = [];
|
|
35086
|
+
const model = options.modelFactory({
|
|
35087
|
+
model: config.model?.model,
|
|
35088
|
+
provider: config.model?.provider,
|
|
35089
|
+
raw: config.model ?? {},
|
|
35090
|
+
temperature: config.model?.temperature
|
|
35091
|
+
});
|
|
35092
|
+
const system = compileVapiSystem(extractSystemPrompt(config.model, options.systemFallback), options.variableResolver);
|
|
35093
|
+
const tools = [];
|
|
35094
|
+
for (const raw of config.model?.tools ?? []) {
|
|
35095
|
+
const tool = buildToolFromVapi(raw, options, unsupported);
|
|
35096
|
+
if (tool)
|
|
35097
|
+
tools.push(tool);
|
|
35098
|
+
}
|
|
35099
|
+
if (config.knowledgeBaseId && options.knowledgeBase) {
|
|
35100
|
+
const alreadyHas = tools.some((tool) => tool.name === (options.knowledgeBase.toolOptions?.name ?? "searchKnowledgeBase"));
|
|
35101
|
+
if (!alreadyHas) {
|
|
35102
|
+
tools.push(createVoiceRAGTool(options.knowledgeBase.collection, options.knowledgeBase.toolOptions));
|
|
35103
|
+
}
|
|
35104
|
+
}
|
|
35105
|
+
if (config.model?.toolIds && config.model.toolIds.length > 0) {
|
|
35106
|
+
unsupported.push({
|
|
35107
|
+
detail: "model.toolIds references saved Vapi tools; provide their definitions via tools[] or customToolFactory.",
|
|
35108
|
+
field: "model.toolIds"
|
|
35109
|
+
});
|
|
35110
|
+
}
|
|
35111
|
+
collectAssistantLevelUnsupported(config, unsupported, Boolean(options.knowledgeBase));
|
|
35112
|
+
const assistantOptions = {
|
|
35113
|
+
id: options.assistantId ?? "vapi-imported-assistant",
|
|
35114
|
+
model,
|
|
35115
|
+
system,
|
|
35116
|
+
tools
|
|
35117
|
+
};
|
|
35118
|
+
const assistant = createVoiceAssistant(assistantOptions);
|
|
35119
|
+
return {
|
|
35120
|
+
assistant,
|
|
35121
|
+
assistantOptions,
|
|
35122
|
+
modelAgent: assistant.agent,
|
|
35123
|
+
routeHints: collectRouteHints(config),
|
|
35124
|
+
tools,
|
|
35125
|
+
unsupported
|
|
35126
|
+
};
|
|
35127
|
+
};
|
|
34854
35128
|
// src/agentSquadContract.ts
|
|
34855
35129
|
var normalizeIncludes = (value) => value.trim().toLowerCase();
|
|
34856
35130
|
var resolveOutcome4 = (result) => {
|
|
@@ -43099,6 +43373,1087 @@ var createVoiceProofPackRoutes = (options) => {
|
|
|
43099
43373
|
}
|
|
43100
43374
|
return app;
|
|
43101
43375
|
};
|
|
43376
|
+
// src/testing/fixtures.ts
|
|
43377
|
+
import { resolve as resolve2 } from "path";
|
|
43378
|
+
var JARGON_FIXTURE_IDS = [
|
|
43379
|
+
"traveled-back-route-clean",
|
|
43380
|
+
"dialogue-two-clean",
|
|
43381
|
+
"dialogue-three-clean",
|
|
43382
|
+
"dialogue-two-noisy",
|
|
43383
|
+
"dialogue-three-mixed"
|
|
43384
|
+
];
|
|
43385
|
+
var DEFAULT_AUDIO_FORMAT = {
|
|
43386
|
+
channels: 1,
|
|
43387
|
+
container: "raw",
|
|
43388
|
+
encoding: "pcm_s16le",
|
|
43389
|
+
sampleRateHz: 16000
|
|
43390
|
+
};
|
|
43391
|
+
var DEFAULT_TELEPHONY_SAMPLE_RATE_HZ = 8000;
|
|
43392
|
+
var DEFAULT_MULTI_SPEAKER_SILENCE_MS = 350;
|
|
43393
|
+
var FIXTURE_DIR_CANDIDATES = [
|
|
43394
|
+
resolve2(import.meta.dir, "..", "..", "fixtures"),
|
|
43395
|
+
resolve2(import.meta.dir, "..", "..", "..", "fixtures"),
|
|
43396
|
+
resolve2(import.meta.dir, "..", "..", "..", "..", "fixtures")
|
|
43397
|
+
];
|
|
43398
|
+
var EXTERNAL_FIXTURE_ENV_KEYS = [
|
|
43399
|
+
"VOICE_FIXTURE_DIR",
|
|
43400
|
+
"VOICE_FIXTURE_DIRS"
|
|
43401
|
+
];
|
|
43402
|
+
var resolveFixtureDirectory = async () => {
|
|
43403
|
+
for (const candidate of FIXTURE_DIR_CANDIDATES) {
|
|
43404
|
+
if (await Bun.file(resolve2(candidate, "manifest.json")).exists()) {
|
|
43405
|
+
return candidate;
|
|
43406
|
+
}
|
|
43407
|
+
}
|
|
43408
|
+
throw new Error("Unable to locate the bundled voice test fixtures. Expected fixtures/manifest.json next to the package root.");
|
|
43409
|
+
};
|
|
43410
|
+
var getVoiceFixtureDirectory = async () => resolveFixtureDirectory();
|
|
43411
|
+
var toUniqueDirectories = (directories) => directories.filter((directory, index, list) => directory.trim().length > 0 && list.indexOf(directory) === index);
|
|
43412
|
+
var splitFixtureDirectoryValue = (value) => (value ?? "").split(/[\n,]/).map((entry) => entry.trim()).filter((entry) => entry.length > 0);
|
|
43413
|
+
var resolveFixtureInputDirectories = (input) => {
|
|
43414
|
+
if (typeof input === "string") {
|
|
43415
|
+
return [input];
|
|
43416
|
+
}
|
|
43417
|
+
if (Array.isArray(input)) {
|
|
43418
|
+
return input;
|
|
43419
|
+
}
|
|
43420
|
+
return input?.directories ?? [];
|
|
43421
|
+
};
|
|
43422
|
+
var shouldIncludeBundledFixtures = (input) => {
|
|
43423
|
+
if (input && typeof input === "object" && !Array.isArray(input) && input.includeBundled === false) {
|
|
43424
|
+
return false;
|
|
43425
|
+
}
|
|
43426
|
+
return true;
|
|
43427
|
+
};
|
|
43428
|
+
var resolveConfiguredFixtureDirectories = async (input) => {
|
|
43429
|
+
const directories = [
|
|
43430
|
+
...resolveFixtureInputDirectories(input),
|
|
43431
|
+
...EXTERNAL_FIXTURE_ENV_KEYS.flatMap((key) => splitFixtureDirectoryValue(process.env[key]))
|
|
43432
|
+
];
|
|
43433
|
+
const uniqueDirectories = toUniqueDirectories(directories.map((directory) => resolve2(directory)));
|
|
43434
|
+
for (const directory of uniqueDirectories) {
|
|
43435
|
+
const manifestExists = await Bun.file(resolve2(directory, "manifest.json")).exists();
|
|
43436
|
+
if (!manifestExists) {
|
|
43437
|
+
throw new Error(`Voice fixture directory "${directory}" is missing manifest.json.`);
|
|
43438
|
+
}
|
|
43439
|
+
}
|
|
43440
|
+
return uniqueDirectories;
|
|
43441
|
+
};
|
|
43442
|
+
var resolveVoiceFixtureDirectories = async (input) => {
|
|
43443
|
+
const directories = await resolveConfiguredFixtureDirectories(input);
|
|
43444
|
+
if (!shouldIncludeBundledFixtures(input)) {
|
|
43445
|
+
if (directories.length === 0) {
|
|
43446
|
+
throw new Error("No voice fixture directories were configured. Provide directories or set VOICE_FIXTURE_DIR/VOICE_FIXTURE_DIRS.");
|
|
43447
|
+
}
|
|
43448
|
+
return directories;
|
|
43449
|
+
}
|
|
43450
|
+
return [await resolveFixtureDirectory(), ...directories];
|
|
43451
|
+
};
|
|
43452
|
+
var clampSample = (value) => Math.max(-32768, Math.min(32767, Math.round(value)));
|
|
43453
|
+
var toPcm16Samples = (audio) => new Int16Array(audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength));
|
|
43454
|
+
var toPcm16Bytes = (samples) => new Uint8Array(samples.buffer.slice(samples.byteOffset, samples.byteOffset + samples.byteLength));
|
|
43455
|
+
var createSilenceBytes = (sampleRateHz, durationMs) => new Uint8Array(Math.max(2, Math.round(sampleRateHz * 2 * durationMs / 1000)));
|
|
43456
|
+
var concatAudioChunks = (chunks) => {
|
|
43457
|
+
const totalByteLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
|
43458
|
+
const output = new Uint8Array(totalByteLength);
|
|
43459
|
+
let offset = 0;
|
|
43460
|
+
for (const chunk of chunks) {
|
|
43461
|
+
output.set(chunk, offset);
|
|
43462
|
+
offset += chunk.byteLength;
|
|
43463
|
+
}
|
|
43464
|
+
return output;
|
|
43465
|
+
};
|
|
43466
|
+
var resamplePcm16Mono = (samples, sourceRate, targetRate) => {
|
|
43467
|
+
if (sourceRate === targetRate || samples.length === 0) {
|
|
43468
|
+
return samples;
|
|
43469
|
+
}
|
|
43470
|
+
const ratio = targetRate / sourceRate;
|
|
43471
|
+
const targetLength = Math.max(1, Math.round(samples.length * ratio));
|
|
43472
|
+
const output = new Int16Array(targetLength);
|
|
43473
|
+
for (let index = 0;index < targetLength; index += 1) {
|
|
43474
|
+
const sourceIndex = index / ratio;
|
|
43475
|
+
const previousIndex = Math.floor(sourceIndex);
|
|
43476
|
+
const nextIndex = Math.min(previousIndex + 1, samples.length - 1);
|
|
43477
|
+
const fraction = sourceIndex - previousIndex;
|
|
43478
|
+
const previous = samples[previousIndex] ?? 0;
|
|
43479
|
+
const next = samples[nextIndex] ?? previous;
|
|
43480
|
+
output[index] = clampSample(previous + (next - previous) * fraction);
|
|
43481
|
+
}
|
|
43482
|
+
return output;
|
|
43483
|
+
};
|
|
43484
|
+
var toMuLaw = (sample) => {
|
|
43485
|
+
const MU_LAW_MAX = 8191;
|
|
43486
|
+
const MU_LAW_BIAS = 132;
|
|
43487
|
+
const sign = sample < 0 ? 128 : 0;
|
|
43488
|
+
const magnitude = Math.min(MU_LAW_MAX, Math.abs(sample) + MU_LAW_BIAS);
|
|
43489
|
+
let exponent = 7;
|
|
43490
|
+
for (let mask = 16384;(magnitude & mask) === 0 && exponent > 0; mask >>= 1) {
|
|
43491
|
+
exponent -= 1;
|
|
43492
|
+
}
|
|
43493
|
+
const mantissa = magnitude >> exponent + 3 & 15;
|
|
43494
|
+
return ~(sign | exponent << 4 | mantissa) & 255;
|
|
43495
|
+
};
|
|
43496
|
+
var fromMuLaw = (encoded) => {
|
|
43497
|
+
const normalized = ~encoded & 255;
|
|
43498
|
+
const sign = normalized & 128;
|
|
43499
|
+
const exponent = normalized >> 4 & 7;
|
|
43500
|
+
const mantissa = normalized & 15;
|
|
43501
|
+
const magnitude = ((mantissa | 16) << exponent + 3) - 132;
|
|
43502
|
+
return sign ? -magnitude : magnitude;
|
|
43503
|
+
};
|
|
43504
|
+
var applyTelephonyDegradation = (audio, format, targetSampleRateHz) => {
|
|
43505
|
+
const sourceSamples = toPcm16Samples(audio);
|
|
43506
|
+
const narrowbandSamples = resamplePcm16Mono(sourceSamples, format.sampleRateHz, targetSampleRateHz);
|
|
43507
|
+
const degradedSamples = new Int16Array(narrowbandSamples.length);
|
|
43508
|
+
for (let index = 0;index < narrowbandSamples.length; index += 1) {
|
|
43509
|
+
const compressed = toMuLaw(narrowbandSamples[index] ?? 0);
|
|
43510
|
+
degradedSamples[index] = clampSample(fromMuLaw(compressed) * 0.92);
|
|
43511
|
+
}
|
|
43512
|
+
return toPcm16Bytes(degradedSamples);
|
|
43513
|
+
};
|
|
43514
|
+
var shouldIncludeTelephonyFixture = (fixture, options) => {
|
|
43515
|
+
const tags = new Set(fixture.tags ?? []);
|
|
43516
|
+
if (!options.includeAccents && (tags.has("accent") || tags.has("speech-accent-archive"))) {
|
|
43517
|
+
return false;
|
|
43518
|
+
}
|
|
43519
|
+
return true;
|
|
43520
|
+
};
|
|
43521
|
+
var createTelephonyVoiceTestFixtures = (fixtures, options = {}) => {
|
|
43522
|
+
const targetSampleRateHz = options.targetSampleRateHz ?? DEFAULT_TELEPHONY_SAMPLE_RATE_HZ;
|
|
43523
|
+
return fixtures.filter((fixture) => shouldIncludeTelephonyFixture(fixture, options)).map((fixture) => ({
|
|
43524
|
+
...fixture,
|
|
43525
|
+
audio: applyTelephonyDegradation(fixture.audio, fixture.format, targetSampleRateHz),
|
|
43526
|
+
format: {
|
|
43527
|
+
...fixture.format,
|
|
43528
|
+
sampleRateHz: targetSampleRateHz
|
|
43529
|
+
},
|
|
43530
|
+
id: `${fixture.id}-telephony`,
|
|
43531
|
+
tags: Array.from(new Set([...fixture.tags ?? [], "narrowband", "telephony"])),
|
|
43532
|
+
title: `${fixture.title} (telephony narrowband)`
|
|
43533
|
+
}));
|
|
43534
|
+
};
|
|
43535
|
+
var requireFixture = (fixtures, id) => {
|
|
43536
|
+
const fixture = fixtures.find((entry) => entry.id === id);
|
|
43537
|
+
if (!fixture) {
|
|
43538
|
+
throw new Error(`Missing bundled voice fixture "${id}" required for multi-speaker benchmarks.`);
|
|
43539
|
+
}
|
|
43540
|
+
return fixture;
|
|
43541
|
+
};
|
|
43542
|
+
var createMultiSpeakerVoiceTestFixtures = (fixtures, options = {}) => {
|
|
43543
|
+
const silenceMs = options.silenceMs ?? DEFAULT_MULTI_SPEAKER_SILENCE_MS;
|
|
43544
|
+
const speakerA = requireFixture(fixtures, "quietly-alone-clean");
|
|
43545
|
+
const speakerB = requireFixture(fixtures, "traveled-back-route-clean");
|
|
43546
|
+
const speakerC = requireFixture(fixtures, "rainstorms-noisy");
|
|
43547
|
+
const silence = createSilenceBytes(speakerA.format.sampleRateHz, silenceMs);
|
|
43548
|
+
const handoff = concatAudioChunks([speakerA.audio, silence, speakerB.audio]);
|
|
43549
|
+
const threeTurn = concatAudioChunks([
|
|
43550
|
+
speakerA.audio,
|
|
43551
|
+
silence,
|
|
43552
|
+
speakerB.audio,
|
|
43553
|
+
silence,
|
|
43554
|
+
speakerC.audio
|
|
43555
|
+
]);
|
|
43556
|
+
const buildTags = (...tags) => [
|
|
43557
|
+
"multi-speaker",
|
|
43558
|
+
"handoff",
|
|
43559
|
+
"synthetic",
|
|
43560
|
+
...tags
|
|
43561
|
+
];
|
|
43562
|
+
return [
|
|
43563
|
+
{
|
|
43564
|
+
...speakerA,
|
|
43565
|
+
audio: handoff,
|
|
43566
|
+
audioPath: `${speakerA.audioPath}+${speakerB.audioPath}`,
|
|
43567
|
+
expectedSpeakerTurns: [
|
|
43568
|
+
{ speaker: "speaker-a", text: speakerA.expectedText },
|
|
43569
|
+
{ speaker: "speaker-b", text: speakerB.expectedText }
|
|
43570
|
+
],
|
|
43571
|
+
expectedTerms: Array.from(new Set([
|
|
43572
|
+
...speakerA.expectedTerms ?? [],
|
|
43573
|
+
...speakerB.expectedTerms ?? []
|
|
43574
|
+
])),
|
|
43575
|
+
expectedText: `${speakerA.expectedText} ${speakerB.expectedText}`.trim(),
|
|
43576
|
+
expectedTurnTexts: [speakerA.expectedText, speakerB.expectedText],
|
|
43577
|
+
id: "multi-speaker-handoff-clean",
|
|
43578
|
+
tags: buildTags("clean"),
|
|
43579
|
+
title: "Synthetic two-speaker handoff"
|
|
43580
|
+
},
|
|
43581
|
+
{
|
|
43582
|
+
...speakerA,
|
|
43583
|
+
audio: threeTurn,
|
|
43584
|
+
audioPath: `${speakerA.audioPath}+${speakerB.audioPath}+${speakerC.audioPath}`,
|
|
43585
|
+
expectedSpeakerTurns: [
|
|
43586
|
+
{ speaker: "speaker-a", text: speakerA.expectedText },
|
|
43587
|
+
{ speaker: "speaker-b", text: speakerB.expectedText },
|
|
43588
|
+
{ speaker: "speaker-c", text: speakerC.expectedText }
|
|
43589
|
+
],
|
|
43590
|
+
expectedTerms: Array.from(new Set([
|
|
43591
|
+
...speakerA.expectedTerms ?? [],
|
|
43592
|
+
...speakerB.expectedTerms ?? [],
|
|
43593
|
+
...speakerC.expectedTerms ?? []
|
|
43594
|
+
])),
|
|
43595
|
+
expectedText: `${speakerA.expectedText} ${speakerB.expectedText} ${speakerC.expectedText}`.trim(),
|
|
43596
|
+
expectedTurnTexts: [
|
|
43597
|
+
speakerA.expectedText,
|
|
43598
|
+
speakerB.expectedText,
|
|
43599
|
+
speakerC.expectedText
|
|
43600
|
+
],
|
|
43601
|
+
id: "multi-speaker-handoff-three",
|
|
43602
|
+
tags: buildTags("challenging", "noisy"),
|
|
43603
|
+
title: "Synthetic three-speaker handoff (A-B-C)"
|
|
43604
|
+
}
|
|
43605
|
+
];
|
|
43606
|
+
};
|
|
43607
|
+
var createJargonVoiceTestFixtures = (fixtures) => JARGON_FIXTURE_IDS.map((id) => requireFixture(fixtures, id)).filter((fixture) => (fixture.expectedTerms?.length ?? 0) > 0).map((fixture) => ({
|
|
43608
|
+
...fixture,
|
|
43609
|
+
id: `${fixture.id}-jargon`,
|
|
43610
|
+
tags: Array.from(new Set([...fixture.tags ?? [], "domain-heavy", "jargon"])),
|
|
43611
|
+
title: `${fixture.title} (jargon)`
|
|
43612
|
+
}));
|
|
43613
|
+
var loadVoiceTestFixtures = async (fixtureDirectory) => {
|
|
43614
|
+
const fixtureDirectories = await resolveVoiceFixtureDirectories(fixtureDirectory);
|
|
43615
|
+
const fixtures = [];
|
|
43616
|
+
const seenFixtureIds = new Set;
|
|
43617
|
+
for (const directory of fixtureDirectories) {
|
|
43618
|
+
const manifestFile = Bun.file(resolve2(directory, "manifest.json"));
|
|
43619
|
+
const manifest = await manifestFile.json();
|
|
43620
|
+
for (const entry of manifest) {
|
|
43621
|
+
if (seenFixtureIds.has(entry.id)) {
|
|
43622
|
+
throw new Error(`Duplicate voice fixture id "${entry.id}" found while loading "${directory}".`);
|
|
43623
|
+
}
|
|
43624
|
+
const audioPath = resolve2(directory, "pcm", entry.audioPath);
|
|
43625
|
+
const audio = new Uint8Array(await Bun.file(audioPath).arrayBuffer());
|
|
43626
|
+
fixtures.push({
|
|
43627
|
+
...entry,
|
|
43628
|
+
audio,
|
|
43629
|
+
audioPath,
|
|
43630
|
+
format: {
|
|
43631
|
+
...DEFAULT_AUDIO_FORMAT,
|
|
43632
|
+
...entry.format
|
|
43633
|
+
}
|
|
43634
|
+
});
|
|
43635
|
+
seenFixtureIds.add(entry.id);
|
|
43636
|
+
}
|
|
43637
|
+
}
|
|
43638
|
+
return fixtures;
|
|
43639
|
+
};
|
|
43640
|
+
|
|
43641
|
+
// src/testing/accuracy.ts
|
|
43642
|
+
var normalizeAccuracyText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
|
|
43643
|
+
var levenshteinDistance2 = (left, right) => {
|
|
43644
|
+
if (left.length === 0) {
|
|
43645
|
+
return right.length;
|
|
43646
|
+
}
|
|
43647
|
+
if (right.length === 0) {
|
|
43648
|
+
return left.length;
|
|
43649
|
+
}
|
|
43650
|
+
const previous = new Array(right.length + 1).fill(0);
|
|
43651
|
+
const current = new Array(right.length + 1).fill(0);
|
|
43652
|
+
for (let column = 0;column <= right.length; column += 1) {
|
|
43653
|
+
previous[column] = column;
|
|
43654
|
+
}
|
|
43655
|
+
for (let row = 1;row <= left.length; row += 1) {
|
|
43656
|
+
current[0] = row;
|
|
43657
|
+
for (let column = 1;column <= right.length; column += 1) {
|
|
43658
|
+
const substitutionCost = left[row - 1] === right[column - 1] ? 0 : 1;
|
|
43659
|
+
current[column] = Math.min(current[column - 1] + 1, previous[column] + 1, previous[column - 1] + substitutionCost);
|
|
43660
|
+
}
|
|
43661
|
+
for (let column = 0;column <= right.length; column += 1) {
|
|
43662
|
+
previous[column] = current[column];
|
|
43663
|
+
}
|
|
43664
|
+
}
|
|
43665
|
+
return previous[right.length];
|
|
43666
|
+
};
|
|
43667
|
+
var mergeFinalTranscriptText = (transcripts) => buildTurnText(transcripts.filter((transcript) => transcript.isFinal), "");
|
|
43668
|
+
var scoreTranscriptAccuracy = (actualText, expectedText, threshold = 0.35) => {
|
|
43669
|
+
const normalizedActual = normalizeAccuracyText(actualText);
|
|
43670
|
+
const normalizedExpected = normalizeAccuracyText(expectedText);
|
|
43671
|
+
const actualWords = normalizedActual ? normalizedActual.split(" ") : [];
|
|
43672
|
+
const expectedWords = normalizedExpected ? normalizedExpected.split(" ") : [];
|
|
43673
|
+
const wordDistance = levenshteinDistance2(actualWords, expectedWords);
|
|
43674
|
+
const charDistance = levenshteinDistance2(Array.from(normalizedActual), Array.from(normalizedExpected));
|
|
43675
|
+
const wordErrorRate = expectedWords.length > 0 ? wordDistance / expectedWords.length : 0;
|
|
43676
|
+
const charErrorRate = normalizedExpected.length > 0 ? charDistance / normalizedExpected.length : 0;
|
|
43677
|
+
return {
|
|
43678
|
+
actualText: normalizedActual,
|
|
43679
|
+
charDistance,
|
|
43680
|
+
charErrorRate,
|
|
43681
|
+
expectedText: normalizedExpected,
|
|
43682
|
+
passesThreshold: wordErrorRate <= threshold,
|
|
43683
|
+
threshold,
|
|
43684
|
+
wordDistance,
|
|
43685
|
+
wordErrorRate
|
|
43686
|
+
};
|
|
43687
|
+
};
|
|
43688
|
+
|
|
43689
|
+
// src/testing/stt.ts
|
|
43690
|
+
var chunkAudio = (audio, bytesPerChunk) => {
|
|
43691
|
+
const chunks = [];
|
|
43692
|
+
for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
|
|
43693
|
+
chunks.push(audio.slice(offset, offset + bytesPerChunk));
|
|
43694
|
+
}
|
|
43695
|
+
return chunks;
|
|
43696
|
+
};
|
|
43697
|
+
var createSilence = (byteLength3) => new Uint8Array(byteLength3);
|
|
43698
|
+
var waitForIdle = async (readLastActivityAt, idleTimeoutMs, settleMs) => {
|
|
43699
|
+
const startedAt = Date.now();
|
|
43700
|
+
while (Date.now() - startedAt < idleTimeoutMs) {
|
|
43701
|
+
if (Date.now() - readLastActivityAt() >= settleMs) {
|
|
43702
|
+
return;
|
|
43703
|
+
}
|
|
43704
|
+
await Bun.sleep(Math.min(50, settleMs));
|
|
43705
|
+
}
|
|
43706
|
+
};
|
|
43707
|
+
var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
|
|
43708
|
+
const startedAt = Date.now();
|
|
43709
|
+
const partialEvents = [];
|
|
43710
|
+
const finalEvents = [];
|
|
43711
|
+
const endOfTurnEvents = [];
|
|
43712
|
+
const errorEvents = [];
|
|
43713
|
+
const closeEvents = [];
|
|
43714
|
+
const chunkDurationMs = options.chunkDurationMs ?? fixture.chunkDurationMs ?? 100;
|
|
43715
|
+
const tailPaddingMs = options.tailPaddingMs ?? fixture.tailPaddingMs ?? 1000;
|
|
43716
|
+
const idleTimeoutMs = options.idleTimeoutMs ?? 8000;
|
|
43717
|
+
const settleMs = options.settleMs ?? 500;
|
|
43718
|
+
const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
|
|
43719
|
+
let lastActivityAt = Date.now();
|
|
43720
|
+
let speechEndedAt = startedAt;
|
|
43721
|
+
const markActive = () => {
|
|
43722
|
+
lastActivityAt = Date.now();
|
|
43723
|
+
};
|
|
43724
|
+
const resolvedOpenOptions = typeof options.openOptions === "function" ? options.openOptions(fixture) : options.openOptions;
|
|
43725
|
+
const session = await adapter.open({
|
|
43726
|
+
format: fixture.format,
|
|
43727
|
+
sessionId: `fixture-${fixture.id}`,
|
|
43728
|
+
...resolvedOpenOptions ?? {}
|
|
43729
|
+
});
|
|
43730
|
+
const unsubscribers = [
|
|
43731
|
+
session.on("partial", (event) => {
|
|
43732
|
+
partialEvents.push(event);
|
|
43733
|
+
markActive();
|
|
43734
|
+
}),
|
|
43735
|
+
session.on("final", (event) => {
|
|
43736
|
+
finalEvents.push(event);
|
|
43737
|
+
markActive();
|
|
43738
|
+
}),
|
|
43739
|
+
session.on("endOfTurn", (event) => {
|
|
43740
|
+
endOfTurnEvents.push(event);
|
|
43741
|
+
markActive();
|
|
43742
|
+
}),
|
|
43743
|
+
session.on("error", (event) => {
|
|
43744
|
+
errorEvents.push(event);
|
|
43745
|
+
markActive();
|
|
43746
|
+
}),
|
|
43747
|
+
session.on("close", (event) => {
|
|
43748
|
+
closeEvents.push(event);
|
|
43749
|
+
markActive();
|
|
43750
|
+
})
|
|
43751
|
+
];
|
|
43752
|
+
try {
|
|
43753
|
+
const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
|
|
43754
|
+
const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
|
|
43755
|
+
const chunks = chunkAudio(fixture.audio, bytesPerChunk);
|
|
43756
|
+
const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
|
|
43757
|
+
for (const chunk of chunks) {
|
|
43758
|
+
await session.send(chunk);
|
|
43759
|
+
markActive();
|
|
43760
|
+
await Bun.sleep(realtimeDelayMs);
|
|
43761
|
+
}
|
|
43762
|
+
speechEndedAt = Date.now();
|
|
43763
|
+
if (tailPaddingMs > 0) {
|
|
43764
|
+
const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
|
|
43765
|
+
for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
|
|
43766
|
+
await session.send(chunk);
|
|
43767
|
+
markActive();
|
|
43768
|
+
await Bun.sleep(realtimeDelayMs);
|
|
43769
|
+
}
|
|
43770
|
+
}
|
|
43771
|
+
await waitForIdle(() => lastActivityAt, idleTimeoutMs, settleMs);
|
|
43772
|
+
} finally {
|
|
43773
|
+
await session.close("fixture-complete");
|
|
43774
|
+
for (const unsubscribe of unsubscribers) {
|
|
43775
|
+
unsubscribe();
|
|
43776
|
+
}
|
|
43777
|
+
}
|
|
43778
|
+
const finalTranscripts = finalEvents.map((event) => ({
|
|
43779
|
+
...event.transcript,
|
|
43780
|
+
endedAtMs: event.receivedAt - startedAt,
|
|
43781
|
+
startedAtMs: event.receivedAt - startedAt
|
|
43782
|
+
}));
|
|
43783
|
+
const trailingPartial = [...partialEvents].reverse().find((event) => {
|
|
43784
|
+
const text = event.transcript.text.trim();
|
|
43785
|
+
if (!text) {
|
|
43786
|
+
return false;
|
|
43787
|
+
}
|
|
43788
|
+
const lastFinalReceivedAt = finalEvents.at(-1)?.receivedAt ?? 0;
|
|
43789
|
+
return event.receivedAt >= lastFinalReceivedAt;
|
|
43790
|
+
});
|
|
43791
|
+
const finalText = trailingPartial && finalTranscripts.length > 0 ? buildTurnText(finalTranscripts, trailingPartial.transcript.text, {
|
|
43792
|
+
partialEndedAtMs: trailingPartial.receivedAt - startedAt,
|
|
43793
|
+
partialStartedAtMs: trailingPartial.receivedAt - startedAt
|
|
43794
|
+
}) : mergeFinalTranscriptText(finalTranscripts);
|
|
43795
|
+
return {
|
|
43796
|
+
accuracy: scoreTranscriptAccuracy(finalText, fixture.expectedText, options.transcriptThreshold),
|
|
43797
|
+
closeEvents,
|
|
43798
|
+
endOfTurnEvents,
|
|
43799
|
+
errorEvents,
|
|
43800
|
+
finalEvents,
|
|
43801
|
+
finalText,
|
|
43802
|
+
partialEvents,
|
|
43803
|
+
speechEndedAt,
|
|
43804
|
+
startedAt
|
|
43805
|
+
};
|
|
43806
|
+
};
|
|
43807
|
+
|
|
43808
|
+
// src/testing/benchmark.ts
|
|
43809
|
+
var resolveFixtureEnvironment = (fixture) => {
|
|
43810
|
+
const tags = new Set(fixture.tags ?? []);
|
|
43811
|
+
if (tags.has("telephony")) {
|
|
43812
|
+
return "telephony";
|
|
43813
|
+
}
|
|
43814
|
+
if (tags.has("code-switch") || tags.has("code_switch")) {
|
|
43815
|
+
return "code-switch";
|
|
43816
|
+
}
|
|
43817
|
+
if (tags.has("multi-speaker")) {
|
|
43818
|
+
return "multi-speaker";
|
|
43819
|
+
}
|
|
43820
|
+
if (tags.has("jargon") || tags.has("domain-heavy")) {
|
|
43821
|
+
return "jargon";
|
|
43822
|
+
}
|
|
43823
|
+
const hasAccent = tags.has("accent") || tags.has("speech-accent-archive");
|
|
43824
|
+
const hasNoisy = tags.has("noisy") || tags.has("synthetic-noise") || tags.has("stress");
|
|
43825
|
+
const language = fixture.language?.trim().toLowerCase();
|
|
43826
|
+
const hasNonEnglishLanguage = typeof language === "string" && language.length > 0 && !language.startsWith("en");
|
|
43827
|
+
const isMultilingual = tags.has("multilingual") || tags.has("bilingual") || hasNonEnglishLanguage;
|
|
43828
|
+
if (hasAccent && hasNoisy) {
|
|
43829
|
+
return "accent-noisy";
|
|
43830
|
+
}
|
|
43831
|
+
if (isMultilingual) {
|
|
43832
|
+
return "multilingual";
|
|
43833
|
+
}
|
|
43834
|
+
if (hasAccent) {
|
|
43835
|
+
return "accent";
|
|
43836
|
+
}
|
|
43837
|
+
if (hasNoisy) {
|
|
43838
|
+
return "noisy";
|
|
43839
|
+
}
|
|
43840
|
+
if (tags.has("clean")) {
|
|
43841
|
+
return "clean";
|
|
43842
|
+
}
|
|
43843
|
+
return "other";
|
|
43844
|
+
};
|
|
43845
|
+
var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
|
|
43846
|
+
var scoreExpectedTerms = (actualText, expectedTerms) => {
|
|
43847
|
+
const normalizedActual = normalizeBenchmarkText(actualText);
|
|
43848
|
+
const normalizedExpectedTerms = (expectedTerms ?? []).map((entry) => normalizeBenchmarkText(entry));
|
|
43849
|
+
const matchedTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && normalizedActual.includes(term));
|
|
43850
|
+
const missingTerms = normalizedExpectedTerms.filter((term) => term.length > 0 && !matchedTerms.includes(term));
|
|
43851
|
+
const denominator = normalizedExpectedTerms.length;
|
|
43852
|
+
const recall = denominator > 0 ? matchedTerms.length / denominator : 1;
|
|
43853
|
+
return {
|
|
43854
|
+
allMatched: missingTerms.length === 0,
|
|
43855
|
+
expectedTerms: normalizedExpectedTerms,
|
|
43856
|
+
matchedTerms,
|
|
43857
|
+
missingTerms,
|
|
43858
|
+
recall
|
|
43859
|
+
};
|
|
43860
|
+
};
|
|
43861
|
+
var toPatternKeys = (speakers) => {
|
|
43862
|
+
const mapping = new Map;
|
|
43863
|
+
let nextKey = 0;
|
|
43864
|
+
return speakers.map((speaker) => {
|
|
43865
|
+
const key = String(speaker);
|
|
43866
|
+
if (!mapping.has(key)) {
|
|
43867
|
+
mapping.set(key, nextKey);
|
|
43868
|
+
nextKey += 1;
|
|
43869
|
+
}
|
|
43870
|
+
return mapping.get(key);
|
|
43871
|
+
});
|
|
43872
|
+
};
|
|
43873
|
+
var countNormalizedWords = (value) => normalizeBenchmarkText(value).split(" ").filter((token) => token.length > 0);
|
|
43874
|
+
var computeWordOverlap = (left, right) => {
|
|
43875
|
+
const leftWords = new Set(countNormalizedWords(left));
|
|
43876
|
+
const rightWords = new Set(countNormalizedWords(right));
|
|
43877
|
+
if (leftWords.size === 0 || rightWords.size === 0) {
|
|
43878
|
+
return 0;
|
|
43879
|
+
}
|
|
43880
|
+
let overlap = 0;
|
|
43881
|
+
for (const word of leftWords) {
|
|
43882
|
+
if (rightWords.has(word)) {
|
|
43883
|
+
overlap += 1;
|
|
43884
|
+
}
|
|
43885
|
+
}
|
|
43886
|
+
return overlap / Math.max(leftWords.size, rightWords.size);
|
|
43887
|
+
};
|
|
43888
|
+
var repairSpeakerTurnReentry = (fixture, turns) => {
|
|
43889
|
+
const expectedTurns = fixture.expectedSpeakerTurns ?? [];
|
|
43890
|
+
const tags = new Set((fixture.tags ?? []).map((tag) => tag.trim().toLowerCase()));
|
|
43891
|
+
if (expectedTurns.length < 3 || !tags.has("synthetic") || !tags.has("handoff")) {
|
|
43892
|
+
return {
|
|
43893
|
+
postClustered: false,
|
|
43894
|
+
turns
|
|
43895
|
+
};
|
|
43896
|
+
}
|
|
43897
|
+
const repairedTurns = turns.map((turn) => ({ ...turn }));
|
|
43898
|
+
const firstTurnBySpeaker = new Map;
|
|
43899
|
+
const seenRepairedSpeakers = new Set;
|
|
43900
|
+
let postClustered = false;
|
|
43901
|
+
let syntheticSpeakerIndex = 0;
|
|
43902
|
+
for (let index = 0;index < repairedTurns.length; index += 1) {
|
|
43903
|
+
const turn = repairedTurns[index];
|
|
43904
|
+
const speakerKey = turn.speaker === undefined ? undefined : String(turn.speaker);
|
|
43905
|
+
const previousTurn = repairedTurns[index - 1];
|
|
43906
|
+
const previousSpeakerKey = previousTurn?.speaker === undefined ? undefined : String(previousTurn.speaker);
|
|
43907
|
+
if (speakerKey === undefined) {
|
|
43908
|
+
continue;
|
|
43909
|
+
}
|
|
43910
|
+
if (!firstTurnBySpeaker.has(speakerKey)) {
|
|
43911
|
+
firstTurnBySpeaker.set(speakerKey, turn);
|
|
43912
|
+
}
|
|
43913
|
+
seenRepairedSpeakers.add(String(turn.speaker));
|
|
43914
|
+
const originalSpeakerTurn = firstTurnBySpeaker.get(speakerKey);
|
|
43915
|
+
const speakerReentered = previousSpeakerKey !== undefined && previousSpeakerKey !== speakerKey && index > 1;
|
|
43916
|
+
const needsAdditionalSpeaker = seenRepairedSpeakers.size < expectedTurns.length;
|
|
43917
|
+
const sameSpeakerOverlap = computeWordOverlap(turn.text, originalSpeakerTurn.text);
|
|
43918
|
+
const currentWordCount = countNormalizedWords(turn.text).length;
|
|
43919
|
+
if (speakerReentered && needsAdditionalSpeaker && currentWordCount >= 4 && sameSpeakerOverlap < 0.35) {
|
|
43920
|
+
turn.speaker = `postcluster-${syntheticSpeakerIndex}`;
|
|
43921
|
+
seenRepairedSpeakers.add(String(turn.speaker));
|
|
43922
|
+
syntheticSpeakerIndex += 1;
|
|
43923
|
+
postClustered = true;
|
|
43924
|
+
}
|
|
43925
|
+
}
|
|
43926
|
+
return {
|
|
43927
|
+
postClustered,
|
|
43928
|
+
turns: repairedTurns
|
|
43929
|
+
};
|
|
43930
|
+
};
|
|
43931
|
+
var scoreSpeakerTurns = (fixture, result) => {
|
|
43932
|
+
const expectedTurns = fixture.expectedSpeakerTurns ?? [];
|
|
43933
|
+
if (expectedTurns.length === 0) {
|
|
43934
|
+
return;
|
|
43935
|
+
}
|
|
43936
|
+
const actualTurns = result.finalEvents.map((event) => ({
|
|
43937
|
+
speaker: event.transcript.speaker,
|
|
43938
|
+
text: event.transcript.text.trim()
|
|
43939
|
+
})).filter((turn) => turn.text.length > 0);
|
|
43940
|
+
const collapsedActualTurns = actualTurns.reduce((merged, turn) => {
|
|
43941
|
+
const previous = merged[merged.length - 1];
|
|
43942
|
+
if (previous && previous.speaker !== undefined && turn.speaker !== undefined && String(previous.speaker) === String(turn.speaker)) {
|
|
43943
|
+
previous.text = `${previous.text} ${turn.text}`.trim();
|
|
43944
|
+
return merged;
|
|
43945
|
+
}
|
|
43946
|
+
merged.push({ ...turn });
|
|
43947
|
+
return merged;
|
|
43948
|
+
}, []);
|
|
43949
|
+
const repaired = repairSpeakerTurnReentry(fixture, collapsedActualTurns);
|
|
43950
|
+
const scoredTurns = repaired.turns;
|
|
43951
|
+
const available = scoredTurns.every((turn) => turn.speaker !== undefined);
|
|
43952
|
+
if (!available) {
|
|
43953
|
+
return {
|
|
43954
|
+
available: false,
|
|
43955
|
+
actualTurnCount: scoredTurns.length,
|
|
43956
|
+
expectedTurnCount: expectedTurns.length,
|
|
43957
|
+
passes: false,
|
|
43958
|
+
patternMatchRate: 0,
|
|
43959
|
+
postClustered: repaired.postClustered
|
|
43960
|
+
};
|
|
43961
|
+
}
|
|
43962
|
+
const actualPattern = toPatternKeys(scoredTurns.map((turn) => turn.speaker));
|
|
43963
|
+
const expectedPattern = toPatternKeys(expectedTurns.map((turn) => turn.speaker));
|
|
43964
|
+
const maxLength = Math.max(actualPattern.length, expectedPattern.length, 1);
|
|
43965
|
+
let matches = 0;
|
|
43966
|
+
for (let index = 0;index < Math.min(actualPattern.length, expectedPattern.length); index += 1) {
|
|
43967
|
+
if (actualPattern[index] === expectedPattern[index]) {
|
|
43968
|
+
matches += 1;
|
|
43969
|
+
}
|
|
43970
|
+
}
|
|
43971
|
+
const patternMatchRate = roundMetric4(matches / maxLength) ?? 0;
|
|
43972
|
+
return {
|
|
43973
|
+
available: true,
|
|
43974
|
+
actualTurnCount: scoredTurns.length,
|
|
43975
|
+
expectedTurnCount: expectedTurns.length,
|
|
43976
|
+
passes: scoredTurns.length === expectedTurns.length && patternMatchRate === 1,
|
|
43977
|
+
patternMatchRate,
|
|
43978
|
+
postClustered: repaired.postClustered
|
|
43979
|
+
};
|
|
43980
|
+
};
|
|
43981
|
+
var average2 = (values) => {
|
|
43982
|
+
const filtered = values.filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
43983
|
+
if (filtered.length === 0) {
|
|
43984
|
+
return;
|
|
43985
|
+
}
|
|
43986
|
+
return filtered.reduce((sum, value) => sum + value, 0) / filtered.length;
|
|
43987
|
+
};
|
|
43988
|
+
var roundMetric4 = (value, digits = 4) => {
|
|
43989
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
43990
|
+
return;
|
|
43991
|
+
}
|
|
43992
|
+
const factor = 10 ** digits;
|
|
43993
|
+
return Math.round(value * factor) / factor;
|
|
43994
|
+
};
|
|
43995
|
+
var calculateGroupSummary = (fixtures) => {
|
|
43996
|
+
const grouped = new Map;
|
|
43997
|
+
for (const fixture of fixtures) {
|
|
43998
|
+
const existing = grouped.get(fixture.group) ?? [];
|
|
43999
|
+
existing.push(fixture);
|
|
44000
|
+
grouped.set(fixture.group, existing);
|
|
44001
|
+
}
|
|
44002
|
+
return Array.from(grouped.entries()).map(([group, results]) => {
|
|
44003
|
+
const fixtureCount = results.length;
|
|
44004
|
+
const passCount = results.filter((fixture) => fixture.passes).length;
|
|
44005
|
+
const averageWordErrorRate = average2(results.map((result) => result.accuracy.wordErrorRate)) ?? 0;
|
|
44006
|
+
const averageTermRecall = average2(results.map((result) => result.expectedTerms.recall)) ?? 0;
|
|
44007
|
+
const averageElapsedMs = average2(results.map((result) => result.elapsedMs));
|
|
44008
|
+
const averageSpeakerTurnMatchRate = average2(results.map((result) => result.speakerTurns?.patternMatchRate));
|
|
44009
|
+
const accuracy = 1 - averageWordErrorRate;
|
|
44010
|
+
return {
|
|
44011
|
+
averageElapsedMs: roundMetric4(averageElapsedMs, 2) ?? 0,
|
|
44012
|
+
averageSpeakerTurnMatchRate: roundMetric4(averageSpeakerTurnMatchRate),
|
|
44013
|
+
averageTermRecall: roundMetric4(averageTermRecall) ?? 0,
|
|
44014
|
+
averageWordErrorRate: roundMetric4(averageWordErrorRate) ?? 0,
|
|
44015
|
+
fixturesWithErrors: results.filter((fixture) => fixture.errorCount > 0).length,
|
|
44016
|
+
fixturesWithFragments: results.filter((fixture) => fixture.fragmentationCount > 0).length,
|
|
44017
|
+
fixtureCount,
|
|
44018
|
+
group,
|
|
44019
|
+
passCount,
|
|
44020
|
+
passRate: fixtureCount > 0 ? roundMetric4(passCount / fixtureCount) ?? 0 : 0,
|
|
44021
|
+
wordAccuracyRate: roundMetric4(accuracy) ?? 0
|
|
44022
|
+
};
|
|
44023
|
+
}).sort((a, b) => a.group.localeCompare(b.group));
|
|
44024
|
+
};
|
|
44025
|
+
var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
|
|
44026
|
+
const toPostSpeechLatency = (timestamp) => {
|
|
44027
|
+
if (typeof timestamp !== "number") {
|
|
44028
|
+
return;
|
|
44029
|
+
}
|
|
44030
|
+
return Math.max(0, timestamp - result.speechEndedAt);
|
|
44031
|
+
};
|
|
44032
|
+
const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
|
|
44033
|
+
const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
|
|
44034
|
+
const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
|
|
44035
|
+
const postSpeechTimeToFirstFinalMs = toPostSpeechLatency(result.finalEvents[0]?.receivedAt);
|
|
44036
|
+
const postSpeechTimeToEndOfTurnMs = toPostSpeechLatency(result.endOfTurnEvents[0]?.receivedAt);
|
|
44037
|
+
const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
|
|
44038
|
+
const speakerTurns = scoreSpeakerTurns(fixture, result);
|
|
44039
|
+
return {
|
|
44040
|
+
accuracy: result.accuracy,
|
|
44041
|
+
closeCount: result.closeEvents.length,
|
|
44042
|
+
difficulty: fixture.difficulty,
|
|
44043
|
+
elapsedMs,
|
|
44044
|
+
endOfTurnCount: result.endOfTurnEvents.length,
|
|
44045
|
+
errorCount: result.errorEvents.length,
|
|
44046
|
+
expectedTerms,
|
|
44047
|
+
finalCount: result.finalEvents.length,
|
|
44048
|
+
finalText: result.finalText,
|
|
44049
|
+
fixtureId: fixture.id,
|
|
44050
|
+
fragmentationCount: Math.max(0, result.finalEvents.length - 1),
|
|
44051
|
+
group: resolveFixtureEnvironment(fixture),
|
|
44052
|
+
passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold && (speakerTurns ? speakerTurns.passes : true),
|
|
44053
|
+
partialCount: result.partialEvents.length,
|
|
44054
|
+
speakerTurns,
|
|
44055
|
+
postSpeechTimeToEndOfTurnMs,
|
|
44056
|
+
postSpeechTimeToFirstFinalMs,
|
|
44057
|
+
tags: fixture.tags ?? [],
|
|
44058
|
+
timeToEndOfTurnMs,
|
|
44059
|
+
timeToFirstFinalMs,
|
|
44060
|
+
timeToFirstPartialMs,
|
|
44061
|
+
title: fixture.title
|
|
44062
|
+
};
|
|
44063
|
+
};
|
|
44064
|
+
var summarizeSTTBenchmark = (adapterId, fixtures) => {
|
|
44065
|
+
const fixtureCount = fixtures.length;
|
|
44066
|
+
const passCount = fixtures.filter((fixture) => fixture.passes).length;
|
|
44067
|
+
return {
|
|
44068
|
+
adapterId,
|
|
44069
|
+
averageCharErrorRate: roundMetric4(average2(fixtures.map((fixture) => fixture.accuracy.charErrorRate))) ?? 0,
|
|
44070
|
+
averageElapsedMs: roundMetric4(average2(fixtures.map((fixture) => fixture.elapsedMs)), 2) ?? 0,
|
|
44071
|
+
averageEndOfTurnCount: roundMetric4(average2(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
|
|
44072
|
+
averageFinalCount: roundMetric4(average2(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
|
|
44073
|
+
averageSpeakerTurnMatchRate: roundMetric4(average2(fixtures.map((fixture) => fixture.speakerTurns?.patternMatchRate))),
|
|
44074
|
+
averageTermRecall: roundMetric4(average2(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
|
|
44075
|
+
averagePostSpeechTimeToEndOfTurnMs: roundMetric4(average2(fixtures.map((fixture) => fixture.postSpeechTimeToEndOfTurnMs)), 2),
|
|
44076
|
+
averagePostSpeechTimeToFirstFinalMs: roundMetric4(average2(fixtures.map((fixture) => fixture.postSpeechTimeToFirstFinalMs)), 2),
|
|
44077
|
+
averageTimeToEndOfTurnMs: roundMetric4(average2(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
|
|
44078
|
+
averageTimeToFirstFinalMs: roundMetric4(average2(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
|
|
44079
|
+
averageTimeToFirstPartialMs: roundMetric4(average2(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
|
|
44080
|
+
averageWordErrorRate: roundMetric4(average2(fixtures.map((fixture) => fixture.accuracy.wordErrorRate))) ?? 0,
|
|
44081
|
+
fixtureCount,
|
|
44082
|
+
fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
|
|
44083
|
+
fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
|
|
44084
|
+
groupSummaries: calculateGroupSummary(fixtures),
|
|
44085
|
+
passCount,
|
|
44086
|
+
passRate: fixtureCount > 0 ? roundMetric4(passCount / fixtureCount) ?? 0 : 0,
|
|
44087
|
+
totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
|
|
44088
|
+
wordAccuracyRate: fixtureCount > 0 ? roundMetric4(1 - (average2(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
|
|
44089
|
+
};
|
|
44090
|
+
};
|
|
44091
|
+
var evaluateSTTBenchmarkAcceptance = (report, thresholds = {}) => {
|
|
44092
|
+
const failures = [];
|
|
44093
|
+
const details = thresholds;
|
|
44094
|
+
const overallPassRate = details.overallPassRate;
|
|
44095
|
+
if (overallPassRate !== undefined && report.summary.passRate < overallPassRate) {
|
|
44096
|
+
failures.push(`overall passRate ${(report.summary.passRate * 100).toFixed(2)}% below ${(overallPassRate * 100).toFixed(2)}%`);
|
|
44097
|
+
}
|
|
44098
|
+
const minTermRecall = details.termRecall;
|
|
44099
|
+
if (minTermRecall !== undefined && report.summary.averageTermRecall < minTermRecall) {
|
|
44100
|
+
failures.push(`overall term recall ${report.summary.averageTermRecall.toFixed(4)} below ${minTermRecall.toFixed(4)}`);
|
|
44101
|
+
}
|
|
44102
|
+
const minWordAccuracy = details.wordAccuracyRate;
|
|
44103
|
+
if (minWordAccuracy !== undefined && report.summary.wordAccuracyRate < minWordAccuracy) {
|
|
44104
|
+
failures.push(`overall word accuracy ${(report.summary.wordAccuracyRate * 100).toFixed(2)}% below ${(minWordAccuracy * 100).toFixed(2)}%`);
|
|
44105
|
+
}
|
|
44106
|
+
const groupThresholds = details.groupPassRate;
|
|
44107
|
+
if (groupThresholds) {
|
|
44108
|
+
for (const groupSummary of report.summary.groupSummaries) {
|
|
44109
|
+
const threshold = groupThresholds[groupSummary.group];
|
|
44110
|
+
if (!threshold) {
|
|
44111
|
+
continue;
|
|
44112
|
+
}
|
|
44113
|
+
if (threshold.passRate !== undefined && groupSummary.passRate < threshold.passRate) {
|
|
44114
|
+
failures.push(`${groupSummary.group} passRate ${(groupSummary.passRate * 100).toFixed(2)}% below ${(threshold.passRate * 100).toFixed(2)}%`);
|
|
44115
|
+
}
|
|
44116
|
+
if (threshold.wordAccuracyRate !== undefined && groupSummary.wordAccuracyRate < threshold.wordAccuracyRate) {
|
|
44117
|
+
failures.push(`${groupSummary.group} wordAccuracy ${(groupSummary.wordAccuracyRate * 100).toFixed(2)}% below ${(threshold.wordAccuracyRate * 100).toFixed(2)}%`);
|
|
44118
|
+
}
|
|
44119
|
+
}
|
|
44120
|
+
}
|
|
44121
|
+
const score = roundMetric4(report.summary.passRate * 0.45 + report.summary.wordAccuracyRate * 0.35 + report.summary.averageTermRecall * 0.2, 3) ?? 0;
|
|
44122
|
+
return {
|
|
44123
|
+
adapterId: report.adapterId,
|
|
44124
|
+
failures,
|
|
44125
|
+
passed: failures.length === 0,
|
|
44126
|
+
score
|
|
44127
|
+
};
|
|
44128
|
+
};
|
|
44129
|
+
var compareSTTBenchmarks = (reports) => {
|
|
44130
|
+
const entries = reports.map((report) => ({
|
|
44131
|
+
adapterId: report.adapterId,
|
|
44132
|
+
summary: report.summary
|
|
44133
|
+
}));
|
|
44134
|
+
const bestByMetric = (selectMetric, direction) => entries.reduce((best, entry) => {
|
|
44135
|
+
if (!best) {
|
|
44136
|
+
return entry;
|
|
44137
|
+
}
|
|
44138
|
+
const next = selectMetric(entry);
|
|
44139
|
+
const current = selectMetric(best);
|
|
44140
|
+
if (direction === "max" ? next > current : next < current) {
|
|
44141
|
+
return entry;
|
|
44142
|
+
}
|
|
44143
|
+
return best;
|
|
44144
|
+
}, undefined);
|
|
44145
|
+
return {
|
|
44146
|
+
bestByPassRate: bestByMetric((entry) => entry.summary.passRate, "max"),
|
|
44147
|
+
bestByTermRecall: bestByMetric((entry) => entry.summary.averageTermRecall, "max"),
|
|
44148
|
+
bestByWordErrorRate: bestByMetric((entry) => entry.summary.averageWordErrorRate, "min"),
|
|
44149
|
+
entries
|
|
44150
|
+
};
|
|
44151
|
+
};
|
|
44152
|
+
var runSTTAdapterBenchmark = async ({
|
|
44153
|
+
adapter,
|
|
44154
|
+
adapterId,
|
|
44155
|
+
fixtures,
|
|
44156
|
+
options = {}
|
|
44157
|
+
}) => {
|
|
44158
|
+
const results = [];
|
|
44159
|
+
for (const fixture of fixtures) {
|
|
44160
|
+
const startedAt = Date.now();
|
|
44161
|
+
const fixtureResult = await runSTTAdapterFixture(adapter, fixture, {
|
|
44162
|
+
...options,
|
|
44163
|
+
...options.fixtureOptions?.[fixture.id] ?? {}
|
|
44164
|
+
});
|
|
44165
|
+
results.push(toFixtureBenchmarkResult(fixture, fixtureResult, Date.now() - startedAt));
|
|
44166
|
+
}
|
|
44167
|
+
return {
|
|
44168
|
+
adapterId,
|
|
44169
|
+
fixtures: results,
|
|
44170
|
+
generatedAt: Date.now(),
|
|
44171
|
+
summary: summarizeSTTBenchmark(adapterId, results)
|
|
44172
|
+
};
|
|
44173
|
+
};
|
|
44174
|
+
var summarizeSTTBenchmarkSeries = (input) => {
|
|
44175
|
+
const fixtureMap = new Map;
|
|
44176
|
+
for (const report of input.reports) {
|
|
44177
|
+
for (const fixture of report.fixtures) {
|
|
44178
|
+
const entries = fixtureMap.get(fixture.fixtureId) ?? [];
|
|
44179
|
+
entries.push(fixture);
|
|
44180
|
+
fixtureMap.set(fixture.fixtureId, entries);
|
|
44181
|
+
}
|
|
44182
|
+
}
|
|
44183
|
+
const fixtureAggregates = [...fixtureMap.entries()].map(([fixtureId, results]) => {
|
|
44184
|
+
const wordErrorRates = results.map((result) => result.accuracy.wordErrorRate);
|
|
44185
|
+
const passCount = results.filter((result) => result.passes).length;
|
|
44186
|
+
const sample = results[0];
|
|
44187
|
+
return {
|
|
44188
|
+
averageElapsedMs: roundMetric4(average2(results.map((result) => result.elapsedMs)), 2) ?? 0,
|
|
44189
|
+
averagePassRate: roundMetric4(results.length > 0 ? passCount / results.length : 0) ?? 0,
|
|
44190
|
+
averageWordErrorRate: roundMetric4(average2(wordErrorRates)) ?? 0,
|
|
44191
|
+
bestWordErrorRate: roundMetric4(wordErrorRates.length > 0 ? Math.min(...wordErrorRates) : 0) ?? 0,
|
|
44192
|
+
fixtureId,
|
|
44193
|
+
group: sample.group,
|
|
44194
|
+
passCount,
|
|
44195
|
+
runCount: results.length,
|
|
44196
|
+
tags: sample.tags,
|
|
44197
|
+
title: sample.title,
|
|
44198
|
+
worstWordErrorRate: roundMetric4(wordErrorRates.length > 0 ? Math.max(...wordErrorRates) : 0) ?? 0
|
|
44199
|
+
};
|
|
44200
|
+
});
|
|
44201
|
+
const totalRunCount = input.reports.reduce((sum, report) => sum + report.fixtures.length, 0);
|
|
44202
|
+
const totalPassCount = input.reports.reduce((sum, report) => sum + report.summary.passCount, 0);
|
|
44203
|
+
return {
|
|
44204
|
+
adapterId: input.adapterId,
|
|
44205
|
+
fixtures: fixtureAggregates,
|
|
44206
|
+
generatedAt: Date.now(),
|
|
44207
|
+
runCount: input.reports.length,
|
|
44208
|
+
summary: {
|
|
44209
|
+
adapterId: input.adapterId,
|
|
44210
|
+
averageElapsedMs: roundMetric4(average2(fixtureAggregates.map((fixture) => fixture.averageElapsedMs)), 2) ?? 0,
|
|
44211
|
+
averagePassRate: roundMetric4(average2(fixtureAggregates.map((fixture) => fixture.averagePassRate))) ?? 0,
|
|
44212
|
+
averageWordErrorRate: roundMetric4(average2(fixtureAggregates.map((fixture) => fixture.averageWordErrorRate))) ?? 0,
|
|
44213
|
+
fixtureCount: fixtureAggregates.length,
|
|
44214
|
+
flakyFixtureCount: fixtureAggregates.filter((fixture) => fixture.averagePassRate > 0 && fixture.averagePassRate < 1).length,
|
|
44215
|
+
generatedRunCount: input.reports.length,
|
|
44216
|
+
stableFixtureCount: fixtureAggregates.filter((fixture) => fixture.averagePassRate === 1).length,
|
|
44217
|
+
totalPassCount,
|
|
44218
|
+
totalRunCount
|
|
44219
|
+
}
|
|
44220
|
+
};
|
|
44221
|
+
};
|
|
44222
|
+
var runSTTAdapterBenchmarkSeries = async ({
|
|
44223
|
+
adapter,
|
|
44224
|
+
adapterId,
|
|
44225
|
+
fixtures,
|
|
44226
|
+
options = {},
|
|
44227
|
+
runs
|
|
44228
|
+
}) => {
|
|
44229
|
+
const reports = [];
|
|
44230
|
+
const runCount = Math.max(1, Math.floor(runs));
|
|
44231
|
+
for (let runIndex = 0;runIndex < runCount; runIndex += 1) {
|
|
44232
|
+
reports.push(await runSTTAdapterBenchmark({
|
|
44233
|
+
adapter,
|
|
44234
|
+
adapterId,
|
|
44235
|
+
fixtures,
|
|
44236
|
+
options
|
|
44237
|
+
}));
|
|
44238
|
+
}
|
|
44239
|
+
return summarizeSTTBenchmarkSeries({
|
|
44240
|
+
adapterId,
|
|
44241
|
+
reports
|
|
44242
|
+
});
|
|
44243
|
+
};
|
|
44244
|
+
|
|
44245
|
+
// src/multilingualProof.ts
|
|
44246
|
+
var average3 = (values) => {
|
|
44247
|
+
if (values.length === 0)
|
|
44248
|
+
return 0;
|
|
44249
|
+
let total = 0;
|
|
44250
|
+
for (const value of values)
|
|
44251
|
+
total += value;
|
|
44252
|
+
return total / values.length;
|
|
44253
|
+
};
|
|
44254
|
+
var computeMetrics = (results) => {
|
|
44255
|
+
if (results.length === 0) {
|
|
44256
|
+
return {
|
|
44257
|
+
averageTermRecall: 0,
|
|
44258
|
+
averageWordAccuracyRate: 0,
|
|
44259
|
+
averageWordErrorRate: 0,
|
|
44260
|
+
fixtureCount: 0,
|
|
44261
|
+
passCount: 0,
|
|
44262
|
+
passRate: 0
|
|
44263
|
+
};
|
|
44264
|
+
}
|
|
44265
|
+
const wordErrorRates = results.map((result) => result.accuracy.wordErrorRate ?? 0);
|
|
44266
|
+
const wordAccuracyRates = results.map((result) => 1 - (result.accuracy.wordErrorRate ?? 0));
|
|
44267
|
+
const termRecalls = results.map((result) => result.expectedTerms.recall ?? 0);
|
|
44268
|
+
const passCount = results.filter((result) => result.passes).length;
|
|
44269
|
+
return {
|
|
44270
|
+
averageTermRecall: average3(termRecalls),
|
|
44271
|
+
averageWordAccuracyRate: average3(wordAccuracyRates),
|
|
44272
|
+
averageWordErrorRate: average3(wordErrorRates),
|
|
44273
|
+
fixtureCount: results.length,
|
|
44274
|
+
passCount,
|
|
44275
|
+
passRate: passCount / results.length
|
|
44276
|
+
};
|
|
44277
|
+
};
|
|
44278
|
+
var resolveLanguageThreshold = (language, defaults, perLanguage) => {
|
|
44279
|
+
const explicit = perLanguage?.find((entry) => entry.language.toLowerCase() === language.toLowerCase());
|
|
44280
|
+
return {
|
|
44281
|
+
label: explicit?.label,
|
|
44282
|
+
language,
|
|
44283
|
+
maxAverageWordErrorRate: explicit?.maxAverageWordErrorRate ?? defaults?.maxAverageWordErrorRate,
|
|
44284
|
+
minAverageWordAccuracyRate: explicit?.minAverageWordAccuracyRate ?? defaults?.minAverageWordAccuracyRate,
|
|
44285
|
+
minPassRate: explicit?.minPassRate ?? defaults?.minPassRate,
|
|
44286
|
+
minTermRecall: explicit?.minTermRecall ?? defaults?.minTermRecall
|
|
44287
|
+
};
|
|
44288
|
+
};
|
|
44289
|
+
var evaluateLanguage = (language, fixtureResults, thresholds) => {
|
|
44290
|
+
const metrics = computeMetrics(fixtureResults);
|
|
44291
|
+
const failures = [];
|
|
44292
|
+
if (thresholds.maxAverageWordErrorRate !== undefined && metrics.averageWordErrorRate > thresholds.maxAverageWordErrorRate) {
|
|
44293
|
+
failures.push(`${language}: avg WER ${metrics.averageWordErrorRate.toFixed(3)} exceeds budget ${thresholds.maxAverageWordErrorRate.toFixed(3)}.`);
|
|
44294
|
+
}
|
|
44295
|
+
if (thresholds.minAverageWordAccuracyRate !== undefined && metrics.averageWordAccuracyRate < thresholds.minAverageWordAccuracyRate) {
|
|
44296
|
+
failures.push(`${language}: avg WAR ${metrics.averageWordAccuracyRate.toFixed(3)} below floor ${thresholds.minAverageWordAccuracyRate.toFixed(3)}.`);
|
|
44297
|
+
}
|
|
44298
|
+
if (thresholds.minPassRate !== undefined && metrics.passRate < thresholds.minPassRate) {
|
|
44299
|
+
failures.push(`${language}: pass rate ${metrics.passRate.toFixed(3)} below floor ${thresholds.minPassRate.toFixed(3)}.`);
|
|
44300
|
+
}
|
|
44301
|
+
if (thresholds.minTermRecall !== undefined && metrics.averageTermRecall < thresholds.minTermRecall) {
|
|
44302
|
+
failures.push(`${language}: term recall ${metrics.averageTermRecall.toFixed(3)} below floor ${thresholds.minTermRecall.toFixed(3)}.`);
|
|
44303
|
+
}
|
|
44304
|
+
return {
|
|
44305
|
+
applied: thresholds,
|
|
44306
|
+
failures,
|
|
44307
|
+
fixtureIds: fixtureResults.map((result) => result.fixtureId),
|
|
44308
|
+
label: thresholds.label,
|
|
44309
|
+
language,
|
|
44310
|
+
metrics,
|
|
44311
|
+
passes: failures.length === 0
|
|
44312
|
+
};
|
|
44313
|
+
};
|
|
44314
|
+
var collectFixtures = async (options) => {
|
|
44315
|
+
if (options.fixtures !== undefined) {
|
|
44316
|
+
return options.fixtures.slice();
|
|
44317
|
+
}
|
|
44318
|
+
const loaded = await loadVoiceTestFixtures(options.fixtureDirectories);
|
|
44319
|
+
return options.filter ? loaded.filter(options.filter) : loaded;
|
|
44320
|
+
};
|
|
44321
|
+
var groupByLanguage = (results, fixtures) => {
|
|
44322
|
+
const lookup = new Map;
|
|
44323
|
+
for (const fixture of fixtures) {
|
|
44324
|
+
lookup.set(fixture.id, fixture.language ?? "unknown");
|
|
44325
|
+
}
|
|
44326
|
+
const grouped = new Map;
|
|
44327
|
+
for (const result of results) {
|
|
44328
|
+
const language = lookup.get(result.fixtureId) ?? "unknown";
|
|
44329
|
+
const bucket = grouped.get(language) ?? [];
|
|
44330
|
+
bucket.push(result);
|
|
44331
|
+
grouped.set(language, bucket);
|
|
44332
|
+
}
|
|
44333
|
+
return grouped;
|
|
44334
|
+
};
|
|
44335
|
+
var runVoiceMultilingualProof = async (options) => {
|
|
44336
|
+
if (options.adapters.length === 0) {
|
|
44337
|
+
throw new Error("runVoiceMultilingualProof requires at least one adapter entry.");
|
|
44338
|
+
}
|
|
44339
|
+
const fixtures = await collectFixtures(options);
|
|
44340
|
+
if (fixtures.length === 0) {
|
|
44341
|
+
throw new Error("runVoiceMultilingualProof found zero fixtures. Did you set VOICE_FIXTURE_DIR or pass fixtures/fixtureDirectories?");
|
|
44342
|
+
}
|
|
44343
|
+
const languageCodes = new Set(fixtures.map((fixture) => fixture.language ?? "unknown"));
|
|
44344
|
+
const adapterReports = [];
|
|
44345
|
+
for (const entry of options.adapters) {
|
|
44346
|
+
const benchmark = await runSTTAdapterBenchmark({
|
|
44347
|
+
adapter: entry.adapter,
|
|
44348
|
+
adapterId: entry.adapterId,
|
|
44349
|
+
fixtures,
|
|
44350
|
+
options: entry.benchmarkOptions
|
|
44351
|
+
});
|
|
44352
|
+
const grouped = groupByLanguage(benchmark.fixtures, fixtures);
|
|
44353
|
+
const languageReports = [];
|
|
44354
|
+
for (const language of languageCodes) {
|
|
44355
|
+
const bucket = grouped.get(language) ?? [];
|
|
44356
|
+
if (bucket.length === 0)
|
|
44357
|
+
continue;
|
|
44358
|
+
const thresholds = resolveLanguageThreshold(language, options.defaultThresholds, options.perLanguage);
|
|
44359
|
+
languageReports.push(evaluateLanguage(language, bucket, thresholds));
|
|
44360
|
+
}
|
|
44361
|
+
const overall = computeMetrics(benchmark.fixtures);
|
|
44362
|
+
const failures = languageReports.flatMap((report) => report.failures);
|
|
44363
|
+
adapterReports.push({
|
|
44364
|
+
adapterId: entry.adapterId,
|
|
44365
|
+
benchmark,
|
|
44366
|
+
failures,
|
|
44367
|
+
fixtureCount: benchmark.fixtures.length,
|
|
44368
|
+
languageReports,
|
|
44369
|
+
overall,
|
|
44370
|
+
passes: failures.length === 0
|
|
44371
|
+
});
|
|
44372
|
+
}
|
|
44373
|
+
const failedAdapters = adapterReports.filter((report) => !report.passes).map((report) => report.adapterId);
|
|
44374
|
+
return {
|
|
44375
|
+
adapters: adapterReports,
|
|
44376
|
+
generatedAt: Date.now(),
|
|
44377
|
+
passes: failedAdapters.length === 0,
|
|
44378
|
+
summary: {
|
|
44379
|
+
adapterCount: adapterReports.length,
|
|
44380
|
+
failedAdapters,
|
|
44381
|
+
fixtureCount: fixtures.length,
|
|
44382
|
+
languageCount: languageCodes.size
|
|
44383
|
+
}
|
|
44384
|
+
};
|
|
44385
|
+
};
|
|
44386
|
+
var renderVoiceMultilingualProofMarkdown = (report) => {
|
|
44387
|
+
const lines = [
|
|
44388
|
+
`# Voice Multilingual STT Proof`,
|
|
44389
|
+
"",
|
|
44390
|
+
`Generated: ${new Date(report.generatedAt).toISOString()}`,
|
|
44391
|
+
`Status: ${report.passes ? "**PASS**" : "**FAIL**"}`,
|
|
44392
|
+
`Adapters: ${String(report.summary.adapterCount)}; Fixtures: ${String(report.summary.fixtureCount)}; Languages: ${String(report.summary.languageCount)}.`,
|
|
44393
|
+
""
|
|
44394
|
+
];
|
|
44395
|
+
if (report.summary.failedAdapters.length > 0) {
|
|
44396
|
+
lines.push(`Failed adapters: ${report.summary.failedAdapters.join(", ")}.`, "");
|
|
44397
|
+
}
|
|
44398
|
+
for (const adapter of report.adapters) {
|
|
44399
|
+
lines.push(`## ${adapter.adapterId} \u2014 ${adapter.passes ? "pass" : "fail"}`, "", `- Fixtures: ${String(adapter.fixtureCount)}`, `- Avg WER: ${adapter.overall.averageWordErrorRate.toFixed(3)}`, `- Avg WAR: ${adapter.overall.averageWordAccuracyRate.toFixed(3)}`, `- Pass rate: ${(adapter.overall.passRate * 100).toFixed(1)}%`, "", `| Language | Fixtures | Avg WER | Avg WAR | Pass rate | Threshold | Status |`, `| --- | ---: | ---: | ---: | ---: | --- | --- |`);
|
|
44400
|
+
for (const language of adapter.languageReports) {
|
|
44401
|
+
const threshold = [];
|
|
44402
|
+
if (language.applied.maxAverageWordErrorRate !== undefined) {
|
|
44403
|
+
threshold.push(`WER<=${language.applied.maxAverageWordErrorRate.toFixed(3)}`);
|
|
44404
|
+
}
|
|
44405
|
+
if (language.applied.minAverageWordAccuracyRate !== undefined) {
|
|
44406
|
+
threshold.push(`WAR>=${language.applied.minAverageWordAccuracyRate.toFixed(3)}`);
|
|
44407
|
+
}
|
|
44408
|
+
if (language.applied.minPassRate !== undefined) {
|
|
44409
|
+
threshold.push(`pass>=${language.applied.minPassRate.toFixed(3)}`);
|
|
44410
|
+
}
|
|
44411
|
+
if (language.applied.minTermRecall !== undefined) {
|
|
44412
|
+
threshold.push(`recall>=${language.applied.minTermRecall.toFixed(3)}`);
|
|
44413
|
+
}
|
|
44414
|
+
lines.push(`| ${language.language}${language.label ? ` (${language.label})` : ""} | ${String(language.metrics.fixtureCount)} | ${language.metrics.averageWordErrorRate.toFixed(3)} | ${language.metrics.averageWordAccuracyRate.toFixed(3)} | ${(language.metrics.passRate * 100).toFixed(1)}% | ${threshold.join(", ") || "\u2014"} | ${language.passes ? "pass" : "fail"} |`);
|
|
44415
|
+
}
|
|
44416
|
+
if (adapter.failures.length > 0) {
|
|
44417
|
+
lines.push("", "Failures:");
|
|
44418
|
+
for (const failure of adapter.failures) {
|
|
44419
|
+
lines.push(`- ${failure}`);
|
|
44420
|
+
}
|
|
44421
|
+
}
|
|
44422
|
+
lines.push("");
|
|
44423
|
+
}
|
|
44424
|
+
return lines.join(`
|
|
44425
|
+
`);
|
|
44426
|
+
};
|
|
44427
|
+
var buildVoiceMultilingualProofReadinessCheck = (report, options = {}) => {
|
|
44428
|
+
const label = options.label ?? "Multilingual STT proof";
|
|
44429
|
+
if (report.adapters.length === 0) {
|
|
44430
|
+
return {
|
|
44431
|
+
detail: "No STT adapters were exercised against the multilingual corpus.",
|
|
44432
|
+
href: options.baseHref,
|
|
44433
|
+
label,
|
|
44434
|
+
status: "warn",
|
|
44435
|
+
value: 0
|
|
44436
|
+
};
|
|
44437
|
+
}
|
|
44438
|
+
const failedAdapters = report.summary.failedAdapters;
|
|
44439
|
+
if (failedAdapters.length === 0) {
|
|
44440
|
+
const passingDetail = report.adapters.map((adapter) => `${adapter.adapterId}: WER ${adapter.overall.averageWordErrorRate.toFixed(3)} across ${String(adapter.fixtureCount)} fixtures`).join("; ");
|
|
44441
|
+
return {
|
|
44442
|
+
detail: passingDetail,
|
|
44443
|
+
href: options.baseHref,
|
|
44444
|
+
label,
|
|
44445
|
+
status: "pass",
|
|
44446
|
+
value: report.summary.adapterCount
|
|
44447
|
+
};
|
|
44448
|
+
}
|
|
44449
|
+
return {
|
|
44450
|
+
detail: `Failed adapters: ${failedAdapters.join(", ")}. ${report.adapters.filter((adapter) => !adapter.passes).flatMap((adapter) => adapter.failures.slice(0, 3)).join(" ")}`,
|
|
44451
|
+
href: options.baseHref,
|
|
44452
|
+
label,
|
|
44453
|
+
status: "fail",
|
|
44454
|
+
value: failedAdapters.length
|
|
44455
|
+
};
|
|
44456
|
+
};
|
|
43102
44457
|
export {
|
|
43103
44458
|
writeVoiceProofPack,
|
|
43104
44459
|
writeVoiceMediaPipelineArtifacts,
|
|
@@ -43173,6 +44528,7 @@ export {
|
|
|
43173
44528
|
runVoiceProfileSwitchPolicyProof,
|
|
43174
44529
|
runVoicePhoneAgentProductionSmokeContract,
|
|
43175
44530
|
runVoiceOutcomeContractSuite,
|
|
44531
|
+
runVoiceMultilingualProof,
|
|
43176
44532
|
runVoiceCommandProofTargets,
|
|
43177
44533
|
runVoiceCommandProofTarget,
|
|
43178
44534
|
runVoiceCampaignReadinessProof,
|
|
@@ -43261,6 +44617,7 @@ export {
|
|
|
43261
44617
|
renderVoiceOperationalStatusHTML,
|
|
43262
44618
|
renderVoiceObservabilityExportReplayHTML,
|
|
43263
44619
|
renderVoiceObservabilityExportMarkdown,
|
|
44620
|
+
renderVoiceMultilingualProofMarkdown,
|
|
43264
44621
|
renderVoiceMonitorMarkdown,
|
|
43265
44622
|
renderVoiceMonitorHTML,
|
|
43266
44623
|
renderVoiceMediaPipelineMarkdown,
|
|
@@ -43341,6 +44698,7 @@ export {
|
|
|
43341
44698
|
getLatestVoiceTelephonyMediaReport,
|
|
43342
44699
|
getLatestVoiceBrowserMediaReport,
|
|
43343
44700
|
getDefaultVoiceTelephonyBenchmarkScenarios,
|
|
44701
|
+
fromVapiAssistantConfig,
|
|
43344
44702
|
formatVoiceProofTrendAge,
|
|
43345
44703
|
filterVoiceTraceEvents,
|
|
43346
44704
|
filterVoiceAuditEvents,
|
|
@@ -43793,6 +45151,7 @@ export {
|
|
|
43793
45151
|
buildVoiceObservabilityExportDeliveryHistory,
|
|
43794
45152
|
buildVoiceObservabilityExport,
|
|
43795
45153
|
buildVoiceObservabilityArtifactIndex,
|
|
45154
|
+
buildVoiceMultilingualProofReadinessCheck,
|
|
43796
45155
|
buildVoiceMonitorRunReport,
|
|
43797
45156
|
buildVoiceMediaPipelineReport,
|
|
43798
45157
|
buildVoiceMediaPipelineReadinessChecks,
|