@absolutejs/voice 0.0.22-beta.308 → 0.0.22-beta.309

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3525,7 +3525,7 @@ const report = await runTTSAdapterFixture(
3525
3525
 
3526
3526
  For server-to-server use, realtime adapters open provider-specific streaming connections, send session configuration, stream text or PCM input, and emit normalized transcript/audio/error/close events. OpenAI Realtime uses raw 24kHz mono `pcm_s16le` audio. The main `voice(...)` route can run in cascaded mode with `stt` plus optional `tts`, or direct realtime mode with `realtime`. Browser demos should make sure the captured PCM format matches `realtimeInputFormat` or resample before sending audio.
3527
3527
 
3528
- Use `createVoiceRealtimeProviderContractMatrixPreset(...)` to prove which realtime providers are production-ready. Pipecat is represented as an explicit bridge seam by default, not core-owned media infrastructure:
3528
+ Use `createVoiceRealtimeProviderContractMatrixPreset(...)` to prove which realtime providers are production-ready. Native media-pipeline primitives such as `VoiceMediaFrame` and `buildVoiceMediaPipelineCalibrationReport(...)` are the path for advanced pipeline behavior in AbsoluteJS apps.
3529
3529
 
3530
3530
  ```ts
3531
3531
  import {
package/dist/index.d.ts CHANGED
@@ -15,6 +15,8 @@ export type { VoiceRealtimeChannelAssertionInput, VoiceRealtimeChannelAssertionR
15
15
  export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractMatrixPreset, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
16
16
  export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
17
17
  export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
18
+ export { buildVoiceMediaPipelineCalibrationReport, createVoiceMediaFrame } from './mediaPipeline';
19
+ export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus } from './mediaPipeline';
18
20
  export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
19
21
  export { buildVoiceDeliverySinkReport, createVoiceDeliverySinkDescriptor, createVoiceDeliverySinkPair, createVoiceDeliverySinkRoutes, createVoiceFileDeliverySink, createVoicePostgresDeliverySink, createVoiceS3DeliverySink, createVoiceSQLiteDeliverySink, createVoiceWebhookDeliverySink, renderVoiceDeliverySinkHTML } from './deliverySinkRoutes';
20
22
  export { buildVoiceOpsActionHistoryReport, createVoiceOpsActionAuditRoutes, recordVoiceOpsActionAudit, renderVoiceOpsActionHistoryHTML } from './opsActionAuditRoutes';
package/dist/index.js CHANGED
@@ -11253,13 +11253,11 @@ var defaultRequiredCapabilities = [
11253
11253
  ];
11254
11254
  var defaultProviderEnv = {
11255
11255
  "gemini-live": ["GEMINI_API_KEY"],
11256
- "openai-realtime": ["OPENAI_API_KEY"],
11257
- "pipecat-bridge": []
11256
+ "openai-realtime": ["OPENAI_API_KEY"]
11258
11257
  };
11259
11258
  var defaultRealtimeProviders = [
11260
11259
  "openai-realtime",
11261
- "gemini-live",
11262
- "pipecat-bridge"
11260
+ "gemini-live"
11263
11261
  ];
11264
11262
  var statusRank = {
11265
11263
  pass: 0,
@@ -11277,7 +11275,7 @@ var createVoiceRealtimeProviderContractMatrixPreset = (options = {}) => {
11277
11275
  contracts: providers.map((provider) => {
11278
11276
  const providerKey = String(provider);
11279
11277
  const requiredEnv = options.requiredEnv?.[providerKey] ?? defaultProviderEnv[providerKey] ?? [];
11280
- const implementationStatus = options.implementationStatus?.[providerKey] ?? (providerKey === "pipecat-bridge" ? "planned" : "available");
11278
+ const implementationStatus = options.implementationStatus?.[providerKey] ?? "available";
11281
11279
  const configured = options.configured?.[providerKey] ?? (implementationStatus === "planned" ? false : requiredEnv.every((name) => Boolean(options.env?.[name])));
11282
11280
  return {
11283
11281
  capabilities: options.capabilities?.[providerKey] ?? defaultRequiredCapabilities,
@@ -11611,6 +11609,80 @@ var createVoiceDiagnosticsRoutes = (options) => {
11611
11609
  });
11612
11610
  return routes;
11613
11611
  };
11612
+ // src/mediaPipeline.ts
11613
+ var formatLabel2 = (format) => `${format.container}/${format.encoding}/${String(format.sampleRateHz)}hz/${String(format.channels)}ch`;
11614
+ var formatMatches2 = (actual, expected) => actual.container === expected.container && actual.encoding === expected.encoding && actual.sampleRateHz === expected.sampleRateHz && actual.channels === expected.channels;
11615
+ var pushIssue = (issues, severity, code, message) => {
11616
+ issues.push({ code, message, severity });
11617
+ };
11618
+ var numericMetadata = (frame, key) => {
11619
+ const value = frame.metadata?.[key];
11620
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
11621
+ };
11622
+ var createVoiceMediaFrame = (frame) => frame;
11623
+ var buildVoiceMediaPipelineCalibrationReport = (input = {}) => {
11624
+ const frames = input.frames ?? [];
11625
+ const issues = [];
11626
+ const inputFrames = frames.filter((frame) => frame.kind === "input-audio");
11627
+ const assistantFrames = frames.filter((frame) => frame.kind === "assistant-audio");
11628
+ const turnCommitFrames = frames.filter((frame) => frame.kind === "turn-commit");
11629
+ const interruptionFrameRecords = frames.filter((frame) => frame.kind === "interruption");
11630
+ const traceLinkedFrames = frames.filter((frame) => frame.traceEventId).length;
11631
+ const backpressureFrames = frames.filter((frame) => Boolean(frame.metadata?.backpressure)).length;
11632
+ const audioLatencies = assistantFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
11633
+ const firstAudioLatencyMs = audioLatencies.length > 0 ? Math.min(...audioLatencies) : undefined;
11634
+ const jitterValues = frames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined);
11635
+ const jitterMs = jitterValues.length > 0 ? Math.max(...jitterValues) : undefined;
11636
+ const inputFormat = input.inputFormat ?? inputFrames.find((frame) => frame.format)?.format;
11637
+ const outputFormat = input.outputFormat ?? assistantFrames.find((frame) => frame.format)?.format;
11638
+ const resamplingRequired = Boolean(input.expectedInputFormat && inputFormat && inputFormat.sampleRateHz !== input.expectedInputFormat.sampleRateHz) || Boolean(input.expectedOutputFormat && outputFormat && outputFormat.sampleRateHz !== input.expectedOutputFormat.sampleRateHz);
11639
+ const resamplingTargetHz = resamplingRequired && input.expectedInputFormat ? input.expectedInputFormat.sampleRateHz : resamplingRequired ? input.expectedOutputFormat?.sampleRateHz : undefined;
11640
+ if (inputFrames.length === 0) {
11641
+ pushIssue(issues, "warning", "media.input_audio_missing", "No input audio frames were observed.");
11642
+ }
11643
+ if (assistantFrames.length === 0) {
11644
+ pushIssue(issues, "warning", "media.assistant_audio_missing", "No assistant audio frames were observed.");
11645
+ }
11646
+ if (input.expectedInputFormat && inputFormat && !formatMatches2(inputFormat, input.expectedInputFormat)) {
11647
+ pushIssue(issues, inputFormat.sampleRateHz === input.expectedInputFormat.sampleRateHz ? "warning" : "error", "media.input_format_mismatch", `Input format ${formatLabel2(inputFormat)} does not match expected ${formatLabel2(input.expectedInputFormat)}.`);
11648
+ }
11649
+ if (input.expectedOutputFormat && outputFormat && !formatMatches2(outputFormat, input.expectedOutputFormat)) {
11650
+ pushIssue(issues, outputFormat.sampleRateHz === input.expectedOutputFormat.sampleRateHz ? "warning" : "error", "media.output_format_mismatch", `Output format ${formatLabel2(outputFormat)} does not match expected ${formatLabel2(input.expectedOutputFormat)}.`);
11651
+ }
11652
+ if (firstAudioLatencyMs !== undefined && input.maxFirstAudioLatencyMs !== undefined && firstAudioLatencyMs > input.maxFirstAudioLatencyMs) {
11653
+ pushIssue(issues, "error", "media.first_audio_latency", `First audio latency ${String(firstAudioLatencyMs)}ms exceeds budget ${String(input.maxFirstAudioLatencyMs)}ms.`);
11654
+ }
11655
+ if (jitterMs !== undefined && input.maxJitterMs !== undefined && jitterMs > input.maxJitterMs) {
11656
+ pushIssue(issues, "warning", "media.jitter", `Media jitter ${String(jitterMs)}ms exceeds budget ${String(input.maxJitterMs)}ms.`);
11657
+ }
11658
+ if (input.maxBackpressureFrames !== undefined && backpressureFrames > input.maxBackpressureFrames) {
11659
+ pushIssue(issues, "warning", "media.backpressure", `Backpressure frame count ${String(backpressureFrames)} exceeds budget ${String(input.maxBackpressureFrames)}.`);
11660
+ }
11661
+ if (input.requireInterruptionFrame && interruptionFrameRecords.length === 0) {
11662
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
11663
+ }
11664
+ if (input.requireTraceEvidence && traceLinkedFrames === 0) {
11665
+ pushIssue(issues, "warning", "media.trace_evidence_missing", "No media frames were linked to trace evidence.");
11666
+ }
11667
+ return {
11668
+ assistantAudioFrames: assistantFrames.length,
11669
+ backpressureFrames,
11670
+ checkedAt: Date.now(),
11671
+ firstAudioLatencyMs,
11672
+ inputAudioFrames: inputFrames.length,
11673
+ inputFormat,
11674
+ interruptionFrames: interruptionFrameRecords.length,
11675
+ issues,
11676
+ jitterMs,
11677
+ outputFormat,
11678
+ resamplingRequired,
11679
+ resamplingTargetHz,
11680
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
11681
+ surface: input.surface ?? "voice-media-pipeline",
11682
+ traceLinkedFrames,
11683
+ turnCommitFrames: turnCommitFrames.length
11684
+ };
11685
+ };
11614
11686
  // src/demoReadyRoutes.ts
11615
11687
  import { Elysia as Elysia12 } from "elysia";
11616
11688
  var escapeHtml15 = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
@@ -34384,6 +34456,7 @@ export {
34384
34456
  createVoiceMemoryAuditSinkDeliveryStore,
34385
34457
  createVoiceMemoryAuditEventStore,
34386
34458
  createVoiceMemoryAssistantMemoryStore,
34459
+ createVoiceMediaFrame,
34387
34460
  createVoiceLiveOpsRoutes,
34388
34461
  createVoiceLiveOpsController,
34389
34462
  createVoiceLiveLatencyRoutes,
@@ -34532,6 +34605,7 @@ export {
34532
34605
  buildVoiceObservabilityExport,
34533
34606
  buildVoiceObservabilityArtifactIndex,
34534
34607
  buildVoiceMonitorRunReport,
34608
+ buildVoiceMediaPipelineCalibrationReport,
34535
34609
  buildVoiceLiveOpsControlState,
34536
34610
  buildVoiceLatencySLOGate,
34537
34611
  buildVoiceIncidentBundle,
@@ -0,0 +1,56 @@
1
+ import type { AudioFormat } from './types';
2
+ export type VoiceMediaFrameKind = 'assistant-audio' | 'input-audio' | 'interruption' | 'metadata' | 'transcript' | 'turn-commit';
3
+ export type VoiceMediaFrameSource = 'browser' | 'provider' | 'telephony' | 'voice-runtime';
4
+ export type VoiceMediaPipelineStatus = 'fail' | 'pass' | 'warn';
5
+ export type VoiceMediaFrame = {
6
+ at?: number;
7
+ audio?: ArrayBuffer | ArrayBufferView;
8
+ durationMs?: number;
9
+ format?: AudioFormat;
10
+ id: string;
11
+ kind: VoiceMediaFrameKind;
12
+ latencyMs?: number;
13
+ metadata?: Record<string, unknown>;
14
+ sessionId?: string;
15
+ source: VoiceMediaFrameSource | (string & {});
16
+ traceEventId?: string;
17
+ turnId?: string;
18
+ };
19
+ export type VoiceMediaPipelineCalibrationInput = {
20
+ expectedInputFormat?: AudioFormat;
21
+ expectedOutputFormat?: AudioFormat;
22
+ frames?: readonly VoiceMediaFrame[];
23
+ inputFormat?: AudioFormat;
24
+ maxBackpressureFrames?: number;
25
+ maxFirstAudioLatencyMs?: number;
26
+ maxJitterMs?: number;
27
+ outputFormat?: AudioFormat;
28
+ requireInterruptionFrame?: boolean;
29
+ requireTraceEvidence?: boolean;
30
+ surface?: string;
31
+ };
32
+ export type VoiceMediaPipelineCalibrationIssue = {
33
+ code: string;
34
+ message: string;
35
+ severity: 'error' | 'warning';
36
+ };
37
+ export type VoiceMediaPipelineCalibrationReport = {
38
+ assistantAudioFrames: number;
39
+ backpressureFrames: number;
40
+ checkedAt: number;
41
+ firstAudioLatencyMs?: number;
42
+ inputAudioFrames: number;
43
+ inputFormat?: AudioFormat;
44
+ interruptionFrames: number;
45
+ issues: VoiceMediaPipelineCalibrationIssue[];
46
+ jitterMs?: number;
47
+ outputFormat?: AudioFormat;
48
+ resamplingRequired: boolean;
49
+ resamplingTargetHz?: number;
50
+ status: VoiceMediaPipelineStatus;
51
+ surface: string;
52
+ traceLinkedFrames: number;
53
+ turnCommitFrames: number;
54
+ };
55
+ export declare const createVoiceMediaFrame: (frame: VoiceMediaFrame) => VoiceMediaFrame;
56
+ export declare const buildVoiceMediaPipelineCalibrationReport: (input?: VoiceMediaPipelineCalibrationInput) => VoiceMediaPipelineCalibrationReport;
@@ -17,7 +17,7 @@ export type VoiceRealtimeProviderContractDefinition<TProvider extends string = s
17
17
  selected?: boolean;
18
18
  traceHref?: string;
19
19
  };
20
- export type VoiceRealtimeProviderPresetProvider = 'gemini-live' | 'openai-realtime' | 'pipecat-bridge' | (string & {});
20
+ export type VoiceRealtimeProviderPresetProvider = 'gemini-live' | 'openai-realtime' | (string & {});
21
21
  export type VoiceRealtimeProviderContractMatrixPresetOptions<TProvider extends string = VoiceRealtimeProviderPresetProvider> = {
22
22
  capabilities?: Record<string, readonly VoiceRealtimeProviderContractCapability[]>;
23
23
  configured?: Record<string, boolean>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.308",
3
+ "version": "0.0.22-beta.309",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",