@oscharko-dev/keiko-evaluations 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/discussion/fixtures/correction.d.ts +5 -0
  3. package/dist/discussion/fixtures/correction.d.ts.map +1 -0
  4. package/dist/discussion/fixtures/correction.js +53 -0
  5. package/dist/discussion/fixtures/index.d.ts +5 -0
  6. package/dist/discussion/fixtures/index.d.ts.map +1 -0
  7. package/dist/discussion/fixtures/index.js +17 -0
  8. package/dist/discussion/fixtures/no-voice.d.ts +6 -0
  9. package/dist/discussion/fixtures/no-voice.d.ts.map +1 -0
  10. package/dist/discussion/fixtures/no-voice.js +79 -0
  11. package/dist/discussion/fixtures/voice.d.ts +5 -0
  12. package/dist/discussion/fixtures/voice.d.ts.map +1 -0
  13. package/dist/discussion/fixtures/voice.js +57 -0
  14. package/dist/discussion/index.d.ts +6 -0
  15. package/dist/discussion/index.d.ts.map +1 -0
  16. package/dist/discussion/index.js +9 -0
  17. package/dist/discussion/render.d.ts +3 -0
  18. package/dist/discussion/render.d.ts.map +1 -0
  19. package/dist/discussion/render.js +49 -0
  20. package/dist/discussion/runner.d.ts +13 -0
  21. package/dist/discussion/runner.d.ts.map +1 -0
  22. package/dist/discussion/runner.js +80 -0
  23. package/dist/discussion/scorer.d.ts +8 -0
  24. package/dist/discussion/scorer.d.ts.map +1 -0
  25. package/dist/discussion/scorer.js +225 -0
  26. package/dist/discussion/types.d.ts +71 -0
  27. package/dist/discussion/types.d.ts.map +1 -0
  28. package/dist/discussion/types.js +29 -0
  29. package/dist/index.d.ts +2 -0
  30. package/dist/index.d.ts.map +1 -1
  31. package/dist/index.js +6 -0
  32. package/dist/voice-action/fixtures/adversarial.d.ts +9 -0
  33. package/dist/voice-action/fixtures/adversarial.d.ts.map +1 -0
  34. package/dist/voice-action/fixtures/adversarial.js +163 -0
  35. package/dist/voice-action/fixtures/index.d.ts +5 -0
  36. package/dist/voice-action/fixtures/index.d.ts.map +1 -0
  37. package/dist/voice-action/fixtures/index.js +17 -0
  38. package/dist/voice-action/fixtures/no-voice.d.ts +5 -0
  39. package/dist/voice-action/fixtures/no-voice.d.ts.map +1 -0
  40. package/dist/voice-action/fixtures/no-voice.js +37 -0
  41. package/dist/voice-action/fixtures/segment.d.ts +11 -0
  42. package/dist/voice-action/fixtures/segment.d.ts.map +1 -0
  43. package/dist/voice-action/fixtures/segment.js +25 -0
  44. package/dist/voice-action/fixtures/voice.d.ts +6 -0
  45. package/dist/voice-action/fixtures/voice.d.ts.map +1 -0
  46. package/dist/voice-action/fixtures/voice.js +74 -0
  47. package/dist/voice-action/index.d.ts +6 -0
  48. package/dist/voice-action/index.d.ts.map +1 -0
  49. package/dist/voice-action/index.js +10 -0
  50. package/dist/voice-action/render.d.ts +3 -0
  51. package/dist/voice-action/render.d.ts.map +1 -0
  52. package/dist/voice-action/render.js +49 -0
  53. package/dist/voice-action/runner.d.ts +14 -0
  54. package/dist/voice-action/runner.d.ts.map +1 -0
  55. package/dist/voice-action/runner.js +149 -0
  56. package/dist/voice-action/scorer.d.ts +8 -0
  57. package/dist/voice-action/scorer.d.ts.map +1 -0
  58. package/dist/voice-action/scorer.js +247 -0
  59. package/dist/voice-action/types.d.ts +82 -0
  60. package/dist/voice-action/types.d.ts.map +1 -0
  61. package/dist/voice-action/types.js +30 -0
  62. package/dist/voice-twin/capability.d.ts +4 -0
  63. package/dist/voice-twin/capability.d.ts.map +1 -0
  64. package/dist/voice-twin/capability.js +26 -0
  65. package/dist/voice-twin/fixtures/full-realtime.d.ts +3 -0
  66. package/dist/voice-twin/fixtures/full-realtime.d.ts.map +1 -0
  67. package/dist/voice-twin/fixtures/full-realtime.js +36 -0
  68. package/dist/voice-twin/fixtures/index.d.ts +5 -0
  69. package/dist/voice-twin/fixtures/index.d.ts.map +1 -0
  70. package/dist/voice-twin/fixtures/index.js +21 -0
  71. package/dist/voice-twin/fixtures/no-voice.d.ts +3 -0
  72. package/dist/voice-twin/fixtures/no-voice.d.ts.map +1 -0
  73. package/dist/voice-twin/fixtures/no-voice.js +33 -0
  74. package/dist/voice-twin/fixtures/privacy.d.ts +3 -0
  75. package/dist/voice-twin/fixtures/privacy.d.ts.map +1 -0
  76. package/dist/voice-twin/fixtures/privacy.js +69 -0
  77. package/dist/voice-twin/fixtures/speech-output.d.ts +3 -0
  78. package/dist/voice-twin/fixtures/speech-output.d.ts.map +1 -0
  79. package/dist/voice-twin/fixtures/speech-output.js +32 -0
  80. package/dist/voice-twin/fixtures/stt-only.d.ts +3 -0
  81. package/dist/voice-twin/fixtures/stt-only.d.ts.map +1 -0
  82. package/dist/voice-twin/fixtures/stt-only.js +35 -0
  83. package/dist/voice-twin/index.d.ts +10 -0
  84. package/dist/voice-twin/index.d.ts.map +1 -0
  85. package/dist/voice-twin/index.js +14 -0
  86. package/dist/voice-twin/metrics.d.ts +10 -0
  87. package/dist/voice-twin/metrics.d.ts.map +1 -0
  88. package/dist/voice-twin/metrics.js +142 -0
  89. package/dist/voice-twin/privacy.d.ts +9 -0
  90. package/dist/voice-twin/privacy.d.ts.map +1 -0
  91. package/dist/voice-twin/privacy.js +100 -0
  92. package/dist/voice-twin/profiles.d.ts +15 -0
  93. package/dist/voice-twin/profiles.d.ts.map +1 -0
  94. package/dist/voice-twin/profiles.js +58 -0
  95. package/dist/voice-twin/render.d.ts +3 -0
  96. package/dist/voice-twin/render.d.ts.map +1 -0
  97. package/dist/voice-twin/render.js +53 -0
  98. package/dist/voice-twin/runner.d.ts +13 -0
  99. package/dist/voice-twin/runner.d.ts.map +1 -0
  100. package/dist/voice-twin/runner.js +141 -0
  101. package/dist/voice-twin/scorer.d.ts +8 -0
  102. package/dist/voice-twin/scorer.d.ts.map +1 -0
  103. package/dist/voice-twin/scorer.js +323 -0
  104. package/dist/voice-twin/types.d.ts +149 -0
  105. package/dist/voice-twin/types.d.ts.map +1 -0
  106. package/dist/voice-twin/types.js +45 -0
  107. package/package.json +9 -9
@@ -0,0 +1,323 @@
1
+ // Voice Digital Twin scorer (Epic #491, Issue #505; ADR-0068).
2
+ //
3
+ // Pure per-dimension scoring + suite aggregation across the eleven capstone dimensions. Each dimension is a
4
+ // pure function over the fixture + its derived observation. A dimension a fixture does not declare is
5
+ // `not-applicable` and excluded from aggregation. Each dimension combines a STRUCTURAL gate (the contract
6
+ // fact the cell / metric must satisfy) with the fixture oracle, so weakening a capability table, leaking a
7
+ // forbidden message kind into the STT profile, dropping the loopback control plane, letting an unapproved
8
+ // egress through, or breaking a metric flips the corresponding dimension to FAIL. Rationales are
9
+ // harness-authored and content-free (counts, closed-vocabulary labels, numbers).
10
+ import { VOICE_CONTROL_TRANSPORTS, } from "@oscharko-dev/keiko-contracts";
11
+ import { VOICE_TWIN_REPLAY_CAPACITY } from "./metrics.js";
12
+ import { VOICE_TWIN_DIMENSIONS, } from "./types.js";
13
+ // The control-message kinds STT-only must NEVER permit (AC3): WebRTC media signalling, media-track state,
14
+ // playback, and barge-in interruption all belong to the full-realtime / speech-output paths.
15
+ const STT_FORBIDDEN_KINDS = [
16
+ "signal.sdp.offer",
17
+ "signal.sdp.answer",
18
+ "signal.ice.candidate",
19
+ "media.track.state",
20
+ "playback.state",
21
+ "control.interrupt",
22
+ ];
23
+ // The kinds full-realtime MUST permit (AC4): WebRTC SDP signalling and media-track state.
24
+ const REALTIME_REQUIRED_KINDS = [
25
+ "signal.sdp.offer",
26
+ "signal.sdp.answer",
27
+ "media.track.state",
28
+ ];
29
+ function gate(dimension, checks) {
30
+ const failed = checks.filter((c) => !c.ok);
31
+ if (failed.length === 0) {
32
+ return {
33
+ dimension,
34
+ outcome: "pass",
35
+ rationale: `${String(checks.length)}/${String(checks.length)} structural checks met.`,
36
+ };
37
+ }
38
+ return {
39
+ dimension,
40
+ outcome: "fail",
41
+ rationale: `failed: ${failed.map((c) => c.label).join("; ")}.`,
42
+ };
43
+ }
44
+ // ─── Capability-matrix consistency ───────────────────────────────────────────────────
45
+ function scoreCapabilityMatrix(fixture, obs) {
46
+ const cell = obs.cell;
47
+ return gate("capability-matrix-consistency", [
48
+ {
49
+ label: "effective profile matches the oracle expectation",
50
+ ok: cell.effectiveProfile === fixture.oracle.expectedEffectiveProfile,
51
+ },
52
+ {
53
+ label: "control plane is loopback",
54
+ ok: cell.controlPlaneIsLoopback,
55
+ },
56
+ {
57
+ // `none` egresses nowhere; any active capability egresses only to the configured model endpoint.
58
+ label: "egress class matches the effective profile",
59
+ ok: cell.effectiveProfile === "none"
60
+ ? cell.egressClass === "none"
61
+ : cell.egressClass === "configured-model-endpoint",
62
+ },
63
+ ]);
64
+ }
65
+ // ─── No-voice dormancy (AC1 / AC2) ───────────────────────────────────────────────────
66
+ function scoreNoVoiceDormancy(obs) {
67
+ const cell = obs.cell;
68
+ return gate("no-voice-dormancy", [
69
+ { label: "effective profile is none", ok: cell.effectiveProfile === "none" },
70
+ { label: "no allowed control message kinds", ok: cell.allowedKinds.length === 0 },
71
+ { label: "media transport is none", ok: cell.mediaTransport === "none" },
72
+ { label: "negotiation is disabled", ok: cell.negotiation === "disabled" },
73
+ { label: "egress class is none", ok: cell.egressClass === "none" },
74
+ ]);
75
+ }
76
+ // ─── STT affordance bounding (AC3) ───────────────────────────────────────────────────
77
+ function scoreSttAffordanceBounding(obs) {
78
+ const cell = obs.cell;
79
+ const allowed = cell.allowedKinds;
80
+ const excludesForbidden = STT_FORBIDDEN_KINDS.every((kind) => !allowed.includes(kind));
81
+ return gate("stt-affordance-bounding", [
82
+ {
83
+ label: "effective profile is speech-to-text",
84
+ ok: cell.effectiveProfile === "speech-to-text",
85
+ },
86
+ { label: "transcript dictation kinds permitted", ok: allowed.includes("transcript.committed") },
87
+ { label: "WebRTC / media / playback / interrupt kinds excluded", ok: excludesForbidden },
88
+ {
89
+ label: "media transport is gateway-batch not webrtc",
90
+ ok: cell.mediaTransport === "gateway-batch",
91
+ },
92
+ { label: "negotiation is disabled", ok: cell.negotiation === "disabled" },
93
+ ]);
94
+ }
95
+ // ─── Transport plane separation (AC4) ────────────────────────────────────────────────
96
+ function scoreTransportPlaneSeparation(obs) {
97
+ const cell = obs.cell;
98
+ const allowed = cell.allowedKinds;
99
+ const includesRealtimeKinds = REALTIME_REQUIRED_KINDS.every((kind) => allowed.includes(kind));
100
+ return gate("transport-plane-separation", [
101
+ { label: "effective profile is full-realtime", ok: cell.effectiveProfile === "full-realtime" },
102
+ { label: "media transport is webrtc", ok: cell.mediaTransport === "webrtc" },
103
+ { label: "negotiation is proxied-sdp", ok: cell.negotiation === "proxied-sdp" },
104
+ { label: "SDP / media-track signalling kinds present", ok: includesRealtimeKinds },
105
+ {
106
+ label: "control plane is loopback (loopback-websocket available)",
107
+ ok: cell.controlPlaneIsLoopback && VOICE_CONTROL_TRANSPORTS.includes("loopback-websocket"),
108
+ },
109
+ ]);
110
+ }
111
+ // ─── External-destination privacy (AC5) ──────────────────────────────────────────────
112
+ function scoreExternalDestinationPrivacy(fixture, obs) {
113
+ return gate("external-destination-privacy", [
114
+ {
115
+ // The dimension PASSES when the auditor's verdict matches the fixture oracle: a positive fixture
116
+ // audits approved, a privacy-negative fixture is correctly caught (approved=false).
117
+ label: "egress audit verdict matches oracle expectation",
118
+ ok: obs.egressAudit.approved === fixture.oracle.expectedEgressApproved,
119
+ },
120
+ {
121
+ label: "manifest scan verdict matches oracle expectation",
122
+ ok: obs.manifestScan.clean === fixture.oracle.expectedManifestsClean,
123
+ },
124
+ {
125
+ // An approved-expected fixture must carry zero unapproved destinations; a negative one must carry at
126
+ // least one (so the teeth are exercised, not merely asserted).
127
+ label: "unapproved-destination count agrees with the verdict",
128
+ ok: fixture.oracle.expectedEgressApproved
129
+ ? obs.egressAudit.unapprovedCount === 0
130
+ : obs.egressAudit.unapprovedCount > 0,
131
+ },
132
+ ]);
133
+ }
134
+ // ─── AC6 metric dimensions ───────────────────────────────────────────────────────────
135
+ function scoreInterruptionMetric(obs) {
136
+ const metric = obs.metrics.interruption;
137
+ if (metric === undefined) {
138
+ return missingMetric("interruption-metric");
139
+ }
140
+ return gate("interruption-metric", [
141
+ { label: "interrupt allowed for the capable profile", ok: metric.interruptAllowed },
142
+ { label: "speaking -> interrupted is reachable", ok: metric.interruptedPhaseReachable },
143
+ ]);
144
+ }
145
+ function scoreEndOfTurnMetric(obs) {
146
+ const metric = obs.metrics.endOfTurn;
147
+ if (metric === undefined) {
148
+ return missingMetric("end-of-turn-metric");
149
+ }
150
+ return gate("end-of-turn-metric", [
151
+ { label: "transcript.committed permitted for the profile", ok: metric.committedKindAllowed },
152
+ { label: "transcript capture permitted for the profile", ok: metric.captureAllowed },
153
+ { label: "committed projection is non-empty", ok: metric.committedSegmentCount > 0 },
154
+ { label: "committed projection excludes partial / stable", ok: metric.excludesUncommitted },
155
+ ]);
156
+ }
157
+ function scoreTranscriptCorrectionMetric(obs) {
158
+ const metric = obs.metrics.transcriptCorrection;
159
+ if (metric === undefined) {
160
+ return missingMetric("transcript-correction-metric");
161
+ }
162
+ return gate("transcript-correction-metric", [
163
+ { label: "stable -> corrected transition allowed", ok: metric.stableToCorrectedAllowed },
164
+ { label: "committed -> corrected transition allowed", ok: metric.committedToCorrectedAllowed },
165
+ { label: "corrected is a consumable state", ok: metric.correctedIsConsumable },
166
+ {
167
+ label: "superseded committed text dropped from projection",
168
+ ok: metric.supersededTextDropped,
169
+ },
170
+ ]);
171
+ }
172
+ function scoreProviderFailureRecoveryMetric(obs) {
173
+ const metric = obs.metrics.providerFailureRecovery;
174
+ if (metric === undefined) {
175
+ return missingMetric("provider-failure-recovery-metric");
176
+ }
177
+ return gate("provider-failure-recovery-metric", [
178
+ { label: "provider-error is a transcript state", ok: metric.providerErrorIsState },
179
+ { label: "provider-error is not consumable", ok: metric.providerErrorNotConsumable },
180
+ { label: "playback failed is a settled phase", ok: metric.playbackFailedIsSettled },
181
+ { label: "a recovery transition exists out of failure", ok: metric.recoveryTransitionExists },
182
+ ]);
183
+ }
184
+ function scoreBufferBoundednessMetric(obs) {
185
+ const metric = obs.metrics.bufferBoundedness;
186
+ if (metric === undefined) {
187
+ return missingMetric("buffer-boundedness-metric");
188
+ }
189
+ return gate("buffer-boundedness-metric", [
190
+ {
191
+ label: "capacity equals the documented ring size",
192
+ ok: metric.capacity === VOICE_TWIN_REPLAY_CAPACITY,
193
+ },
194
+ {
195
+ // Under overflow the ring fills EXACTLY to capacity (not merely ≤): a self-referential ≤capacity
196
+ // check would pass even at a trivial capacity that never evicted, so we require equality whenever
197
+ // eviction occurred and ≤capacity only in the no-overflow case.
198
+ label: "ring fills exactly to capacity under overflow",
199
+ ok: metric.evictedOldestOnOverflow
200
+ ? metric.maxObservedLength === metric.capacity
201
+ : metric.maxObservedLength <= metric.capacity,
202
+ },
203
+ { label: "no ephemeral kind ever buffered", ok: !metric.ephemeralBuffered },
204
+ { label: "overflow evicts the oldest entry", ok: metric.evictedOldestOnOverflow },
205
+ { label: "at least one replay-eligible kind admitted", ok: metric.admittedCount > 0 },
206
+ ]);
207
+ }
208
+ // The latency POSTURE class each media transport must map to. A total map so a new transport added to the
209
+ // contract is a compile error here, keeping the scorer's expectation in lock-step with the derivation.
210
+ const EXPECTED_LATENCY_CLASS = {
211
+ none: "none",
212
+ "gateway-batch": "batch",
213
+ webrtc: "interactive-realtime",
214
+ };
215
+ function scoreLatencyClassMetric(obs) {
216
+ const metric = obs.metrics.latencyClass;
217
+ if (metric === undefined) {
218
+ return missingMetric("latency-class-metric");
219
+ }
220
+ const expectedClass = EXPECTED_LATENCY_CLASS[metric.mediaTransport];
221
+ return gate("latency-class-metric", [
222
+ {
223
+ label: "latency class matches the media-transport mapping",
224
+ ok: metric.latencyClass === expectedClass,
225
+ },
226
+ {
227
+ // A `webrtc` (full-realtime) transport is interactive; every batch / none transport is not.
228
+ label: "isInteractive agrees with an interactive-realtime class",
229
+ ok: metric.isInteractive === (metric.latencyClass === "interactive-realtime"),
230
+ },
231
+ {
232
+ label: "webrtc is interactive while batch / none are not",
233
+ ok: metric.mediaTransport === "webrtc" ? metric.isInteractive : !metric.isInteractive,
234
+ },
235
+ ]);
236
+ }
237
+ function missingMetric(dimension) {
238
+ return {
239
+ dimension,
240
+ outcome: "fail",
241
+ rationale: `failed: ${dimension} declared but no metric record was derived.`,
242
+ };
243
+ }
244
+ // ─── Dispatch ─────────────────────────────────────────────────────────────────────────
245
+ // The metric dimensions (the five AC6 metrics plus the latency-posture class) are dispatched separately so
246
+ // neither dispatcher exceeds the cyclomatic complexity ceiling. `undefined` means the dimension is not a
247
+ // metric dimension.
248
+ function scoreMetricDimension(dimension, obs) {
249
+ switch (dimension) {
250
+ case "interruption-metric":
251
+ return scoreInterruptionMetric(obs);
252
+ case "end-of-turn-metric":
253
+ return scoreEndOfTurnMetric(obs);
254
+ case "transcript-correction-metric":
255
+ return scoreTranscriptCorrectionMetric(obs);
256
+ case "provider-failure-recovery-metric":
257
+ return scoreProviderFailureRecoveryMetric(obs);
258
+ case "buffer-boundedness-metric":
259
+ return scoreBufferBoundednessMetric(obs);
260
+ case "latency-class-metric":
261
+ return scoreLatencyClassMetric(obs);
262
+ default:
263
+ return undefined;
264
+ }
265
+ }
266
+ function scoreDimension(dimension, fixture, obs) {
267
+ switch (dimension) {
268
+ case "capability-matrix-consistency":
269
+ return scoreCapabilityMatrix(fixture, obs);
270
+ case "no-voice-dormancy":
271
+ return scoreNoVoiceDormancy(obs);
272
+ case "stt-affordance-bounding":
273
+ return scoreSttAffordanceBounding(obs);
274
+ case "transport-plane-separation":
275
+ return scoreTransportPlaneSeparation(obs);
276
+ case "external-destination-privacy":
277
+ return scoreExternalDestinationPrivacy(fixture, obs);
278
+ default:
279
+ return scoreMetricDimension(dimension, obs) ?? missingMetric(dimension);
280
+ }
281
+ }
282
+ /**
283
+ * Score one fixture's observation across all eleven dimensions. A dimension the fixture does not declare is
284
+ * `not-applicable`. Pure.
285
+ */
286
+ export function scoreVoiceTwinQuality(fixture, obs) {
287
+ return VOICE_TWIN_DIMENSIONS.map((dimension) => fixture.dimensions.has(dimension)
288
+ ? scoreDimension(dimension, fixture, obs)
289
+ : {
290
+ dimension,
291
+ outcome: "not-applicable",
292
+ rationale: "not exercised by this fixture.",
293
+ });
294
+ }
295
+ // ─── Suite aggregation ─────────────────────────────────────────────────────────────────
296
+ function aggregateDimension(dimension, results) {
297
+ let passCount = 0;
298
+ let failCount = 0;
299
+ let notApplicableCount = 0;
300
+ for (const dims of results) {
301
+ const outcome = dims.find((d) => d.dimension === dimension)?.outcome;
302
+ if (outcome === "pass") {
303
+ passCount += 1;
304
+ }
305
+ else if (outcome === "fail") {
306
+ failCount += 1;
307
+ }
308
+ else {
309
+ notApplicableCount += 1;
310
+ }
311
+ }
312
+ const scored = passCount + failCount;
313
+ return {
314
+ dimension,
315
+ passCount,
316
+ failCount,
317
+ notApplicableCount,
318
+ passRate: scored === 0 ? null : passCount / scored,
319
+ };
320
+ }
321
+ export function aggregateVoiceTwinQuality(results) {
322
+ return VOICE_TWIN_DIMENSIONS.map((dimension) => aggregateDimension(dimension, results));
323
+ }
@@ -0,0 +1,149 @@
1
+ import type { VoiceControlMessageKind, VoiceMediaTransport, VoiceNegotiationMode, VoiceProfile } from "@oscharko-dev/keiko-contracts";
2
+ export declare const VOICE_TWIN_EVAL_SCHEMA_VERSION: "1";
3
+ export type VoiceTwinDimension = "capability-matrix-consistency" | "no-voice-dormancy" | "stt-affordance-bounding" | "transport-plane-separation" | "external-destination-privacy" | "interruption-metric" | "end-of-turn-metric" | "transcript-correction-metric" | "provider-failure-recovery-metric" | "buffer-boundedness-metric" | "latency-class-metric";
4
+ export declare const VOICE_TWIN_DIMENSIONS: readonly VoiceTwinDimension[];
5
+ export type VoiceEnvironmentProfile = "azure-foundry" | "customer-hosted" | "no-voice-env";
6
+ export declare const VOICE_ENVIRONMENT_PROFILES: readonly VoiceEnvironmentProfile[];
7
+ export type EgressDestinationClass = "none" | "configured-model-endpoint" | "unapproved-external";
8
+ export type VoiceTwinFixtureCategory = "no-voice" | "stt-only" | "speech-output" | "full-realtime" | "privacy";
9
+ export declare const VOICE_TWIN_FIXTURE_CATEGORIES: readonly VoiceTwinFixtureCategory[];
10
+ export interface VoiceTwinOracle {
11
+ readonly expectedEffectiveProfile: VoiceProfile;
12
+ readonly expectedEgressApproved: boolean;
13
+ readonly expectedManifestsClean: boolean;
14
+ }
15
+ export interface VoiceTwinManifestFixture {
16
+ readonly packageName: string;
17
+ readonly dependencyNames: readonly string[];
18
+ }
19
+ export interface VoiceTwinEgressDestination {
20
+ readonly class: EgressDestinationClass;
21
+ }
22
+ export interface VoiceTwinFixture {
23
+ readonly name: string;
24
+ readonly category: VoiceTwinFixtureCategory;
25
+ readonly description: string;
26
+ readonly environment: VoiceEnvironmentProfile;
27
+ readonly advertisedProfile: VoiceProfile;
28
+ readonly egressLedger: readonly VoiceTwinEgressDestination[];
29
+ readonly manifests: readonly VoiceTwinManifestFixture[];
30
+ readonly dimensions: ReadonlySet<VoiceTwinDimension>;
31
+ readonly oracle: VoiceTwinOracle;
32
+ }
33
+ export interface VoiceTwinCapabilityCell {
34
+ readonly effectiveProfile: VoiceProfile;
35
+ readonly allowedKinds: readonly VoiceControlMessageKind[];
36
+ readonly mediaTransport: VoiceMediaTransport;
37
+ readonly negotiation: VoiceNegotiationMode;
38
+ readonly controlPlaneIsLoopback: boolean;
39
+ readonly mediaPlaneId: "media";
40
+ readonly egressClass: EgressDestinationClass;
41
+ }
42
+ export interface InterruptionMetricRecord {
43
+ readonly interruptAllowed: boolean;
44
+ readonly interruptedPhaseReachable: boolean;
45
+ }
46
+ export interface EndOfTurnMetricRecord {
47
+ readonly committedKindAllowed: boolean;
48
+ readonly captureAllowed: boolean;
49
+ readonly committedSegmentCount: number;
50
+ readonly excludesUncommitted: boolean;
51
+ }
52
+ export interface TranscriptCorrectionMetricRecord {
53
+ readonly stableToCorrectedAllowed: boolean;
54
+ readonly committedToCorrectedAllowed: boolean;
55
+ readonly correctedIsConsumable: boolean;
56
+ readonly supersededTextDropped: boolean;
57
+ }
58
+ export interface ProviderFailureRecoveryMetricRecord {
59
+ readonly providerErrorIsState: boolean;
60
+ readonly providerErrorNotConsumable: boolean;
61
+ readonly playbackFailedIsSettled: boolean;
62
+ readonly recoveryTransitionExists: boolean;
63
+ }
64
+ export interface BufferBoundednessMetricRecord {
65
+ readonly capacity: number;
66
+ readonly maxObservedLength: number;
67
+ readonly admittedCount: number;
68
+ readonly ephemeralBuffered: boolean;
69
+ readonly evictedOldestOnOverflow: boolean;
70
+ }
71
+ export interface LatencyClassMetricRecord {
72
+ readonly mediaTransport: VoiceMediaTransport;
73
+ readonly latencyClass: "none" | "batch" | "interactive-realtime";
74
+ readonly isInteractive: boolean;
75
+ }
76
+ export interface VoiceTwinMetricBundle {
77
+ readonly interruption?: InterruptionMetricRecord;
78
+ readonly endOfTurn?: EndOfTurnMetricRecord;
79
+ readonly transcriptCorrection?: TranscriptCorrectionMetricRecord;
80
+ readonly providerFailureRecovery?: ProviderFailureRecoveryMetricRecord;
81
+ readonly bufferBoundedness?: BufferBoundednessMetricRecord;
82
+ readonly latencyClass?: LatencyClassMetricRecord;
83
+ }
84
+ export interface VoiceTwinEgressAudit {
85
+ readonly approved: boolean;
86
+ readonly unapprovedCount: number;
87
+ }
88
+ export interface VoiceTwinDeniedManifestHit {
89
+ readonly packageName: string;
90
+ readonly deniedPackage: string;
91
+ }
92
+ export interface VoiceTwinManifestScan {
93
+ readonly clean: boolean;
94
+ readonly found: readonly VoiceTwinDeniedManifestHit[];
95
+ }
96
+ export interface VoiceTwinObservation {
97
+ readonly environment: VoiceEnvironmentProfile;
98
+ readonly advertisedProfile: VoiceProfile;
99
+ readonly cell: VoiceTwinCapabilityCell;
100
+ readonly egressAudit: VoiceTwinEgressAudit;
101
+ readonly manifestScan: VoiceTwinManifestScan;
102
+ readonly metrics: VoiceTwinMetricBundle;
103
+ }
104
+ export type VoiceTwinOutcome = "pass" | "fail" | "not-applicable";
105
+ export interface VoiceTwinDimensionResult {
106
+ readonly dimension: VoiceTwinDimension;
107
+ readonly outcome: VoiceTwinOutcome;
108
+ readonly rationale: string;
109
+ }
110
+ export interface VoiceTwinFixtureResult {
111
+ readonly fixtureName: string;
112
+ readonly category: VoiceTwinFixtureCategory;
113
+ readonly environment: VoiceEnvironmentProfile;
114
+ readonly effectiveProfile: VoiceProfile;
115
+ readonly observation: VoiceTwinObservation;
116
+ readonly dimensionResults: readonly VoiceTwinDimensionResult[];
117
+ readonly fullyPassed: boolean;
118
+ }
119
+ export interface VoiceTwinScorecardEntry {
120
+ readonly dimension: VoiceTwinDimension;
121
+ readonly passCount: number;
122
+ readonly failCount: number;
123
+ readonly notApplicableCount: number;
124
+ readonly passRate: number | null;
125
+ }
126
+ export interface VoiceTwinMatrixCell {
127
+ readonly environment: VoiceEnvironmentProfile;
128
+ readonly effectiveProfile: VoiceProfile;
129
+ }
130
+ export interface VoiceTwinEvalSummary {
131
+ readonly totalFixtures: number;
132
+ readonly fullyPassedFixtures: number;
133
+ readonly coversNoVoice: boolean;
134
+ readonly coversSttOnly: boolean;
135
+ readonly coversSpeechOutput: boolean;
136
+ readonly coversFullRealtime: boolean;
137
+ readonly coversAzureFoundry: boolean;
138
+ readonly coversCustomerHosted: boolean;
139
+ readonly coversPrivacyNegative: boolean;
140
+ readonly goNoGo: "GO" | "NO-GO";
141
+ }
142
+ export interface VoiceTwinScorecard {
143
+ readonly schemaVersion: typeof VOICE_TWIN_EVAL_SCHEMA_VERSION;
144
+ readonly fixtureResults: readonly VoiceTwinFixtureResult[];
145
+ readonly dimensions: readonly VoiceTwinScorecardEntry[];
146
+ readonly summary: VoiceTwinEvalSummary;
147
+ readonly coveredMatrixCells: readonly VoiceTwinMatrixCell[];
148
+ }
149
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/voice-twin/types.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EACV,uBAAuB,EACvB,mBAAmB,EACnB,oBAAoB,EACpB,YAAY,EACb,MAAM,+BAA+B,CAAC;AAEvC,eAAO,MAAM,8BAA8B,EAAG,GAAY,CAAC;AAO3D,MAAM,MAAM,kBAAkB,GAC1B,+BAA+B,GAC/B,mBAAmB,GACnB,yBAAyB,GACzB,4BAA4B,GAC5B,8BAA8B,GAC9B,qBAAqB,GACrB,oBAAoB,GACpB,8BAA8B,GAC9B,kCAAkC,GAClC,2BAA2B,GAC3B,sBAAsB,CAAC;AAE3B,eAAO,MAAM,qBAAqB,EAAE,SAAS,kBAAkB,EAYrD,CAAC;AAMX,MAAM,MAAM,uBAAuB,GAAG,eAAe,GAAG,iBAAiB,GAAG,cAAc,CAAC;AAE3F,eAAO,MAAM,0BAA0B,EAAE,SAAS,uBAAuB,EAI/D,CAAC;AAOX,MAAM,MAAM,sBAAsB,GAAG,MAAM,GAAG,2BAA2B,GAAG,qBAAqB,CAAC;AAGlG,MAAM,MAAM,wBAAwB,GAChC,UAAU,GACV,UAAU,GACV,eAAe,GACf,eAAe,GACf,SAAS,CAAC;AAEd,eAAO,MAAM,6BAA6B,EAAE,SAAS,wBAAwB,EAMnE,CAAC;AAKX,MAAM,WAAW,eAAe;IAE9B,QAAQ,CAAC,wBAAwB,EAAE,YAAY,CAAC;IAIhD,QAAQ,CAAC,sBAAsB,EAAE,OAAO,CAAC;IAGzC,QAAQ,CAAC,sBAAsB,EAAE,OAAO,CAAC;CAC1C;AAOD,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,eAAe,EAAE,SAAS,MAAM,EAAE,CAAC;CAC7C;AAED,MAAM,WAAW,0BAA0B;IACzC,QAAQ,CAAC,KAAK,EAAE,sBAAsB,CAAC;CACxC;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,wBAAwB,CAAC;IAC5C,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,uBAAuB,CAAC;IAE9C,QAAQ,CAAC,iBAAiB,EAAE,YAAY,CAAC;IACzC,QAAQ,CAAC,YAAY,EAAE,SAAS,0BAA0B,EAAE,CAAC;IAC7D,QAAQ,CAAC,SAAS,EAAE,SAAS,wBAAwB,EAAE,CAAC;IACxD,QAAQ,CAAC,UAAU,EAAE,WAAW,CAAC,kBAAkB,CAAC,CAAC;IACrD,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC;CAClC;AAMD,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,gBAAgB,EAAE,YAAY,CAAC;IACxC,QAAQ,CAAC,YAAY,EAAE,SAAS,uBAAuB,EAAE,CAAC;IAC1D,QAAQ,CAAC,cAAc,EAAE,mBAAmB,CAAC;IAC7C,QAAQ,CAAC,WAAW,EAAE,oBAAoB,CAAC;IAE3C,QAAQ,CAAC,sBAAsB,EAAE,OAAO,CAAC;IACzC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,WAAW,EAAE,sBAAsB,CAAC;CAC9C;AAGD,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACnC,QAAQ,CAAC,yBAAyB,EAAE,OAAO,CAAC;CAC7C;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACvC,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IAEjC,QAAQ,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACvC,QAAQ,CAAC,mBAAmB,EAAE,OAAO,CAAC;CACvC;AAED,MAAM,WAAW,gCAAgC;IAC/C,QAAQ,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC3C,QAAQ,CAAC,2BAA2B,EAAE,OAAO,CAAC;IAC9C,QAAQ,CAAC,qBAAqB,EAAE,OAAO,CAAC;IAExC,QAAQ,CAAC,qBAAqB,EAAE,OAAO,CAAC;CACzC;AAED,MAAM,WAAW,mCAAmC;IAClD,QAAQ,CAAC,oBAAoB,EAAE,OAAO,CAAC;IAEvC,QAAQ,CAAC,0BAA0B,EAAE,OAAO,CAAC;IAC7C,QAAQ,CAAC,uBAAuB,EAAE,OAAO,CAAC;IAE1C,QAAQ,CAAC,wBAAwB,EAAE,OAAO,CAAC;CAC5C;AAED,MAAM,WAAW,6BAA6B;IAC5C,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IAEnC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAE/B,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC;IACpC,QAAQ,CAAC,uBAAuB,EAAE,OAAO,CAAC;CAC3C;AAOD,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,cAAc,EAAE,mBAAmB,CAAC;IAC7C,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,GAAG,sBAAsB,CAAC;IACjE,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;CACjC;AAID,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,YAAY,CAAC,EAAE,wBAAwB,CAAC;IACjD,QAAQ,CAAC,SAAS,CAAC,EAAE,qBAAqB,CAAC;IAC3C,QAAQ,CAAC,oBAAoB,CAAC,EAAE,gCAAgC,CAAC;IACjE,QAAQ,CAAC,uBAAuB,CAAC,EAAE,mCAAmC,CAAC;IACvE,QAAQ,CAAC,iBAAiB,CAAC,EAAE,6BAA6B,CAAC;IAC3D,QAAQ,CAAC,YAAY,CAAC,EAAE,wBAAwB,CAAC;CAClD;AAGD,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;CAClC;AAED,MAAM,WAAW,0BAA0B;IACzC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;CAChC;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,SAAS,0BAA0B,EAAE,CAAC;CACvD;AAKD,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,WAAW,EAAE,uBAAuB,CAAC;IAC9C,QAAQ,CAAC,iBAAiB,EAAE,YAAY,CAAC;IACzC,QAAQ,CAAC,IAAI,EAAE,uBAAuB,CAAC;IACvC,QAAQ,CAAC,WAAW,EAAE,oBAAoB,CAAC;IAC3C,QAAQ,CAAC,YAAY,EAAE,qBAAqB,CAAC;IAC7C,QAAQ,CAAC,OAAO,EAAE,qBAAqB,CAAC;CACzC;AAGD,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,MAAM,GAAG,gBAAgB,CAAC;AAElE,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,OAAO,EAAE,gBAAgB,CAAC;IAGnC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,wBAAwB,CAAC;IAC5C,QAAQ,CAAC,WAAW,EAAE,uBAAuB,CAAC;IAC9C,QAAQ,CAAC,gBAAgB,EAAE,YAAY,CAAC;IACxC,QAAQ,CAAC,WAAW,EAAE,oBAAoB,CAAC;IAC3C,QAAQ,CAAC,gBAAgB,EAAE,SAAS,wBAAwB,EAAE,CAAC;IAC/D,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;CAC/B;AAED,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;CAClC;AAGD,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,WAAW,EAAE,uBAAuB,CAAC;IAC9C,QAAQ,CAAC,gBAAgB,EAAE,YAAY,CAAC;CACzC;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACrC,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACvC,QAAQ,CAAC,qBAAqB,EAAE,OAAO,CAAC;IACxC,QAAQ,CAAC,MAAM,EAAE,IAAI,GAAG,OAAO,CAAC;CACjC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,aAAa,EAAE,OAAO,8BAA8B,CAAC;IAC9D,QAAQ,CAAC,cAAc,EAAE,SAAS,sBAAsB,EAAE,CAAC;IAC3D,QAAQ,CAAC,UAAU,EAAE,SAAS,uBAAuB,EAAE,CAAC;IACxD,QAAQ,CAAC,OAAO,EAAE,oBAAoB,CAAC;IAEvC,QAAQ,CAAC,kBAAkB,EAAE,SAAS,mBAAmB,EAAE,CAAC;CAC7D"}
@@ -0,0 +1,45 @@
1
+ // Voice Digital Twin evaluation types (Epic #491, Issue #505 — the capstone; ADR-0068).
2
+ //
3
+ // This subpackage is the PROOF MECHANISM for the whole Voice Digital Twin: that it is high quality when
4
+ // available and harmless when unavailable. It scores the deterministic capability / privacy / metric
5
+ // facts derivable from the keiko-contracts voice leaf modules (`voice-protocol`, `voice-transcript`,
6
+ // `voice-playback`) across the profile × environment matrix from
7
+ // `docs/voice/deployment-profile-matrix.md` §3. It is NOT the agent-trajectory harness (`../types.ts`):
8
+ // the capstone is judged on its own closed set of eleven dimensions, mirroring the Voice Action
9
+ // (`../voice-action/`) and Discussion (`../discussion/`) subpackages.
10
+ //
11
+ // Import boundary (ADR-0019 rule 3l): keiko-evaluations may import ONLY keiko-contracts (and node fs in
12
+ // .test.ts for the repo manifest scan). All voice RUNTIME mechanics (timing engine, turn manager,
13
+ // transcript reducer, playback controller) live in keiko-ui and are OFF LIMITS — every metric is modelled
14
+ // against keiko-contracts importable APIs and the repo manifests.
15
+ //
16
+ // Determinism: every observation is pure data derivation over the frozen contract — no model call, clock
17
+ // read, randomness, or network. Every observation / metric / rationale field is content-free: counts,
18
+ // closed-vocabulary enum labels, booleans, integers. No raw transcript text, SDP, ICE, audio, or a
19
+ // fixture's opaque topic / segment id ever enters the scorecard.
20
+ export const VOICE_TWIN_EVAL_SCHEMA_VERSION = "1";
21
+ export const VOICE_TWIN_DIMENSIONS = [
22
+ "capability-matrix-consistency",
23
+ "no-voice-dormancy",
24
+ "stt-affordance-bounding",
25
+ "transport-plane-separation",
26
+ "external-destination-privacy",
27
+ "interruption-metric",
28
+ "end-of-turn-metric",
29
+ "transcript-correction-metric",
30
+ "provider-failure-recovery-metric",
31
+ "buffer-boundedness-metric",
32
+ "latency-class-metric",
33
+ ];
34
+ export const VOICE_ENVIRONMENT_PROFILES = [
35
+ "azure-foundry",
36
+ "customer-hosted",
37
+ "no-voice-env",
38
+ ];
39
+ export const VOICE_TWIN_FIXTURE_CATEGORIES = [
40
+ "no-voice",
41
+ "stt-only",
42
+ "speech-output",
43
+ "full-realtime",
44
+ "privacy",
45
+ ];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oscharko-dev/keiko-evaluations",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "type": "module",
5
5
  "license": "Apache-2.0",
6
6
  "description": "Internal evaluations package: deterministic evaluation harness — scripted ModelPort fixtures, six scored dimensions, surface-parity check, scorecard renderer, and the offline-vs-live model provider selector (ADR-0012, ADR-0019). Not published independently.",
@@ -26,13 +26,13 @@
26
26
  "node": ">=22"
27
27
  },
28
28
  "dependencies": {
29
- "@oscharko-dev/keiko-contracts": "0.2.8",
30
- "@oscharko-dev/keiko-evidence": "0.2.8",
31
- "@oscharko-dev/keiko-harness": "0.2.8",
32
- "@oscharko-dev/keiko-model-gateway": "0.2.8",
33
- "@oscharko-dev/keiko-security": "0.2.8",
34
- "@oscharko-dev/keiko-tools": "0.2.8",
35
- "@oscharko-dev/keiko-workflows": "0.2.8",
36
- "@oscharko-dev/keiko-workspace": "0.2.8"
29
+ "@oscharko-dev/keiko-contracts": "0.2.9",
30
+ "@oscharko-dev/keiko-evidence": "0.2.9",
31
+ "@oscharko-dev/keiko-harness": "0.2.9",
32
+ "@oscharko-dev/keiko-model-gateway": "0.2.9",
33
+ "@oscharko-dev/keiko-security": "0.2.9",
34
+ "@oscharko-dev/keiko-tools": "0.2.9",
35
+ "@oscharko-dev/keiko-workflows": "0.2.9",
36
+ "@oscharko-dev/keiko-workspace": "0.2.9"
37
37
  }
38
38
  }