@oscharko-dev/keiko-evaluations 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/discussion/fixtures/correction.d.ts +5 -0
- package/dist/discussion/fixtures/correction.d.ts.map +1 -0
- package/dist/discussion/fixtures/correction.js +53 -0
- package/dist/discussion/fixtures/index.d.ts +5 -0
- package/dist/discussion/fixtures/index.d.ts.map +1 -0
- package/dist/discussion/fixtures/index.js +17 -0
- package/dist/discussion/fixtures/no-voice.d.ts +6 -0
- package/dist/discussion/fixtures/no-voice.d.ts.map +1 -0
- package/dist/discussion/fixtures/no-voice.js +79 -0
- package/dist/discussion/fixtures/voice.d.ts +5 -0
- package/dist/discussion/fixtures/voice.d.ts.map +1 -0
- package/dist/discussion/fixtures/voice.js +57 -0
- package/dist/discussion/index.d.ts +6 -0
- package/dist/discussion/index.d.ts.map +1 -0
- package/dist/discussion/index.js +9 -0
- package/dist/discussion/render.d.ts +3 -0
- package/dist/discussion/render.d.ts.map +1 -0
- package/dist/discussion/render.js +49 -0
- package/dist/discussion/runner.d.ts +13 -0
- package/dist/discussion/runner.d.ts.map +1 -0
- package/dist/discussion/runner.js +80 -0
- package/dist/discussion/scorer.d.ts +8 -0
- package/dist/discussion/scorer.d.ts.map +1 -0
- package/dist/discussion/scorer.js +225 -0
- package/dist/discussion/types.d.ts +71 -0
- package/dist/discussion/types.d.ts.map +1 -0
- package/dist/discussion/types.js +29 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/voice-action/fixtures/adversarial.d.ts +9 -0
- package/dist/voice-action/fixtures/adversarial.d.ts.map +1 -0
- package/dist/voice-action/fixtures/adversarial.js +163 -0
- package/dist/voice-action/fixtures/index.d.ts +5 -0
- package/dist/voice-action/fixtures/index.d.ts.map +1 -0
- package/dist/voice-action/fixtures/index.js +17 -0
- package/dist/voice-action/fixtures/no-voice.d.ts +5 -0
- package/dist/voice-action/fixtures/no-voice.d.ts.map +1 -0
- package/dist/voice-action/fixtures/no-voice.js +37 -0
- package/dist/voice-action/fixtures/segment.d.ts +11 -0
- package/dist/voice-action/fixtures/segment.d.ts.map +1 -0
- package/dist/voice-action/fixtures/segment.js +25 -0
- package/dist/voice-action/fixtures/voice.d.ts +6 -0
- package/dist/voice-action/fixtures/voice.d.ts.map +1 -0
- package/dist/voice-action/fixtures/voice.js +74 -0
- package/dist/voice-action/index.d.ts +6 -0
- package/dist/voice-action/index.d.ts.map +1 -0
- package/dist/voice-action/index.js +10 -0
- package/dist/voice-action/render.d.ts +3 -0
- package/dist/voice-action/render.d.ts.map +1 -0
- package/dist/voice-action/render.js +49 -0
- package/dist/voice-action/runner.d.ts +14 -0
- package/dist/voice-action/runner.d.ts.map +1 -0
- package/dist/voice-action/runner.js +149 -0
- package/dist/voice-action/scorer.d.ts +8 -0
- package/dist/voice-action/scorer.d.ts.map +1 -0
- package/dist/voice-action/scorer.js +247 -0
- package/dist/voice-action/types.d.ts +82 -0
- package/dist/voice-action/types.d.ts.map +1 -0
- package/dist/voice-action/types.js +30 -0
- package/dist/voice-twin/capability.d.ts +4 -0
- package/dist/voice-twin/capability.d.ts.map +1 -0
- package/dist/voice-twin/capability.js +26 -0
- package/dist/voice-twin/fixtures/full-realtime.d.ts +3 -0
- package/dist/voice-twin/fixtures/full-realtime.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/full-realtime.js +36 -0
- package/dist/voice-twin/fixtures/index.d.ts +5 -0
- package/dist/voice-twin/fixtures/index.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/index.js +21 -0
- package/dist/voice-twin/fixtures/no-voice.d.ts +3 -0
- package/dist/voice-twin/fixtures/no-voice.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/no-voice.js +33 -0
- package/dist/voice-twin/fixtures/privacy.d.ts +3 -0
- package/dist/voice-twin/fixtures/privacy.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/privacy.js +69 -0
- package/dist/voice-twin/fixtures/speech-output.d.ts +3 -0
- package/dist/voice-twin/fixtures/speech-output.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/speech-output.js +32 -0
- package/dist/voice-twin/fixtures/stt-only.d.ts +3 -0
- package/dist/voice-twin/fixtures/stt-only.d.ts.map +1 -0
- package/dist/voice-twin/fixtures/stt-only.js +35 -0
- package/dist/voice-twin/index.d.ts +10 -0
- package/dist/voice-twin/index.d.ts.map +1 -0
- package/dist/voice-twin/index.js +14 -0
- package/dist/voice-twin/metrics.d.ts +10 -0
- package/dist/voice-twin/metrics.d.ts.map +1 -0
- package/dist/voice-twin/metrics.js +142 -0
- package/dist/voice-twin/privacy.d.ts +9 -0
- package/dist/voice-twin/privacy.d.ts.map +1 -0
- package/dist/voice-twin/privacy.js +100 -0
- package/dist/voice-twin/profiles.d.ts +15 -0
- package/dist/voice-twin/profiles.d.ts.map +1 -0
- package/dist/voice-twin/profiles.js +58 -0
- package/dist/voice-twin/render.d.ts +3 -0
- package/dist/voice-twin/render.d.ts.map +1 -0
- package/dist/voice-twin/render.js +53 -0
- package/dist/voice-twin/runner.d.ts +13 -0
- package/dist/voice-twin/runner.d.ts.map +1 -0
- package/dist/voice-twin/runner.js +141 -0
- package/dist/voice-twin/scorer.d.ts +8 -0
- package/dist/voice-twin/scorer.d.ts.map +1 -0
- package/dist/voice-twin/scorer.js +323 -0
- package/dist/voice-twin/types.d.ts +149 -0
- package/dist/voice-twin/types.d.ts.map +1 -0
- package/dist/voice-twin/types.js +45 -0
- package/package.json +9 -9
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
// Voice Digital Twin AC6 metric derivations (Epic #491, Issue #505; ADR-0068).
|
|
2
|
+
//
|
|
3
|
+
// Each metric is a pure derivation over a frozen keiko-contracts state machine, returning a small
|
|
4
|
+
// content-free record the scorer compares against the fixture oracle. The runtime reducers that drive
|
|
5
|
+
// these state machines live in keiko-ui (turn manager, transcript reducer, playback controller) and are
|
|
6
|
+
// OFF LIMITS here (ADR-0019 rule 3l); this suite proves the CONTRACT the reducers are bound to is
|
|
7
|
+
// regression-safe, while the reducers' own keiko-ui suites prove their behaviour. The five AC6 metrics:
|
|
8
|
+
// interruption, end-of-turn, transcript correction, provider-failure recovery, and the bounded-FIFO
|
|
9
|
+
// buffer model. Pure — no IO, clock, randomness.
|
|
10
|
+
import { VOICE_PLAYBACK_SETTLED_PHASES, VOICE_PLAYBACK_TRANSITIONS, VOICE_PROFILE_MEDIA_TRANSPORT, VOICE_TRANSCRIPT_CONSUMABLE_STATES, VOICE_TRANSCRIPT_SEGMENT_STATES, VOICE_TRANSCRIPT_SEGMENT_TRANSITIONS, canTransitionVoicePlayback, isVoiceReplayEligible, selectCommittedVoiceTranscript, voiceMessageAllowedForProfile, voicePlaybackInterruptAllowedForProfile, voiceTranscriptCaptureAllowed, } from "@oscharko-dev/keiko-contracts";
|
|
11
|
+
// Mirrors the documented keiko-ui replay ring (200) and the keiko-server MAX_REPLAY_EVENTS bound. The
|
|
12
|
+
// value is restated here (not imported) because those packages are off-limits to keiko-evaluations
|
|
13
|
+
// (ADR-0019 rule 3l); the buffer model proves the boundedness invariant the contract's replay-eligibility
|
|
14
|
+
// classification implies, independent of the runtime ring's exact size.
|
|
15
|
+
export const VOICE_TWIN_REPLAY_CAPACITY = 200;
|
|
16
|
+
// A fixed content-free segment builder. `text` is harness-authored filler ("x" repeated) so the committed
|
|
17
|
+
// projection has a non-zero character length without embedding any fixture content; it never leaves the
|
|
18
|
+
// metric as text (only its length / count is recorded).
|
|
19
|
+
function segment(id, seq, state, options = {}) {
|
|
20
|
+
const textBearing = state === "committed" || state === "corrected" || state === "stable";
|
|
21
|
+
return {
|
|
22
|
+
id,
|
|
23
|
+
seq,
|
|
24
|
+
state,
|
|
25
|
+
text: textBearing ? "xx" : "",
|
|
26
|
+
source: "realtime",
|
|
27
|
+
revision: 0,
|
|
28
|
+
replayClass: "replayable",
|
|
29
|
+
redactionClass: textBearing ? "reviewable-text" : "content-free",
|
|
30
|
+
supersedesId: options.supersedesId,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
// ─── Interruption (barge-in) ─────────────────────────────────────────────────────────
|
|
34
|
+
export function deriveInterruptionMetric(profile) {
|
|
35
|
+
return {
|
|
36
|
+
interruptAllowed: voicePlaybackInterruptAllowedForProfile(profile),
|
|
37
|
+
interruptedPhaseReachable: canTransitionVoicePlayback("speaking", "interrupted"),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
// ─── End-of-turn (committed projection excludes uncommitted) ─────────────────────────
|
|
41
|
+
export function deriveEndOfTurnMetric(profile) {
|
|
42
|
+
// A fixture segment list spanning the lifecycle: one partial + one stable (uncommitted) and one segment
|
|
43
|
+
// per consumable state. The committed projection must exclude the partial / stable text.
|
|
44
|
+
const segments = [
|
|
45
|
+
segment("eot-partial", 0, "partial"),
|
|
46
|
+
segment("eot-stable", 1, "stable"),
|
|
47
|
+
...VOICE_TRANSCRIPT_CONSUMABLE_STATES.map((state, index) => segment(`eot-consumable-${state}`, 2 + index, state)),
|
|
48
|
+
];
|
|
49
|
+
const projection = selectCommittedVoiceTranscript(segments);
|
|
50
|
+
const projectedIds = new Set(projection.segments.map((s) => s.id));
|
|
51
|
+
return {
|
|
52
|
+
committedKindAllowed: voiceMessageAllowedForProfile("transcript.committed", profile),
|
|
53
|
+
captureAllowed: voiceTranscriptCaptureAllowed(profile),
|
|
54
|
+
committedSegmentCount: projection.segmentCount,
|
|
55
|
+
excludesUncommitted: !projectedIds.has("eot-partial") && !projectedIds.has("eot-stable"),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// ─── Transcript correction (supersede drops prior committed text) ────────────────────
|
|
59
|
+
export function deriveTranscriptCorrectionMetric() {
|
|
60
|
+
// A corrected segment supersedes a prior committed one. The committed projection must drop the
|
|
61
|
+
// superseded segment so a provider correction REPLACES rather than duplicates the prior text.
|
|
62
|
+
const segments = [
|
|
63
|
+
segment("corr-committed", 0, "committed"),
|
|
64
|
+
segment("corr-corrected", 1, "corrected", { supersedesId: "corr-committed" }),
|
|
65
|
+
];
|
|
66
|
+
const projection = selectCommittedVoiceTranscript(segments);
|
|
67
|
+
const projectedIds = new Set(projection.segments.map((s) => s.id));
|
|
68
|
+
return {
|
|
69
|
+
stableToCorrectedAllowed: VOICE_TRANSCRIPT_SEGMENT_TRANSITIONS.stable.includes("corrected"),
|
|
70
|
+
committedToCorrectedAllowed: VOICE_TRANSCRIPT_SEGMENT_TRANSITIONS.committed.includes("corrected"),
|
|
71
|
+
correctedIsConsumable: VOICE_TRANSCRIPT_CONSUMABLE_STATES.includes("corrected"),
|
|
72
|
+
supersededTextDropped: !projectedIds.has("corr-committed") && projectedIds.has("corr-corrected"),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
// ─── Provider-failure recovery ───────────────────────────────────────────────────────
|
|
76
|
+
export function deriveProviderFailureRecoveryMetric() {
|
|
77
|
+
// A recovery transition out of the playback failure phase, read from the actual transition table (not
|
|
78
|
+
// assumed): `failed` may re-arm `preparing`.
|
|
79
|
+
const recoveryTransitionExists = VOICE_PLAYBACK_TRANSITIONS.failed.includes("preparing");
|
|
80
|
+
return {
|
|
81
|
+
providerErrorIsState: VOICE_TRANSCRIPT_SEGMENT_STATES.includes("provider-error"),
|
|
82
|
+
// provider-error is a reviewable lifecycle fact but is NOT in the consumable set, so failed text never
|
|
83
|
+
// reaches a downstream integration.
|
|
84
|
+
providerErrorNotConsumable: !VOICE_TRANSCRIPT_CONSUMABLE_STATES.includes("provider-error"),
|
|
85
|
+
playbackFailedIsSettled: VOICE_PLAYBACK_SETTLED_PHASES.includes("failed"),
|
|
86
|
+
recoveryTransitionExists,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
// ─── Bounded-FIFO buffer model ───────────────────────────────────────────────────────
|
|
90
|
+
// Push the given control-message kinds through a fixed-capacity ring that ONLY admits replay-eligible
|
|
91
|
+
// kinds. Asserts length never exceeds capacity, ephemeral kinds never buffer, and overflow evicts the
|
|
92
|
+
// oldest. The runtime ring lives in keiko-ui / keiko-server (off-limits); this models the same invariant
|
|
93
|
+
// over the contract's `isVoiceReplayEligible` classification.
|
|
94
|
+
export function deriveBufferBoundednessMetric(kinds, capacity = VOICE_TWIN_REPLAY_CAPACITY) {
|
|
95
|
+
const ring = [];
|
|
96
|
+
let maxObservedLength = 0;
|
|
97
|
+
let admittedCount = 0;
|
|
98
|
+
let ephemeralBuffered = false;
|
|
99
|
+
let evictedOldestOnOverflow = false;
|
|
100
|
+
for (const kind of kinds) {
|
|
101
|
+
if (!isVoiceReplayEligible(kind)) {
|
|
102
|
+
// An ephemeral kind is never admitted; if it ever entered the ring the invariant is broken.
|
|
103
|
+
ephemeralBuffered = ephemeralBuffered || ring.includes(kind);
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
admittedCount += 1;
|
|
107
|
+
if (ring.length >= capacity) {
|
|
108
|
+
ring.shift();
|
|
109
|
+
evictedOldestOnOverflow = true;
|
|
110
|
+
}
|
|
111
|
+
ring.push(kind);
|
|
112
|
+
maxObservedLength = Math.max(maxObservedLength, ring.length);
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
capacity,
|
|
116
|
+
maxObservedLength,
|
|
117
|
+
admittedCount,
|
|
118
|
+
ephemeralBuffered,
|
|
119
|
+
evictedOldestOnOverflow,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
// ─── Latency posture class (Deliverable "latency") ───────────────────────────────────
|
|
123
|
+
// A total, deterministic map from the contract media-transport to the latency POSTURE class the transport
|
|
124
|
+
// implies. This is NOT a wall-clock measurement (which would need a clock + network reads and is out of the
|
|
125
|
+
// pure-harness boundary, covered by the keiko-ui voice-timebase suite, ADR-0061); it is the deterministic
|
|
126
|
+
// class the contract's transport choice fixes: `webrtc` is interactive full-duplex, `gateway-batch` is a
|
|
127
|
+
// single batched request/response, and `none` has no latency surface. Keyed for totality so a new media
|
|
128
|
+
// transport added to the contract is a compile error here, not a silent wrong class.
|
|
129
|
+
const LATENCY_CLASS_BY_TRANSPORT = {
|
|
130
|
+
none: "none",
|
|
131
|
+
"gateway-batch": "batch",
|
|
132
|
+
webrtc: "interactive-realtime",
|
|
133
|
+
};
|
|
134
|
+
export function deriveLatencyClassMetric(profile) {
|
|
135
|
+
const mediaTransport = VOICE_PROFILE_MEDIA_TRANSPORT[profile];
|
|
136
|
+
const latencyClass = LATENCY_CLASS_BY_TRANSPORT[mediaTransport];
|
|
137
|
+
return {
|
|
138
|
+
mediaTransport,
|
|
139
|
+
latencyClass,
|
|
140
|
+
isInteractive: latencyClass === "interactive-realtime",
|
|
141
|
+
};
|
|
142
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { EgressDestinationClass, VoiceTwinEgressAudit, VoiceTwinEgressDestination, VoiceTwinManifestFixture, VoiceTwinManifestScan } from "./types.js";
|
|
2
|
+
export declare function auditVoiceEgress(destinations: readonly {
|
|
3
|
+
readonly class: EgressDestinationClass;
|
|
4
|
+
}[]): VoiceTwinEgressAudit;
|
|
5
|
+
export declare const DENIED_MEDIA_PACKAGES: readonly string[];
|
|
6
|
+
export declare const ALLOWED_MEDIA_RUNTIME: readonly string[];
|
|
7
|
+
export declare function scanManifestsForDeniedMediaPackages(manifests: readonly VoiceTwinManifestFixture[]): VoiceTwinManifestScan;
|
|
8
|
+
export type { VoiceTwinEgressDestination };
|
|
9
|
+
//# sourceMappingURL=privacy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"privacy.d.ts","sourceRoot":"","sources":["../../src/voice-twin/privacy.ts"],"names":[],"mappings":"AAcA,OAAO,KAAK,EACV,sBAAsB,EACtB,oBAAoB,EACpB,0BAA0B,EAC1B,wBAAwB,EACxB,qBAAqB,EACtB,MAAM,YAAY,CAAC;AAGpB,wBAAgB,gBAAgB,CAC9B,YAAY,EAAE,SAAS;IAAE,QAAQ,CAAC,KAAK,EAAE,sBAAsB,CAAA;CAAE,EAAE,GAClE,oBAAoB,CAGtB;AASD,eAAO,MAAM,qBAAqB,EAAE,SAAS,MAAM,EA8CzC,CAAC;AAEX,eAAO,MAAM,qBAAqB,EAAE,SAAS,MAAM,EAAoB,CAAC;AAYxE,wBAAgB,mCAAmC,CACjD,SAAS,EAAE,SAAS,wBAAwB,EAAE,GAC7C,qBAAqB,CAmBvB;AAGD,YAAY,EAAE,0BAA0B,EAAE,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// Voice Digital Twin privacy / supply-chain audit (Epic #491, Issue #505; ADR-0068; AC5).
|
|
2
|
+
//
|
|
3
|
+
// Two pure auditors give AC5 its teeth:
|
|
4
|
+
// (a) `auditVoiceEgress` — an egress ledger is approved iff NO destination is `unapproved-external`. A
|
|
5
|
+
// voice cell may egress nowhere or only to the configured model endpoint (matrix §3 /
|
|
6
|
+
// privacy-contract §1); any other destination flips approval to false.
|
|
7
|
+
// (b) `scanManifestsForDeniedMediaPackages` — a pure scan over provided package manifests for the
|
|
8
|
+
// denied runtime-media packages from `docs/voice/supply-chain-policy.md` §1. The real-repo scan
|
|
9
|
+
// (reading the actual `packages/**/package.json`) runs in the .test.ts files (test files may do
|
|
10
|
+
// fs); this function is pure over its inputs so pure negative fixtures can prove a denied package
|
|
11
|
+
// flips `clean=false`.
|
|
12
|
+
//
|
|
13
|
+
// Both are pure (no IO, clock, randomness) and content-free.
|
|
14
|
+
// ─── Egress audit (AC5) ──────────────────────────────────────────────────────────────
|
|
15
|
+
export function auditVoiceEgress(destinations) {
|
|
16
|
+
const unapprovedCount = destinations.filter((d) => d.class === "unapproved-external").length;
|
|
17
|
+
return { approved: unapprovedCount === 0, unapprovedCount };
|
|
18
|
+
}
|
|
19
|
+
// ─── Supply-chain denylist (docs/voice/supply-chain-policy.md §1) ────────────────────
|
|
20
|
+
// Enumerates the known runtime-media package + scope variants of the policy §1 vendors (WebRTC wrapper /
|
|
21
|
+
// peer / SFU / media-server / SDK packages) the policy forbids adding by default — including the real
|
|
22
|
+
// client / SDK and scoped-vendor variants, not just the bare vendor id. This is a NAME-based scan
|
|
23
|
+
// (exact id OR `@scope/` prefix); the policy §1 catch-all cannot be enforced by an exact-name scan alone,
|
|
24
|
+
// so operators MUST keep this list current as new vendor packages appear. `ws` is the single allowed
|
|
25
|
+
// runtime media-adjacent package (already vetted, scoped). Grouped alphabetically by vendor.
|
|
26
|
+
export const DENIED_MEDIA_PACKAGES = [
|
|
27
|
+
// agora
|
|
28
|
+
"agora",
|
|
29
|
+
"agora-rtc-sdk-ng",
|
|
30
|
+
"@agora-js",
|
|
31
|
+
// daily
|
|
32
|
+
"@daily-co",
|
|
33
|
+
"@daily-co/daily-js",
|
|
34
|
+
// jitsi
|
|
35
|
+
"jitsi",
|
|
36
|
+
"lib-jitsi-meet",
|
|
37
|
+
"@jitsi",
|
|
38
|
+
// janus
|
|
39
|
+
"janus",
|
|
40
|
+
"janus-gateway",
|
|
41
|
+
// kurento
|
|
42
|
+
"kurento",
|
|
43
|
+
// livekit
|
|
44
|
+
"livekit",
|
|
45
|
+
"livekit-client",
|
|
46
|
+
"livekit-server-sdk",
|
|
47
|
+
"@livekit",
|
|
48
|
+
// mediasoup
|
|
49
|
+
"mediasoup",
|
|
50
|
+
"mediasoup-client",
|
|
51
|
+
// opentok
|
|
52
|
+
"opentok",
|
|
53
|
+
// peerjs
|
|
54
|
+
"peerjs",
|
|
55
|
+
"peerjs-server",
|
|
56
|
+
// simple-peer
|
|
57
|
+
"simple-peer",
|
|
58
|
+
// sip
|
|
59
|
+
"sip.js",
|
|
60
|
+
"jssip",
|
|
61
|
+
// socket.io
|
|
62
|
+
"socket.io",
|
|
63
|
+
"socket.io-client",
|
|
64
|
+
"@socket.io",
|
|
65
|
+
// twilio
|
|
66
|
+
"twilio",
|
|
67
|
+
"twilio-video",
|
|
68
|
+
"@twilio",
|
|
69
|
+
// webrtc native bindings
|
|
70
|
+
"wrtc",
|
|
71
|
+
"node-webrtc",
|
|
72
|
+
];
|
|
73
|
+
export const ALLOWED_MEDIA_RUNTIME = ["ws"];
|
|
74
|
+
// A dependency name matches a denied package if it equals the denied id or is a scoped sub-package of a
|
|
75
|
+
// denied scope (e.g. `@daily-co/daily-js` matches the `@daily-co` scope). This catches the realistic
|
|
76
|
+
// monorepo shape where a forbidden vendor ships under a scope.
|
|
77
|
+
function dependencyMatchesDenied(dependencyName, denied) {
|
|
78
|
+
if (dependencyName === denied) {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
return denied.startsWith("@") && dependencyName.startsWith(`${denied}/`);
|
|
82
|
+
}
|
|
83
|
+
export function scanManifestsForDeniedMediaPackages(manifests) {
|
|
84
|
+
const found = [];
|
|
85
|
+
for (const manifest of manifests) {
|
|
86
|
+
for (const dependency of manifest.dependencyNames) {
|
|
87
|
+
const denied = DENIED_MEDIA_PACKAGES.find((d) => dependencyMatchesDenied(dependency, d));
|
|
88
|
+
if (denied !== undefined) {
|
|
89
|
+
found.push({ packageName: manifest.packageName, deniedPackage: denied });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// Sort to a canonical order so the found list is stable regardless of manifest ordering.
|
|
94
|
+
const sorted = found
|
|
95
|
+
.slice()
|
|
96
|
+
.sort((a, b) => a.packageName === b.packageName
|
|
97
|
+
? a.deniedPackage.localeCompare(b.deniedPackage)
|
|
98
|
+
: a.packageName.localeCompare(b.packageName));
|
|
99
|
+
return { clean: sorted.length === 0, found: sorted };
|
|
100
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { type VoiceProfile } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { EgressDestinationClass, VoiceEnvironmentProfile } from "./types.js";
|
|
3
|
+
export declare const ALL_VOICE_PROFILES: readonly VoiceProfile[];
|
|
4
|
+
export declare function localProfilesMatchContract(): boolean;
|
|
5
|
+
export type VoiceNetworkPosture = "cloud-provider" | "controlled-network" | "no-provider";
|
|
6
|
+
export interface VoiceEnvironmentDescriptor {
|
|
7
|
+
readonly id: VoiceEnvironmentProfile;
|
|
8
|
+
readonly label: string;
|
|
9
|
+
readonly networkPosture: VoiceNetworkPosture;
|
|
10
|
+
readonly egresses: boolean;
|
|
11
|
+
}
|
|
12
|
+
export declare const VOICE_ENVIRONMENT_DESCRIPTORS: Record<VoiceEnvironmentProfile, VoiceEnvironmentDescriptor>;
|
|
13
|
+
export declare function effectiveVoiceProfile(environment: VoiceEnvironmentProfile, advertised: VoiceProfile): VoiceProfile;
|
|
14
|
+
export declare function egressDestinationClassFor(effectiveProfile: VoiceProfile): EgressDestinationClass;
|
|
15
|
+
//# sourceMappingURL=profiles.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"profiles.d.ts","sourceRoot":"","sources":["../../src/voice-twin/profiles.ts"],"names":[],"mappings":"AAUA,OAAO,EAAiC,KAAK,YAAY,EAAE,MAAM,+BAA+B,CAAC;AACjG,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAKlF,eAAO,MAAM,kBAAkB,EAAE,SAAS,YAAY,EAK5C,CAAC;AAIX,wBAAgB,0BAA0B,IAAI,OAAO,CAIpD;AAMD,MAAM,MAAM,mBAAmB,GAAG,gBAAgB,GAAG,oBAAoB,GAAG,aAAa,CAAC;AAE1F,MAAM,WAAW,0BAA0B;IACzC,QAAQ,CAAC,EAAE,EAAE,uBAAuB,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,cAAc,EAAE,mBAAmB,CAAC;IAC7C,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;CAC5B;AAED,eAAO,MAAM,6BAA6B,EAAE,MAAM,CAChD,uBAAuB,EACvB,0BAA0B,CAoBlB,CAAC;AAKX,wBAAgB,qBAAqB,CACnC,WAAW,EAAE,uBAAuB,EACpC,UAAU,EAAE,YAAY,GACvB,YAAY,CAEd;AAKD,wBAAgB,yBAAyB,CAAC,gBAAgB,EAAE,YAAY,GAAG,sBAAsB,CAEhG"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// Voice Digital Twin profile × environment model (Epic #491, Issue #505; ADR-0068).
|
|
2
|
+
//
|
|
3
|
+
// The two orthogonal axes the capstone evaluates: the contract `VoiceProfile` capability axis and the
|
|
4
|
+
// deployment ENVIRONMENT axis (`docs/voice/deployment-profile-matrix.md` §1). `effectiveVoiceProfile`
|
|
5
|
+
// applies the matrix §3 degradation rule — in a no-voice environment EVERY advertised capability degrades
|
|
6
|
+
// to `none` because no provider is configured; in a provider-configured environment the advertised
|
|
7
|
+
// capability is the effective profile. `egressDestinationClassFor` encodes the matrix §3 / privacy-contract
|
|
8
|
+
// §1 external-call rule: an effective-none cell egresses nowhere, any active capability egresses ONLY to
|
|
9
|
+
// the configured model endpoint. Pure.
|
|
10
|
+
import { VOICE_PROFILE_MEDIA_TRANSPORT } from "@oscharko-dev/keiko-contracts";
|
|
11
|
+
// The four capability-axis profiles, declared locally because the contract exports no `VOICE_PROFILES`
|
|
12
|
+
// array. `profiles.test.ts` asserts this equals the keys of `VOICE_PROFILE_MEDIA_TRANSPORT` so it can
|
|
13
|
+
// never drift from the contract.
|
|
14
|
+
export const ALL_VOICE_PROFILES = [
|
|
15
|
+
"none",
|
|
16
|
+
"speech-to-text",
|
|
17
|
+
"speech-output",
|
|
18
|
+
"full-realtime",
|
|
19
|
+
];
|
|
20
|
+
// Whether the contract's media-transport table — keyed by `VoiceProfile` for totality — still lists
|
|
21
|
+
// exactly the locally-declared profiles. A drift (a new profile added to the contract) makes this false.
|
|
22
|
+
export function localProfilesMatchContract() {
|
|
23
|
+
const contractKeys = Object.keys(VOICE_PROFILE_MEDIA_TRANSPORT).slice().sort();
|
|
24
|
+
const local = ALL_VOICE_PROFILES.slice().sort();
|
|
25
|
+
return contractKeys.length === local.length && contractKeys.every((k, i) => k === local[i]);
|
|
26
|
+
}
|
|
27
|
+
export const VOICE_ENVIRONMENT_DESCRIPTORS = {
|
|
28
|
+
"azure-foundry": {
|
|
29
|
+
id: "azure-foundry",
|
|
30
|
+
label: "Azure AI Foundry dev/academic",
|
|
31
|
+
networkPosture: "cloud-provider",
|
|
32
|
+
egresses: true,
|
|
33
|
+
},
|
|
34
|
+
"customer-hosted": {
|
|
35
|
+
id: "customer-hosted",
|
|
36
|
+
label: "Customer-hosted controlled-network",
|
|
37
|
+
networkPosture: "controlled-network",
|
|
38
|
+
egresses: true,
|
|
39
|
+
},
|
|
40
|
+
"no-voice-env": {
|
|
41
|
+
id: "no-voice-env",
|
|
42
|
+
label: "No voice provider configured",
|
|
43
|
+
networkPosture: "no-provider",
|
|
44
|
+
egresses: false,
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
// ─── Effective profile (matrix §3 degradation rule) ──────────────────────────────────
|
|
48
|
+
// In a no-voice environment nothing is configured, so any advertised capability degrades to `none`
|
|
49
|
+
// (AC1/AC2). Otherwise the advertised capability is the effective profile.
|
|
50
|
+
export function effectiveVoiceProfile(environment, advertised) {
|
|
51
|
+
return VOICE_ENVIRONMENT_DESCRIPTORS[environment].egresses ? advertised : "none";
|
|
52
|
+
}
|
|
53
|
+
// ─── Egress destination class (matrix §3 external-call rule) ──────────────────────────
|
|
54
|
+
// An effective-none cell egresses nowhere; any active capability egresses only to the configured model
|
|
55
|
+
// endpoint. This is the ONLY positive egress class the matrix permits.
|
|
56
|
+
export function egressDestinationClassFor(effectiveProfile) {
|
|
57
|
+
return effectiveProfile === "none" ? "none" : "configured-model-endpoint";
|
|
58
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"render.d.ts","sourceRoot":"","sources":["../../src/voice-twin/render.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAGV,kBAAkB,EAEnB,MAAM,YAAY,CAAC;AA+BpB,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,kBAAkB,GAAG,MAAM,CAiC5E"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// renderVoiceTwinSummary (Issue #505): VoiceTwinScorecard -> human-readable string. A verdict line, a
|
|
2
|
+
// per-dimension table, the profile / environment matrix-coverage lines, and the privacy line. The
|
|
3
|
+
// scorecard carries only harness-authored, content-free fields (counts, closed-vocabulary labels, numeric
|
|
4
|
+
// scores), so this renderer performs no redaction — it only formats fields that are safe to print.
|
|
5
|
+
function glyph(result) {
|
|
6
|
+
if (result.outcome === "pass") {
|
|
7
|
+
return "PASS";
|
|
8
|
+
}
|
|
9
|
+
if (result.outcome === "fail") {
|
|
10
|
+
return "FAIL";
|
|
11
|
+
}
|
|
12
|
+
return "n/a";
|
|
13
|
+
}
|
|
14
|
+
function fixtureLine(fixture) {
|
|
15
|
+
const dims = fixture.dimensionResults
|
|
16
|
+
.filter((d) => d.outcome !== "not-applicable")
|
|
17
|
+
.map((d) => `${d.dimension}=${glyph(d)}`)
|
|
18
|
+
.join(" ");
|
|
19
|
+
const verdict = fixture.fullyPassed ? "OK" : "FAIL";
|
|
20
|
+
return `- ${fixture.fixtureName} [${fixture.category}/${fixture.environment}/${fixture.effectiveProfile}] ${verdict} ${dims}`.trimEnd();
|
|
21
|
+
}
|
|
22
|
+
function dimensionLine(entry) {
|
|
23
|
+
const rate = entry.passRate === null ? "n/a" : `${(entry.passRate * 100).toFixed(0)}%`;
|
|
24
|
+
const verdict = entry.failCount > 0 ? "FAIL" : entry.passCount > 0 ? "PASS" : "n/a";
|
|
25
|
+
return ` ${entry.dimension.padEnd(32)} ${verdict.padEnd(5)} pass=${String(entry.passCount)} fail=${String(entry.failCount)} n/a=${String(entry.notApplicableCount)} rate=${rate}`;
|
|
26
|
+
}
|
|
27
|
+
function yesNo(value) {
|
|
28
|
+
return value ? "yes" : "no";
|
|
29
|
+
}
|
|
30
|
+
export function renderVoiceTwinSummary(scorecard) {
|
|
31
|
+
const summary = scorecard.summary;
|
|
32
|
+
const lines = [];
|
|
33
|
+
lines.push(`Voice Digital Twin evaluation summary (schema v${scorecard.schemaVersion})`);
|
|
34
|
+
lines.push(`Fixtures: ${String(summary.totalFixtures)} total, ${String(summary.fullyPassedFixtures)} fully passed`);
|
|
35
|
+
lines.push(`Capability coverage: no-voice=${yesNo(summary.coversNoVoice)} stt=${yesNo(summary.coversSttOnly)} full-realtime=${yesNo(summary.coversFullRealtime)}`);
|
|
36
|
+
lines.push(`Environment coverage: azure-foundry=${yesNo(summary.coversAzureFoundry)} customer-hosted=${yesNo(summary.coversCustomerHosted)}`);
|
|
37
|
+
lines.push(`Privacy: negative-egress-caught=${yesNo(summary.coversPrivacyNegative)}, matrix cells=${String(scorecard.coveredMatrixCells.length)}`);
|
|
38
|
+
lines.push("");
|
|
39
|
+
lines.push("Fixtures:");
|
|
40
|
+
for (const fixture of scorecard.fixtureResults) {
|
|
41
|
+
lines.push(fixtureLine(fixture));
|
|
42
|
+
}
|
|
43
|
+
lines.push("");
|
|
44
|
+
lines.push("Dimensions:");
|
|
45
|
+
for (const entry of scorecard.dimensions) {
|
|
46
|
+
lines.push(dimensionLine(entry));
|
|
47
|
+
}
|
|
48
|
+
lines.push("");
|
|
49
|
+
lines.push(summary.goNoGo === "GO"
|
|
50
|
+
? "Verdict: GO - every exercised dimension passed across the profile and environment matrix."
|
|
51
|
+
: "Verdict: NO-GO - a dimension failed or a coverage gate was unmet (see table above).");
|
|
52
|
+
return lines.join("\n");
|
|
53
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { type VoiceTwinFixture, type VoiceTwinObservation, type VoiceTwinScorecard } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Derive the deterministic observation for one fixture: the contract-derived capability cell, the egress
|
|
4
|
+
* and supply-chain audits, and the declared AC6 metric records. Pure data derivation over the frozen
|
|
5
|
+
* contract.
|
|
6
|
+
*/
|
|
7
|
+
export declare function deriveVoiceTwinObservation(fixture: VoiceTwinFixture): VoiceTwinObservation;
|
|
8
|
+
/**
|
|
9
|
+
* Run the Voice Digital Twin evaluation suite and return a fully aggregated scorecard. Pure and
|
|
10
|
+
* deterministic. Pass an explicit fixture list to scope the run (the suite tests use the default set).
|
|
11
|
+
*/
|
|
12
|
+
export declare function runVoiceTwinEvaluation(fixtures?: readonly VoiceTwinFixture[]): VoiceTwinScorecard;
|
|
13
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/voice-twin/runner.ts"],"names":[],"mappings":"AAyBA,OAAO,EAIL,KAAK,gBAAgB,EAIrB,KAAK,oBAAoB,EACzB,KAAK,kBAAkB,EAExB,MAAM,YAAY,CAAC;AAmDpB;;;;GAIG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,gBAAgB,GAAG,oBAAoB,CAU1F;AAwED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,GAAE,SAAS,gBAAgB,EAA4B,GAC9D,kBAAkB,CAUpB"}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
// Voice Digital Twin evaluation runner (Epic #491, Issue #505; ADR-0068).
|
|
2
|
+
//
|
|
3
|
+
// Derives a deterministic observation for each fixture from the frozen keiko-contracts voice tables and
|
|
4
|
+
// state machines — the capability cell, the egress / supply-chain audit, and exactly the AC6 metric
|
|
5
|
+
// records the fixture's declared dimensions require — then scores the eleven dimensions, aggregates a
|
|
6
|
+
// scorecard, and derives the offline Go/No-Go verdict and the matrix-coverage flags. Pure: no IO, clock,
|
|
7
|
+
// randomness, or model dispatch.
|
|
8
|
+
import { VOICE_CONTROL_MESSAGE_KINDS, } from "@oscharko-dev/keiko-contracts";
|
|
9
|
+
import { deriveCapabilityCell } from "./capability.js";
|
|
10
|
+
import { VOICE_TWIN_REPLAY_CAPACITY, deriveBufferBoundednessMetric, deriveEndOfTurnMetric, deriveInterruptionMetric, deriveLatencyClassMetric, deriveProviderFailureRecoveryMetric, deriveTranscriptCorrectionMetric, } from "./metrics.js";
|
|
11
|
+
import { auditVoiceEgress, scanManifestsForDeniedMediaPackages } from "./privacy.js";
|
|
12
|
+
import { aggregateVoiceTwinQuality, scoreVoiceTwinQuality } from "./scorer.js";
|
|
13
|
+
import { ALL_VOICE_TWIN_FIXTURES } from "./fixtures/index.js";
|
|
14
|
+
import { VOICE_TWIN_EVAL_SCHEMA_VERSION, } from "./types.js";
|
|
15
|
+
// The control-message kind sequence the buffer-boundedness model pushes: every kind repeated enough times
|
|
16
|
+
// that the replay-eligible subset overflows the ring, so eviction is exercised. `transcript.partial` and
|
|
17
|
+
// other ephemeral kinds are interleaved to prove they never buffer.
|
|
18
|
+
function bufferProbeKinds(capacity) {
|
|
19
|
+
const kinds = [];
|
|
20
|
+
// Enough repetitions of the full kind catalog to exceed capacity with the replay-eligible subset alone.
|
|
21
|
+
const rounds = capacity + 4;
|
|
22
|
+
for (let round = 0; round < rounds; round += 1) {
|
|
23
|
+
for (const kind of VOICE_CONTROL_MESSAGE_KINDS) {
|
|
24
|
+
kinds.push(kind);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return kinds;
|
|
28
|
+
}
|
|
29
|
+
// Derive exactly the metric records the fixture's declared dimensions require, against the effective
|
|
30
|
+
// profile of the cell. A metric absent from the declared dimensions is omitted (exactOptionalPropertyTypes
|
|
31
|
+
// forbids assigning `undefined` to an optional prop, so we build the bundle incrementally).
|
|
32
|
+
function deriveMetrics(fixture, cell) {
|
|
33
|
+
const bundle = {};
|
|
34
|
+
if (fixture.dimensions.has("interruption-metric")) {
|
|
35
|
+
bundle.interruption = deriveInterruptionMetric(cell.effectiveProfile);
|
|
36
|
+
}
|
|
37
|
+
if (fixture.dimensions.has("end-of-turn-metric")) {
|
|
38
|
+
bundle.endOfTurn = deriveEndOfTurnMetric(cell.effectiveProfile);
|
|
39
|
+
}
|
|
40
|
+
if (fixture.dimensions.has("transcript-correction-metric")) {
|
|
41
|
+
bundle.transcriptCorrection = deriveTranscriptCorrectionMetric();
|
|
42
|
+
}
|
|
43
|
+
if (fixture.dimensions.has("provider-failure-recovery-metric")) {
|
|
44
|
+
bundle.providerFailureRecovery = deriveProviderFailureRecoveryMetric();
|
|
45
|
+
}
|
|
46
|
+
if (fixture.dimensions.has("buffer-boundedness-metric")) {
|
|
47
|
+
bundle.bufferBoundedness = deriveBufferBoundednessMetric(bufferProbeKinds(VOICE_TWIN_REPLAY_CAPACITY), VOICE_TWIN_REPLAY_CAPACITY);
|
|
48
|
+
}
|
|
49
|
+
if (fixture.dimensions.has("latency-class-metric")) {
|
|
50
|
+
bundle.latencyClass = deriveLatencyClassMetric(cell.effectiveProfile);
|
|
51
|
+
}
|
|
52
|
+
return bundle;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Derive the deterministic observation for one fixture: the contract-derived capability cell, the egress
|
|
56
|
+
* and supply-chain audits, and the declared AC6 metric records. Pure data derivation over the frozen
|
|
57
|
+
* contract.
|
|
58
|
+
*/
|
|
59
|
+
export function deriveVoiceTwinObservation(fixture) {
|
|
60
|
+
const cell = deriveCapabilityCell(fixture.environment, fixture.advertisedProfile);
|
|
61
|
+
return {
|
|
62
|
+
environment: fixture.environment,
|
|
63
|
+
advertisedProfile: fixture.advertisedProfile,
|
|
64
|
+
cell,
|
|
65
|
+
egressAudit: auditVoiceEgress(fixture.egressLedger),
|
|
66
|
+
manifestScan: scanManifestsForDeniedMediaPackages(fixture.manifests),
|
|
67
|
+
metrics: deriveMetrics(fixture, cell),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function runFixture(fixture) {
|
|
71
|
+
const observation = deriveVoiceTwinObservation(fixture);
|
|
72
|
+
const dimensionResults = scoreVoiceTwinQuality(fixture, observation);
|
|
73
|
+
return {
|
|
74
|
+
fixtureName: fixture.name,
|
|
75
|
+
category: fixture.category,
|
|
76
|
+
environment: fixture.environment,
|
|
77
|
+
effectiveProfile: observation.cell.effectiveProfile,
|
|
78
|
+
observation,
|
|
79
|
+
dimensionResults,
|
|
80
|
+
fullyPassed: dimensionResults.every((d) => d.outcome !== "fail"),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
function collectMatrixCells(fixtureResults) {
|
|
84
|
+
const seen = new Map();
|
|
85
|
+
for (const result of fixtureResults) {
|
|
86
|
+
const key = `${result.environment}:${result.effectiveProfile}`;
|
|
87
|
+
if (!seen.has(key)) {
|
|
88
|
+
seen.set(key, {
|
|
89
|
+
environment: result.environment,
|
|
90
|
+
effectiveProfile: result.effectiveProfile,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return [...seen.values()].sort((a, b) => a.environment === b.environment
|
|
95
|
+
? a.effectiveProfile.localeCompare(b.effectiveProfile)
|
|
96
|
+
: a.environment.localeCompare(b.environment));
|
|
97
|
+
}
|
|
98
|
+
function summarize(fixtureResults, dimensions) {
|
|
99
|
+
const allClean = dimensions.every((d) => d.failCount === 0);
|
|
100
|
+
const coversNoVoice = fixtureResults.some((f) => f.effectiveProfile === "none");
|
|
101
|
+
const coversSttOnly = fixtureResults.some((f) => f.effectiveProfile === "speech-to-text");
|
|
102
|
+
const coversSpeechOutput = fixtureResults.some((f) => f.effectiveProfile === "speech-output");
|
|
103
|
+
const coversFullRealtime = fixtureResults.some((f) => f.effectiveProfile === "full-realtime");
|
|
104
|
+
const coversAzureFoundry = fixtureResults.some((f) => f.environment === "azure-foundry");
|
|
105
|
+
const coversCustomerHosted = fixtureResults.some((f) => f.environment === "customer-hosted");
|
|
106
|
+
const coversPrivacyNegative = fixtureResults.some((f) => f.category === "privacy" && !f.observation.egressAudit.approved);
|
|
107
|
+
const coverageMet = coversNoVoice &&
|
|
108
|
+
coversSttOnly &&
|
|
109
|
+
coversSpeechOutput &&
|
|
110
|
+
coversFullRealtime &&
|
|
111
|
+
coversAzureFoundry &&
|
|
112
|
+
coversCustomerHosted &&
|
|
113
|
+
coversPrivacyNegative;
|
|
114
|
+
return {
|
|
115
|
+
totalFixtures: fixtureResults.length,
|
|
116
|
+
fullyPassedFixtures: fixtureResults.filter((f) => f.fullyPassed).length,
|
|
117
|
+
coversNoVoice,
|
|
118
|
+
coversSttOnly,
|
|
119
|
+
coversSpeechOutput,
|
|
120
|
+
coversFullRealtime,
|
|
121
|
+
coversAzureFoundry,
|
|
122
|
+
coversCustomerHosted,
|
|
123
|
+
coversPrivacyNegative,
|
|
124
|
+
goNoGo: allClean && coverageMet ? "GO" : "NO-GO",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Run the Voice Digital Twin evaluation suite and return a fully aggregated scorecard. Pure and
|
|
129
|
+
* deterministic. Pass an explicit fixture list to scope the run (the suite tests use the default set).
|
|
130
|
+
*/
|
|
131
|
+
export function runVoiceTwinEvaluation(fixtures = ALL_VOICE_TWIN_FIXTURES) {
|
|
132
|
+
const fixtureResults = fixtures.map(runFixture);
|
|
133
|
+
const dimensions = aggregateVoiceTwinQuality(fixtureResults.map((f) => f.dimensionResults));
|
|
134
|
+
return {
|
|
135
|
+
schemaVersion: VOICE_TWIN_EVAL_SCHEMA_VERSION,
|
|
136
|
+
fixtureResults,
|
|
137
|
+
dimensions,
|
|
138
|
+
summary: summarize(fixtureResults, dimensions),
|
|
139
|
+
coveredMatrixCells: collectMatrixCells(fixtureResults),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { type VoiceTwinDimensionResult, type VoiceTwinFixture, type VoiceTwinObservation, type VoiceTwinScorecardEntry } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Score one fixture's observation across all eleven dimensions. A dimension the fixture does not declare is
|
|
4
|
+
* `not-applicable`. Pure.
|
|
5
|
+
*/
|
|
6
|
+
export declare function scoreVoiceTwinQuality(fixture: VoiceTwinFixture, obs: VoiceTwinObservation): readonly VoiceTwinDimensionResult[];
|
|
7
|
+
export declare function aggregateVoiceTwinQuality(results: readonly (readonly VoiceTwinDimensionResult[])[]): readonly VoiceTwinScorecardEntry[];
|
|
8
|
+
//# sourceMappingURL=scorer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scorer.d.ts","sourceRoot":"","sources":["../../src/voice-twin/scorer.ts"],"names":[],"mappings":"AAgBA,OAAO,EAIL,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,uBAAuB,EAC7B,MAAM,YAAY,CAAC;AAuTpB;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,gBAAgB,EACzB,GAAG,EAAE,oBAAoB,GACxB,SAAS,wBAAwB,EAAE,CAUrC;AA8BD,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,SAAS,CAAC,SAAS,wBAAwB,EAAE,CAAC,EAAE,GACxD,SAAS,uBAAuB,EAAE,CAEpC"}
|