@wibly/sdk 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/package.json +4 -4
- package/src/client.ts +109 -26
- package/src/control.ts +6 -0
- package/src/index.ts +12 -1
- package/src/inference.ts +7 -99
- package/src/react.ts +15 -0
- package/src/time.ts +17 -1
- package/src/tts-playback.test.ts +61 -0
- package/src/tts-playback.ts +99 -0
- package/src/voice.ts +21 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# `@wibly/sdk` — Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.3 — 2026-06-08
|
|
4
|
+
|
|
5
|
+
## 0.1.3 — 2026-06-08
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- `session.ttsPlayback` — client-side TTS playback telemetry
|
|
10
|
+
(`subscribe` / `getSnapshot`) exposing `{ isPlaying, speechLevel }`.
|
|
11
|
+
The Host shell samples speech energy from its single `<audio>` element
|
|
12
|
+
and pushes smoothed levels via `setTtsPlaybackTelemetry(session, …)` so
|
|
13
|
+
Experience bundles can drive amplitude-synced avatar animation without
|
|
14
|
+
high-frequency `voice.*` events on the wire.
|
|
15
|
+
- `@wibly/sdk/react` — `useTtsPlayback()` hook wrapping the telemetry
|
|
16
|
+
store for shell and bundle React trees.
|
|
17
|
+
|
|
18
|
+
## 0.1.2 — 2026-06-08
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
|
|
22
|
+
- `session.host.abort()` — a universal mid-game abort. Emits the new
|
|
23
|
+
`host.abort` control verb; the Runtime force-jumps to the manifest's
|
|
24
|
+
`workflow.abortPhaseId` (or the last declared phase when omitted) from
|
|
25
|
+
any phase. Games no longer need per-phase abort transitions.
|
|
26
|
+
|
|
3
27
|
## 0.1.1 — 2026-05-30
|
|
4
28
|
|
|
5
29
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wibly/sdk",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Wibly @wibly/sdk",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
"access": "public"
|
|
19
19
|
},
|
|
20
20
|
"dependencies": {
|
|
21
|
-
"@wibly/internal-manifest": "0.1.
|
|
22
|
-
"@wibly/internal-protocol": "0.1.
|
|
23
|
-
"@wibly/internal-shared": "0.1.
|
|
21
|
+
"@wibly/internal-manifest": "0.1.3",
|
|
22
|
+
"@wibly/internal-protocol": "0.1.3",
|
|
23
|
+
"@wibly/internal-shared": "0.1.3",
|
|
24
24
|
"zod": "^3.25.76"
|
|
25
25
|
},
|
|
26
26
|
"peerDependencies": {
|
package/src/client.ts
CHANGED
|
@@ -77,6 +77,11 @@ import {
|
|
|
77
77
|
createServerTimeSource,
|
|
78
78
|
type ServerTimeSource,
|
|
79
79
|
} from './time.js';
|
|
80
|
+
import {
|
|
81
|
+
createTtsPlaybackStore,
|
|
82
|
+
registerTtsPlaybackStore,
|
|
83
|
+
type TtsPlaybackTelemetry,
|
|
84
|
+
} from './tts-playback.js';
|
|
80
85
|
import {
|
|
81
86
|
createTransport,
|
|
82
87
|
type Transport,
|
|
@@ -181,10 +186,36 @@ export type Session = {
|
|
|
181
186
|
readonly advancePhase: (
|
|
182
187
|
detail?: unknown,
|
|
183
188
|
) => Result<{ readonly id: string }, SdkError>;
|
|
189
|
+
/**
|
|
190
|
+
* Universal mid-game abort. The Runtime force-jumps to the
|
|
191
|
+
* manifest's abort/terminal phase (`workflow.abortPhaseId`, else
|
|
192
|
+
* the last declared phase) from wherever the session currently is.
|
|
193
|
+
* No per-phase wiring required.
|
|
194
|
+
*/
|
|
195
|
+
readonly abort: () => Result<{ readonly id: string }, SdkError>;
|
|
196
|
+
readonly reportTtsPlayback: (detail: {
|
|
197
|
+
readonly state: 'active' | 'idle';
|
|
198
|
+
}) => Result<{ readonly id: string }, SdkError>;
|
|
199
|
+
readonly reportTtsBeat: (detail: {
|
|
200
|
+
readonly beatId: string;
|
|
201
|
+
readonly event: 'clip_start' | 'clip_end';
|
|
202
|
+
readonly clipId?: string;
|
|
203
|
+
readonly reveal?: {
|
|
204
|
+
readonly kind: 'none' | 'player_submission';
|
|
205
|
+
readonly playerId?: string;
|
|
206
|
+
};
|
|
207
|
+
}) => Result<{ readonly id: string }, SdkError>;
|
|
184
208
|
readonly reclaim: () => Result<{ readonly id: string }, SdkError>;
|
|
185
209
|
};
|
|
186
210
|
readonly inference: SessionInference;
|
|
187
211
|
readonly voice: SessionVoice;
|
|
212
|
+
/**
|
|
213
|
+
* Client-side TTS playback telemetry (speech energy + playing
|
|
214
|
+
* flag). Populated by the Host shell from the local audio element;
|
|
215
|
+
* read by Experience bundles to drive avatar animation. Never
|
|
216
|
+
* crosses the wire. See `tts-playback.ts`.
|
|
217
|
+
*/
|
|
218
|
+
readonly ttsPlayback: TtsPlaybackTelemetry;
|
|
188
219
|
readonly events: Pick<EventBus, 'onEvent' | 'onAnyEvent'>;
|
|
189
220
|
readonly lifecycle: LifecycleBindings;
|
|
190
221
|
/** Server-time helper. See `time.ts`. */
|
|
@@ -221,6 +252,7 @@ const DEFAULT_VOICE_SPEAK_TIMEOUT_MS = 30_000;
|
|
|
221
252
|
export const createSession = (config: SessionConfig): Session => {
|
|
222
253
|
const store = createSessionStore();
|
|
223
254
|
const bus = createEventBus();
|
|
255
|
+
const ttsPlaybackStore = createTtsPlaybackStore();
|
|
224
256
|
const time = createServerTimeSource({ now: config.now });
|
|
225
257
|
const lifecycle = createLifecycleBindings(bus);
|
|
226
258
|
let isPreviewFlag: boolean = config.isPreview ?? false;
|
|
@@ -310,9 +342,11 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
310
342
|
? 'host'
|
|
311
343
|
: 'system';
|
|
312
344
|
store.setSessionPaused(true, via);
|
|
345
|
+
time.freeze();
|
|
313
346
|
}
|
|
314
347
|
if (payload.transition === 'continued') {
|
|
315
348
|
store.setSessionPaused(false, null);
|
|
349
|
+
time.unfreeze();
|
|
316
350
|
}
|
|
317
351
|
if (payload.transition === 'seat.recovery_code_issued') {
|
|
318
352
|
const detail = payload.detail;
|
|
@@ -481,6 +515,8 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
481
515
|
audioBase64: string;
|
|
482
516
|
contentType?: unknown;
|
|
483
517
|
durationMs?: unknown;
|
|
518
|
+
beat?: unknown;
|
|
519
|
+
cues?: unknown;
|
|
484
520
|
},
|
|
485
521
|
caption: string | null | undefined,
|
|
486
522
|
): void => {
|
|
@@ -499,6 +535,11 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
499
535
|
? data.durationMs
|
|
500
536
|
: estimateAudioDurationMs(data.audioBase64),
|
|
501
537
|
caption: caption === undefined ? null : caption,
|
|
538
|
+
// Carry in-phase beat + lip-sync cues through to the Host shell.
|
|
539
|
+
// These drive Experience reveal choreography via `reportTtsBeat`
|
|
540
|
+
// → `onTtsBeat`; dropping them here silently breaks beat reveals.
|
|
541
|
+
...(data.beat !== undefined ? { beat: data.beat } : {}),
|
|
542
|
+
...(data.cues !== undefined ? { cues: data.cues } : {}),
|
|
502
543
|
},
|
|
503
544
|
});
|
|
504
545
|
};
|
|
@@ -510,37 +551,57 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
510
551
|
contentType?: string;
|
|
511
552
|
durationMs?: number;
|
|
512
553
|
kind?: string;
|
|
554
|
+
beat?: unknown;
|
|
555
|
+
cues?: unknown;
|
|
513
556
|
} | null;
|
|
514
|
-
if (!data
|
|
515
|
-
const causeId = data.causeMessageId as unknown as MessageId;
|
|
516
|
-
const pending = pendingVoice.get(causeId);
|
|
517
|
-
const caption = pending?.caption;
|
|
518
|
-
const audioBase64 = data.audioBase64;
|
|
557
|
+
if (!data) return;
|
|
519
558
|
|
|
520
|
-
|
|
559
|
+
const audioBase64 = data.audioBase64;
|
|
560
|
+
const isSpeakResult =
|
|
521
561
|
payload.eventType === 'voice.speak.result' &&
|
|
522
|
-
typeof audioBase64 === 'string'
|
|
523
|
-
|
|
562
|
+
typeof audioBase64 === 'string';
|
|
563
|
+
|
|
564
|
+
const causeId = (() => {
|
|
565
|
+
if (typeof data.causeMessageId === 'string' && data.causeMessageId.length > 0) {
|
|
566
|
+
return data.causeMessageId as MessageId;
|
|
567
|
+
}
|
|
568
|
+
if (isSpeakResult) {
|
|
569
|
+
return `voice-server-${audioBase64.length}-${audioBase64.slice(0, 12)}` as MessageId;
|
|
570
|
+
}
|
|
571
|
+
return null;
|
|
572
|
+
})();
|
|
573
|
+
|
|
574
|
+
if (isSpeakResult && causeId !== null) {
|
|
575
|
+
const pending =
|
|
576
|
+
typeof data.causeMessageId === 'string' && data.causeMessageId.length > 0
|
|
577
|
+
? pendingVoice.get(data.causeMessageId as MessageId)
|
|
578
|
+
: undefined;
|
|
524
579
|
dispatchVoiceAudio(
|
|
525
580
|
causeId,
|
|
526
581
|
{
|
|
527
582
|
audioBase64,
|
|
528
583
|
contentType: data.contentType,
|
|
529
584
|
durationMs: data.durationMs,
|
|
585
|
+
beat: data.beat,
|
|
586
|
+
cues: data.cues,
|
|
530
587
|
},
|
|
531
|
-
caption ?? null,
|
|
588
|
+
pending?.caption ?? null,
|
|
532
589
|
);
|
|
533
590
|
}
|
|
534
591
|
|
|
592
|
+
if (typeof data.causeMessageId !== 'string' || data.causeMessageId.length === 0) {
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
const causeMessageId = data.causeMessageId as MessageId;
|
|
597
|
+
const pending = pendingVoice.get(causeMessageId);
|
|
598
|
+
|
|
535
599
|
if (pending === undefined) return;
|
|
536
600
|
pending.cancelTimeout();
|
|
537
|
-
pendingVoice.delete(
|
|
538
|
-
transport.confirmSend(
|
|
601
|
+
pendingVoice.delete(causeMessageId);
|
|
602
|
+
transport.confirmSend(causeMessageId);
|
|
539
603
|
|
|
540
|
-
if (
|
|
541
|
-
payload.eventType === 'voice.speak.result' &&
|
|
542
|
-
typeof audioBase64 === 'string'
|
|
543
|
-
) {
|
|
604
|
+
if (isSpeakResult) {
|
|
544
605
|
pending.resolve(
|
|
545
606
|
ok({
|
|
546
607
|
audioBase64,
|
|
@@ -711,6 +772,32 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
711
772
|
const sent = transport.send('emit', payload);
|
|
712
773
|
return ok({ id: sent.id as unknown as string });
|
|
713
774
|
},
|
|
775
|
+
abort: () => {
|
|
776
|
+
const payload = buildHostEmitPayload(
|
|
777
|
+
config.sessionId,
|
|
778
|
+
HOST_EVENT_TYPES.abort,
|
|
779
|
+
);
|
|
780
|
+
const sent = transport.send('emit', payload);
|
|
781
|
+
return ok({ id: sent.id as unknown as string });
|
|
782
|
+
},
|
|
783
|
+
reportTtsPlayback: (detail) => {
|
|
784
|
+
const payload = buildHostEmitPayload(
|
|
785
|
+
config.sessionId,
|
|
786
|
+
HOST_EVENT_TYPES.ttsPlayback,
|
|
787
|
+
detail,
|
|
788
|
+
);
|
|
789
|
+
const sent = transport.send('emit', payload);
|
|
790
|
+
return ok({ id: sent.id as unknown as string });
|
|
791
|
+
},
|
|
792
|
+
reportTtsBeat: (detail) => {
|
|
793
|
+
const payload = buildHostEmitPayload(
|
|
794
|
+
config.sessionId,
|
|
795
|
+
HOST_EVENT_TYPES.ttsBeat,
|
|
796
|
+
detail,
|
|
797
|
+
);
|
|
798
|
+
const sent = transport.send('emit', payload);
|
|
799
|
+
return ok({ id: sent.id as unknown as string });
|
|
800
|
+
},
|
|
714
801
|
reclaim: () => {
|
|
715
802
|
const payload = buildHostEmitPayload(
|
|
716
803
|
config.sessionId,
|
|
@@ -732,6 +819,10 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
732
819
|
scheduleTimer,
|
|
733
820
|
voiceSpeakTimeoutMs,
|
|
734
821
|
),
|
|
822
|
+
ttsPlayback: {
|
|
823
|
+
subscribe: ttsPlaybackStore.subscribe,
|
|
824
|
+
getSnapshot: ttsPlaybackStore.getSnapshot,
|
|
825
|
+
},
|
|
735
826
|
events: { onEvent: bus.onEvent, onAnyEvent: bus.onAnyEvent },
|
|
736
827
|
lifecycle,
|
|
737
828
|
time: { serverNow: time.serverNow, recordEvent: time.recordEvent },
|
|
@@ -762,6 +853,8 @@ export const createSession = (config: SessionConfig): Session => {
|
|
|
762
853
|
},
|
|
763
854
|
};
|
|
764
855
|
|
|
856
|
+
registerTtsPlaybackStore(session, ttsPlaybackStore);
|
|
857
|
+
|
|
765
858
|
transport.start();
|
|
766
859
|
return session;
|
|
767
860
|
};
|
|
@@ -805,7 +898,7 @@ const buildInferenceVerbs = (
|
|
|
805
898
|
});
|
|
806
899
|
const sent = transport.send('emit', {
|
|
807
900
|
sessionId,
|
|
808
|
-
eventType: `${INFERENCE_EVENT_PREFIX}${input.
|
|
901
|
+
eventType: `${INFERENCE_EVENT_PREFIX}${input.templateId}`,
|
|
809
902
|
data: serialised,
|
|
810
903
|
});
|
|
811
904
|
pending.set(sent.id, resolveFn);
|
|
@@ -824,16 +917,6 @@ const buildInferenceVerbs = (
|
|
|
824
917
|
};
|
|
825
918
|
return {
|
|
826
919
|
call,
|
|
827
|
-
// Convenience wrappers map to the most-common manifest `CallKind`s
|
|
828
|
-
// (per `@platform/manifest`'s `CallKindSchema` and
|
|
829
|
-
// `docs/conventions/prompt-composition.md`). Bundles that need a
|
|
830
|
-
// less-common kind (`host_resolve`, `host_recap`, `judge_funniness`,
|
|
831
|
-
// `compose_clue`, `narrate_event`) call `inference.call({ callKind,
|
|
832
|
-
// ... })` directly — the wrappers are a comfort for the dominant
|
|
833
|
-
// open-phase + judge paths, not an exhaustive cover.
|
|
834
|
-
host: (input) => call({ ...input, callKind: 'host_open_phase' }),
|
|
835
|
-
judge: (input) => call({ ...input, callKind: 'host_judge' }),
|
|
836
|
-
classify: (input) => call({ ...input, callKind: 'classify' }),
|
|
837
920
|
};
|
|
838
921
|
};
|
|
839
922
|
|
package/src/control.ts
CHANGED
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
* - `host.resume` — resume from pause.
|
|
16
16
|
* - `host.advancePhase` — request the next phase. The Runtime
|
|
17
17
|
* may reject if no transition matches.
|
|
18
|
+
* - `host.abort` — universal mid-game abort. The Runtime
|
|
19
|
+
* force-jumps to the manifest's abort/
|
|
20
|
+
* terminal phase (no per-phase wiring).
|
|
18
21
|
* - `host.reclaim` — reclaim the host slot from a hung
|
|
19
22
|
* host (the player who fires this
|
|
20
23
|
* becomes the new host if allowed).
|
|
@@ -26,6 +29,9 @@ export const HOST_EVENT_TYPES = {
|
|
|
26
29
|
pause: 'host.pause',
|
|
27
30
|
resume: 'host.resume',
|
|
28
31
|
advancePhase: 'host.advancePhase',
|
|
32
|
+
abort: 'host.abort',
|
|
33
|
+
ttsPlayback: 'host.ttsPlayback',
|
|
34
|
+
ttsBeat: 'host.ttsBeat',
|
|
29
35
|
reclaim: 'host.reclaim',
|
|
30
36
|
} as const;
|
|
31
37
|
|
package/src/index.ts
CHANGED
|
@@ -67,7 +67,6 @@ export {
|
|
|
67
67
|
buildInferenceRequest,
|
|
68
68
|
type InferenceCallInput,
|
|
69
69
|
type InferenceCallSuccess,
|
|
70
|
-
type SdkCallKind,
|
|
71
70
|
type SdkQualityTier,
|
|
72
71
|
type SerialisedInferenceRequest,
|
|
73
72
|
type SessionInference,
|
|
@@ -79,8 +78,20 @@ export {
|
|
|
79
78
|
type SessionVoice,
|
|
80
79
|
type SpeakInput,
|
|
81
80
|
type SpeakSuccess,
|
|
81
|
+
type TtsBeatMeta,
|
|
82
|
+
type TtsBeatReveal,
|
|
83
|
+
type VoiceAudioPayload,
|
|
82
84
|
} from './voice.js';
|
|
83
85
|
|
|
86
|
+
export {
|
|
87
|
+
createTtsPlaybackStore,
|
|
88
|
+
registerTtsPlaybackStore,
|
|
89
|
+
setTtsPlaybackTelemetry,
|
|
90
|
+
type TtsPlaybackSnapshot,
|
|
91
|
+
type TtsPlaybackStore,
|
|
92
|
+
type TtsPlaybackTelemetry,
|
|
93
|
+
} from './tts-playback.js';
|
|
94
|
+
|
|
84
95
|
export {
|
|
85
96
|
type EventBus,
|
|
86
97
|
type EventHandler,
|
package/src/inference.ts
CHANGED
|
@@ -1,98 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Inference verbs
|
|
3
|
-
* helpers: session.inference.host(slots), .judge(slots),
|
|
4
|
-
* .classify(slots), etc. — calls the Gateway through the Runtime
|
|
5
|
-
* (the SDK never holds the gateway-auth key).").
|
|
6
|
-
*
|
|
7
|
-
* The chunk-B7 contract is:
|
|
8
|
-
*
|
|
9
|
-
* 1. The caller invokes `session.inference.<verb>({ slots, output,
|
|
10
|
-
* qualityTier? })`.
|
|
11
|
-
* 2. The SDK serialises any caller-declared Zod output schema to
|
|
12
|
-
* JSON Schema (via `@platform/shared/json-schema.ts`).
|
|
13
|
-
* 3. The SDK sends an `emit` frame with a reserved `inference.*`
|
|
14
|
-
* event type. The Runtime (chunk B8a) receives it, signs the
|
|
15
|
-
* request, forwards to the Gateway, and emits a follow-up
|
|
16
|
-
* `event` frame with the result keyed by the original message
|
|
17
|
-
* id.
|
|
18
|
-
* 4. The SDK matches the inbound event against the pending call's
|
|
19
|
-
* id and resolves the typed result.
|
|
20
|
-
*
|
|
21
|
-
* **Why `emit` and not a new wire kind?** The protocol already
|
|
22
|
-
* carries `emit` for asynchronous client → server work; reserving an
|
|
23
|
-
* `inference.*` namespace on `eventType` keeps the surface compact
|
|
24
|
-
* and avoids a `PROTOCOL_VERSION` bump. The chunk-B8a Runtime is
|
|
25
|
-
* what enforces the gating + signing.
|
|
26
|
-
*
|
|
27
|
-
* Until chunk B8a wires the Runtime-side handler, the SDK's
|
|
28
|
-
* inference verbs return `Err({ kind: 'runtime_not_wired' })`. The
|
|
29
|
-
* SDK still serialises the schema + payload so the chunk-B7 surface
|
|
30
|
-
* is testable (the JSON Schema + outbound `emit` shape are
|
|
31
|
-
* regression-protected); the runtime-side roundtrip lights up with
|
|
32
|
-
* B8a.
|
|
2
|
+
* Inference verbs — game code calls by manifest `templateId`.
|
|
33
3
|
*/
|
|
34
4
|
|
|
5
|
+
import type { QualityTier } from '@wibly/internal-manifest';
|
|
35
6
|
import type { z, ZodTypeAny } from 'zod';
|
|
36
7
|
|
|
37
8
|
import { zodToJsonSchema, type JsonSchema } from '@wibly/internal-shared';
|
|
38
9
|
import type { Result } from '@wibly/internal-shared';
|
|
39
|
-
import type { CallKind, QualityTier } from '@wibly/internal-manifest';
|
|
40
10
|
|
|
41
11
|
import type { SdkError } from './errors.js';
|
|
42
12
|
|
|
43
|
-
/**
|
|
44
|
-
* Quality tier surfaced on the SDK boundary. Type-only re-export of
|
|
45
|
-
* `@platform/manifest`'s `QualityTier` so the manifest stays the
|
|
46
|
-
* single source of truth for the on-the-wire enum. The `import type`
|
|
47
|
-
* is erased at compile time, so the SDK still does not pull in the
|
|
48
|
-
* manifest's Zod runtime — only the type literals.
|
|
49
|
-
*
|
|
50
|
-
* The B7 close note ("kept as a string literal here so the SDK
|
|
51
|
-
* doesn't pull in the manifest's full Zod runtime") was right about
|
|
52
|
-
* the runtime concern but wrong to duplicate the literals — the
|
|
53
|
-
* duplication had already drifted from the manifest's enum at chunk
|
|
54
|
-
* close. Type-only re-export gives the same runtime weight (zero)
|
|
55
|
-
* with the contract honoured at compile time.
|
|
56
|
-
*/
|
|
57
|
-
export type SdkQualityTier = QualityTier;
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Recognised call kinds. Type-only re-export of `@platform/manifest`'s
|
|
61
|
-
* `CallKind`; the manifest's `CallKindSchema` (enumerating
|
|
62
|
-
* `host_open_phase`, `host_judge`, `host_resolve`, `host_recap`,
|
|
63
|
-
* `judge_funniness`, `narrate_event`, `classify`, `compose_clue`) is
|
|
64
|
-
* the canonical wire-side set. New `callKind`s land in
|
|
65
|
-
* `@platform/manifest` and `docs/conventions/prompt-composition.md`
|
|
66
|
-
* in the same commit per chunk B4's convention; the SDK's enum
|
|
67
|
-
* follows automatically because it's a type alias.
|
|
68
|
-
*
|
|
69
|
-
* The trailing `(string & {})` widening preserves the chunk-B7
|
|
70
|
-
* authoring-aid behaviour ("the SDK accepts any string at runtime")
|
|
71
|
-
* while keeping the literal set as IntelliSense-discoverable
|
|
72
|
-
* autocompletes for in-tree game bundles.
|
|
73
|
-
*/
|
|
74
|
-
export type SdkCallKind = CallKind | (string & {});
|
|
75
|
-
|
|
76
13
|
export type InferenceCallInput<TOutput extends ZodTypeAny> = {
|
|
77
|
-
readonly
|
|
14
|
+
readonly templateId: string;
|
|
78
15
|
readonly slots: Readonly<Record<string, unknown>>;
|
|
79
16
|
readonly output?: TOutput;
|
|
80
|
-
readonly qualityTier?: SdkQualityTier;
|
|
81
|
-
/**
|
|
82
|
-
* Optional idempotency key. The Runtime forwards it to the
|
|
83
|
-
* Gateway's `metadata.idempotencyKey`. Use case: a host that
|
|
84
|
-
* wants to retry without double-billing.
|
|
85
|
-
*/
|
|
86
17
|
readonly idempotencyKey?: string;
|
|
87
18
|
};
|
|
88
19
|
|
|
89
20
|
export type InferenceCallSuccess<TOutput extends ZodTypeAny> = {
|
|
90
|
-
/** Raw model output (the Gateway's `output`). */
|
|
91
21
|
readonly output: string;
|
|
92
|
-
/** Parsed structured response. `null` if no schema was provided. */
|
|
93
22
|
readonly structured: TOutput extends ZodTypeAny ? z.infer<TOutput> | null : null;
|
|
94
|
-
/** Gateway usage block. Surface for debug; the operator-side
|
|
95
|
-
* dashboards use the audit ledger instead. */
|
|
96
23
|
readonly usage: {
|
|
97
24
|
readonly model: string;
|
|
98
25
|
readonly tokensIn: number;
|
|
@@ -103,8 +30,7 @@ export type InferenceCallSuccess<TOutput extends ZodTypeAny> = {
|
|
|
103
30
|
};
|
|
104
31
|
|
|
105
32
|
export type SerialisedInferenceRequest = {
|
|
106
|
-
readonly
|
|
107
|
-
readonly qualityTier: SdkQualityTier;
|
|
33
|
+
readonly templateId: string;
|
|
108
34
|
readonly slots: Readonly<Record<string, unknown>>;
|
|
109
35
|
readonly outputSchema: JsonSchema | undefined;
|
|
110
36
|
readonly idempotencyKey: string | undefined;
|
|
@@ -112,37 +38,19 @@ export type SerialisedInferenceRequest = {
|
|
|
112
38
|
|
|
113
39
|
export const INFERENCE_EVENT_PREFIX = 'inference.' as const;
|
|
114
40
|
|
|
115
|
-
/**
|
|
116
|
-
* Build the wire payload for an inference call. Surfaced as a pure
|
|
117
|
-
* helper so the testkit can assert the serialised shape without
|
|
118
|
-
* spinning a transport.
|
|
119
|
-
*/
|
|
120
41
|
export const buildInferenceRequest = <TOutput extends ZodTypeAny>(
|
|
121
42
|
input: InferenceCallInput<TOutput>,
|
|
122
43
|
): SerialisedInferenceRequest => ({
|
|
123
|
-
|
|
124
|
-
qualityTier: input.qualityTier ?? 'standard',
|
|
44
|
+
templateId: input.templateId,
|
|
125
45
|
slots: input.slots,
|
|
126
46
|
outputSchema: input.output ? zodToJsonSchema(input.output) : undefined,
|
|
127
47
|
idempotencyKey: input.idempotencyKey,
|
|
128
48
|
});
|
|
129
49
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
* `client.ts` to the live transport; the Runtime wires up the
|
|
133
|
-
* server-side response under chunk B8a.
|
|
134
|
-
*/
|
|
50
|
+
export type SdkQualityTier = QualityTier;
|
|
51
|
+
|
|
135
52
|
export type SessionInference = {
|
|
136
53
|
readonly call: <TOutput extends ZodTypeAny = ZodTypeAny>(
|
|
137
54
|
input: InferenceCallInput<TOutput>,
|
|
138
55
|
) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
|
|
139
|
-
readonly host: <TOutput extends ZodTypeAny = ZodTypeAny>(
|
|
140
|
-
input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
|
|
141
|
-
) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
|
|
142
|
-
readonly judge: <TOutput extends ZodTypeAny = ZodTypeAny>(
|
|
143
|
-
input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
|
|
144
|
-
) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
|
|
145
|
-
readonly classify: <TOutput extends ZodTypeAny = ZodTypeAny>(
|
|
146
|
-
input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
|
|
147
|
-
) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
|
|
148
56
|
};
|
package/src/react.ts
CHANGED
|
@@ -37,6 +37,7 @@ import type { EventPayload, LifecyclePayload } from '@wibly/internal-protocol';
|
|
|
37
37
|
import type { ConsentDecision, ConsentRequiredPayload } from './consent.js';
|
|
38
38
|
import type { Session, SessionConfig, SessionState } from './client.js';
|
|
39
39
|
import { createSession } from './client.js';
|
|
40
|
+
import type { TtsPlaybackSnapshot } from './tts-playback.js';
|
|
40
41
|
|
|
41
42
|
const SessionContext = createContext<Session | null>(null);
|
|
42
43
|
|
|
@@ -121,6 +122,20 @@ export const useConnectionState = (): SessionState['connectionState'] => {
|
|
|
121
122
|
return state.connectionState;
|
|
122
123
|
};
|
|
123
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Subscribe to client-side TTS playback telemetry (speech energy +
|
|
127
|
+
* playing flag). Populated by the Host shell's audio sampler; idle
|
|
128
|
+
* until the first clip plays.
|
|
129
|
+
*/
|
|
130
|
+
export const useTtsPlayback = (): TtsPlaybackSnapshot => {
|
|
131
|
+
const session = useSession();
|
|
132
|
+
return useSyncExternalStore(
|
|
133
|
+
session.ttsPlayback.subscribe,
|
|
134
|
+
session.ttsPlayback.getSnapshot,
|
|
135
|
+
session.ttsPlayback.getSnapshot,
|
|
136
|
+
);
|
|
137
|
+
};
|
|
138
|
+
|
|
124
139
|
/**
|
|
125
140
|
* Subscribe to a server-emitted event by `eventType`. The handler
|
|
126
141
|
* fires on every matching `event` frame. Unsubscribes on unmount.
|
package/src/time.ts
CHANGED
|
@@ -24,6 +24,10 @@ export type ServerTimeSource = {
|
|
|
24
24
|
readonly recordedEvents: () => readonly RecordedEvent[];
|
|
25
25
|
/** Internal: update the measured skew on a fresh pong. */
|
|
26
26
|
readonly updateSkew: (skewMs: number) => void;
|
|
27
|
+
/** Freeze `serverNow()` while the session is paused. */
|
|
28
|
+
readonly freeze: () => void;
|
|
29
|
+
/** Resume advancing `serverNow()` after a pause. */
|
|
30
|
+
readonly unfreeze: () => void;
|
|
27
31
|
};
|
|
28
32
|
|
|
29
33
|
export type RecordedEvent = {
|
|
@@ -40,10 +44,14 @@ export const createServerTimeSource = (
|
|
|
40
44
|
): ServerTimeSource => {
|
|
41
45
|
const nowFn = opts.now ?? (() => Date.now());
|
|
42
46
|
let skew = 0;
|
|
47
|
+
let frozenServerNow: number | null = null;
|
|
43
48
|
const events: RecordedEvent[] = [];
|
|
44
49
|
|
|
45
50
|
return {
|
|
46
|
-
serverNow: () =>
|
|
51
|
+
serverNow: () => {
|
|
52
|
+
if (frozenServerNow !== null) return frozenServerNow;
|
|
53
|
+
return nowFn() + skew;
|
|
54
|
+
},
|
|
47
55
|
recordEvent: (eventId) => {
|
|
48
56
|
const clientTs = nowFn();
|
|
49
57
|
events.push({ eventId, clientTs });
|
|
@@ -54,5 +62,13 @@ export const createServerTimeSource = (
|
|
|
54
62
|
updateSkew: (newSkew) => {
|
|
55
63
|
skew = newSkew;
|
|
56
64
|
},
|
|
65
|
+
freeze: () => {
|
|
66
|
+
if (frozenServerNow === null) {
|
|
67
|
+
frozenServerNow = nowFn() + skew;
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
unfreeze: () => {
|
|
71
|
+
frozenServerNow = null;
|
|
72
|
+
},
|
|
57
73
|
};
|
|
58
74
|
};
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
createTtsPlaybackStore,
|
|
5
|
+
registerTtsPlaybackStore,
|
|
6
|
+
setTtsPlaybackTelemetry,
|
|
7
|
+
} from './tts-playback.js';
|
|
8
|
+
|
|
9
|
+
describe('createTtsPlaybackStore', () => {
|
|
10
|
+
it('starts idle', () => {
|
|
11
|
+
const store = createTtsPlaybackStore();
|
|
12
|
+
expect(store.getSnapshot()).toEqual({ isPlaying: false, speechLevel: 0 });
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('notifies subscribers when the snapshot changes', () => {
|
|
16
|
+
const store = createTtsPlaybackStore();
|
|
17
|
+
const listener = vi.fn();
|
|
18
|
+
const unsubscribe = store.subscribe(listener);
|
|
19
|
+
|
|
20
|
+
store.set({ isPlaying: true, speechLevel: 0.5 });
|
|
21
|
+
|
|
22
|
+
expect(listener).toHaveBeenCalledTimes(1);
|
|
23
|
+
expect(store.getSnapshot()).toEqual({ isPlaying: true, speechLevel: 0.5 });
|
|
24
|
+
|
|
25
|
+
unsubscribe();
|
|
26
|
+
store.set({ isPlaying: false, speechLevel: 0 });
|
|
27
|
+
expect(listener).toHaveBeenCalledTimes(1);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('treats an identical set as a no-op and keeps a stable reference', () => {
|
|
31
|
+
const store = createTtsPlaybackStore();
|
|
32
|
+
const listener = vi.fn();
|
|
33
|
+
store.subscribe(listener);
|
|
34
|
+
|
|
35
|
+
const before = store.getSnapshot();
|
|
36
|
+
store.set({ isPlaying: false, speechLevel: 0 });
|
|
37
|
+
|
|
38
|
+
// useSyncExternalStore depends on a stable reference when nothing
|
|
39
|
+
// changed — otherwise it re-renders (or warns) every read.
|
|
40
|
+
expect(store.getSnapshot()).toBe(before);
|
|
41
|
+
expect(listener).not.toHaveBeenCalled();
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('setTtsPlaybackTelemetry', () => {
|
|
46
|
+
it('routes a sample to the registered store', () => {
|
|
47
|
+
const session = {};
|
|
48
|
+
const store = createTtsPlaybackStore();
|
|
49
|
+
registerTtsPlaybackStore(session, store);
|
|
50
|
+
|
|
51
|
+
setTtsPlaybackTelemetry(session, { isPlaying: true, speechLevel: 0.8 });
|
|
52
|
+
|
|
53
|
+
expect(store.getSnapshot()).toEqual({ isPlaying: true, speechLevel: 0.8 });
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('is a no-op for a session with no registered store', () => {
|
|
57
|
+
expect(() =>
|
|
58
|
+
setTtsPlaybackTelemetry({}, { isPlaying: true, speechLevel: 1 }),
|
|
59
|
+
).not.toThrow();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Client-side TTS playback telemetry.
|
|
3
|
+
*
|
|
4
|
+
* This is a purely local, ephemeral signal — it never touches the
|
|
5
|
+
* WebSocket. The Host shell samples speech energy from the single
|
|
6
|
+
* `<audio>` element (Web Audio `AnalyserNode`) and pushes a smoothed
|
|
7
|
+
* `speechLevel` here at animation-frame cadence; Experience bundles
|
|
8
|
+
* read it through `session.ttsPlayback` to drive avatar animation
|
|
9
|
+
* (the amplitude-driven fallback the Platform Spec §3.5 describes for
|
|
10
|
+
* providers without phoneme metadata).
|
|
11
|
+
*
|
|
12
|
+
* Why a dedicated store and not a `voice.*` event: the signal updates
|
|
13
|
+
* ~60×/sec. Pumping that through `session.events` would flood every
|
|
14
|
+
* listener and the wire-shaped bus machinery. A tiny
|
|
15
|
+
* `useSyncExternalStore`-friendly snapshot store keeps the hot path
|
|
16
|
+
* cheap and matches the SDK's existing one-store-per-concern pattern.
|
|
17
|
+
*
|
|
18
|
+
* The read surface (`subscribe` / `getSnapshot`) is public on
|
|
19
|
+
* `Session.ttsPlayback`. The write surface lives behind
|
|
20
|
+
* `setTtsPlaybackTelemetry(session, snapshot)` so only the shell that
|
|
21
|
+
* owns the audio element mutates it — bundles consume, never produce.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
export type TtsPlaybackSnapshot = {
|
|
25
|
+
/** True while a TTS clip is actively playing. */
|
|
26
|
+
readonly isPlaying: boolean;
|
|
27
|
+
/** Smoothed speech energy, normalised to 0..1. */
|
|
28
|
+
readonly speechLevel: number;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/** Read surface exposed on `Session.ttsPlayback`. */
|
|
32
|
+
export type TtsPlaybackTelemetry = {
|
|
33
|
+
readonly subscribe: (listener: () => void) => () => void;
|
|
34
|
+
readonly getSnapshot: () => TtsPlaybackSnapshot;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/** Full store — the read surface plus the shell-only setter. */
|
|
38
|
+
export type TtsPlaybackStore = TtsPlaybackTelemetry & {
|
|
39
|
+
readonly set: (next: TtsPlaybackSnapshot) => void;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const IDLE: TtsPlaybackSnapshot = { isPlaying: false, speechLevel: 0 };
|
|
43
|
+
|
|
44
|
+
export const createTtsPlaybackStore = (): TtsPlaybackStore => {
|
|
45
|
+
let snapshot: TtsPlaybackSnapshot = IDLE;
|
|
46
|
+
const listeners = new Set<() => void>();
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
subscribe: (listener) => {
|
|
50
|
+
listeners.add(listener);
|
|
51
|
+
return () => {
|
|
52
|
+
listeners.delete(listener);
|
|
53
|
+
};
|
|
54
|
+
},
|
|
55
|
+
getSnapshot: () => snapshot,
|
|
56
|
+
set: (next) => {
|
|
57
|
+
if (
|
|
58
|
+
next.isPlaying === snapshot.isPlaying &&
|
|
59
|
+
next.speechLevel === snapshot.speechLevel
|
|
60
|
+
) {
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
snapshot = next;
|
|
64
|
+
for (const listener of Array.from(listeners)) {
|
|
65
|
+
try {
|
|
66
|
+
listener();
|
|
67
|
+
} catch {
|
|
68
|
+
// Listeners must not throw; swallow to avoid cascading.
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Registry mapping a `Session` to its writable telemetry store. The
|
|
77
|
+
* `Session` only exposes the read surface, so the shell pushes
|
|
78
|
+
* samples through this side channel keyed by the session reference.
|
|
79
|
+
*/
|
|
80
|
+
const storeRegistry = new WeakMap<object, TtsPlaybackStore>();
|
|
81
|
+
|
|
82
|
+
export const registerTtsPlaybackStore = (
|
|
83
|
+
session: object,
|
|
84
|
+
store: TtsPlaybackStore,
|
|
85
|
+
): void => {
|
|
86
|
+
storeRegistry.set(session, store);
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Push a telemetry sample for the given session. No-op if the session
|
|
91
|
+
* has no registered store (e.g. a stub session in a dev harness that
|
|
92
|
+
* supplies its own `ttsPlayback`).
|
|
93
|
+
*/
|
|
94
|
+
export const setTtsPlaybackTelemetry = (
|
|
95
|
+
session: object,
|
|
96
|
+
snapshot: TtsPlaybackSnapshot,
|
|
97
|
+
): void => {
|
|
98
|
+
storeRegistry.get(session)?.set(snapshot);
|
|
99
|
+
};
|
package/src/voice.ts
CHANGED
|
@@ -47,6 +47,27 @@ export type SpeakSuccess = {
|
|
|
47
47
|
readonly durationMs: number;
|
|
48
48
|
};
|
|
49
49
|
|
|
50
|
+
/** Optional beat metadata for in-phase TTS choreography (chunk B26). */
|
|
51
|
+
export type TtsBeatReveal = {
|
|
52
|
+
readonly kind: 'none' | 'player_submission';
|
|
53
|
+
readonly playerId?: string;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export type TtsBeatMeta = {
|
|
57
|
+
readonly beatId: string;
|
|
58
|
+
readonly sequence: number;
|
|
59
|
+
readonly reveal?: TtsBeatReveal;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
/** Wire shape on `voice.audio` events (Host + Experience bundles). */
|
|
63
|
+
export type VoiceAudioPayload = SpeakSuccess & {
|
|
64
|
+
readonly id?: string;
|
|
65
|
+
readonly causeMessageId?: string;
|
|
66
|
+
readonly caption?: string | null;
|
|
67
|
+
readonly beat?: TtsBeatMeta;
|
|
68
|
+
readonly cues?: ReadonlyArray<{ readonly at: number; readonly kind: string }>;
|
|
69
|
+
};
|
|
70
|
+
|
|
50
71
|
export type SessionVoice = {
|
|
51
72
|
readonly speak: (input: SpeakInput) => Promise<Result<SpeakSuccess, SdkError>>;
|
|
52
73
|
};
|