@wibly/sdk 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # `@wibly/sdk` — Changelog
2
2
 
3
+ ## 0.1.3 — 2026-06-08
4
+
5
+ ## 0.1.3 — 2026-06-08
6
+
7
+ ### Added
8
+
9
+ - `session.ttsPlayback` — client-side TTS playback telemetry
10
+ (`subscribe` / `getSnapshot`) exposing `{ isPlaying, speechLevel }`.
11
+ The Host shell samples speech energy from its single `<audio>` element
12
+ and pushes smoothed levels via `setTtsPlaybackTelemetry(session, …)` so
13
+ Experience bundles can drive amplitude-synced avatar animation without
14
+ high-frequency `voice.*` events on the wire.
15
+ - `@wibly/sdk/react` — `useTtsPlayback()` hook wrapping the telemetry
16
+ store for shell and bundle React trees.
17
+
18
+ ## 0.1.2 — 2026-06-08
19
+
20
+ ### Added
21
+
22
+ - `session.host.abort()` — a universal mid-game abort. Emits the new
23
+ `host.abort` control verb; the Runtime force-jumps to the manifest's
24
+ `workflow.abortPhaseId` (or the last declared phase when omitted) from
25
+ any phase. Games no longer need per-phase abort transitions.
26
+
3
27
  ## 0.1.1 — 2026-05-30
4
28
 
5
29
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wibly/sdk",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Wibly @wibly/sdk",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -18,9 +18,9 @@
18
18
  "access": "public"
19
19
  },
20
20
  "dependencies": {
21
- "@wibly/internal-manifest": "0.1.1",
22
- "@wibly/internal-protocol": "0.1.1",
23
- "@wibly/internal-shared": "0.1.1",
21
+ "@wibly/internal-manifest": "0.1.3",
22
+ "@wibly/internal-protocol": "0.1.3",
23
+ "@wibly/internal-shared": "0.1.3",
24
24
  "zod": "^3.25.76"
25
25
  },
26
26
  "peerDependencies": {
package/src/client.ts CHANGED
@@ -77,6 +77,11 @@ import {
77
77
  createServerTimeSource,
78
78
  type ServerTimeSource,
79
79
  } from './time.js';
80
+ import {
81
+ createTtsPlaybackStore,
82
+ registerTtsPlaybackStore,
83
+ type TtsPlaybackTelemetry,
84
+ } from './tts-playback.js';
80
85
  import {
81
86
  createTransport,
82
87
  type Transport,
@@ -181,10 +186,36 @@ export type Session = {
181
186
  readonly advancePhase: (
182
187
  detail?: unknown,
183
188
  ) => Result<{ readonly id: string }, SdkError>;
189
+ /**
190
+ * Universal mid-game abort. The Runtime force-jumps to the
191
+ * manifest's abort/terminal phase (`workflow.abortPhaseId`, else
192
+ * the last declared phase) from wherever the session currently is.
193
+ * No per-phase wiring required.
194
+ */
195
+ readonly abort: () => Result<{ readonly id: string }, SdkError>;
196
+ readonly reportTtsPlayback: (detail: {
197
+ readonly state: 'active' | 'idle';
198
+ }) => Result<{ readonly id: string }, SdkError>;
199
+ readonly reportTtsBeat: (detail: {
200
+ readonly beatId: string;
201
+ readonly event: 'clip_start' | 'clip_end';
202
+ readonly clipId?: string;
203
+ readonly reveal?: {
204
+ readonly kind: 'none' | 'player_submission';
205
+ readonly playerId?: string;
206
+ };
207
+ }) => Result<{ readonly id: string }, SdkError>;
184
208
  readonly reclaim: () => Result<{ readonly id: string }, SdkError>;
185
209
  };
186
210
  readonly inference: SessionInference;
187
211
  readonly voice: SessionVoice;
212
+ /**
213
+ * Client-side TTS playback telemetry (speech energy + playing
214
+ * flag). Populated by the Host shell from the local audio element;
215
+ * read by Experience bundles to drive avatar animation. Never
216
+ * crosses the wire. See `tts-playback.ts`.
217
+ */
218
+ readonly ttsPlayback: TtsPlaybackTelemetry;
188
219
  readonly events: Pick<EventBus, 'onEvent' | 'onAnyEvent'>;
189
220
  readonly lifecycle: LifecycleBindings;
190
221
  /** Server-time helper. See `time.ts`. */
@@ -221,6 +252,7 @@ const DEFAULT_VOICE_SPEAK_TIMEOUT_MS = 30_000;
221
252
  export const createSession = (config: SessionConfig): Session => {
222
253
  const store = createSessionStore();
223
254
  const bus = createEventBus();
255
+ const ttsPlaybackStore = createTtsPlaybackStore();
224
256
  const time = createServerTimeSource({ now: config.now });
225
257
  const lifecycle = createLifecycleBindings(bus);
226
258
  let isPreviewFlag: boolean = config.isPreview ?? false;
@@ -310,9 +342,11 @@ export const createSession = (config: SessionConfig): Session => {
310
342
  ? 'host'
311
343
  : 'system';
312
344
  store.setSessionPaused(true, via);
345
+ time.freeze();
313
346
  }
314
347
  if (payload.transition === 'continued') {
315
348
  store.setSessionPaused(false, null);
349
+ time.unfreeze();
316
350
  }
317
351
  if (payload.transition === 'seat.recovery_code_issued') {
318
352
  const detail = payload.detail;
@@ -481,6 +515,8 @@ export const createSession = (config: SessionConfig): Session => {
481
515
  audioBase64: string;
482
516
  contentType?: unknown;
483
517
  durationMs?: unknown;
518
+ beat?: unknown;
519
+ cues?: unknown;
484
520
  },
485
521
  caption: string | null | undefined,
486
522
  ): void => {
@@ -499,6 +535,11 @@ export const createSession = (config: SessionConfig): Session => {
499
535
  ? data.durationMs
500
536
  : estimateAudioDurationMs(data.audioBase64),
501
537
  caption: caption === undefined ? null : caption,
538
+ // Carry in-phase beat + lip-sync cues through to the Host shell.
539
+ // These drive Experience reveal choreography via `reportTtsBeat`
540
+ // → `onTtsBeat`; dropping them here silently breaks beat reveals.
541
+ ...(data.beat !== undefined ? { beat: data.beat } : {}),
542
+ ...(data.cues !== undefined ? { cues: data.cues } : {}),
502
543
  },
503
544
  });
504
545
  };
@@ -510,37 +551,57 @@ export const createSession = (config: SessionConfig): Session => {
510
551
  contentType?: string;
511
552
  durationMs?: number;
512
553
  kind?: string;
554
+ beat?: unknown;
555
+ cues?: unknown;
513
556
  } | null;
514
- if (!data || typeof data.causeMessageId !== 'string') return;
515
- const causeId = data.causeMessageId as unknown as MessageId;
516
- const pending = pendingVoice.get(causeId);
517
- const caption = pending?.caption;
518
- const audioBase64 = data.audioBase64;
557
+ if (!data) return;
519
558
 
520
- if (
559
+ const audioBase64 = data.audioBase64;
560
+ const isSpeakResult =
521
561
  payload.eventType === 'voice.speak.result' &&
522
- typeof audioBase64 === 'string'
523
- ) {
562
+ typeof audioBase64 === 'string';
563
+
564
+ const causeId = (() => {
565
+ if (typeof data.causeMessageId === 'string' && data.causeMessageId.length > 0) {
566
+ return data.causeMessageId as MessageId;
567
+ }
568
+ if (isSpeakResult) {
569
+ return `voice-server-${audioBase64.length}-${audioBase64.slice(0, 12)}` as MessageId;
570
+ }
571
+ return null;
572
+ })();
573
+
574
+ if (isSpeakResult && causeId !== null) {
575
+ const pending =
576
+ typeof data.causeMessageId === 'string' && data.causeMessageId.length > 0
577
+ ? pendingVoice.get(data.causeMessageId as MessageId)
578
+ : undefined;
524
579
  dispatchVoiceAudio(
525
580
  causeId,
526
581
  {
527
582
  audioBase64,
528
583
  contentType: data.contentType,
529
584
  durationMs: data.durationMs,
585
+ beat: data.beat,
586
+ cues: data.cues,
530
587
  },
531
- caption ?? null,
588
+ pending?.caption ?? null,
532
589
  );
533
590
  }
534
591
 
592
+ if (typeof data.causeMessageId !== 'string' || data.causeMessageId.length === 0) {
593
+ return;
594
+ }
595
+
596
+ const causeMessageId = data.causeMessageId as MessageId;
597
+ const pending = pendingVoice.get(causeMessageId);
598
+
535
599
  if (pending === undefined) return;
536
600
  pending.cancelTimeout();
537
- pendingVoice.delete(causeId);
538
- transport.confirmSend(causeId);
601
+ pendingVoice.delete(causeMessageId);
602
+ transport.confirmSend(causeMessageId);
539
603
 
540
- if (
541
- payload.eventType === 'voice.speak.result' &&
542
- typeof audioBase64 === 'string'
543
- ) {
604
+ if (isSpeakResult) {
544
605
  pending.resolve(
545
606
  ok({
546
607
  audioBase64,
@@ -711,6 +772,32 @@ export const createSession = (config: SessionConfig): Session => {
711
772
  const sent = transport.send('emit', payload);
712
773
  return ok({ id: sent.id as unknown as string });
713
774
  },
775
+ abort: () => {
776
+ const payload = buildHostEmitPayload(
777
+ config.sessionId,
778
+ HOST_EVENT_TYPES.abort,
779
+ );
780
+ const sent = transport.send('emit', payload);
781
+ return ok({ id: sent.id as unknown as string });
782
+ },
783
+ reportTtsPlayback: (detail) => {
784
+ const payload = buildHostEmitPayload(
785
+ config.sessionId,
786
+ HOST_EVENT_TYPES.ttsPlayback,
787
+ detail,
788
+ );
789
+ const sent = transport.send('emit', payload);
790
+ return ok({ id: sent.id as unknown as string });
791
+ },
792
+ reportTtsBeat: (detail) => {
793
+ const payload = buildHostEmitPayload(
794
+ config.sessionId,
795
+ HOST_EVENT_TYPES.ttsBeat,
796
+ detail,
797
+ );
798
+ const sent = transport.send('emit', payload);
799
+ return ok({ id: sent.id as unknown as string });
800
+ },
714
801
  reclaim: () => {
715
802
  const payload = buildHostEmitPayload(
716
803
  config.sessionId,
@@ -732,6 +819,10 @@ export const createSession = (config: SessionConfig): Session => {
732
819
  scheduleTimer,
733
820
  voiceSpeakTimeoutMs,
734
821
  ),
822
+ ttsPlayback: {
823
+ subscribe: ttsPlaybackStore.subscribe,
824
+ getSnapshot: ttsPlaybackStore.getSnapshot,
825
+ },
735
826
  events: { onEvent: bus.onEvent, onAnyEvent: bus.onAnyEvent },
736
827
  lifecycle,
737
828
  time: { serverNow: time.serverNow, recordEvent: time.recordEvent },
@@ -762,6 +853,8 @@ export const createSession = (config: SessionConfig): Session => {
762
853
  },
763
854
  };
764
855
 
856
+ registerTtsPlaybackStore(session, ttsPlaybackStore);
857
+
765
858
  transport.start();
766
859
  return session;
767
860
  };
@@ -805,7 +898,7 @@ const buildInferenceVerbs = (
805
898
  });
806
899
  const sent = transport.send('emit', {
807
900
  sessionId,
808
- eventType: `${INFERENCE_EVENT_PREFIX}${input.callKind}`,
901
+ eventType: `${INFERENCE_EVENT_PREFIX}${input.templateId}`,
809
902
  data: serialised,
810
903
  });
811
904
  pending.set(sent.id, resolveFn);
@@ -824,16 +917,6 @@ const buildInferenceVerbs = (
824
917
  };
825
918
  return {
826
919
  call,
827
- // Convenience wrappers map to the most-common manifest `CallKind`s
828
- // (per `@platform/manifest`'s `CallKindSchema` and
829
- // `docs/conventions/prompt-composition.md`). Bundles that need a
830
- // less-common kind (`host_resolve`, `host_recap`, `judge_funniness`,
831
- // `compose_clue`, `narrate_event`) call `inference.call({ callKind,
832
- // ... })` directly — the wrappers are a comfort for the dominant
833
- // open-phase + judge paths, not an exhaustive cover.
834
- host: (input) => call({ ...input, callKind: 'host_open_phase' }),
835
- judge: (input) => call({ ...input, callKind: 'host_judge' }),
836
- classify: (input) => call({ ...input, callKind: 'classify' }),
837
920
  };
838
921
  };
839
922
 
package/src/control.ts CHANGED
@@ -15,6 +15,9 @@
15
15
  * - `host.resume` — resume from pause.
16
16
  * - `host.advancePhase` — request the next phase. The Runtime
17
17
  * may reject if no transition matches.
18
+ * - `host.abort` — universal mid-game abort. The Runtime
19
+ * force-jumps to the manifest's abort/
20
+ * terminal phase (no per-phase wiring).
18
21
  * - `host.reclaim` — reclaim the host slot from a hung
19
22
  * host (the player who fires this
20
23
  * becomes the new host if allowed).
@@ -26,6 +29,9 @@ export const HOST_EVENT_TYPES = {
26
29
  pause: 'host.pause',
27
30
  resume: 'host.resume',
28
31
  advancePhase: 'host.advancePhase',
32
+ abort: 'host.abort',
33
+ ttsPlayback: 'host.ttsPlayback',
34
+ ttsBeat: 'host.ttsBeat',
29
35
  reclaim: 'host.reclaim',
30
36
  } as const;
31
37
 
package/src/index.ts CHANGED
@@ -67,7 +67,6 @@ export {
67
67
  buildInferenceRequest,
68
68
  type InferenceCallInput,
69
69
  type InferenceCallSuccess,
70
- type SdkCallKind,
71
70
  type SdkQualityTier,
72
71
  type SerialisedInferenceRequest,
73
72
  type SessionInference,
@@ -79,8 +78,20 @@ export {
79
78
  type SessionVoice,
80
79
  type SpeakInput,
81
80
  type SpeakSuccess,
81
+ type TtsBeatMeta,
82
+ type TtsBeatReveal,
83
+ type VoiceAudioPayload,
82
84
  } from './voice.js';
83
85
 
86
+ export {
87
+ createTtsPlaybackStore,
88
+ registerTtsPlaybackStore,
89
+ setTtsPlaybackTelemetry,
90
+ type TtsPlaybackSnapshot,
91
+ type TtsPlaybackStore,
92
+ type TtsPlaybackTelemetry,
93
+ } from './tts-playback.js';
94
+
84
95
  export {
85
96
  type EventBus,
86
97
  type EventHandler,
package/src/inference.ts CHANGED
@@ -1,98 +1,25 @@
1
1
  /**
2
- * Inference verbs (per chunk B7 build: "inference.ts typed call
3
- * helpers: session.inference.host(slots), .judge(slots),
4
- * .classify(slots), etc. — calls the Gateway through the Runtime
5
- * (the SDK never holds the gateway-auth key).").
6
- *
7
- * The chunk-B7 contract is:
8
- *
9
- * 1. The caller invokes `session.inference.<verb>({ slots, output,
10
- * qualityTier? })`.
11
- * 2. The SDK serialises any caller-declared Zod output schema to
12
- * JSON Schema (via `@platform/shared/json-schema.ts`).
13
- * 3. The SDK sends an `emit` frame with a reserved `inference.*`
14
- * event type. The Runtime (chunk B8a) receives it, signs the
15
- * request, forwards to the Gateway, and emits a follow-up
16
- * `event` frame with the result keyed by the original message
17
- * id.
18
- * 4. The SDK matches the inbound event against the pending call's
19
- * id and resolves the typed result.
20
- *
21
- * **Why `emit` and not a new wire kind?** The protocol already
22
- * carries `emit` for asynchronous client → server work; reserving an
23
- * `inference.*` namespace on `eventType` keeps the surface compact
24
- * and avoids a `PROTOCOL_VERSION` bump. The chunk-B8a Runtime is
25
- * what enforces the gating + signing.
26
- *
27
- * Until chunk B8a wires the Runtime-side handler, the SDK's
28
- * inference verbs return `Err({ kind: 'runtime_not_wired' })`. The
29
- * SDK still serialises the schema + payload so the chunk-B7 surface
30
- * is testable (the JSON Schema + outbound `emit` shape are
31
- * regression-protected); the runtime-side roundtrip lights up with
32
- * B8a.
2
+ * Inference verbs game code calls by manifest `templateId`.
33
3
  */
34
4
 
5
+ import type { QualityTier } from '@wibly/internal-manifest';
35
6
  import type { z, ZodTypeAny } from 'zod';
36
7
 
37
8
  import { zodToJsonSchema, type JsonSchema } from '@wibly/internal-shared';
38
9
  import type { Result } from '@wibly/internal-shared';
39
- import type { CallKind, QualityTier } from '@wibly/internal-manifest';
40
10
 
41
11
  import type { SdkError } from './errors.js';
42
12
 
43
- /**
44
- * Quality tier surfaced on the SDK boundary. Type-only re-export of
45
- * `@platform/manifest`'s `QualityTier` so the manifest stays the
46
- * single source of truth for the on-the-wire enum. The `import type`
47
- * is erased at compile time, so the SDK still does not pull in the
48
- * manifest's Zod runtime — only the type literals.
49
- *
50
- * The B7 close note ("kept as a string literal here so the SDK
51
- * doesn't pull in the manifest's full Zod runtime") was right about
52
- * the runtime concern but wrong to duplicate the literals — the
53
- * duplication had already drifted from the manifest's enum at chunk
54
- * close. Type-only re-export gives the same runtime weight (zero)
55
- * with the contract honoured at compile time.
56
- */
57
- export type SdkQualityTier = QualityTier;
58
-
59
- /**
60
- * Recognised call kinds. Type-only re-export of `@platform/manifest`'s
61
- * `CallKind`; the manifest's `CallKindSchema` (enumerating
62
- * `host_open_phase`, `host_judge`, `host_resolve`, `host_recap`,
63
- * `judge_funniness`, `narrate_event`, `classify`, `compose_clue`) is
64
- * the canonical wire-side set. New `callKind`s land in
65
- * `@platform/manifest` and `docs/conventions/prompt-composition.md`
66
- * in the same commit per chunk B4's convention; the SDK's enum
67
- * follows automatically because it's a type alias.
68
- *
69
- * The trailing `(string & {})` widening preserves the chunk-B7
70
- * authoring-aid behaviour ("the SDK accepts any string at runtime")
71
- * while keeping the literal set as IntelliSense-discoverable
72
- * autocompletes for in-tree game bundles.
73
- */
74
- export type SdkCallKind = CallKind | (string & {});
75
-
76
13
  export type InferenceCallInput<TOutput extends ZodTypeAny> = {
77
- readonly callKind: SdkCallKind;
14
+ readonly templateId: string;
78
15
  readonly slots: Readonly<Record<string, unknown>>;
79
16
  readonly output?: TOutput;
80
- readonly qualityTier?: SdkQualityTier;
81
- /**
82
- * Optional idempotency key. The Runtime forwards it to the
83
- * Gateway's `metadata.idempotencyKey`. Use case: a host that
84
- * wants to retry without double-billing.
85
- */
86
17
  readonly idempotencyKey?: string;
87
18
  };
88
19
 
89
20
  export type InferenceCallSuccess<TOutput extends ZodTypeAny> = {
90
- /** Raw model output (the Gateway's `output`). */
91
21
  readonly output: string;
92
- /** Parsed structured response. `null` if no schema was provided. */
93
22
  readonly structured: TOutput extends ZodTypeAny ? z.infer<TOutput> | null : null;
94
- /** Gateway usage block. Surface for debug; the operator-side
95
- * dashboards use the audit ledger instead. */
96
23
  readonly usage: {
97
24
  readonly model: string;
98
25
  readonly tokensIn: number;
@@ -103,8 +30,7 @@ export type InferenceCallSuccess<TOutput extends ZodTypeAny> = {
103
30
  };
104
31
 
105
32
  export type SerialisedInferenceRequest = {
106
- readonly callKind: SdkCallKind;
107
- readonly qualityTier: SdkQualityTier;
33
+ readonly templateId: string;
108
34
  readonly slots: Readonly<Record<string, unknown>>;
109
35
  readonly outputSchema: JsonSchema | undefined;
110
36
  readonly idempotencyKey: string | undefined;
@@ -112,37 +38,19 @@ export type SerialisedInferenceRequest = {
112
38
 
113
39
  export const INFERENCE_EVENT_PREFIX = 'inference.' as const;
114
40
 
115
- /**
116
- * Build the wire payload for an inference call. Surfaced as a pure
117
- * helper so the testkit can assert the serialised shape without
118
- * spinning a transport.
119
- */
120
41
  export const buildInferenceRequest = <TOutput extends ZodTypeAny>(
121
42
  input: InferenceCallInput<TOutput>,
122
43
  ): SerialisedInferenceRequest => ({
123
- callKind: input.callKind,
124
- qualityTier: input.qualityTier ?? 'standard',
44
+ templateId: input.templateId,
125
45
  slots: input.slots,
126
46
  outputSchema: input.output ? zodToJsonSchema(input.output) : undefined,
127
47
  idempotencyKey: input.idempotencyKey,
128
48
  });
129
49
 
130
- /**
131
- * The async `Session.inference` namespace. Each verb is bound by
132
- * `client.ts` to the live transport; the Runtime wires up the
133
- * server-side response under chunk B8a.
134
- */
50
+ export type SdkQualityTier = QualityTier;
51
+
135
52
  export type SessionInference = {
136
53
  readonly call: <TOutput extends ZodTypeAny = ZodTypeAny>(
137
54
  input: InferenceCallInput<TOutput>,
138
55
  ) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
139
- readonly host: <TOutput extends ZodTypeAny = ZodTypeAny>(
140
- input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
141
- ) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
142
- readonly judge: <TOutput extends ZodTypeAny = ZodTypeAny>(
143
- input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
144
- ) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
145
- readonly classify: <TOutput extends ZodTypeAny = ZodTypeAny>(
146
- input: Omit<InferenceCallInput<TOutput>, 'callKind'>,
147
- ) => Promise<Result<InferenceCallSuccess<TOutput>, SdkError>>;
148
56
  };
package/src/react.ts CHANGED
@@ -37,6 +37,7 @@ import type { EventPayload, LifecyclePayload } from '@wibly/internal-protocol';
37
37
  import type { ConsentDecision, ConsentRequiredPayload } from './consent.js';
38
38
  import type { Session, SessionConfig, SessionState } from './client.js';
39
39
  import { createSession } from './client.js';
40
+ import type { TtsPlaybackSnapshot } from './tts-playback.js';
40
41
 
41
42
  const SessionContext = createContext<Session | null>(null);
42
43
 
@@ -121,6 +122,20 @@ export const useConnectionState = (): SessionState['connectionState'] => {
121
122
  return state.connectionState;
122
123
  };
123
124
 
125
+ /**
126
+ * Subscribe to client-side TTS playback telemetry (speech energy +
127
+ * playing flag). Populated by the Host shell's audio sampler; idle
128
+ * until the first clip plays.
129
+ */
130
+ export const useTtsPlayback = (): TtsPlaybackSnapshot => {
131
+ const session = useSession();
132
+ return useSyncExternalStore(
133
+ session.ttsPlayback.subscribe,
134
+ session.ttsPlayback.getSnapshot,
135
+ session.ttsPlayback.getSnapshot,
136
+ );
137
+ };
138
+
124
139
  /**
125
140
  * Subscribe to a server-emitted event by `eventType`. The handler
126
141
  * fires on every matching `event` frame. Unsubscribes on unmount.
package/src/time.ts CHANGED
@@ -24,6 +24,10 @@ export type ServerTimeSource = {
24
24
  readonly recordedEvents: () => readonly RecordedEvent[];
25
25
  /** Internal: update the measured skew on a fresh pong. */
26
26
  readonly updateSkew: (skewMs: number) => void;
27
+ /** Freeze `serverNow()` while the session is paused. */
28
+ readonly freeze: () => void;
29
+ /** Resume advancing `serverNow()` after a pause. */
30
+ readonly unfreeze: () => void;
27
31
  };
28
32
 
29
33
  export type RecordedEvent = {
@@ -40,10 +44,14 @@ export const createServerTimeSource = (
40
44
  ): ServerTimeSource => {
41
45
  const nowFn = opts.now ?? (() => Date.now());
42
46
  let skew = 0;
47
+ let frozenServerNow: number | null = null;
43
48
  const events: RecordedEvent[] = [];
44
49
 
45
50
  return {
46
- serverNow: () => nowFn() + skew,
51
+ serverNow: () => {
52
+ if (frozenServerNow !== null) return frozenServerNow;
53
+ return nowFn() + skew;
54
+ },
47
55
  recordEvent: (eventId) => {
48
56
  const clientTs = nowFn();
49
57
  events.push({ eventId, clientTs });
@@ -54,5 +62,13 @@ export const createServerTimeSource = (
54
62
  updateSkew: (newSkew) => {
55
63
  skew = newSkew;
56
64
  },
65
+ freeze: () => {
66
+ if (frozenServerNow === null) {
67
+ frozenServerNow = nowFn() + skew;
68
+ }
69
+ },
70
+ unfreeze: () => {
71
+ frozenServerNow = null;
72
+ },
57
73
  };
58
74
  };
@@ -0,0 +1,61 @@
1
+ import { describe, expect, it, vi } from 'vitest';
2
+
3
+ import {
4
+ createTtsPlaybackStore,
5
+ registerTtsPlaybackStore,
6
+ setTtsPlaybackTelemetry,
7
+ } from './tts-playback.js';
8
+
9
+ describe('createTtsPlaybackStore', () => {
10
+ it('starts idle', () => {
11
+ const store = createTtsPlaybackStore();
12
+ expect(store.getSnapshot()).toEqual({ isPlaying: false, speechLevel: 0 });
13
+ });
14
+
15
+ it('notifies subscribers when the snapshot changes', () => {
16
+ const store = createTtsPlaybackStore();
17
+ const listener = vi.fn();
18
+ const unsubscribe = store.subscribe(listener);
19
+
20
+ store.set({ isPlaying: true, speechLevel: 0.5 });
21
+
22
+ expect(listener).toHaveBeenCalledTimes(1);
23
+ expect(store.getSnapshot()).toEqual({ isPlaying: true, speechLevel: 0.5 });
24
+
25
+ unsubscribe();
26
+ store.set({ isPlaying: false, speechLevel: 0 });
27
+ expect(listener).toHaveBeenCalledTimes(1);
28
+ });
29
+
30
+ it('treats an identical set as a no-op and keeps a stable reference', () => {
31
+ const store = createTtsPlaybackStore();
32
+ const listener = vi.fn();
33
+ store.subscribe(listener);
34
+
35
+ const before = store.getSnapshot();
36
+ store.set({ isPlaying: false, speechLevel: 0 });
37
+
38
+ // useSyncExternalStore depends on a stable reference when nothing
39
+ // changed — otherwise it re-renders (or warns) every read.
40
+ expect(store.getSnapshot()).toBe(before);
41
+ expect(listener).not.toHaveBeenCalled();
42
+ });
43
+ });
44
+
45
+ describe('setTtsPlaybackTelemetry', () => {
46
+ it('routes a sample to the registered store', () => {
47
+ const session = {};
48
+ const store = createTtsPlaybackStore();
49
+ registerTtsPlaybackStore(session, store);
50
+
51
+ setTtsPlaybackTelemetry(session, { isPlaying: true, speechLevel: 0.8 });
52
+
53
+ expect(store.getSnapshot()).toEqual({ isPlaying: true, speechLevel: 0.8 });
54
+ });
55
+
56
+ it('is a no-op for a session with no registered store', () => {
57
+ expect(() =>
58
+ setTtsPlaybackTelemetry({}, { isPlaying: true, speechLevel: 1 }),
59
+ ).not.toThrow();
60
+ });
61
+ });
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Client-side TTS playback telemetry.
3
+ *
4
+ * This is a purely local, ephemeral signal — it never touches the
5
+ * WebSocket. The Host shell samples speech energy from the single
6
+ * `<audio>` element (Web Audio `AnalyserNode`) and pushes a smoothed
7
+ * `speechLevel` here at animation-frame cadence; Experience bundles
8
+ * read it through `session.ttsPlayback` to drive avatar animation
9
+ * (the amplitude-driven fallback the Platform Spec §3.5 describes for
10
+ * providers without phoneme metadata).
11
+ *
12
+ * Why a dedicated store and not a `voice.*` event: the signal updates
13
+ * ~60×/sec. Pumping that through `session.events` would flood every
14
+ * listener and the wire-shaped bus machinery. A tiny
15
+ * `useSyncExternalStore`-friendly snapshot store keeps the hot path
16
+ * cheap and matches the SDK's existing one-store-per-concern pattern.
17
+ *
18
+ * The read surface (`subscribe` / `getSnapshot`) is public on
19
+ * `Session.ttsPlayback`. The write surface lives behind
20
+ * `setTtsPlaybackTelemetry(session, snapshot)` so only the shell that
21
+ * owns the audio element mutates it — bundles consume, never produce.
22
+ */
23
+
24
+ export type TtsPlaybackSnapshot = {
25
+ /** True while a TTS clip is actively playing. */
26
+ readonly isPlaying: boolean;
27
+ /** Smoothed speech energy, normalised to 0..1. */
28
+ readonly speechLevel: number;
29
+ };
30
+
31
+ /** Read surface exposed on `Session.ttsPlayback`. */
32
+ export type TtsPlaybackTelemetry = {
33
+ readonly subscribe: (listener: () => void) => () => void;
34
+ readonly getSnapshot: () => TtsPlaybackSnapshot;
35
+ };
36
+
37
+ /** Full store — the read surface plus the shell-only setter. */
38
+ export type TtsPlaybackStore = TtsPlaybackTelemetry & {
39
+ readonly set: (next: TtsPlaybackSnapshot) => void;
40
+ };
41
+
42
+ const IDLE: TtsPlaybackSnapshot = { isPlaying: false, speechLevel: 0 };
43
+
44
+ export const createTtsPlaybackStore = (): TtsPlaybackStore => {
45
+ let snapshot: TtsPlaybackSnapshot = IDLE;
46
+ const listeners = new Set<() => void>();
47
+
48
+ return {
49
+ subscribe: (listener) => {
50
+ listeners.add(listener);
51
+ return () => {
52
+ listeners.delete(listener);
53
+ };
54
+ },
55
+ getSnapshot: () => snapshot,
56
+ set: (next) => {
57
+ if (
58
+ next.isPlaying === snapshot.isPlaying &&
59
+ next.speechLevel === snapshot.speechLevel
60
+ ) {
61
+ return;
62
+ }
63
+ snapshot = next;
64
+ for (const listener of Array.from(listeners)) {
65
+ try {
66
+ listener();
67
+ } catch {
68
+ // Listeners must not throw; swallow to avoid cascading.
69
+ }
70
+ }
71
+ },
72
+ };
73
+ };
74
+
75
+ /**
76
+ * Registry mapping a `Session` to its writable telemetry store. The
77
+ * `Session` only exposes the read surface, so the shell pushes
78
+ * samples through this side channel keyed by the session reference.
79
+ */
80
+ const storeRegistry = new WeakMap<object, TtsPlaybackStore>();
81
+
82
+ export const registerTtsPlaybackStore = (
83
+ session: object,
84
+ store: TtsPlaybackStore,
85
+ ): void => {
86
+ storeRegistry.set(session, store);
87
+ };
88
+
89
+ /**
90
+ * Push a telemetry sample for the given session. No-op if the session
91
+ * has no registered store (e.g. a stub session in a dev harness that
92
+ * supplies its own `ttsPlayback`).
93
+ */
94
+ export const setTtsPlaybackTelemetry = (
95
+ session: object,
96
+ snapshot: TtsPlaybackSnapshot,
97
+ ): void => {
98
+ storeRegistry.get(session)?.set(snapshot);
99
+ };
package/src/voice.ts CHANGED
@@ -47,6 +47,27 @@ export type SpeakSuccess = {
47
47
  readonly durationMs: number;
48
48
  };
49
49
 
50
+ /** Optional beat metadata for in-phase TTS choreography (chunk B26). */
51
+ export type TtsBeatReveal = {
52
+ readonly kind: 'none' | 'player_submission';
53
+ readonly playerId?: string;
54
+ };
55
+
56
+ export type TtsBeatMeta = {
57
+ readonly beatId: string;
58
+ readonly sequence: number;
59
+ readonly reveal?: TtsBeatReveal;
60
+ };
61
+
62
+ /** Wire shape on `voice.audio` events (Host + Experience bundles). */
63
+ export type VoiceAudioPayload = SpeakSuccess & {
64
+ readonly id?: string;
65
+ readonly causeMessageId?: string;
66
+ readonly caption?: string | null;
67
+ readonly beat?: TtsBeatMeta;
68
+ readonly cues?: ReadonlyArray<{ readonly at: number; readonly kind: string }>;
69
+ };
70
+
50
71
  export type SessionVoice = {
51
72
  readonly speak: (input: SpeakInput) => Promise<Result<SpeakSuccess, SdkError>>;
52
73
  };