@elizaos/plugin-xr 2.0.3-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/AGENTS.md +151 -0
  2. package/CLAUDE.md +151 -0
  3. package/LICENSE +21 -0
  4. package/README.md +106 -0
  5. package/package.json +57 -0
  6. package/simulator/bun.lock +159 -0
  7. package/simulator/package.json +28 -0
  8. package/simulator/src/emulator.ts +174 -0
  9. package/simulator/src/mock-agent.ts +233 -0
  10. package/simulator/src/node.ts +9 -0
  11. package/simulator/src/playwright-fixture.ts +169 -0
  12. package/simulator/src/types.ts +51 -0
  13. package/simulator/tsconfig.json +13 -0
  14. package/simulator/vite.config.ts +25 -0
  15. package/src/__tests__/audio-pipeline.test.ts +129 -0
  16. package/src/__tests__/protocol.test.ts +53 -0
  17. package/src/__tests__/routes-e2e.test.ts +276 -0
  18. package/src/__tests__/vision-pipeline.test.ts +73 -0
  19. package/src/__tests__/xr-bundle-coverage.test.ts +303 -0
  20. package/src/__tests__/xr-feature-parity.test.ts +524 -0
  21. package/src/__tests__/xr-functional-parity.test.ts +522 -0
  22. package/src/__tests__/xr-view-host-http.test.ts +239 -0
  23. package/src/__tests__/xr-view-host.test.ts +174 -0
  24. package/src/actions/xr-query-vision.ts +64 -0
  25. package/src/actions/xr-view-actions.ts +386 -0
  26. package/src/index.ts +55 -0
  27. package/src/protocol.ts +126 -0
  28. package/src/providers/xr-context.ts +49 -0
  29. package/src/routes/xr-connect.ts +89 -0
  30. package/src/routes/xr-simulator-route.ts +37 -0
  31. package/src/routes/xr-status.ts +36 -0
  32. package/src/routes/xr-view-host.ts +359 -0
  33. package/src/routes/xr-views.ts +43 -0
  34. package/src/services/audio-pipeline.ts +120 -0
  35. package/src/services/vision-pipeline.ts +57 -0
  36. package/src/services/xr-session-service.ts +388 -0
  37. package/tsconfig.build.json +9 -0
  38. package/tsconfig.json +30 -0
  39. package/vitest.config.ts +21 -0
@@ -0,0 +1,57 @@
1
+ import type { IAgentRuntime } from "@elizaos/core";
2
+ import { ModelType } from "@elizaos/core";
3
+ import type { XRFrameHeader } from "../protocol.ts";
4
+
5
+ export interface LatestFrame {
6
+ data: Buffer;
7
+ header: XRFrameHeader;
8
+ receivedAt: number;
9
+ }
10
+
11
+ // A frame older than this is considered stale and won't be described
12
+ const FRAME_MAX_AGE_MS = 10_000;
13
+
14
+ export class VisionPipeline {
15
+ private latest = new Map<string, LatestFrame>();
16
+
17
+ storeFrame(connectionId: string, header: XRFrameHeader, data: Buffer): void {
18
+ this.latest.set(connectionId, { data, header, receivedAt: Date.now() });
19
+ }
20
+
21
+ getLatestFrame(connectionId: string): LatestFrame | undefined {
22
+ const frame = this.latest.get(connectionId);
23
+ if (!frame) return undefined;
24
+ if (Date.now() - frame.receivedAt > FRAME_MAX_AGE_MS) return undefined;
25
+ return frame;
26
+ }
27
+
28
+ hasRecentFrame(connectionId: string): boolean {
29
+ return this.getLatestFrame(connectionId) !== undefined;
30
+ }
31
+
32
+ async describeFrame(
33
+ runtime: IAgentRuntime,
34
+ connectionId: string,
35
+ prompt?: string,
36
+ ): Promise<string | null> {
37
+ const frame = this.getLatestFrame(connectionId);
38
+ if (!frame) return null;
39
+
40
+ const dataUrl = `data:image/${frame.header.format};base64,${frame.data.toString("base64")}`;
41
+
42
+ try {
43
+ const description = await runtime.useModel(ModelType.IMAGE_DESCRIPTION, {
44
+ imageUrl: dataUrl,
45
+ prompt: prompt ?? "Describe what you see in this image concisely.",
46
+ });
47
+ return typeof description === "string" ? description : null;
48
+ } catch (err) {
49
+ console.error("[plugin-xr] vision error:", err);
50
+ return null;
51
+ }
52
+ }
53
+
54
+ clear(connectionId: string): void {
55
+ this.latest.delete(connectionId);
56
+ }
57
+ }
@@ -0,0 +1,388 @@
1
+ import type {
2
+ Content,
3
+ HandlerCallback,
4
+ IAgentRuntime,
5
+ Memory,
6
+ UUID,
7
+ } from "@elizaos/core";
8
+ import {
9
+ ChannelType,
10
+ createMessageMemory,
11
+ ModelType,
12
+ Service,
13
+ } from "@elizaos/core";
14
+ import { type WebSocket, WebSocketServer } from "ws";
15
+ import {
16
+ decodeBinaryFrame,
17
+ encodeBinaryFrame,
18
+ type XRClientControl,
19
+ type XRDeviceType,
20
+ type XRPanelConfig,
21
+ type XRServerControl,
22
+ type XRTTSAudioHeader,
23
+ } from "../protocol.ts";
24
+ import { AudioPipeline } from "./audio-pipeline.ts";
25
+ import { VisionPipeline } from "./vision-pipeline.ts";
26
+
27
+ export const XR_SERVICE_TYPE = "xr-session";
28
+ export const XR_WS_PORT_DEFAULT = 31338;
29
+ export const XR_WS_PORT_ENV = "XR_WS_PORT";
30
+
31
+ export interface XRConnection {
32
+ id: string;
33
+ ws: WebSocket;
34
+ deviceType: XRDeviceType;
35
+ entityId: UUID;
36
+ roomId: UUID;
37
+ connectedAt: Date;
38
+ }
39
+
40
+ export class XRSessionService extends Service {
41
+ static override serviceType = XR_SERVICE_TYPE;
42
+
43
+ readonly capabilityDescription =
44
+ "Streams audio and camera from XR headsets (Quest 3, XReal) to the agent and returns voice responses.";
45
+
46
+ private wss!: WebSocketServer;
47
+ private connections = new Map<string, XRConnection>();
48
+ private audioPipeline!: AudioPipeline;
49
+ private visionPipeline!: VisionPipeline;
50
+
51
+ static override async start(runtime: IAgentRuntime): Promise<Service> {
52
+ const svc = new XRSessionService(runtime);
53
+ await svc.initialize(runtime);
54
+ return svc;
55
+ }
56
+
57
+ private async initialize(runtime: IAgentRuntime): Promise<void> {
58
+ this.visionPipeline = new VisionPipeline();
59
+ this.audioPipeline = new AudioPipeline(
60
+ runtime,
61
+ (connectionId, transcript) =>
62
+ this.handleTranscript(connectionId, transcript),
63
+ );
64
+
65
+ const port = Number(
66
+ runtime.getSetting(XR_WS_PORT_ENV) ?? XR_WS_PORT_DEFAULT,
67
+ );
68
+ this.wss = new WebSocketServer({ port });
69
+
70
+ this.wss.on("connection", (ws: WebSocket) => this.onConnect(runtime, ws));
71
+ this.wss.on("error", (err: Error) =>
72
+ console.error("[plugin-xr] WebSocket server error:", err),
73
+ );
74
+
75
+ console.info(
76
+ `[plugin-xr] WebSocket server listening on ws://localhost:${port}`,
77
+ );
78
+ }
79
+
80
+ override async stop(): Promise<void> {
81
+ for (const conn of this.connections.values()) {
82
+ this.audioPipeline.clear(conn.id);
83
+ this.visionPipeline.clear(conn.id);
84
+ conn.ws.close();
85
+ }
86
+ this.connections.clear();
87
+ await new Promise<void>((resolve) => this.wss.close(() => resolve()));
88
+ }
89
+
90
+ // ── Public accessors used by provider / action ──────────────────────────
91
+
92
+ getConnections(): XRConnection[] {
93
+ return [...this.connections.values()];
94
+ }
95
+
96
+ hasActiveConnections(): boolean {
97
+ return this.connections.size > 0;
98
+ }
99
+
100
+ getVisionPipeline(): VisionPipeline {
101
+ return this.visionPipeline;
102
+ }
103
+
104
+ sendText(connectionId: string, text: string): void {
105
+ const conn = this.connections.get(connectionId);
106
+ if (!conn || conn.ws.readyState !== conn.ws.OPEN) return;
107
+ conn.ws.send(JSON.stringify({ type: "agent_text", text }));
108
+ }
109
+
110
+ sendControl(connectionId: string, msg: XRServerControl): void {
111
+ const conn = this.connections.get(connectionId);
112
+ if (!conn || conn.ws.readyState !== conn.ws.OPEN) return;
113
+ conn.ws.send(JSON.stringify(msg));
114
+ }
115
+
116
+ broadcastControl(msg: XRServerControl): void {
117
+ for (const conn of this.connections.values()) {
118
+ if (conn.ws.readyState === conn.ws.OPEN) {
119
+ conn.ws.send(JSON.stringify(msg));
120
+ }
121
+ }
122
+ }
123
+
124
+ openView(
125
+ connectionId: string,
126
+ viewId: string,
127
+ agentBaseUrl: string,
128
+ config?: XRPanelConfig,
129
+ ): void {
130
+ this.sendControl(connectionId, {
131
+ type: "view_open",
132
+ viewId,
133
+ agentBaseUrl,
134
+ config,
135
+ });
136
+ }
137
+
138
+ closeView(connectionId: string, viewId: string): void {
139
+ this.sendControl(connectionId, { type: "view_close", viewId });
140
+ }
141
+
142
+ switchView(connectionId: string, viewId: string): void {
143
+ this.sendControl(connectionId, { type: "view_switch", viewId });
144
+ }
145
+
146
+ resizeView(
147
+ connectionId: string,
148
+ viewId: string,
149
+ config: XRPanelConfig,
150
+ ): void {
151
+ this.sendControl(connectionId, { type: "view_resize", viewId, config });
152
+ }
153
+
154
+ sendViewsCatalog(
155
+ connectionId: string,
156
+ views: Array<{
157
+ id: string;
158
+ label: string;
159
+ icon?: string;
160
+ description?: string;
161
+ }>,
162
+ ): void {
163
+ this.sendControl(connectionId, { type: "views_catalog", views });
164
+ }
165
+
166
+ sendAudio(connectionId: string, audio: Buffer, sampleRate = 24000): void {
167
+ const conn = this.connections.get(connectionId);
168
+ if (!conn || conn.ws.readyState !== conn.ws.OPEN) return;
169
+ const header: XRTTSAudioHeader = {
170
+ type: "tts_audio",
171
+ sampleRate,
172
+ channels: 1,
173
+ encoding: "mp3",
174
+ };
175
+ conn.ws.send(encodeBinaryFrame(header, audio), { binary: true });
176
+ }
177
+
178
+ // ── WebSocket connection lifecycle ──────────────────────────────────────
179
+
180
+ private onConnect(runtime: IAgentRuntime, ws: WebSocket): void {
181
+ const connId = crypto.randomUUID();
182
+
183
+ ws.on("message", (data: unknown, isBinary: boolean) => {
184
+ try {
185
+ if (isBinary) {
186
+ this.handleBinaryMessage(connId, data as Buffer);
187
+ } else {
188
+ const raw =
189
+ typeof data === "string"
190
+ ? data
191
+ : Buffer.isBuffer(data)
192
+ ? data.toString("utf8")
193
+ : String(data);
194
+ this.handleTextMessage(runtime, connId, ws, raw);
195
+ }
196
+ } catch (err) {
197
+ console.error("[plugin-xr] message handler error:", err);
198
+ }
199
+ });
200
+
201
+ ws.on("close", () => {
202
+ this.audioPipeline.flush(connId);
203
+ this.audioPipeline.clear(connId);
204
+ this.visionPipeline.clear(connId);
205
+ this.connections.delete(connId);
206
+ console.info(`[plugin-xr] device disconnected: ${connId}`);
207
+ });
208
+
209
+ ws.on("error", (err: Error) =>
210
+ console.error(`[plugin-xr] ws error on ${connId}:`, err),
211
+ );
212
+ }
213
+
214
+ private handleTextMessage(
215
+ runtime: IAgentRuntime,
216
+ connId: string,
217
+ ws: WebSocket,
218
+ raw: string,
219
+ ): void {
220
+ const msg = JSON.parse(raw) as XRClientControl;
221
+
222
+ if (msg.type === "hello") {
223
+ const entityId = crypto.randomUUID() as UUID;
224
+ const roomId = crypto.randomUUID() as UUID;
225
+ const conn: XRConnection = {
226
+ id: connId,
227
+ ws,
228
+ deviceType: msg.deviceType,
229
+ entityId,
230
+ roomId,
231
+ connectedAt: new Date(),
232
+ };
233
+ this.connections.set(connId, conn);
234
+ ws.send(JSON.stringify({ type: "ready", sessionId: connId }));
235
+ void this.ensureEntities(runtime, conn);
236
+ console.info(`[plugin-xr] ${msg.deviceType} connected: ${connId}`);
237
+ return;
238
+ }
239
+
240
+ if (msg.type === "ping") {
241
+ ws.send(JSON.stringify({ type: "pong" }));
242
+ return;
243
+ }
244
+
245
+ if (msg.type === "view_ready") {
246
+ console.info(`[plugin-xr] view ready on ${connId}: ${msg.viewId}`);
247
+ return;
248
+ }
249
+
250
+ if (msg.type === "view_closed") {
251
+ console.info(`[plugin-xr] view closed on ${connId}: ${msg.viewId}`);
252
+ return;
253
+ }
254
+
255
+ if (msg.type === "view_event") {
256
+ console.info(
257
+ `[plugin-xr] view event on ${connId}:`,
258
+ msg.viewId,
259
+ msg.event,
260
+ );
261
+ return;
262
+ }
263
+ }
264
+
265
+ private handleBinaryMessage(connId: string, data: Buffer): void {
266
+ const conn = this.connections.get(connId);
267
+ if (!conn) return;
268
+
269
+ const { header, payload } = decodeBinaryFrame(data);
270
+
271
+ if (header.type === "audio") {
272
+ this.audioPipeline.push(connId, header, payload);
273
+ return;
274
+ }
275
+
276
+ if (header.type === "frame") {
277
+ this.visionPipeline.storeFrame(connId, header, payload);
278
+ return;
279
+ }
280
+ }
281
+
282
+ // ── Message routing ─────────────────────────────────────────────────────
283
+
284
+ private async handleTranscript(
285
+ connectionId: string,
286
+ transcript: string,
287
+ ): Promise<void> {
288
+ const conn = this.connections.get(connectionId);
289
+ if (!conn) return;
290
+ const runtime = this.runtime;
291
+
292
+ // Echo transcript back so the UI can show it
293
+ conn.ws.send(
294
+ JSON.stringify({ type: "transcript", text: transcript, final: true }),
295
+ );
296
+
297
+ // Attach latest camera frame as image attachment if available
298
+ const latestFrame = this.visionPipeline.getLatestFrame(connectionId);
299
+ const attachments: Content["attachments"] = latestFrame
300
+ ? [
301
+ {
302
+ id: crypto.randomUUID(),
303
+ url: `data:image/${latestFrame.header.format};base64,${latestFrame.data.toString("base64")}`,
304
+ title: "XR camera frame",
305
+ contentType: "image",
306
+ source: "xr-camera",
307
+ },
308
+ ]
309
+ : [];
310
+
311
+ const memory = createMessageMemory({
312
+ entityId: conn.entityId,
313
+ agentId: runtime.agentId,
314
+ roomId: conn.roomId,
315
+ content: {
316
+ text: transcript,
317
+ source: `xr-${conn.deviceType}`,
318
+ attachments,
319
+ },
320
+ });
321
+
322
+ await runtime.createMemory(memory, "messages");
323
+
324
+ const callback: HandlerCallback = async (
325
+ response: Content,
326
+ ): Promise<Memory[]> => {
327
+ const text = response.text?.trim() ?? "";
328
+ if (text.length === 0) return [];
329
+
330
+ // Send text response immediately
331
+ conn.ws.send(JSON.stringify({ type: "agent_text", text }));
332
+
333
+ // Generate TTS and send audio
334
+ try {
335
+ const audio = await runtime.useModel(ModelType.TEXT_TO_SPEECH, text);
336
+ const audioBuf = Buffer.isBuffer(audio)
337
+ ? audio
338
+ : Buffer.from(audio as ArrayBuffer);
339
+ this.sendAudio(connectionId, audioBuf);
340
+ } catch (err) {
341
+ console.error("[plugin-xr] TTS error:", err);
342
+ }
343
+
344
+ // Persist agent response as memory
345
+ const responseMemory = createMessageMemory({
346
+ entityId: runtime.agentId,
347
+ agentId: runtime.agentId,
348
+ roomId: conn.roomId,
349
+ content: { text, source: `xr-agent`, inReplyTo: memory.id },
350
+ });
351
+ await runtime.createMemory(responseMemory, "messages");
352
+ return [responseMemory];
353
+ };
354
+
355
+ if (runtime.messageService) {
356
+ await runtime.messageService.handleMessage(runtime, memory, callback);
357
+ }
358
+ }
359
+
360
+ // ── Entity / room bootstrap ─────────────────────────────────────────────
361
+
362
+ private async ensureEntities(
363
+ runtime: IAgentRuntime,
364
+ conn: XRConnection,
365
+ ): Promise<void> {
366
+ try {
367
+ await runtime.createEntity({
368
+ id: conn.entityId,
369
+ agentId: runtime.agentId,
370
+ names: [`XR-${conn.deviceType}`],
371
+ metadata: { source: `xr-${conn.deviceType}` },
372
+ });
373
+ await runtime.createRoom({
374
+ id: conn.roomId,
375
+ name: `XR session (${conn.deviceType} ${conn.id.slice(0, 8)})`,
376
+ source: "xr",
377
+ type: ChannelType.GROUP,
378
+ worldId: undefined,
379
+ channelId: undefined,
380
+ messageServerId: undefined,
381
+ });
382
+ await runtime.addParticipant(conn.entityId, conn.roomId);
383
+ await runtime.addParticipant(runtime.agentId, conn.roomId);
384
+ } catch (err) {
385
+ console.error("[plugin-xr] entity setup error:", err);
386
+ }
387
+ }
388
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "extends": "../tsconfig.build.shared.json",
3
+ "compilerOptions": {
4
+ "outDir": "dist",
5
+ "rootDir": "./src"
6
+ },
7
+ "include": ["src"],
8
+ "exclude": ["src/**/*.test.ts", "src/**/__tests__/**"]
9
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "extends": "../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "composite": false,
5
+ "moduleResolution": "bundler",
6
+ "strict": true,
7
+ "strictNullChecks": true,
8
+ "skipLibCheck": true,
9
+ "types": ["node", "bun"],
10
+ "allowImportingTsExtensions": true,
11
+ "noEmit": true,
12
+ "declaration": false,
13
+ "declarationMap": false,
14
+ "sourceMap": false,
15
+ "paths": {
16
+ "@elizaos/core": ["../../packages/core/src/index.node.ts"],
17
+ "@elizaos/logger": ["../../packages/logger/src/index.ts"],
18
+ "@elizaos/shared": ["../../packages/shared/src/index.ts"],
19
+ "@elizaos/shared/*": ["../../packages/shared/src/*"],
20
+ "@elizaos/*": ["../../node_modules/@elizaos/*"]
21
+ }
22
+ },
23
+ "include": ["src/**/*.ts", "src/**/*.d.ts"],
24
+ "exclude": [
25
+ "src/**/*.stories.ts",
26
+ "src/**/__tests__/**",
27
+ "src/**/*.test.ts",
28
+ "src/**/*.test.tsx"
29
+ ]
30
+ }
@@ -0,0 +1,21 @@
1
+ import { fileURLToPath } from "node:url";
2
+ import { defineConfig } from "vitest/config";
3
+
4
+ const agentApiSourceDir = fileURLToPath(
5
+ new URL("../../packages/agent/src/api/", import.meta.url),
6
+ );
7
+
8
+ export default defineConfig({
9
+ resolve: {
10
+ alias: [
11
+ {
12
+ find: /^@elizaos\/agent\/api\/(.+)$/,
13
+ replacement: `${agentApiSourceDir}$1.ts`,
14
+ },
15
+ ],
16
+ },
17
+ test: {
18
+ include: ["src/**/*.test.ts", "src/**/__tests__/**/*.test.ts"],
19
+ environment: "node",
20
+ },
21
+ });