@elizaos/plugin-xr 2.0.3-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/AGENTS.md +151 -0
  2. package/CLAUDE.md +151 -0
  3. package/LICENSE +21 -0
  4. package/README.md +106 -0
  5. package/package.json +57 -0
  6. package/simulator/bun.lock +159 -0
  7. package/simulator/package.json +28 -0
  8. package/simulator/src/emulator.ts +174 -0
  9. package/simulator/src/mock-agent.ts +233 -0
  10. package/simulator/src/node.ts +9 -0
  11. package/simulator/src/playwright-fixture.ts +169 -0
  12. package/simulator/src/types.ts +51 -0
  13. package/simulator/tsconfig.json +13 -0
  14. package/simulator/vite.config.ts +25 -0
  15. package/src/__tests__/audio-pipeline.test.ts +129 -0
  16. package/src/__tests__/protocol.test.ts +53 -0
  17. package/src/__tests__/routes-e2e.test.ts +276 -0
  18. package/src/__tests__/vision-pipeline.test.ts +73 -0
  19. package/src/__tests__/xr-bundle-coverage.test.ts +303 -0
  20. package/src/__tests__/xr-feature-parity.test.ts +524 -0
  21. package/src/__tests__/xr-functional-parity.test.ts +522 -0
  22. package/src/__tests__/xr-view-host-http.test.ts +239 -0
  23. package/src/__tests__/xr-view-host.test.ts +174 -0
  24. package/src/actions/xr-query-vision.ts +64 -0
  25. package/src/actions/xr-view-actions.ts +386 -0
  26. package/src/index.ts +55 -0
  27. package/src/protocol.ts +126 -0
  28. package/src/providers/xr-context.ts +49 -0
  29. package/src/routes/xr-connect.ts +89 -0
  30. package/src/routes/xr-simulator-route.ts +37 -0
  31. package/src/routes/xr-status.ts +36 -0
  32. package/src/routes/xr-view-host.ts +359 -0
  33. package/src/routes/xr-views.ts +43 -0
  34. package/src/services/audio-pipeline.ts +120 -0
  35. package/src/services/vision-pipeline.ts +57 -0
  36. package/src/services/xr-session-service.ts +388 -0
  37. package/tsconfig.build.json +9 -0
  38. package/tsconfig.json +30 -0
  39. package/vitest.config.ts +21 -0
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Playwright fixture and page wrapper for XR emulator testing.
3
+ *
4
+ * Usage:
5
+ * import { test, expect } from './fixtures.ts';
6
+ *
7
+ * test('full roundtrip', async ({ xrPage, mockAgent }) => {
8
+ * await xrPage.goto('/');
9
+ * await mockAgent.waitForConnection();
10
+ * await xrPage.injectCameraFrame('./fixtures/desk.jpg');
11
+ * const frame = await mockAgent.waitForCameraFrame();
12
+ * expect(frame.payload.length).toBeGreaterThan(100);
13
+ * });
14
+ */
15
+
16
+ import { existsSync, readFileSync } from "node:fs";
17
+ import { dirname, resolve } from "node:path";
18
+ import { fileURLToPath } from "node:url";
19
+ import { test as base, type Page } from "@playwright/test";
20
+ import { MockAgentServer } from "./mock-agent.ts";
21
+ import type { EmulatorStats, XRPose } from "./types.ts";
22
+
23
+ const __dirname = dirname(fileURLToPath(import.meta.url));
24
+ const EMULATOR_DIST = resolve(__dirname, "../dist/emulator.js");
25
+
26
+ // ── XREmulatorPage ────────────────────────────────────────────────────────
27
+
28
+ export class XREmulatorPage {
29
+ constructor(readonly page: Page) {}
30
+
31
+ /** Inject the emulator script before the page loads. Call before page.goto(). */
32
+ async inject(): Promise<void> {
33
+ if (!existsSync(EMULATOR_DIST)) {
34
+ throw new Error(
35
+ `Emulator bundle not found at ${EMULATOR_DIST}. Run: cd eliza/plugins/plugin-xr/simulator && bun run build`,
36
+ );
37
+ }
38
+ await this.page.addInitScript({ path: EMULATOR_DIST });
39
+ }
40
+
41
+ /** Navigate and wait for the emulator to be ready. */
42
+ async goto(url: string): Promise<void> {
43
+ await this.page.goto(url);
44
+ // Wait for emulator to install (logs a console message)
45
+ await this.page.waitForFunction(
46
+ () => typeof window.__XREmulator !== "undefined",
47
+ {
48
+ timeout: 5000,
49
+ },
50
+ );
51
+ }
52
+
53
+ /** Set the emulated headset pose. */
54
+ async setPose(pose: Partial<XRPose>): Promise<void> {
55
+ await this.page.evaluate((p) => window.__XREmulator.setPose(p), pose);
56
+ }
57
+
58
+ /** Inject a camera frame from a local image file (JPEG or PNG). */
59
+ async injectCameraFrame(imagePath: string): Promise<void> {
60
+ const abs = resolve(imagePath);
61
+ const data = readFileSync(abs);
62
+ const mime = imagePath.endsWith(".png") ? "image/png" : "image/jpeg";
63
+ const dataUrl = `data:${mime};base64,${data.toString("base64")}`;
64
+ await this.page.evaluate(
65
+ (url) => window.__XREmulator.injectCameraFrame(url),
66
+ dataUrl,
67
+ );
68
+ }
69
+
70
+ /** Inject a camera frame from an inline data URL. */
71
+ async injectCameraDataUrl(dataUrl: string): Promise<void> {
72
+ await this.page.evaluate(
73
+ (url) => window.__XREmulator.injectCameraFrame(url),
74
+ dataUrl,
75
+ );
76
+ }
77
+
78
+ /** Send a synthetic audio chunk directly to the agent WebSocket. */
79
+ async sendAudioChunk(
80
+ base64: string,
81
+ sampleRate = 48000,
82
+ encoding = "webm-opus",
83
+ ): Promise<void> {
84
+ await this.page.evaluate(
85
+ ({ b64, sr, enc }) => {
86
+ if (!window.__xrTestHooks)
87
+ throw new Error("__xrTestHooks not available — is VITE_TEST=true?");
88
+ window.__xrTestHooks.sendAudioChunk(b64, sr, enc);
89
+ },
90
+ { b64: base64, sr: sampleRate, enc: encoding },
91
+ );
92
+ }
93
+
94
+ /** Get emulator stats. */
95
+ async getStats(): Promise<EmulatorStats> {
96
+ return this.page.evaluate(() => window.__XREmulator.getStats());
97
+ }
98
+
99
+ /** Get WebSocket readyState from test hooks. */
100
+ async getSocketState(): Promise<string> {
101
+ return this.page.evaluate(() => {
102
+ if (!window.__xrTestHooks) return "UNAVAILABLE";
103
+ return window.__xrTestHooks.getSocketState();
104
+ });
105
+ }
106
+
107
+ /** Wait for the page's status text to match a pattern. */
108
+ async waitForStatus(pattern: string | RegExp, timeout = 8000): Promise<void> {
109
+ await this.page
110
+ .locator("#status-text")
111
+ .filter({ hasText: pattern })
112
+ .waitFor({
113
+ state: "visible",
114
+ timeout,
115
+ });
116
+ }
117
+
118
+ /** Wait for agent response text to appear. */
119
+ async waitForAgentText(timeout = 10000): Promise<string> {
120
+ const el = this.page.locator("#agent-response");
121
+ await el.waitFor({ state: "visible", timeout });
122
+ await el.filter({ hasNotText: "" }).waitFor({ timeout });
123
+ return el.innerText();
124
+ }
125
+
126
+ /** Wait for transcript text to appear. */
127
+ async waitForTranscript(timeout = 10000): Promise<string> {
128
+ const el = this.page.locator("#transcript");
129
+ await el.waitFor({ state: "visible", timeout });
130
+ await el.filter({ hasNotText: "" }).waitFor({ timeout });
131
+ return el.innerText();
132
+ }
133
+
134
+ /** Force-disconnect the WebSocket (tests reconnect logic). */
135
+ async simulateDisconnect(): Promise<void> {
136
+ await this.page.evaluate(() => window.__XREmulator.simulateDisconnect());
137
+ }
138
+ }
139
+
140
+ // ── Playwright fixture extensions ─────────────────────────────────────────
141
+
142
+ interface XRFixtures {
143
+ mockAgent: MockAgentServer;
144
+ xrPage: XREmulatorPage;
145
+ }
146
+
147
+ export const test = base.extend<XRFixtures>({
148
+ mockAgent: async (_fixtures, use, testInfo) => {
149
+ // Use a unique port per worker to allow parallel test runs
150
+ const port = 31338 + testInfo.workerIndex;
151
+ const server = new MockAgentServer({ port });
152
+ await server.start();
153
+ await use(server);
154
+ await server.stop();
155
+ },
156
+
157
+ xrPage: async ({ page }, use) => {
158
+ const xrp = new XREmulatorPage(page);
159
+ await xrp.inject();
160
+ await use(xrp);
161
+ },
162
+ });
163
+
164
+ export { expect } from "@playwright/test";
165
+
166
+ // ── Node-side exports ─────────────────────────────────────────────────────
167
+
168
+ export { MockAgentServer } from "./mock-agent.ts";
169
+ export type { EmulatorStats, XRPose } from "./types.ts";
@@ -0,0 +1,51 @@
1
+ export interface Vec3 {
2
+ x: number;
3
+ y: number;
4
+ z: number;
5
+ }
6
+
7
+ export interface Quat {
8
+ x: number;
9
+ y: number;
10
+ z: number;
11
+ w: number;
12
+ }
13
+
14
+ export interface XRPose {
15
+ position: Vec3;
16
+ orientation: Quat;
17
+ }
18
+
19
+ export interface EmulatorStats {
20
+ sessionActive: boolean;
21
+ framesInjected: number;
22
+ cameraStreamActive: boolean;
23
+ wsConnected: boolean;
24
+ }
25
+
26
+ /** window.__XREmulator — set by emulator.ts, consumed by Playwright via page.evaluate() */
27
+ export interface XREmulatorAPI {
28
+ setPose(pose: Partial<XRPose>): void;
29
+ injectCameraFrame(jpegDataUrl: string): Promise<void>;
30
+ getStats(): EmulatorStats;
31
+ /** Simulate device disconnection (closes WebSocket) */
32
+ simulateDisconnect(): void;
33
+ /** Simulate reconnect after a disconnect */
34
+ simulateReconnect(): void;
35
+ }
36
+
37
+ declare global {
38
+ interface Window {
39
+ __XREmulator: XREmulatorAPI;
40
+ /** Set by app-xr/src/main.ts in VITE_TEST mode */
41
+ __xrTestHooks: {
42
+ sendAudioChunk(
43
+ base64: string,
44
+ sampleRate: number,
45
+ encoding: string,
46
+ ): void;
47
+ getSocketState(): "CONNECTING" | "OPEN" | "CLOSING" | "CLOSED";
48
+ sendPing?(): void;
49
+ };
50
+ }
51
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "strict": true,
7
+ "noEmit": true,
8
+ "allowImportingTsExtensions": true,
9
+ "skipLibCheck": true,
10
+ "lib": ["ES2022", "DOM"]
11
+ },
12
+ "include": ["src/**/*.ts"]
13
+ }
@@ -0,0 +1,25 @@
1
+ import { resolve } from "node:path";
2
+ import { defineConfig } from "vite";
3
+
4
+ // Builds the browser-side emulator as a self-contained IIFE.
5
+ // Output: dist/emulator.js — injected into pages by Playwright via addInitScript.
6
+ export default defineConfig({
7
+ build: {
8
+ lib: {
9
+ entry: resolve(__dirname, "src/emulator.ts"),
10
+ name: "XREmulator",
11
+ fileName: "emulator",
12
+ formats: ["iife"],
13
+ },
14
+ outDir: "dist",
15
+ rollupOptions: {
16
+ output: {
17
+ inlineDynamicImports: true,
18
+ // Force the output filename to emulator.js (not emulator.iife.js)
19
+ entryFileNames: "[name].js",
20
+ },
21
+ },
22
+ minify: false, // keep readable for debugging
23
+ sourcemap: true,
24
+ },
25
+ });
@@ -0,0 +1,129 @@
1
+ import type { IAgentRuntime } from "@elizaos/core";
2
+ import { ModelType } from "@elizaos/core";
3
+ import { beforeEach, describe, expect, it, vi } from "vitest";
4
+ import { AudioPipeline } from "../services/audio-pipeline.ts";
5
+
6
+ function makeRuntime(transcriptResult = "hello world"): IAgentRuntime {
7
+ return {
8
+ useModel: vi.fn().mockResolvedValue(transcriptResult),
9
+ } as unknown as IAgentRuntime;
10
+ }
11
+
12
+ describe("AudioPipeline", () => {
13
+ let onTranscript: ReturnType<typeof vi.fn>;
14
+ let runtime: IAgentRuntime;
15
+ let pipeline: AudioPipeline;
16
+
17
+ beforeEach(() => {
18
+ onTranscript = vi.fn().mockResolvedValue(undefined);
19
+ runtime = makeRuntime("test transcript");
20
+ pipeline = new AudioPipeline(
21
+ runtime,
22
+ onTranscript as (id: string, text: string) => Promise<void>,
23
+ );
24
+ });
25
+
26
+ it("calls onTranscript after flush with enough audio", async () => {
27
+ const header = {
28
+ type: "audio" as const,
29
+ ts: 0,
30
+ sampleRate: 16000,
31
+ encoding: "webm-opus" as const,
32
+ };
33
+ const chunk = Buffer.alloc(1024, 0x55);
34
+
35
+ pipeline.push("conn1", header, chunk);
36
+ await pipeline.flush("conn1");
37
+
38
+ expect(runtime.useModel).toHaveBeenCalledWith(
39
+ ModelType.TRANSCRIPTION,
40
+ expect.any(Buffer),
41
+ );
42
+ expect(onTranscript).toHaveBeenCalledWith("conn1", "test transcript");
43
+ });
44
+
45
+ it("does not call onTranscript for tiny chunks", async () => {
46
+ const header = {
47
+ type: "audio" as const,
48
+ ts: 0,
49
+ sampleRate: 16000,
50
+ encoding: "webm-opus" as const,
51
+ };
52
+ pipeline.push("conn1", header, Buffer.alloc(64));
53
+ await pipeline.flush("conn1");
54
+
55
+ expect(runtime.useModel).not.toHaveBeenCalled();
56
+ expect(onTranscript).not.toHaveBeenCalled();
57
+ });
58
+
59
+ it("ignores empty transcription results", async () => {
60
+ (runtime.useModel as ReturnType<typeof vi.fn>).mockResolvedValue(" ");
61
+ const header = {
62
+ type: "audio" as const,
63
+ ts: 0,
64
+ sampleRate: 16000,
65
+ encoding: "webm-opus" as const,
66
+ };
67
+ pipeline.push("conn1", header, Buffer.alloc(1024));
68
+ await pipeline.flush("conn1");
69
+
70
+ expect(onTranscript).not.toHaveBeenCalled();
71
+ });
72
+
73
+ it("clears pending state on clear()", () => {
74
+ const header = {
75
+ type: "audio" as const,
76
+ ts: 0,
77
+ sampleRate: 16000,
78
+ encoding: "webm-opus" as const,
79
+ };
80
+ pipeline.push("conn1", header, Buffer.alloc(512));
81
+ pipeline.clear("conn1");
82
+ // after clear, flush has no pending audio to emit
83
+ expect(onTranscript).not.toHaveBeenCalled();
84
+ });
85
+
86
+ it("wraps pcm-f32 chunks with a WAV header before transcription", async () => {
87
+ // pcm-f32: 1024 float32 samples = 4096 bytes of raw audio (> 512-byte floor)
88
+ const float32Samples = new Float32Array(1024).fill(0.1);
89
+ const chunk = Buffer.from(float32Samples.buffer);
90
+
91
+ const header = {
92
+ type: "audio" as const,
93
+ ts: 0,
94
+ sampleRate: 44100,
95
+ encoding: "pcm-f32" as const,
96
+ };
97
+ pipeline.push("conn1", header, chunk);
98
+ await pipeline.flush("conn1");
99
+
100
+ const callArg = (runtime.useModel as ReturnType<typeof vi.fn>).mock
101
+ .calls[0]?.[1] as Buffer;
102
+
103
+ // WAV header starts with "RIFF"
104
+ expect(callArg.subarray(0, 4).toString("ascii")).toBe("RIFF");
105
+ // IEEE_FLOAT format tag = 3 at offset 20 (little-endian uint16)
106
+ expect(callArg.readUInt16LE(20)).toBe(3);
107
+ // sample rate at offset 24
108
+ expect(callArg.readUInt32LE(24)).toBe(44100);
109
+ // data chunk starts at offset 44 — matches original pcm bytes
110
+ expect(callArg.subarray(44)).toEqual(chunk);
111
+ });
112
+
113
+ it("auto-flushes after FLUSH_AFTER_MS duration", async () => {
114
+ const header = {
115
+ type: "audio" as const,
116
+ ts: 0,
117
+ sampleRate: 16000,
118
+ encoding: "webm-opus" as const,
119
+ };
120
+ pipeline.push("conn1", header, Buffer.alloc(1024));
121
+ // Advance ts by 2100ms to trigger auto-flush
122
+ const header2 = { ...header, ts: 2100 };
123
+ pipeline.push("conn1", header2, Buffer.alloc(1024));
124
+
125
+ // flush was called internally; useModel should have been called
126
+ await new Promise((r) => setTimeout(r, 10));
127
+ expect(runtime.useModel).toHaveBeenCalled();
128
+ });
129
+ });
@@ -0,0 +1,53 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { decodeBinaryFrame, encodeBinaryFrame } from "../protocol.ts";
3
+
4
+ describe("binary frame codec", () => {
5
+ it("round-trips an audio frame", () => {
6
+ const header = {
7
+ type: "audio" as const,
8
+ ts: 1234567890,
9
+ sampleRate: 48000,
10
+ encoding: "webm-opus" as const,
11
+ };
12
+ const payload = Buffer.from([1, 2, 3, 4, 5]);
13
+ const frame = encodeBinaryFrame(header, payload);
14
+ const decoded = decodeBinaryFrame(frame);
15
+
16
+ expect(decoded.header).toEqual(header);
17
+ expect(decoded.payload).toEqual(payload);
18
+ });
19
+
20
+ it("round-trips a frame with pose data", () => {
21
+ const header = {
22
+ type: "frame" as const,
23
+ ts: 999,
24
+ width: 1280,
25
+ height: 720,
26
+ format: "jpeg" as const,
27
+ pose: {
28
+ position: { x: 1, y: 2, z: 3 },
29
+ orientation: { x: 0, y: 0, z: 0, w: 1 },
30
+ },
31
+ };
32
+ const payload = Buffer.alloc(100, 0xab);
33
+ const { header: h, payload: p } = decodeBinaryFrame(
34
+ encodeBinaryFrame(header, payload),
35
+ );
36
+ expect(h).toEqual(header);
37
+ expect(p).toEqual(payload);
38
+ });
39
+
40
+ it("handles zero-length payload", () => {
41
+ const header = {
42
+ type: "tts_audio" as const,
43
+ sampleRate: 24000,
44
+ channels: 1,
45
+ encoding: "mp3" as const,
46
+ };
47
+ const { header: h, payload } = decodeBinaryFrame(
48
+ encodeBinaryFrame(header, Buffer.alloc(0)),
49
+ );
50
+ expect(h).toEqual(header);
51
+ expect(payload.length).toBe(0);
52
+ });
53
+ });