xrblocks 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,18 +2,22 @@
2
2
 
3
3
  [![NPM Package](https://img.shields.io/npm/v/xrblocks)](https://www.npmjs.com/package/xrblocks)
4
4
  [![Build Size](https://badgen.net/bundlephobia/minzip/xrblocks)](https://bundlephobia.com/result?p=xrblocks)
5
- [![NPM Downloads](https://img.shields.io/npm/dw/xrblocks)](https://www.npmtrends.com/xrblocks)
5
+ ![jsDelivr hits (GitHub)](https://img.shields.io/jsdelivr/gh/hw/google/xrblocks)
6
6
 
7
7
  #### JavaScript library for rapid XR and AI prototyping
8
8
 
9
+ [Site](https://xrblocks.github.io/) —
9
10
  [Manual](https://xrblocks.github.io/docs/) —
10
- [Templates](https://xrblocks.github.io/docs/templates/Basic) —
11
- [Samples](https://xrblocks.github.io/docs/samples/ModelViewer)
11
+ [Templates](https://xrblocks.github.io/docs/templates/Basic/) —
12
+ [Demos](https://xrblocks.github.io/docs/samples/ModelViewer/) —
13
+ [YouTube](https://www.youtube.com/watch?v=75QJHTsAoB8) —
14
+ [arXiv](https://arxiv.org/abs/2509.25504) —
15
+ [Blog](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
12
16
 
13
17
  <p align="center">
14
- <a href="https://xrblocks.github.io/docs/samples/Ballpit" target="_blank"><img width="32.3%" src="assets/ballpit-demo.webp" alt="Ballpit" /></a>
15
- <a href="https://xrblocks.github.io/docs/samples/XR-Emoji" target="_blank"><img width="32.3%" src="assets/xremoji-demo.webp" alt="XR Emoji" /></a>
16
- <a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers" target="_blank"><img width="32.3%" src="assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
18
+ <a href="https://xrblocks.github.io/docs/samples/Ballpit/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/ballpit-demo.webp" alt="Ballpit" /></a>
19
+ <a href="https://xrblocks.github.io/docs/samples/XR-Emoji/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/xremoji-demo.webp" alt="XR Emoji" /></a>
20
+ <a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
17
21
  </p>
18
22
 
19
23
  ### Description
@@ -25,21 +29,27 @@ powerful desktop simulator for development. The framework emphasizes a
25
29
  user-centric, developer-friendly SDK designed to simplify the creation of
26
30
  immersive applications with features like:
27
31
 
28
- - **Hand Tracking & Gestures:** Access advanced hand tracking, custom
29
- gestures with TensorFlow Lite / LiteRT models, and interaction events.
30
- - **World Understanding:** Present samples with depth sensing, geometry-aware
31
- physics, and object recognition with Gemini in both XR and desktop.
32
- - **AI Integration:** Seamlessly connect to Gemini for multimodal
33
- understanding and live conversational experiences.
34
- - **Cross-Platform:** Write once and deploy to both XR devices and desktop
35
- browsers.
32
+ - **Hand Tracking & Gestures:** Access advanced hand tracking, custom
33
+ gestures with TensorFlow Lite / PyTorch models, and interaction events.
34
+ - **World Understanding:** Present samples with depth sensing, geometry-aware
35
+ physics, and object recognition with Gemini in both XR and desktop simulator.
36
+ - **AI Integration:** Seamlessly connect to Gemini for multimodal
37
+ understanding and live conversational experiences.
38
+ - **Cross-Platform:** Write once and deploy to both XR devices and desktop
39
+ Chrome browsers.
40
+
41
+ We welcome all contributors to foster an AI + XR community! Read our
42
+ [blog post](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
43
+ and [white paper](https://arxiv.org/abs/2509.25504) for a visionary roadmap.
36
44
 
37
45
  ### Usage
38
46
 
39
47
  XR Blocks can be imported directly into a webpage using an importmap. This code
40
48
  creates a basic XR scene containing a cylinder. When you view the scene, you can
41
49
  pinch your fingers (in XR) or click (in the desktop simulator) to change the
42
- cylinder's color.
50
+ cylinder's color. Check out
51
+ [this live demo](https://xrblocks.github.io/docs/templates/Basic/) with simple
52
+ code below:
43
53
 
44
54
  ```html
45
55
  <!DOCTYPE html>
@@ -132,8 +142,9 @@ npm ci
132
142
  npm run build
133
143
  ```
134
144
 
135
- This is not an officially supported Google product. This project is not eligible
136
- for the
145
+ This is not an officially supported Google product, but will be actively
146
+ maintained by the XR Labs team and external collaborators. This project is not
147
+ eligible for the
137
148
  [Google Open Source Software Vulnerability Rewards Program](https://bughunters.google.com/open-source-security).
138
149
 
139
150
  ### User Data & Permissions
@@ -183,11 +194,85 @@ To remove XR Blocks from your code, simple remove the lines from your `<script
183
194
  type="importmap">` tag in HTML, or `import * from xrblocks` in JavaScript, or
184
195
  use `npm uninstall xrblocks` from your project directory.
185
196
 
197
+ ### References
198
+
199
+ If you find XR Blocks inspiring or useful in your research, please reference it
200
+ as:
201
+
202
+ ```bibtex
203
+ @misc{Li2025XR,
204
+ title={{XR Blocks: Accelerating Human-centered AI + XR Innovation}},
205
+ author={Li, David and Numan, Nels and Qian, Xun and Chen, Yanhe and Zhou, Zhongyi and Alekseev, Evgenii and Lee, Geonsun and Cooper, Alex and Xia, Min and Chung, Scott and Nelson, Jeremy and Yuan, Xiuxiu and Dias, Jolica and Bettridge, Tim and Hersh, Benjamin and Huynh, Michelle and Piascik, Konrad and Cabello, Ricardo and Kim, David and Du, Ruofei},
206
+ year={2025},
207
+ eprint={2509.25504},
208
+ archivePrefix={arXiv},
209
+ primaryClass={cs.HC},
210
+ url={https://arxiv.org/abs/2509.25504},
211
+ }
212
+ ```
213
+
214
+ #### Key Works Built with XR Blocks
215
+
216
+ These references are built with XR Blocks:
217
+
218
+ ```bibtex
219
+ @inproceedings{Lee2025Sensible,
220
+ title = {{Sensible Agent: A Framework for Unobtrusive Interaction with Proactive AR Agent}},
221
+ author = {Lee, Geonsun and Xia, Min and Numan, Nels and Qian, Xun and Li, David and Chen, Yanhe and Kulshrestha, Achin and Chatterjee, Ishan and Zhang, Yinda and Manocha, Dinesh and Kim, David and Du, Ruofei},
222
+ booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
223
+ year = {2025},
224
+ publisher = {ACM},
225
+ numpages = {22},
226
+ series = {UIST},
227
+ doi = {10.1145/3746059.3747748},
228
+ }
229
+ ```
230
+
231
+ #### Inspiring Related Works
232
+
233
+ We call for contributors to integrate our prior art into XR Blocks to enhance
234
+ reproducibility and knowledge sharing:
235
+
236
+ E.g., integrating models from https://visualblocks.withgoogle.com to XR Blocks:
237
+
238
+ ```bibtex
239
+ @inproceedings{Du2023Rapsai,
240
+ title = {{Rapsai: Accelerating Machine Learning Prototyping of Multimedia Applications Through Visual Programming}},
241
+ author = {Du, Ruofei and Li, Na and Jin, Jing and Carney, Michelle and Miles, Scott and Kleiner, Maria and Yuan, Xiuxiu and Zhang, Yinda and Kulkarni, Anuva and Liu, XingyuBruce and Sabie, Ahmed and Orts-Escolano, Sergio and Kar, Abhishek and Yu, Ping and Iyengar, Ram and Kowdle, Adarsh and Olwal, Alex},
242
+ booktitle = {Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
243
+ year = {2023},
244
+ publisher = {ACM},
245
+ month = {Apr.},
246
+ day = {22-29},
247
+ number = {125},
248
+ pages = {1--23},
249
+ series = {CHI},
250
+ doi = {10.1145/3544548.3581338},
251
+ }
252
+ ```
253
+
254
+ Extending XR Blocks to XR communication:
255
+
256
+ ```bibtex
257
+ @inproceedings{Hu2025DialogLab,
258
+ title = {{DialogLab: Authoring, Simulating, and Testing Dynamic Group Conversations in Hybrid Human-AI Conversations}},
259
+ author = {Hu, Erzhen and Chen, Yanhe and Li, Mingyi and Phadnis, Vrushank and Xu, Pingmei and Qian, Xun and Olwal, Alex and Kim, David and Heo, Seongkook and Du, Ruofei},
260
+ booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
261
+ year = {2025},
262
+ number = {210},
263
+ publisher = {ACM},
264
+ number = {210},
265
+ pages = {1--20},
266
+ series = {UIST},
267
+ doi = {10.1145/3746059.3747696},
268
+ }
269
+ ```
270
+
186
271
  ### Terms of Service
187
272
 
188
- - Please follow
189
- [Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
190
- using this SDK.
273
+ - Please follow
274
+ [Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
275
+ using this SDK.
191
276
 
192
- - When using AI features in this SDK, please follow
193
- [Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
277
+ - When using AI features in this SDK, please follow
278
+ [Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
@@ -19,23 +19,26 @@ export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
19
19
  processorNode: AudioWorkletNode | null;
20
20
  isAIRunning: boolean;
21
21
  audioQueue: AudioBuffer[];
22
- isPlayingAudio: boolean;
22
+ nextAudioStartTime: number;
23
23
  private screenshotInterval?;
24
24
  currentInputText: string;
25
25
  currentOutputText: string;
26
+ tools: xb.Tool[];
26
27
  constructor();
27
28
  init(): void;
28
- startGeminiLive(): Promise<void>;
29
+ startGeminiLive({ liveParams }?: {
30
+ liveParams?: xb.GeminiStartLiveSessionParams;
31
+ }): Promise<void>;
29
32
  stopGeminiLive(): Promise<void>;
30
33
  setupAudioCapture(): Promise<void>;
31
- startLiveAI(): Promise<void>;
34
+ startLiveAI(params: xb.GeminiStartLiveSessionParams): Promise<void>;
32
35
  startScreenshotCapture(intervalMs?: number): void;
33
36
  captureAndSendScreenshot(): void;
34
37
  sendAudioData(audioBuffer: ArrayBuffer): void;
35
38
  sendVideoFrame(base64Image: string): void;
36
39
  initializeAudioContext(): Promise<void>;
37
40
  playAudioChunk(audioData: string): Promise<void>;
38
- playNextAudioBuffer(): void;
41
+ scheduleAudioBuffers(): void;
39
42
  cleanup(): void;
40
43
  handleAIMessage(message: GoogleGenAITypes.LiveServerMessage): void;
41
44
  arrayBufferToBase64(buffer: ArrayBuffer): string;
@@ -12,23 +12,29 @@ class GeminiManager extends xb.Script {
12
12
  this.isAIRunning = false;
13
13
  // Audio playback setup
14
14
  this.audioQueue = [];
15
- this.isPlayingAudio = false;
15
+ this.nextAudioStartTime = 0;
16
16
  // Transcription state
17
17
  this.currentInputText = '';
18
18
  this.currentOutputText = '';
19
+ this.tools = [];
19
20
  }
20
21
  init() {
21
22
  this.xrDeviceCamera = xb.core.deviceCamera;
22
23
  this.ai = xb.core.ai;
23
24
  }
24
- async startGeminiLive() {
25
+ async startGeminiLive({ liveParams } = {}) {
25
26
  if (this.isAIRunning || !this.ai) {
26
27
  console.warn('AI already running or not available');
27
28
  return;
28
29
  }
30
+ liveParams = liveParams || {};
31
+ liveParams.tools = liveParams.tools || [];
32
+ for (const tool of this.tools) {
33
+ liveParams.tools.push(tool.toJSON());
34
+ }
29
35
  try {
30
36
  await this.setupAudioCapture();
31
- await this.startLiveAI();
37
+ await this.startLiveAI(liveParams);
32
38
  this.startScreenshotCapture();
33
39
  this.isAIRunning = true;
34
40
  }
@@ -82,7 +88,7 @@ class GeminiManager extends xb.Script {
82
88
  this.sourceNode.connect(this.processorNode);
83
89
  this.processorNode.connect(this.audioContext.destination);
84
90
  }
85
- async startLiveAI() {
91
+ async startLiveAI(params) {
86
92
  return new Promise((resolve, reject) => {
87
93
  this.ai.setLiveCallbacks({
88
94
  onopen: () => {
@@ -99,7 +105,7 @@ class GeminiManager extends xb.Script {
99
105
  this.isAIRunning = false;
100
106
  }
101
107
  });
102
- this.ai.startLiveSession().catch(reject);
108
+ this.ai.startLiveSession(params).catch(reject);
103
109
  });
104
110
  }
105
111
  startScreenshotCapture(intervalMs = 1000) {
@@ -174,28 +180,28 @@ class GeminiManager extends xb.Script {
174
180
  channelData[i] = int16View[i] / 32768.0;
175
181
  }
176
182
  this.audioQueue.push(audioBuffer);
177
- if (!this.isPlayingAudio) {
178
- this.playNextAudioBuffer();
179
- }
183
+ this.scheduleAudioBuffers();
180
184
  }
181
185
  catch (error) {
182
186
  console.error('Error playing audio chunk:', error);
183
187
  }
184
188
  }
185
- playNextAudioBuffer() {
186
- if (this.audioQueue.length === 0) {
187
- this.isPlayingAudio = false;
188
- return;
189
+ scheduleAudioBuffers() {
190
+ const SCHEDULE_AHEAD_TIME = 0.2;
191
+ while (this.audioQueue.length > 0 &&
192
+ this.nextAudioStartTime <=
193
+ this.audioContext.currentTime + SCHEDULE_AHEAD_TIME) {
194
+ const audioBuffer = this.audioQueue.shift();
195
+ const source = this.audioContext.createBufferSource();
196
+ source.buffer = audioBuffer;
197
+ source.connect(this.audioContext.destination);
198
+ source.onended = () => {
199
+ this.scheduleAudioBuffers();
200
+ };
201
+ const startTime = Math.max(this.nextAudioStartTime, this.audioContext.currentTime);
202
+ source.start(startTime);
203
+ this.nextAudioStartTime = startTime + audioBuffer.duration;
189
204
  }
190
- this.isPlayingAudio = true;
191
- const audioBuffer = this.audioQueue.shift();
192
- const source = this.audioContext.createBufferSource();
193
- source.buffer = audioBuffer;
194
- source.connect(this.audioContext.destination);
195
- source.onended = () => {
196
- this.playNextAudioBuffer();
197
- };
198
- source.start();
199
205
  }
200
206
  cleanup() {
201
207
  if (this.screenshotInterval) {
@@ -204,7 +210,6 @@ class GeminiManager extends xb.Script {
204
210
  }
205
211
  // Clear audio queue and stop playback
206
212
  this.audioQueue = [];
207
- this.isPlayingAudio = false;
208
213
  if (this.processorNode) {
209
214
  this.processorNode.disconnect();
210
215
  this.processorNode = null;
@@ -226,6 +231,22 @@ class GeminiManager extends xb.Script {
226
231
  if (message.data) {
227
232
  this.playAudioChunk(message.data);
228
233
  }
234
+ for (const functionCall of message.toolCall?.functionCalls ?? []) {
235
+ const tool = this.tools.find(tool => tool.name == functionCall.name);
236
+ if (tool) {
237
+ const exec = tool.execute(functionCall.args);
238
+ exec.then(result => {
239
+ this.ai.sendToolResponse({
240
+ functionResponses: {
241
+ id: functionCall.id,
242
+ name: functionCall.name,
243
+ response: { 'output': result }
244
+ }
245
+ });
246
+ })
247
+ .catch((error) => console.error('Tool error:', error));
248
+ }
249
+ }
229
250
  if (message.serverContent) {
230
251
  if (message.serverContent.inputTranscription) {
231
252
  const text = message.serverContent.inputTranscription.text;
@@ -2,6 +2,19 @@ import { AI } from '../ai/AI';
2
2
  import { Context } from './Context';
3
3
  import { Memory } from './Memory';
4
4
  import { Tool } from './Tool';
5
+ /**
6
+ * Lifecycle callbacks for agent events.
7
+ */
8
+ export interface AgentLifecycleCallbacks {
9
+ /** Called when a session starts */
10
+ onSessionStart?: () => void | Promise<void>;
11
+ /** Called when a session ends */
12
+ onSessionEnd?: () => void | Promise<void>;
13
+ /** Called after a tool is executed */
14
+ onToolExecuted?: (toolName: string, result: unknown) => void;
15
+ /** Called when an error occurs */
16
+ onError?: (error: Error) => void;
17
+ }
5
18
  /**
6
19
  * An agent that can use an AI to reason and execute tools.
7
20
  */
@@ -11,7 +24,9 @@ export declare class Agent {
11
24
  tools: Tool[];
12
25
  memory: Memory;
13
26
  contextBuilder: Context;
14
- constructor(ai: AI, tools?: Tool[], instruction?: string);
27
+ lifecycleCallbacks?: AgentLifecycleCallbacks;
28
+ isSessionActive: boolean;
29
+ constructor(ai: AI, tools?: Tool[], instruction?: string, callbacks?: AgentLifecycleCallbacks);
15
30
  /**
16
31
  * Starts the agent's reasoning loop with an initial prompt.
17
32
  * @param prompt - The initial prompt from the user.
@@ -25,4 +40,13 @@ export declare class Agent {
25
40
  */
26
41
  private run;
27
42
  findTool(name: string): Tool | undefined;
43
+ /**
44
+ * Get the current session state.
45
+ * @returns Object containing session information
46
+ */
47
+ getSessionState(): {
48
+ isActive: boolean;
49
+ toolCount: number;
50
+ memorySize: number;
51
+ };
28
52
  }
@@ -2,11 +2,127 @@ import type * as GoogleGenAITypes from '@google/genai';
2
2
  import * as THREE from 'three';
3
3
  import { AI } from '../ai/AI';
4
4
  import { CoreSound } from '../sound/CoreSound';
5
- import { Agent } from './Agent';
5
+ import { Agent, AgentLifecycleCallbacks } from './Agent';
6
+ import { ToolResult } from './Tool';
7
+ /**
8
+ * State information for a live session.
9
+ */
10
+ export interface LiveSessionState {
11
+ /** Whether the session is currently active */
12
+ isActive: boolean;
13
+ /** Timestamp when session started */
14
+ startTime?: number;
15
+ /** Timestamp when session ended */
16
+ endTime?: number;
17
+ /** Number of messages received */
18
+ messageCount: number;
19
+ /** Number of tool calls executed */
20
+ toolCallCount: number;
21
+ /** Last error message if any */
22
+ lastError?: string;
23
+ }
24
+ /**
25
+ * Skybox Agent for generating 360-degree equirectangular backgrounds through conversation.
26
+ *
27
+ * @example Basic usage
28
+ * ```typescript
29
+ * // 1. Enable audio (required for live sessions)
30
+ * await xb.core.sound.enableAudio();
31
+ *
32
+ * // 2. Create agent
33
+ * const agent = new xb.SkyboxAgent(xb.core.ai, xb.core.sound, xb.core.scene);
34
+ *
35
+ * // 3. Start session
36
+ * await agent.startLiveSession({
37
+ * onopen: () => console.log('Session ready'),
38
+ * onmessage: (msg) => handleMessage(msg),
39
+ * onclose: () => console.log('Session closed')
40
+ * });
41
+ *
42
+ * // 4. Clean up when done
43
+ * await agent.stopLiveSession();
44
+ * xb.core.sound.disableAudio();
45
+ * ```
46
+ *
47
+ * @example With lifecycle callbacks
48
+ * ```typescript
49
+ * const agent = new xb.SkyboxAgent(
50
+ * xb.core.ai,
51
+ * xb.core.sound,
52
+ * xb.core.scene,
53
+ * {
54
+ * onSessionStart: () => updateUI('active'),
55
+ * onSessionEnd: () => updateUI('inactive'),
56
+ * onError: (error) => showError(error)
57
+ * }
58
+ * );
59
+ * ```
60
+ *
61
+ * @remarks
62
+ * - Audio must be enabled BEFORE starting live session using `xb.core.sound.enableAudio()`
63
+ * - Users are responsible for managing audio lifecycle
64
+ * - Always call `stopLiveSession()` before disabling audio
65
+ * - Session state can be checked using `getSessionState()` and `getLiveSessionState()`
66
+ */
6
67
  export declare class SkyboxAgent extends Agent {
7
68
  private sound;
8
- constructor(ai: AI, sound: CoreSound, scene: THREE.Scene);
9
- startLiveSession(callbacks: GoogleGenAITypes.LiveCallbacks): Promise<void>;
69
+ private sessionState;
70
+ constructor(ai: AI, sound: CoreSound, scene: THREE.Scene, callbacks?: AgentLifecycleCallbacks);
71
+ /**
72
+ * Starts a live AI session for real-time conversation.
73
+ *
74
+ * @param callbacks - Optional callbacks for session events. Can also be set using ai.setLiveCallbacks()
75
+ * @throws If AI model is not initialized or live session is not available
76
+ *
77
+ * @remarks
78
+ * Audio must be enabled separately using `xb.core.sound.enableAudio()` before starting the session.
79
+ * This gives users control over when microphone permissions are requested.
80
+ */
81
+ startLiveSession(callbacks?: GoogleGenAITypes.LiveCallbacks): Promise<void>;
82
+ /**
83
+ * Stops the live AI session.
84
+ *
85
+ * @remarks
86
+ * Audio must be disabled separately using `xb.core.sound.disableAudio()` after stopping the session.
87
+ */
10
88
  stopLiveSession(): Promise<void>;
89
+ /**
90
+ * Wraps user callbacks to track session state and trigger lifecycle events.
91
+ * @param callbacks - The callbacks to wrap.
92
+ * @returns The wrapped callbacks.
93
+ */
94
+ private wrapCallbacks;
95
+ /**
96
+ * Sends tool execution results back to the AI.
97
+ *
98
+ * @param response - The tool response containing function results
99
+ */
11
100
  sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): Promise<void>;
101
+ /**
102
+ * Validates that a tool response has the correct format.
103
+ * @param response - The tool response to validate.
104
+ * @returns True if the response is valid, false otherwise.
105
+ */
106
+ private validateToolResponse;
107
+ /**
108
+ * Helper to create a properly formatted tool response from a ToolResult.
109
+ *
110
+ * @param id - The function call ID
111
+ * @param name - The function name
112
+ * @param result - The ToolResult from tool execution
113
+ * @returns A properly formatted FunctionResponse
114
+ */
115
+ static createToolResponse(id: string, name: string, result: ToolResult): GoogleGenAITypes.FunctionResponse;
116
+ /**
117
+ * Gets the current live session state.
118
+ *
119
+ * @returns Read-only session state information
120
+ */
121
+ getLiveSessionState(): Readonly<LiveSessionState>;
122
+ /**
123
+ * Gets the duration of the session in milliseconds.
124
+ *
125
+ * @returns Duration in ms, or null if session hasn't started
126
+ */
127
+ getSessionDuration(): number | null;
12
128
  }
@@ -3,6 +3,20 @@ export interface ToolCall {
3
3
  name: string;
4
4
  args: unknown;
5
5
  }
6
+ /**
7
+ * Standardized result type for tool execution.
8
+ * @typeParam T - The type of data returned on success.
9
+ */
10
+ export interface ToolResult<T = unknown> {
11
+ /** Whether the tool execution succeeded */
12
+ success: boolean;
13
+ /** The result data if successful */
14
+ data?: T;
15
+ /** Error message if execution failed */
16
+ error?: string;
17
+ /** Additional metadata about the execution */
18
+ metadata?: Record<string, unknown>;
19
+ }
6
20
  export type ToolSchema = Omit<GoogleGenAITypes.Schema, 'type' | 'properties'> & {
7
21
  properties?: Record<string, ToolSchema>;
8
22
  type?: keyof typeof GoogleGenAITypes.Type;
@@ -15,7 +29,7 @@ export type ToolOptions = {
15
29
  /** The parameters of the tool */
16
30
  parameters?: ToolSchema;
17
31
  /** A callback to execute when the tool is triggered */
18
- onTriggered?: (args: unknown) => unknown;
32
+ onTriggered?: (args: unknown) => unknown | Promise<unknown>;
19
33
  };
20
34
  /**
21
35
  * A base class for tools that the agent can use.
@@ -30,11 +44,11 @@ export declare class Tool {
30
44
  */
31
45
  constructor(options: ToolOptions);
32
46
  /**
33
- * Executes the tool's action.
47
+ * Executes the tool's action with standardized error handling.
34
48
  * @param args - The arguments for the tool.
35
- * @returns The result of the tool's action.
49
+ * @returns A promise that resolves with a ToolResult containing success/error information.
36
50
  */
37
- execute(args: unknown): unknown;
51
+ execute(args: unknown): Promise<ToolResult>;
38
52
  /**
39
53
  * Returns a JSON representation of the tool.
40
54
  * @returns A valid FunctionDeclaration object.
@@ -1,4 +1,5 @@
1
1
  export * from './Agent';
2
2
  export * from './Memory';
3
+ export * from './SkyboxAgent';
3
4
  export * from './Tool';
4
5
  export * from './tools/index';
@@ -1,6 +1,6 @@
1
1
  import * as THREE from 'three';
2
2
  import { AI } from '../../ai/AI';
3
- import { Tool } from '../Tool';
3
+ import { Tool, ToolResult } from '../Tool';
4
4
  /**
5
5
  * A tool that generates a 360-degree equirectangular skybox image
6
6
  * based on a given prompt using an AI service.
@@ -12,9 +12,9 @@ export declare class GenerateSkyboxTool extends Tool {
12
12
  /**
13
13
  * Executes the tool's action.
14
14
  * @param args - The prompt to use to generate the skybox.
15
- * @returns A promise that resolves with the result of the skybox generation.
15
+ * @returns A promise that resolves with a ToolResult containing success/error information.
16
16
  */
17
17
  execute(args: {
18
18
  prompt: string;
19
- }): Promise<string>;
19
+ }): Promise<ToolResult<string>>;
20
20
  }
@@ -1,13 +1,12 @@
1
- import { Tool } from '../Tool';
1
+ import { Tool, ToolResult } from '../Tool';
2
2
  export interface GetWeatherArgs {
3
3
  latitude: number;
4
4
  longitude: number;
5
5
  }
6
- export type GetWeatherToolResults = {
7
- error?: string;
8
- temperature?: number;
9
- weathercode?: number;
10
- };
6
+ export interface WeatherData {
7
+ temperature: number;
8
+ weathercode: number;
9
+ }
11
10
  /**
12
11
  * A tool that gets the current weather for a specific location.
13
12
  */
@@ -16,7 +15,7 @@ export declare class GetWeatherTool extends Tool {
16
15
  /**
17
16
  * Executes the tool's action.
18
17
  * @param args - The arguments for the tool.
19
- * @returns A promise that resolves with the weather information.
18
+ * @returns A promise that resolves with a ToolResult containing weather information.
20
19
  */
21
- execute(args: GetWeatherArgs): Promise<GetWeatherToolResults>;
20
+ execute(args: GetWeatherArgs): Promise<ToolResult<WeatherData>>;
22
21
  }
package/build/ai/AI.d.ts CHANGED
@@ -2,7 +2,7 @@ import type * as GoogleGenAITypes from '@google/genai';
2
2
  import { Script } from '../core/Script';
3
3
  import { AIOptions, GeminiOptions, OpenAIOptions } from './AIOptions';
4
4
  import { GeminiResponse } from './AITypes';
5
- import { Gemini } from './Gemini';
5
+ import { Gemini, GeminiStartLiveSessionParams } from './Gemini';
6
6
  import { OpenAI } from './OpenAI';
7
7
  export type ModelClass = Gemini | OpenAI;
8
8
  export type ModelOptions = GeminiOptions | OpenAIOptions;
@@ -60,7 +60,7 @@ export declare class AI extends Script {
60
60
  query(input: {
61
61
  prompt: string;
62
62
  }, tools?: never[]): Promise<GeminiResponse | string | null>;
63
- startLiveSession(config?: {}): Promise<GoogleGenAITypes.Session>;
63
+ startLiveSession(config?: GeminiStartLiveSessionParams): Promise<GoogleGenAITypes.Session>;
64
64
  stopLiveSession(): Promise<void>;
65
65
  setLiveCallbacks(callbacks: GoogleGenAITypes.LiveCallbacks): Promise<void>;
66
66
  sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): void;
@@ -14,6 +14,10 @@ export interface GeminiQueryInput {
14
14
  config?: GoogleGenAITypes.LiveConnectConfig;
15
15
  data?: GoogleGenAITypes.LiveSendRealtimeInputParameters;
16
16
  }
17
+ export type GeminiStartLiveSessionParams = {
18
+ tools?: GoogleGenAITypes.FunctionDeclaration[];
19
+ systemInstruction?: GoogleGenAITypes.ContentUnion | string;
20
+ };
17
21
  export declare class Gemini extends BaseAIModel {
18
22
  protected options: GeminiOptions;
19
23
  inited: boolean;
@@ -25,10 +29,7 @@ export declare class Gemini extends BaseAIModel {
25
29
  init(): Promise<void>;
26
30
  isAvailable(): boolean;
27
31
  isLiveAvailable(): false | typeof GoogleGenAITypes.Modality | undefined;
28
- startLiveSession(params?: {
29
- tools?: GoogleGenAITypes.FunctionDeclaration[];
30
- systemInstruction?: GoogleGenAITypes.ContentUnion | string;
31
- }): Promise<GoogleGenAITypes.Session>;
32
+ startLiveSession(params?: GeminiStartLiveSessionParams): Promise<GoogleGenAITypes.Session>;
32
33
  stopLiveSession(): Promise<void>;
33
34
  setLiveCallbacks(callbacks: GoogleGenAITypes.LiveCallbacks): void;
34
35
  sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): void;
@@ -56,6 +56,7 @@ export declare class Core {
56
56
  ui: UI;
57
57
  /** Manages all (spatial) audio playback. */
58
58
  sound: CoreSound;
59
+ private renderSceneBound;
59
60
  /** Manages the desktop XR simulator. */
60
61
  simulator: Simulator;
61
62
  /** Manages drag-and-drop interactions. */