xrblocks 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +131 -30
  2. package/build/addons/ai/AudioCaptureProcessorCode.d.ts +1 -0
  3. package/build/addons/ai/AudioCaptureProcessorCode.js +27 -0
  4. package/build/addons/ai/GeminiManager.d.ts +11 -4
  5. package/build/addons/ai/GeminiManager.js +80 -34
  6. package/build/addons/objects/SimpleDecalGeometry.js +9 -5
  7. package/build/addons/simulator/instructions/CustomInstruction.js +8 -9
  8. package/build/addons/simulator/instructions/HandsInstructions.js +17 -10
  9. package/build/addons/simulator/instructions/NavigationInstructions.js +10 -9
  10. package/build/addons/simulator/instructions/SimulatorInstructions.js +17 -18
  11. package/build/addons/simulator/instructions/SimulatorInstructionsCard.js +69 -75
  12. package/build/addons/simulator/instructions/SimulatorInstructionsEvents.js +4 -1
  13. package/build/addons/simulator/instructions/UserInstructions.js +18 -15
  14. package/build/addons/simulator/ui/EnterXRButton.js +17 -17
  15. package/build/addons/simulator/ui/GeminiLiveApiKeyInput.js +45 -39
  16. package/build/addons/simulator/ui/HandPosePanel.js +20 -10
  17. package/build/addons/simulator/ui/MicButton.js +23 -18
  18. package/build/addons/simulator/ui/ModeIndicator.js +17 -17
  19. package/build/addons/ui/TextBillboard.js +1 -1
  20. package/build/addons/utils/Palette.js +3 -15
  21. package/build/addons/virtualkeyboard/Keyboard.js +24 -21
  22. package/build/addons/volumes/VolumetricCloud.glsl.js +1 -1
  23. package/build/addons/volumes/VolumetricCloud.js +8 -5
  24. package/build/agent/Agent.d.ts +25 -1
  25. package/build/agent/SkyboxAgent.d.ts +119 -3
  26. package/build/agent/Tool.d.ts +21 -5
  27. package/build/agent/index.d.ts +1 -0
  28. package/build/agent/tools/GenerateSkyboxTool.d.ts +3 -3
  29. package/build/agent/tools/GetWeatherTool.d.ts +7 -8
  30. package/build/ai/AI.d.ts +1 -1
  31. package/build/ai/Gemini.d.ts +1 -4
  32. package/build/camera/XRDeviceCamera.d.ts +1 -1
  33. package/build/core/Core.d.ts +4 -1
  34. package/build/core/Options.d.ts +17 -0
  35. package/build/core/components/ScreenshotSynthesizer.d.ts +6 -4
  36. package/build/core/components/XRTransition.d.ts +1 -1
  37. package/build/depth/Depth.d.ts +5 -3
  38. package/build/depth/DepthMesh.d.ts +8 -1
  39. package/build/depth/DepthTextures.d.ts +6 -4
  40. package/build/depth/occlusion/OcclusionPass.d.ts +6 -5
  41. package/build/input/Hands.d.ts +1 -1
  42. package/build/input/Input.d.ts +1 -1
  43. package/build/input/gestures/GestureEvents.d.ts +23 -0
  44. package/build/input/gestures/GestureRecognition.d.ts +43 -0
  45. package/build/input/gestures/GestureRecognitionOptions.d.ts +43 -0
  46. package/build/input/gestures/GestureTypes.d.ts +16 -0
  47. package/build/input/gestures/providers/HeuristicGestureDetectors.d.ts +2 -0
  48. package/build/simulator/Simulator.d.ts +2 -0
  49. package/build/simulator/SimulatorControls.d.ts +1 -1
  50. package/build/simulator/SimulatorOptions.d.ts +1 -0
  51. package/build/simulator/controlModes/SimulatorControlMode.d.ts +1 -1
  52. package/build/simulator/handPoses/HandPoseJoints.d.ts +2 -2
  53. package/build/simulator/userActions/PinchOnButtonAction.d.ts +2 -2
  54. package/build/simulator/userActions/WalkTowardsPanelAction.d.ts +1 -1
  55. package/build/singletons.d.ts +2 -2
  56. package/build/sound/AudioListener.d.ts +16 -1
  57. package/build/sound/AudioPlayer.d.ts +21 -2
  58. package/build/sound/CoreSound.d.ts +26 -1
  59. package/build/stereo/utils.d.ts +1 -1
  60. package/build/ui/components/IconButton.d.ts +6 -2
  61. package/build/ui/components/MaterialSymbolsView.d.ts +1 -1
  62. package/build/ui/components/ScrollingTroikaTextView.d.ts +1 -1
  63. package/build/ui/components/TextButton.d.ts +0 -1
  64. package/build/ui/interaction/ModelViewer.d.ts +6 -2
  65. package/build/utils/ModelLoader.d.ts +1 -1
  66. package/build/utils/SparkRendererHolder.d.ts +5 -0
  67. package/build/utils/Types.d.ts +2 -2
  68. package/build/video/VideoStream.d.ts +1 -1
  69. package/build/world/World.d.ts +1 -1
  70. package/build/world/objects/ObjectDetector.d.ts +1 -1
  71. package/build/world/planes/PlaneDetector.d.ts +1 -1
  72. package/build/xrblocks.d.ts +3 -0
  73. package/build/xrblocks.js +7268 -5884
  74. package/build/xrblocks.js.map +1 -1
  75. package/build/xrblocks.min.js +1 -1
  76. package/build/xrblocks.min.js.map +1 -1
  77. package/package.json +14 -9
package/README.md CHANGED
@@ -2,44 +2,59 @@
2
2
 
3
3
  [![NPM Package](https://img.shields.io/npm/v/xrblocks)](https://www.npmjs.com/package/xrblocks)
4
4
  [![Build Size](https://badgen.net/bundlephobia/minzip/xrblocks)](https://bundlephobia.com/result?p=xrblocks)
5
- [![NPM Downloads](https://img.shields.io/npm/dw/xrblocks)](https://www.npmtrends.com/xrblocks)
5
+ ![jsDelivr hits (GitHub)](https://img.shields.io/jsdelivr/gh/hw/google/xrblocks)
6
6
 
7
7
  #### JavaScript library for rapid XR and AI prototyping
8
8
 
9
+ [Site](https://xrblocks.github.io/) —
9
10
  [Manual](https://xrblocks.github.io/docs/) —
10
- [Templates](https://xrblocks.github.io/docs/templates/Basic) —
11
- [Samples](https://xrblocks.github.io/docs/samples/ModelViewer)
11
+ [Templates](https://xrblocks.github.io/docs/templates/Basic/) —
12
+ [Demos](https://xrblocks.github.io/docs/samples/ModelViewer/) —
13
+ [YouTube](https://www.youtube.com/watch?v=75QJHTsAoB8) —
14
+ [arXiv](https://arxiv.org/abs/2509.25504) —
15
+ [Blog](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
12
16
 
13
17
  <p align="center">
14
- <a href="https://xrblocks.github.io/docs/samples/Ballpit" target="_blank"><img width="32.3%" src="assets/ballpit-demo.webp" alt="Ballpit" /></a>
15
- <a href="https://xrblocks.github.io/docs/samples/XR-Emoji" target="_blank"><img width="32.3%" src="assets/xremoji-demo.webp" alt="XR Emoji" /></a>
16
- <a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers" target="_blank"><img width="32.3%" src="assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
18
+ <a href="https://xrblocks.github.io/docs/samples/Ballpit/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/ballpit-demo.webp" alt="Ballpit" /></a>
19
+ <a href="https://xrblocks.github.io/docs/samples/XR-Emoji/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/xremoji-demo.webp" alt="XR Emoji" /></a>
20
+ <a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
17
21
  </p>
18
22
 
19
23
  ### Description
20
24
 
21
25
  **XR Blocks** is a lightweight, cross-platform library for rapidly prototyping
22
26
  advanced XR and AI experiences. Built upon [three.js](https://threejs.org), it
23
- targets Chrome v136+ with WebXR support on Android XR and also includes a
24
- powerful desktop simulator for development. The framework emphasizes a
25
- user-centric, developer-friendly SDK designed to simplify the creation of
27
+ targets Chrome v136+ with WebXR support on Android XR (e.g.,
28
+ [Galaxy XR](https://www.samsung.com/us/xr/galaxy-xr/galaxy-xr/)) and also
29
+ includes a powerful desktop simulator for development. The framework emphasizes
30
+ a user-centric, developer-friendly SDK designed to simplify the creation of
26
31
  immersive applications with features like:
27
32
 
28
- - **Hand Tracking & Gestures:** Access advanced hand tracking, custom
29
- gestures with TensorFlow Lite / LiteRT models, and interaction events.
30
- - **World Understanding:** Present samples with depth sensing, geometry-aware
31
- physics, and object recognition with Gemini in both XR and desktop.
32
- - **AI Integration:** Seamlessly connect to Gemini for multimodal
33
- understanding and live conversational experiences.
34
- - **Cross-Platform:** Write once and deploy to both XR devices and desktop
35
- browsers.
33
+ - **Hand Tracking & Gestures:** Access advanced hand tracking, custom
34
+ gestures with TensorFlow Lite / PyTorch models, and interaction events.
35
+ - **Gesture Recognition:** Opt into pinch, open-palm, fist, thumbs-up, point,
36
+ and spread detection with `options.enableGestures()`, tune providers or
37
+ thresholds, and subscribe to `gesturestart`/`gestureupdate`/`gestureend`
38
+ events from the shared subsystem.
39
+ - **World Understanding:** Present samples with depth sensing, geometry-aware
40
+ physics, and object recognition with Gemini in both XR and desktop simulator.
41
+ - **AI Integration:** Seamlessly connect to Gemini for multimodal
42
+ understanding and live conversational experiences.
43
+ - **Cross-Platform:** Write once and deploy to both XR devices and desktop
44
+ Chrome browsers.
45
+
46
+ We welcome all contributors to foster an AI + XR community! Read our
47
+ [blog post](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
48
+ and [white paper](https://arxiv.org/abs/2509.25504) for a visionary roadmap.
36
49
 
37
50
  ### Usage
38
51
 
39
52
  XR Blocks can be imported directly into a webpage using an importmap. This code
40
53
  creates a basic XR scene containing a cylinder. When you view the scene, you can
41
54
  pinch your fingers (in XR) or click (in the desktop simulator) to change the
42
- cylinder's color.
55
+ cylinder's color. Check out
56
+ [this live demo](https://xrblocks.github.io/docs/templates/Basic/) with simple
57
+ code below:
43
58
 
44
59
  ```html
45
60
  <!DOCTYPE html>
@@ -59,8 +74,8 @@ cylinder's color.
59
74
  <script type="importmap">
60
75
  {
61
76
  "imports": {
62
- "three": "https://cdn.jsdelivr.net/npm/three@0.180.0/build/three.module.js",
63
- "three/addons/": "https://cdn.jsdelivr.net/npm/three@0.180.0/examples/jsm/",
77
+ "three": "https://cdn.jsdelivr.net/npm/three@0.181.0/build/three.module.js",
78
+ "three/addons/": "https://cdn.jsdelivr.net/npm/three@0.181.0/examples/jsm/",
64
79
  "xrblocks": "https://cdn.jsdelivr.net/gh/google/xrblocks@build/xrblocks.js",
65
80
  "xrblocks/addons/": "https://cdn.jsdelivr.net/gh/google/xrblocks@build/addons/"
66
81
  }
@@ -69,8 +84,8 @@ cylinder's color.
69
84
  </head>
70
85
  <body>
71
86
  <script type="module">
72
- import * as THREE from "three";
73
- import * as xb from "xrblocks";
87
+ import * as THREE from 'three';
88
+ import * as xb from 'xrblocks';
74
89
 
75
90
  /**
76
91
  * A basic example of XRBlocks to render a cylinder and pinch to change its color.
@@ -107,7 +122,7 @@ cylinder's color.
107
122
  }
108
123
 
109
124
  // When the page content is loaded, add our script and initialize XR Blocks.
110
- document.addEventListener("DOMContentLoaded", function () {
125
+ document.addEventListener('DOMContentLoaded', function () {
111
126
  xb.add(new MainScript());
112
127
  xb.init(new xb.Options());
113
128
  });
@@ -130,10 +145,18 @@ npm ci
130
145
 
131
146
  # Build xrblocks.js.
132
147
  npm run build
148
+
149
+ # After making changes, check ESLint and run Prettier
150
+ npm run lint # ESLint check
151
+ npm run format # Prettier format
133
152
  ```
134
153
 
135
- This is not an officially supported Google product. This project is not eligible
136
- for the
154
+ XR Blocks uses ESLint for linting and Prettier for formatting.
155
+ If coding in VSCode, make sure to install the [ESLint extension](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint) and the [Prettier extension](https://marketplace.visualstudio.com/items?itemName=esbenp.prettier-vscode). Then set Prettier as your default formatter.
156
+
157
+ This is not an officially supported Google product, but will be actively
158
+ maintained by the XR Labs team and external collaborators. This project is not
159
+ eligible for the
137
160
  [Google Open Source Software Vulnerability Rewards Program](https://bughunters.google.com/open-source-security).
138
161
 
139
162
  ### User Data & Permissions
@@ -183,11 +206,89 @@ To remove XR Blocks from your code, simple remove the lines from your `<script
183
206
  type="importmap">` tag in HTML, or `import * from xrblocks` in JavaScript, or
184
207
  use `npm uninstall xrblocks` from your project directory.
185
208
 
209
+ ### References
210
+
211
+ If you find XR Blocks inspiring or useful in your research, please reference it
212
+ as:
213
+
214
+ ```bibtex
215
+ @misc{Li2025XR,
216
+ title={{XR Blocks: Accelerating Human-centered AI + XR Innovation}},
217
+ author={Li, David and Numan, Nels and Qian, Xun and Chen, Yanhe and Zhou, Zhongyi and Alekseev, Evgenii and Lee, Geonsun and Cooper, Alex and Xia, Min and Chung, Scott and Nelson, Jeremy and Yuan, Xiuxiu and Dias, Jolica and Bettridge, Tim and Hersh, Benjamin and Huynh, Michelle and Piascik, Konrad and Cabello, Ricardo and Kim, David and Du, Ruofei},
218
+ year={2025},
219
+ eprint={2509.25504},
220
+ archivePrefix={arXiv},
221
+ primaryClass={cs.HC},
222
+ url={https://arxiv.org/abs/2509.25504},
223
+ }
224
+ ```
225
+
226
+ #### Key Works Built with XR Blocks
227
+
228
+ These references are built with XR Blocks:
229
+
230
+ ```bibtex
231
+ @inproceedings{Lee2025Sensible,
232
+ title = {{Sensible Agent: A Framework for Unobtrusive Interaction with Proactive AR Agent}},
233
+ author = {Lee, Geonsun and Xia, Min and Numan, Nels and Qian, Xun and Li, David and Chen, Yanhe and Kulshrestha, Achin and Chatterjee, Ishan and Zhang, Yinda and Manocha, Dinesh and Kim, David and Du, Ruofei},
234
+ booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
235
+ year = {2025},
236
+ publisher = {ACM},
237
+ numpages = {22},
238
+ series = {UIST},
239
+ doi = {10.1145/3746059.3747748},
240
+ }
241
+ ```
242
+
243
+ #### Inspiring Related Works
244
+
245
+ We call for contributors to integrate our prior art into XR Blocks to enhance
246
+ reproducibility and knowledge sharing:
247
+
248
+ E.g., integrating models from <https://visualblocks.withgoogle.com> and [Transformers.js](https://huggingface.co/docs/transformers.js/en/index)
249
+ to XR Blocks; bringing more
250
+ [depth-based interaction](https://augmentedperception.github.io/depthlab/) to
251
+ XR Blocks; and add more samples and demos. For large commits, feel free to add
252
+ an issue before working on it so that your work won't be duplicated with others.
253
+
254
+ ```bibtex
255
+ @inproceedings{Du2023Rapsai,
256
+ title = {{Rapsai: Accelerating Machine Learning Prototyping of Multimedia Applications Through Visual Programming}},
257
+ author = {Du, Ruofei and Li, Na and Jin, Jing and Carney, Michelle and Miles, Scott and Kleiner, Maria and Yuan, Xiuxiu and Zhang, Yinda and Kulkarni, Anuva and Liu, XingyuBruce and Sabie, Ahmed and Orts-Escolano, Sergio and Kar, Abhishek and Yu, Ping and Iyengar, Ram and Kowdle, Adarsh and Olwal, Alex},
258
+ booktitle = {Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
259
+ year = {2023},
260
+ publisher = {ACM},
261
+ month = {Apr.},
262
+ day = {22-29},
263
+ number = {125},
264
+ pages = {1--23},
265
+ series = {CHI},
266
+ doi = {10.1145/3544548.3581338},
267
+ }
268
+ ```
269
+
270
+ Extending XR Blocks to XR communication:
271
+
272
+ ```bibtex
273
+ @inproceedings{Hu2025DialogLab,
274
+ title = {{DialogLab: Authoring, Simulating, and Testing Dynamic Group Conversations in Hybrid Human-AI Conversations}},
275
+ author = {Hu, Erzhen and Chen, Yanhe and Li, Mingyi and Phadnis, Vrushank and Xu, Pingmei and Qian, Xun and Olwal, Alex and Kim, David and Heo, Seongkook and Du, Ruofei},
276
+ booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
277
+ year = {2025},
278
+ number = {210},
279
+ publisher = {ACM},
280
+ number = {210},
281
+ pages = {1--20},
282
+ series = {UIST},
283
+ doi = {10.1145/3746059.3747696},
284
+ }
285
+ ```
286
+
186
287
  ### Terms of Service
187
288
 
188
- - Please follow
189
- [Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
190
- using this SDK.
289
+ - Please follow
290
+ [Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
291
+ using this SDK.
191
292
 
192
- - When using AI features in this SDK, please follow
193
- [Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
293
+ - When using AI features in this SDK, please follow
294
+ [Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
@@ -0,0 +1 @@
1
+ export declare const AUDIO_CAPTURE_PROCESSOR_CODE = "\n // Audio worklet processor for capturing audio data\n class AudioCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n }\n\n process(inputs, outputs, parameters) {\n const input = inputs[0];\n\n if (input && input[0]) {\n const inputData = input[0];\n const pcmData = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n pcmData[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));\n }\n this.port.postMessage({type: 'audioData', data: pcmData.buffer});\n }\n\n return true;\n }\n }\n\n registerProcessor('audio-capture-processor', AudioCaptureProcessor);\n";
@@ -0,0 +1,27 @@
1
+ const AUDIO_CAPTURE_PROCESSOR_CODE = `
2
+ // Audio worklet processor for capturing audio data
3
+ class AudioCaptureProcessor extends AudioWorkletProcessor {
4
+ constructor() {
5
+ super();
6
+ }
7
+
8
+ process(inputs, outputs, parameters) {
9
+ const input = inputs[0];
10
+
11
+ if (input && input[0]) {
12
+ const inputData = input[0];
13
+ const pcmData = new Int16Array(inputData.length);
14
+ for (let i = 0; i < inputData.length; i++) {
15
+ pcmData[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
16
+ }
17
+ this.port.postMessage({type: 'audioData', data: pcmData.buffer});
18
+ }
19
+
20
+ return true;
21
+ }
22
+ }
23
+
24
+ registerProcessor('audio-capture-processor', AudioCaptureProcessor);
25
+ `;
26
+
27
+ export { AUDIO_CAPTURE_PROCESSOR_CODE };
@@ -9,6 +9,7 @@ export interface GeminiManagerEventMap extends THREE.Object3DEventMap {
9
9
  message: string;
10
10
  };
11
11
  turnComplete: object;
12
+ interrupted: object;
12
13
  }
13
14
  export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
14
15
  xrDeviceCamera?: xb.XRDeviceCamera;
@@ -17,25 +18,31 @@ export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
17
18
  audioContext: AudioContext | null;
18
19
  sourceNode: MediaStreamAudioSourceNode | null;
19
20
  processorNode: AudioWorkletNode | null;
21
+ queuedSourceNodes: Set<AudioScheduledSourceNode>;
20
22
  isAIRunning: boolean;
21
23
  audioQueue: AudioBuffer[];
22
- isPlayingAudio: boolean;
24
+ nextAudioStartTime: number;
23
25
  private screenshotInterval?;
24
26
  currentInputText: string;
25
27
  currentOutputText: string;
28
+ tools: xb.Tool[];
26
29
  constructor();
27
30
  init(): void;
28
- startGeminiLive(): Promise<void>;
31
+ startGeminiLive({ liveParams, model, }?: {
32
+ liveParams?: GoogleGenAITypes.LiveConnectConfig;
33
+ model?: string;
34
+ }): Promise<void>;
29
35
  stopGeminiLive(): Promise<void>;
30
36
  setupAudioCapture(): Promise<void>;
31
- startLiveAI(): Promise<void>;
37
+ startLiveAI(params: GoogleGenAITypes.LiveConnectConfig, model?: string): Promise<void>;
32
38
  startScreenshotCapture(intervalMs?: number): void;
33
39
  captureAndSendScreenshot(): void;
34
40
  sendAudioData(audioBuffer: ArrayBuffer): void;
35
41
  sendVideoFrame(base64Image: string): void;
36
42
  initializeAudioContext(): Promise<void>;
37
43
  playAudioChunk(audioData: string): Promise<void>;
38
- playNextAudioBuffer(): void;
44
+ scheduleAudioBuffers(): void;
45
+ stopPlayingAudio(): void;
39
46
  cleanup(): void;
40
47
  handleAIMessage(message: GoogleGenAITypes.LiveServerMessage): void;
41
48
  arrayBufferToBase64(buffer: ArrayBuffer): string;
@@ -1,4 +1,5 @@
1
1
  import * as xb from 'xrblocks';
2
+ import { AUDIO_CAPTURE_PROCESSOR_CODE } from './AudioCaptureProcessorCode.js';
2
3
 
3
4
  class GeminiManager extends xb.Script {
4
5
  constructor() {
@@ -8,27 +9,34 @@ class GeminiManager extends xb.Script {
8
9
  this.audioContext = null;
9
10
  this.sourceNode = null;
10
11
  this.processorNode = null;
12
+ this.queuedSourceNodes = new Set();
11
13
  // AI state
12
14
  this.isAIRunning = false;
13
15
  // Audio playback setup
14
16
  this.audioQueue = [];
15
- this.isPlayingAudio = false;
17
+ this.nextAudioStartTime = 0;
16
18
  // Transcription state
17
19
  this.currentInputText = '';
18
20
  this.currentOutputText = '';
21
+ this.tools = [];
19
22
  }
20
23
  init() {
21
24
  this.xrDeviceCamera = xb.core.deviceCamera;
22
25
  this.ai = xb.core.ai;
23
26
  }
24
- async startGeminiLive() {
27
+ async startGeminiLive({ liveParams, model, } = {}) {
25
28
  if (this.isAIRunning || !this.ai) {
26
29
  console.warn('AI already running or not available');
27
30
  return;
28
31
  }
32
+ liveParams = liveParams || {};
33
+ liveParams.tools = liveParams.tools || [];
34
+ liveParams.tools.push({
35
+ functionDeclarations: this.tools.map((tool) => tool.toJSON()),
36
+ });
29
37
  try {
30
38
  await this.setupAudioCapture();
31
- await this.startLiveAI();
39
+ await this.startLiveAI(liveParams, model);
32
40
  this.startScreenshotCapture();
33
41
  this.isAIRunning = true;
34
42
  }
@@ -61,19 +69,21 @@ class GeminiManager extends xb.Script {
61
69
  sampleRate: 16000,
62
70
  channelCount: 1,
63
71
  echoCancellation: true,
64
- noiseSuppression: true
65
- }
72
+ noiseSuppression: true,
73
+ },
66
74
  });
67
75
  const audioTracks = this.audioStream.getAudioTracks();
68
76
  if (audioTracks.length === 0) {
69
77
  throw new Error('No audio tracks found.');
70
78
  }
71
79
  this.audioContext = new AudioContext({ sampleRate: 16000 });
72
- await this.audioContext.audioWorklet.addModule('./AudioCaptureProcessor.js');
73
- this.sourceNode =
74
- this.audioContext.createMediaStreamSource(this.audioStream);
75
- this.processorNode =
76
- new AudioWorkletNode(this.audioContext, 'audio-capture-processor');
80
+ const blob = new Blob([AUDIO_CAPTURE_PROCESSOR_CODE], {
81
+ type: 'text/javascript',
82
+ });
83
+ const blobUrl = URL.createObjectURL(blob);
84
+ await this.audioContext.audioWorklet.addModule(blobUrl);
85
+ this.sourceNode = this.audioContext.createMediaStreamSource(this.audioStream);
86
+ this.processorNode = new AudioWorkletNode(this.audioContext, 'audio-capture-processor');
77
87
  this.processorNode.port.onmessage = (event) => {
78
88
  if (event.data.type === 'audioData' && this.isAIRunning) {
79
89
  this.sendAudioData(event.data.data);
@@ -82,7 +92,7 @@ class GeminiManager extends xb.Script {
82
92
  this.sourceNode.connect(this.processorNode);
83
93
  this.processorNode.connect(this.audioContext.destination);
84
94
  }
85
- async startLiveAI() {
95
+ async startLiveAI(params, model) {
86
96
  return new Promise((resolve, reject) => {
87
97
  this.ai.setLiveCallbacks({
88
98
  onopen: () => {
@@ -97,9 +107,9 @@ class GeminiManager extends xb.Script {
97
107
  },
98
108
  onclose: () => {
99
109
  this.isAIRunning = false;
100
- }
110
+ },
101
111
  });
102
- this.ai.startLiveSession().catch(reject);
112
+ this.ai.startLiveSession(params, model).catch(reject);
103
113
  });
104
114
  }
105
115
  startScreenshotCapture(intervalMs = 1000) {
@@ -120,9 +130,9 @@ class GeminiManager extends xb.Script {
120
130
  });
121
131
  if (typeof base64Image == 'string') {
122
132
  // Strip the data URL prefix if present
123
- const base64Data = base64Image.startsWith('data:') ?
124
- base64Image.split(',')[1] :
125
- base64Image;
133
+ const base64Data = base64Image.startsWith('data:')
134
+ ? base64Image.split(',')[1]
135
+ : base64Image;
126
136
  this.sendVideoFrame(base64Data);
127
137
  }
128
138
  }
@@ -174,28 +184,40 @@ class GeminiManager extends xb.Script {
174
184
  channelData[i] = int16View[i] / 32768.0;
175
185
  }
176
186
  this.audioQueue.push(audioBuffer);
177
- if (!this.isPlayingAudio) {
178
- this.playNextAudioBuffer();
179
- }
187
+ this.scheduleAudioBuffers();
180
188
  }
181
189
  catch (error) {
182
190
  console.error('Error playing audio chunk:', error);
183
191
  }
184
192
  }
185
- playNextAudioBuffer() {
186
- if (this.audioQueue.length === 0) {
187
- this.isPlayingAudio = false;
188
- return;
193
+ scheduleAudioBuffers() {
194
+ const SCHEDULE_AHEAD_TIME = 0.2;
195
+ while (this.audioQueue.length > 0 &&
196
+ this.nextAudioStartTime <=
197
+ this.audioContext.currentTime + SCHEDULE_AHEAD_TIME) {
198
+ const audioBuffer = this.audioQueue.shift();
199
+ const source = this.audioContext.createBufferSource();
200
+ source.buffer = audioBuffer;
201
+ source.connect(this.audioContext.destination);
202
+ source.onended = () => {
203
+ source.disconnect();
204
+ this.queuedSourceNodes.delete(source);
205
+ this.scheduleAudioBuffers();
206
+ };
207
+ const startTime = Math.max(this.nextAudioStartTime, this.audioContext.currentTime);
208
+ source.start(startTime);
209
+ this.queuedSourceNodes.add(source);
210
+ this.nextAudioStartTime = startTime + audioBuffer.duration;
189
211
  }
190
- this.isPlayingAudio = true;
191
- const audioBuffer = this.audioQueue.shift();
192
- const source = this.audioContext.createBufferSource();
193
- source.buffer = audioBuffer;
194
- source.connect(this.audioContext.destination);
195
- source.onended = () => {
196
- this.playNextAudioBuffer();
197
- };
198
- source.start();
212
+ }
213
+ stopPlayingAudio() {
214
+ this.audioQueue = [];
215
+ this.nextAudioStartTime = 0;
216
+ for (const source of this.queuedSourceNodes) {
217
+ source.stop();
218
+ source.disconnect();
219
+ }
220
+ this.queuedSourceNodes.clear();
199
221
  }
200
222
  cleanup() {
201
223
  if (this.screenshotInterval) {
@@ -204,7 +226,6 @@ class GeminiManager extends xb.Script {
204
226
  }
205
227
  // Clear audio queue and stop playback
206
228
  this.audioQueue = [];
207
- this.isPlayingAudio = false;
208
229
  if (this.processorNode) {
209
230
  this.processorNode.disconnect();
210
231
  this.processorNode = null;
@@ -218,7 +239,7 @@ class GeminiManager extends xb.Script {
218
239
  this.audioContext = null;
219
240
  }
220
241
  if (this.audioStream) {
221
- this.audioStream.getTracks().forEach(track => track.stop());
242
+ this.audioStream.getTracks().forEach((track) => track.stop());
222
243
  this.audioStream = null;
223
244
  }
224
245
  }
@@ -226,6 +247,27 @@ class GeminiManager extends xb.Script {
226
247
  if (message.data) {
227
248
  this.playAudioChunk(message.data);
228
249
  }
250
+ for (const functionCall of message.toolCall?.functionCalls ?? []) {
251
+ const tool = this.tools.find((tool) => tool.name == functionCall.name);
252
+ if (tool) {
253
+ const exec = tool.execute(functionCall.args);
254
+ exec
255
+ .then((result) => {
256
+ this.ai.sendToolResponse({
257
+ functionResponses: {
258
+ id: functionCall.id,
259
+ name: functionCall.name,
260
+ response: {
261
+ output: result.data,
262
+ error: result.error,
263
+ ...result.metadata,
264
+ },
265
+ },
266
+ });
267
+ })
268
+ .catch((error) => console.error('Tool error:', error));
269
+ }
270
+ }
229
271
  if (message.serverContent) {
230
272
  if (message.serverContent.inputTranscription) {
231
273
  const text = message.serverContent.inputTranscription.text;
@@ -239,6 +281,10 @@ class GeminiManager extends xb.Script {
239
281
  this.dispatchEvent({ type: 'outputTranscription', message: text });
240
282
  }
241
283
  }
284
+ if (message.serverContent.interrupted) {
285
+ this.stopPlayingAudio();
286
+ this.dispatchEvent({ type: 'interrupted' });
287
+ }
242
288
  if (message.serverContent.turnComplete) {
243
289
  this.dispatchEvent({ type: 'turnComplete' });
244
290
  }
@@ -24,8 +24,7 @@ class SimpleDecalGeometry extends THREE.BufferGeometry {
24
24
  projectorMatrix.makeRotationFromQuaternion(orientation);
25
25
  projectorMatrix.setPosition(position);
26
26
  projectorMatrix.scale(scale);
27
- projectorMatrix
28
- .invert(); // Inverts the matrix for projection calculations.
27
+ projectorMatrix.invert(); // Inverts the matrix for projection calculations.
29
28
  // Accesses the vertices, UVs, and indices from the geometry attributes.
30
29
  const vertices = this.attributes.position.array;
31
30
  const uvs = this.attributes.uv.array;
@@ -46,15 +45,20 @@ class SimpleDecalGeometry extends THREE.BufferGeometry {
46
45
  uvs[2 * i] = vector4.x + 0.5;
47
46
  uvs[2 * i + 1] = vector4.y + 0.5;
48
47
  // Checks if the vertex is within the -0.5 to 0.5 range in all dimensions.
49
- vertexBounded[i] = Number(vector4.x >= -0.5 && vector4.x <= 0.5 && vector4.y >= -0.5 &&
50
- vector4.y <= 0.5 && vector4.z >= -0.5 && vector4.z <= 0.5);
48
+ vertexBounded[i] = Number(vector4.x >= -0.5 &&
49
+ vector4.x <= 0.5 &&
50
+ vector4.y >= -0.5 &&
51
+ vector4.y <= 0.5 &&
52
+ vector4.z >= -0.5 &&
53
+ vector4.z <= 0.5);
51
54
  }
52
55
  // Creates a list of indices that correspond to bounded vertices only.
53
56
  const goodIndices = [];
54
57
  for (let i = 0; i < indices.length / 3; ++i) {
55
58
  // Adds the triangle indices if any of its vertices are inside the
56
59
  // bounding box.
57
- if (vertexBounded[indices[3 * i]] || vertexBounded[indices[3 * i + 1]] ||
60
+ if (vertexBounded[indices[3 * i]] ||
61
+ vertexBounded[indices[3 * i + 1]] ||
58
62
  vertexBounded[indices[3 * i + 2]]) {
59
63
  goodIndices.push(indices[3 * i]);
60
64
  goodIndices.push(indices[3 * i + 1]);
@@ -10,15 +10,14 @@ let CustomInstruction = class CustomInstruction extends SimulatorInstructionsCar
10
10
  return html `${this.customInstruction.header}`;
11
11
  }
12
12
  getImageContents() {
13
- return this.customInstruction.videoSrc ? html `
14
- <video playsinline autoplay muted loop>
15
- <source
16
- src=${this.customInstruction.videoSrc}
17
- type="video/webm">
18
- Your browser does not support the video tag.
19
- </video>
20
- ` :
21
- html ``;
13
+ return this.customInstruction.videoSrc
14
+ ? html `
15
+ <video playsinline autoplay muted loop>
16
+ <source src=${this.customInstruction.videoSrc} type="video/webm" />
17
+ Your browser does not support the video tag.
18
+ </video>
19
+ `
20
+ : html ``;
22
21
  }
23
22
  getDescriptionContents() {
24
23
  return html `${this.customInstruction.description}`;
@@ -11,10 +11,8 @@ let HandsInstructions = class HandsInstructions extends SimulatorInstructionsCar
11
11
  getImageContents() {
12
12
  return html `
13
13
  <video playsinline autoplay muted loop>
14
- <source
15
- src="${SIMULATOR_HANDS_VIDEO_PATH}"
16
- type="video/webm">
17
- Your browser does not support the video tag.
14
+ <source src="${SIMULATOR_HANDS_VIDEO_PATH}" type="video/webm" />
15
+ Your browser does not support the video tag.
18
16
  </video>
19
17
  `;
20
18
  }
@@ -22,14 +20,23 @@ let HandsInstructions = class HandsInstructions extends SimulatorInstructionsCar
22
20
  return html `
23
21
  <h2>Hands Mode</h2>
24
22
  <p>
25
- From Navigation Mode, press <strong>Left Shift</strong> to enter <strong>Hands Mode</strong>.
26
- This mode allows for precise manipulation of virtual hands.
23
+ From Navigation Mode, press <strong>Left Shift</strong> to enter
24
+ <strong>Hands Mode</strong>. This mode allows for precise manipulation
25
+ of virtual hands.
27
26
  </p>
28
27
  <ul>
29
- <li><strong>Move Hand:</strong> Use the W, A, S, D keys to move it forward, left, backward, and right.</li>
30
- <li><strong>Elevate Hand:</strong> Use the Q (up) and E (down) keys.</li>
31
- <li><strong>Switch Active Hand:</strong> Press the T key to toggle between hands.</li>
32
- <li><strong>Simulate Pinch:</strong> Press the Spacebar.</li>
28
+ <li>
29
+ <strong>Move Hand:</strong> Use the W, A, S, D keys to move it
30
+ forward, left, backward, and right.
31
+ </li>
32
+ <li>
33
+ <strong>Elevate Hand:</strong> Use the Q (up) and E (down) keys.
34
+ </li>
35
+ <li>
36
+ <strong>Switch Active Hand:</strong> Press the T key to toggle between
37
+ hands.
38
+ </li>
39
+ <li><strong>Simulate Pinch:</strong> Press the Spacebar.</li>
33
40
  </ul>
34
41
  `;
35
42
  }
@@ -11,10 +11,8 @@ let NavigationInstructions = class NavigationInstructions extends SimulatorInstr
11
11
  getImageContents() {
12
12
  return html `
13
13
  <video playsinline autoplay muted loop>
14
- <source
15
- src=${SIMULATOR_NAVIGATION_VIDEO_PATH}
16
- type="video/webm">
17
- Your browser does not support the video tag.
14
+ <source src=${SIMULATOR_NAVIGATION_VIDEO_PATH} type="video/webm" />
15
+ Your browser does not support the video tag.
18
16
  </video>
19
17
  `;
20
18
  }
@@ -22,13 +20,16 @@ let NavigationInstructions = class NavigationInstructions extends SimulatorInstr
22
20
  return html `
23
21
  <h2>Navigation Mode</h2>
24
22
  <p>
25
- Press <strong>Left Shift</strong> to toggle Navigation Mode.
26
- In this mode, virtual hands appear and the mouse controls the camera view.
23
+ Press <strong>Left Shift</strong> to toggle Navigation Mode. In this
24
+ mode, virtual hands appear and the mouse controls the camera view.
27
25
  </p>
28
26
  <ul>
29
- <li><strong>Move Forward/Backward/Sideways:</strong> Use the W, A, S, D keys.</li>
30
- <li><strong>Move Up/Down:</strong> Use the Q and E keys.</li>
31
- <li><strong>Rotate Camera:</strong> Click and drag the mouse.</li>
27
+ <li>
28
+ <strong>Move Forward/Backward/Sideways:</strong> Use the W, A, S, D
29
+ keys.
30
+ </li>
31
+ <li><strong>Move Up/Down:</strong> Use the Q and E keys.</li>
32
+ <li><strong>Rotate Camera:</strong> Click and drag the mouse.</li>
32
33
  </ul>
33
34
  `;
34
35
  }