xrblocks 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -30
- package/build/addons/ai/AudioCaptureProcessorCode.d.ts +1 -0
- package/build/addons/ai/AudioCaptureProcessorCode.js +27 -0
- package/build/addons/ai/GeminiManager.d.ts +11 -4
- package/build/addons/ai/GeminiManager.js +80 -34
- package/build/addons/objects/SimpleDecalGeometry.js +9 -5
- package/build/addons/simulator/instructions/CustomInstruction.js +8 -9
- package/build/addons/simulator/instructions/HandsInstructions.js +17 -10
- package/build/addons/simulator/instructions/NavigationInstructions.js +10 -9
- package/build/addons/simulator/instructions/SimulatorInstructions.js +17 -18
- package/build/addons/simulator/instructions/SimulatorInstructionsCard.js +69 -75
- package/build/addons/simulator/instructions/SimulatorInstructionsEvents.js +4 -1
- package/build/addons/simulator/instructions/UserInstructions.js +18 -15
- package/build/addons/simulator/ui/EnterXRButton.js +17 -17
- package/build/addons/simulator/ui/GeminiLiveApiKeyInput.js +45 -39
- package/build/addons/simulator/ui/HandPosePanel.js +20 -10
- package/build/addons/simulator/ui/MicButton.js +23 -18
- package/build/addons/simulator/ui/ModeIndicator.js +17 -17
- package/build/addons/ui/TextBillboard.js +1 -1
- package/build/addons/utils/Palette.js +3 -15
- package/build/addons/virtualkeyboard/Keyboard.js +24 -21
- package/build/addons/volumes/VolumetricCloud.glsl.js +1 -1
- package/build/addons/volumes/VolumetricCloud.js +8 -5
- package/build/agent/Agent.d.ts +25 -1
- package/build/agent/SkyboxAgent.d.ts +119 -3
- package/build/agent/Tool.d.ts +21 -5
- package/build/agent/index.d.ts +1 -0
- package/build/agent/tools/GenerateSkyboxTool.d.ts +3 -3
- package/build/agent/tools/GetWeatherTool.d.ts +7 -8
- package/build/ai/AI.d.ts +1 -1
- package/build/ai/Gemini.d.ts +1 -4
- package/build/camera/XRDeviceCamera.d.ts +1 -1
- package/build/core/Core.d.ts +4 -1
- package/build/core/Options.d.ts +17 -0
- package/build/core/components/ScreenshotSynthesizer.d.ts +6 -4
- package/build/core/components/XRTransition.d.ts +1 -1
- package/build/depth/Depth.d.ts +5 -3
- package/build/depth/DepthMesh.d.ts +8 -1
- package/build/depth/DepthTextures.d.ts +6 -4
- package/build/depth/occlusion/OcclusionPass.d.ts +6 -5
- package/build/input/Hands.d.ts +1 -1
- package/build/input/Input.d.ts +1 -1
- package/build/input/gestures/GestureEvents.d.ts +23 -0
- package/build/input/gestures/GestureRecognition.d.ts +43 -0
- package/build/input/gestures/GestureRecognitionOptions.d.ts +43 -0
- package/build/input/gestures/GestureTypes.d.ts +16 -0
- package/build/input/gestures/providers/HeuristicGestureDetectors.d.ts +2 -0
- package/build/simulator/Simulator.d.ts +2 -0
- package/build/simulator/SimulatorControls.d.ts +1 -1
- package/build/simulator/SimulatorOptions.d.ts +1 -0
- package/build/simulator/controlModes/SimulatorControlMode.d.ts +1 -1
- package/build/simulator/handPoses/HandPoseJoints.d.ts +2 -2
- package/build/simulator/userActions/PinchOnButtonAction.d.ts +2 -2
- package/build/simulator/userActions/WalkTowardsPanelAction.d.ts +1 -1
- package/build/singletons.d.ts +2 -2
- package/build/sound/AudioListener.d.ts +16 -1
- package/build/sound/AudioPlayer.d.ts +21 -2
- package/build/sound/CoreSound.d.ts +26 -1
- package/build/stereo/utils.d.ts +1 -1
- package/build/ui/components/IconButton.d.ts +6 -2
- package/build/ui/components/MaterialSymbolsView.d.ts +1 -1
- package/build/ui/components/ScrollingTroikaTextView.d.ts +1 -1
- package/build/ui/components/TextButton.d.ts +0 -1
- package/build/ui/interaction/ModelViewer.d.ts +6 -2
- package/build/utils/ModelLoader.d.ts +1 -1
- package/build/utils/SparkRendererHolder.d.ts +5 -0
- package/build/utils/Types.d.ts +2 -2
- package/build/video/VideoStream.d.ts +1 -1
- package/build/world/World.d.ts +1 -1
- package/build/world/objects/ObjectDetector.d.ts +1 -1
- package/build/world/planes/PlaneDetector.d.ts +1 -1
- package/build/xrblocks.d.ts +3 -0
- package/build/xrblocks.js +7268 -5884
- package/build/xrblocks.js.map +1 -1
- package/build/xrblocks.min.js +1 -1
- package/build/xrblocks.min.js.map +1 -1
- package/package.json +14 -9
package/README.md
CHANGED
|
@@ -2,44 +2,59 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/xrblocks)
|
|
4
4
|
[](https://bundlephobia.com/result?p=xrblocks)
|
|
5
|
-
|
|
5
|
+

|
|
6
6
|
|
|
7
7
|
#### JavaScript library for rapid XR and AI prototyping
|
|
8
8
|
|
|
9
|
+
[Site](https://xrblocks.github.io/) —
|
|
9
10
|
[Manual](https://xrblocks.github.io/docs/) —
|
|
10
|
-
[Templates](https://xrblocks.github.io/docs/templates/Basic) —
|
|
11
|
-
[
|
|
11
|
+
[Templates](https://xrblocks.github.io/docs/templates/Basic/) —
|
|
12
|
+
[Demos](https://xrblocks.github.io/docs/samples/ModelViewer/) —
|
|
13
|
+
[YouTube](https://www.youtube.com/watch?v=75QJHTsAoB8) —
|
|
14
|
+
[arXiv](https://arxiv.org/abs/2509.25504) —
|
|
15
|
+
[Blog](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
|
|
12
16
|
|
|
13
17
|
<p align="center">
|
|
14
|
-
|
|
15
|
-
<a href="https://xrblocks.github.io/docs/samples/XR-Emoji" target="_blank"><img width="32.3%" src="assets/xremoji-demo.webp" alt="XR Emoji" /></a>
|
|
16
|
-
|
|
18
|
+
<a href="https://xrblocks.github.io/docs/samples/Ballpit/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/ballpit-demo.webp" alt="Ballpit" /></a>
|
|
19
|
+
<a href="https://xrblocks.github.io/docs/samples/XR-Emoji/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/xremoji-demo.webp" alt="XR Emoji" /></a>
|
|
20
|
+
<a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
|
|
17
21
|
</p>
|
|
18
22
|
|
|
19
23
|
### Description
|
|
20
24
|
|
|
21
25
|
**XR Blocks** is a lightweight, cross-platform library for rapidly prototyping
|
|
22
26
|
advanced XR and AI experiences. Built upon [three.js](https://threejs.org), it
|
|
23
|
-
targets Chrome v136+ with WebXR support on Android XR
|
|
24
|
-
|
|
25
|
-
|
|
27
|
+
targets Chrome v136+ with WebXR support on Android XR (e.g.,
|
|
28
|
+
[Galaxy XR](https://www.samsung.com/us/xr/galaxy-xr/galaxy-xr/)) and also
|
|
29
|
+
includes a powerful desktop simulator for development. The framework emphasizes
|
|
30
|
+
a user-centric, developer-friendly SDK designed to simplify the creation of
|
|
26
31
|
immersive applications with features like:
|
|
27
32
|
|
|
28
|
-
-
|
|
29
|
-
|
|
30
|
-
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
-
|
|
35
|
-
|
|
33
|
+
- **Hand Tracking & Gestures:** Access advanced hand tracking, custom
|
|
34
|
+
gestures with TensorFlow Lite / PyTorch models, and interaction events.
|
|
35
|
+
- **Gesture Recognition:** Opt into pinch, open-palm, fist, thumbs-up, point,
|
|
36
|
+
and spread detection with `options.enableGestures()`, tune providers or
|
|
37
|
+
thresholds, and subscribe to `gesturestart`/`gestureupdate`/`gestureend`
|
|
38
|
+
events from the shared subsystem.
|
|
39
|
+
- **World Understanding:** Present samples with depth sensing, geometry-aware
|
|
40
|
+
physics, and object recognition with Gemini in both XR and desktop simulator.
|
|
41
|
+
- **AI Integration:** Seamlessly connect to Gemini for multimodal
|
|
42
|
+
understanding and live conversational experiences.
|
|
43
|
+
- **Cross-Platform:** Write once and deploy to both XR devices and desktop
|
|
44
|
+
Chrome browsers.
|
|
45
|
+
|
|
46
|
+
We welcome all contributors to foster an AI + XR community! Read our
|
|
47
|
+
[blog post](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
|
|
48
|
+
and [white paper](https://arxiv.org/abs/2509.25504) for a visionary roadmap.
|
|
36
49
|
|
|
37
50
|
### Usage
|
|
38
51
|
|
|
39
52
|
XR Blocks can be imported directly into a webpage using an importmap. This code
|
|
40
53
|
creates a basic XR scene containing a cylinder. When you view the scene, you can
|
|
41
54
|
pinch your fingers (in XR) or click (in the desktop simulator) to change the
|
|
42
|
-
cylinder's color.
|
|
55
|
+
cylinder's color. Check out
|
|
56
|
+
[this live demo](https://xrblocks.github.io/docs/templates/Basic/) with simple
|
|
57
|
+
code below:
|
|
43
58
|
|
|
44
59
|
```html
|
|
45
60
|
<!DOCTYPE html>
|
|
@@ -59,8 +74,8 @@ cylinder's color.
|
|
|
59
74
|
<script type="importmap">
|
|
60
75
|
{
|
|
61
76
|
"imports": {
|
|
62
|
-
"three": "https://cdn.jsdelivr.net/npm/three@0.
|
|
63
|
-
"three/addons/": "https://cdn.jsdelivr.net/npm/three@0.
|
|
77
|
+
"three": "https://cdn.jsdelivr.net/npm/three@0.181.0/build/three.module.js",
|
|
78
|
+
"three/addons/": "https://cdn.jsdelivr.net/npm/three@0.181.0/examples/jsm/",
|
|
64
79
|
"xrblocks": "https://cdn.jsdelivr.net/gh/google/xrblocks@build/xrblocks.js",
|
|
65
80
|
"xrblocks/addons/": "https://cdn.jsdelivr.net/gh/google/xrblocks@build/addons/"
|
|
66
81
|
}
|
|
@@ -69,8 +84,8 @@ cylinder's color.
|
|
|
69
84
|
</head>
|
|
70
85
|
<body>
|
|
71
86
|
<script type="module">
|
|
72
|
-
import * as THREE from
|
|
73
|
-
import * as xb from
|
|
87
|
+
import * as THREE from 'three';
|
|
88
|
+
import * as xb from 'xrblocks';
|
|
74
89
|
|
|
75
90
|
/**
|
|
76
91
|
* A basic example of XRBlocks to render a cylinder and pinch to change its color.
|
|
@@ -107,7 +122,7 @@ cylinder's color.
|
|
|
107
122
|
}
|
|
108
123
|
|
|
109
124
|
// When the page content is loaded, add our script and initialize XR Blocks.
|
|
110
|
-
document.addEventListener(
|
|
125
|
+
document.addEventListener('DOMContentLoaded', function () {
|
|
111
126
|
xb.add(new MainScript());
|
|
112
127
|
xb.init(new xb.Options());
|
|
113
128
|
});
|
|
@@ -130,10 +145,18 @@ npm ci
|
|
|
130
145
|
|
|
131
146
|
# Build xrblocks.js.
|
|
132
147
|
npm run build
|
|
148
|
+
|
|
149
|
+
# After making changes, check ESLint and run Prettier
|
|
150
|
+
npm run lint # ESLint check
|
|
151
|
+
npm run format # Prettier format
|
|
133
152
|
```
|
|
134
153
|
|
|
135
|
-
|
|
136
|
-
|
|
154
|
+
XR Blocks uses ESLint for linting and Prettier for formatting.
|
|
155
|
+
If coding in VSCode, make sure to install the [ESLint extension](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint) and the [Prettier extension](https://marketplace.visualstudio.com/items?itemName=esbenp.prettier-vscode). Then set Prettier as your default formatter.
|
|
156
|
+
|
|
157
|
+
This is not an officially supported Google product, but will be actively
|
|
158
|
+
maintained by the XR Labs team and external collaborators. This project is not
|
|
159
|
+
eligible for the
|
|
137
160
|
[Google Open Source Software Vulnerability Rewards Program](https://bughunters.google.com/open-source-security).
|
|
138
161
|
|
|
139
162
|
### User Data & Permissions
|
|
@@ -183,11 +206,89 @@ To remove XR Blocks from your code, simple remove the lines from your `<script
|
|
|
183
206
|
type="importmap">` tag in HTML, or `import * from xrblocks` in JavaScript, or
|
|
184
207
|
use `npm uninstall xrblocks` from your project directory.
|
|
185
208
|
|
|
209
|
+
### References
|
|
210
|
+
|
|
211
|
+
If you find XR Blocks inspiring or useful in your research, please reference it
|
|
212
|
+
as:
|
|
213
|
+
|
|
214
|
+
```bibtex
|
|
215
|
+
@misc{Li2025XR,
|
|
216
|
+
title={{XR Blocks: Accelerating Human-centered AI + XR Innovation}},
|
|
217
|
+
author={Li, David and Numan, Nels and Qian, Xun and Chen, Yanhe and Zhou, Zhongyi and Alekseev, Evgenii and Lee, Geonsun and Cooper, Alex and Xia, Min and Chung, Scott and Nelson, Jeremy and Yuan, Xiuxiu and Dias, Jolica and Bettridge, Tim and Hersh, Benjamin and Huynh, Michelle and Piascik, Konrad and Cabello, Ricardo and Kim, David and Du, Ruofei},
|
|
218
|
+
year={2025},
|
|
219
|
+
eprint={2509.25504},
|
|
220
|
+
archivePrefix={arXiv},
|
|
221
|
+
primaryClass={cs.HC},
|
|
222
|
+
url={https://arxiv.org/abs/2509.25504},
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
#### Key Works Built with XR Blocks
|
|
227
|
+
|
|
228
|
+
These references are built with XR Blocks:
|
|
229
|
+
|
|
230
|
+
```bibtex
|
|
231
|
+
@inproceedings{Lee2025Sensible,
|
|
232
|
+
title = {{Sensible Agent: A Framework for Unobtrusive Interaction with Proactive AR Agent}},
|
|
233
|
+
author = {Lee, Geonsun and Xia, Min and Numan, Nels and Qian, Xun and Li, David and Chen, Yanhe and Kulshrestha, Achin and Chatterjee, Ishan and Zhang, Yinda and Manocha, Dinesh and Kim, David and Du, Ruofei},
|
|
234
|
+
booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
|
|
235
|
+
year = {2025},
|
|
236
|
+
publisher = {ACM},
|
|
237
|
+
numpages = {22},
|
|
238
|
+
series = {UIST},
|
|
239
|
+
doi = {10.1145/3746059.3747748},
|
|
240
|
+
}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
#### Inspiring Related Works
|
|
244
|
+
|
|
245
|
+
We call for contributors to integrate our prior art into XR Blocks to enhance
|
|
246
|
+
reproducibility and knowledge sharing:
|
|
247
|
+
|
|
248
|
+
E.g., integrating models from <https://visualblocks.withgoogle.com> and [Transformers.js](https://huggingface.co/docs/transformers.js/en/index)
|
|
249
|
+
to XR Blocks; bringing more
|
|
250
|
+
[depth-based interaction](https://augmentedperception.github.io/depthlab/) to
|
|
251
|
+
XR Blocks; and add more samples and demos. For large commits, feel free to add
|
|
252
|
+
an issue before working on it so that your work won't be duplicated with others.
|
|
253
|
+
|
|
254
|
+
```bibtex
|
|
255
|
+
@inproceedings{Du2023Rapsai,
|
|
256
|
+
title = {{Rapsai: Accelerating Machine Learning Prototyping of Multimedia Applications Through Visual Programming}},
|
|
257
|
+
author = {Du, Ruofei and Li, Na and Jin, Jing and Carney, Michelle and Miles, Scott and Kleiner, Maria and Yuan, Xiuxiu and Zhang, Yinda and Kulkarni, Anuva and Liu, XingyuBruce and Sabie, Ahmed and Orts-Escolano, Sergio and Kar, Abhishek and Yu, Ping and Iyengar, Ram and Kowdle, Adarsh and Olwal, Alex},
|
|
258
|
+
booktitle = {Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
|
|
259
|
+
year = {2023},
|
|
260
|
+
publisher = {ACM},
|
|
261
|
+
month = {Apr.},
|
|
262
|
+
day = {22-29},
|
|
263
|
+
number = {125},
|
|
264
|
+
pages = {1--23},
|
|
265
|
+
series = {CHI},
|
|
266
|
+
doi = {10.1145/3544548.3581338},
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Extending XR Blocks to XR communication:
|
|
271
|
+
|
|
272
|
+
```bibtex
|
|
273
|
+
@inproceedings{Hu2025DialogLab,
|
|
274
|
+
title = {{DialogLab: Authoring, Simulating, and Testing Dynamic Group Conversations in Hybrid Human-AI Conversations}},
|
|
275
|
+
author = {Hu, Erzhen and Chen, Yanhe and Li, Mingyi and Phadnis, Vrushank and Xu, Pingmei and Qian, Xun and Olwal, Alex and Kim, David and Heo, Seongkook and Du, Ruofei},
|
|
276
|
+
booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
|
|
277
|
+
year = {2025},
|
|
278
|
+
number = {210},
|
|
279
|
+
publisher = {ACM},
|
|
280
|
+
number = {210},
|
|
281
|
+
pages = {1--20},
|
|
282
|
+
series = {UIST},
|
|
283
|
+
doi = {10.1145/3746059.3747696},
|
|
284
|
+
}
|
|
285
|
+
```
|
|
286
|
+
|
|
186
287
|
### Terms of Service
|
|
187
288
|
|
|
188
|
-
-
|
|
189
|
-
|
|
190
|
-
|
|
289
|
+
- Please follow
|
|
290
|
+
[Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
|
|
291
|
+
using this SDK.
|
|
191
292
|
|
|
192
|
-
-
|
|
193
|
-
|
|
293
|
+
- When using AI features in this SDK, please follow
|
|
294
|
+
[Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const AUDIO_CAPTURE_PROCESSOR_CODE = "\n // Audio worklet processor for capturing audio data\n class AudioCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n }\n\n process(inputs, outputs, parameters) {\n const input = inputs[0];\n\n if (input && input[0]) {\n const inputData = input[0];\n const pcmData = new Int16Array(inputData.length);\n for (let i = 0; i < inputData.length; i++) {\n pcmData[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));\n }\n this.port.postMessage({type: 'audioData', data: pcmData.buffer});\n }\n\n return true;\n }\n }\n\n registerProcessor('audio-capture-processor', AudioCaptureProcessor);\n";
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const AUDIO_CAPTURE_PROCESSOR_CODE = `
|
|
2
|
+
// Audio worklet processor for capturing audio data
|
|
3
|
+
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
4
|
+
constructor() {
|
|
5
|
+
super();
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
process(inputs, outputs, parameters) {
|
|
9
|
+
const input = inputs[0];
|
|
10
|
+
|
|
11
|
+
if (input && input[0]) {
|
|
12
|
+
const inputData = input[0];
|
|
13
|
+
const pcmData = new Int16Array(inputData.length);
|
|
14
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
15
|
+
pcmData[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
|
|
16
|
+
}
|
|
17
|
+
this.port.postMessage({type: 'audioData', data: pcmData.buffer});
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
registerProcessor('audio-capture-processor', AudioCaptureProcessor);
|
|
25
|
+
`;
|
|
26
|
+
|
|
27
|
+
export { AUDIO_CAPTURE_PROCESSOR_CODE };
|
|
@@ -9,6 +9,7 @@ export interface GeminiManagerEventMap extends THREE.Object3DEventMap {
|
|
|
9
9
|
message: string;
|
|
10
10
|
};
|
|
11
11
|
turnComplete: object;
|
|
12
|
+
interrupted: object;
|
|
12
13
|
}
|
|
13
14
|
export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
|
|
14
15
|
xrDeviceCamera?: xb.XRDeviceCamera;
|
|
@@ -17,25 +18,31 @@ export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
|
|
|
17
18
|
audioContext: AudioContext | null;
|
|
18
19
|
sourceNode: MediaStreamAudioSourceNode | null;
|
|
19
20
|
processorNode: AudioWorkletNode | null;
|
|
21
|
+
queuedSourceNodes: Set<AudioScheduledSourceNode>;
|
|
20
22
|
isAIRunning: boolean;
|
|
21
23
|
audioQueue: AudioBuffer[];
|
|
22
|
-
|
|
24
|
+
nextAudioStartTime: number;
|
|
23
25
|
private screenshotInterval?;
|
|
24
26
|
currentInputText: string;
|
|
25
27
|
currentOutputText: string;
|
|
28
|
+
tools: xb.Tool[];
|
|
26
29
|
constructor();
|
|
27
30
|
init(): void;
|
|
28
|
-
startGeminiLive(
|
|
31
|
+
startGeminiLive({ liveParams, model, }?: {
|
|
32
|
+
liveParams?: GoogleGenAITypes.LiveConnectConfig;
|
|
33
|
+
model?: string;
|
|
34
|
+
}): Promise<void>;
|
|
29
35
|
stopGeminiLive(): Promise<void>;
|
|
30
36
|
setupAudioCapture(): Promise<void>;
|
|
31
|
-
startLiveAI(): Promise<void>;
|
|
37
|
+
startLiveAI(params: GoogleGenAITypes.LiveConnectConfig, model?: string): Promise<void>;
|
|
32
38
|
startScreenshotCapture(intervalMs?: number): void;
|
|
33
39
|
captureAndSendScreenshot(): void;
|
|
34
40
|
sendAudioData(audioBuffer: ArrayBuffer): void;
|
|
35
41
|
sendVideoFrame(base64Image: string): void;
|
|
36
42
|
initializeAudioContext(): Promise<void>;
|
|
37
43
|
playAudioChunk(audioData: string): Promise<void>;
|
|
38
|
-
|
|
44
|
+
scheduleAudioBuffers(): void;
|
|
45
|
+
stopPlayingAudio(): void;
|
|
39
46
|
cleanup(): void;
|
|
40
47
|
handleAIMessage(message: GoogleGenAITypes.LiveServerMessage): void;
|
|
41
48
|
arrayBufferToBase64(buffer: ArrayBuffer): string;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as xb from 'xrblocks';
|
|
2
|
+
import { AUDIO_CAPTURE_PROCESSOR_CODE } from './AudioCaptureProcessorCode.js';
|
|
2
3
|
|
|
3
4
|
class GeminiManager extends xb.Script {
|
|
4
5
|
constructor() {
|
|
@@ -8,27 +9,34 @@ class GeminiManager extends xb.Script {
|
|
|
8
9
|
this.audioContext = null;
|
|
9
10
|
this.sourceNode = null;
|
|
10
11
|
this.processorNode = null;
|
|
12
|
+
this.queuedSourceNodes = new Set();
|
|
11
13
|
// AI state
|
|
12
14
|
this.isAIRunning = false;
|
|
13
15
|
// Audio playback setup
|
|
14
16
|
this.audioQueue = [];
|
|
15
|
-
this.
|
|
17
|
+
this.nextAudioStartTime = 0;
|
|
16
18
|
// Transcription state
|
|
17
19
|
this.currentInputText = '';
|
|
18
20
|
this.currentOutputText = '';
|
|
21
|
+
this.tools = [];
|
|
19
22
|
}
|
|
20
23
|
init() {
|
|
21
24
|
this.xrDeviceCamera = xb.core.deviceCamera;
|
|
22
25
|
this.ai = xb.core.ai;
|
|
23
26
|
}
|
|
24
|
-
async startGeminiLive() {
|
|
27
|
+
async startGeminiLive({ liveParams, model, } = {}) {
|
|
25
28
|
if (this.isAIRunning || !this.ai) {
|
|
26
29
|
console.warn('AI already running or not available');
|
|
27
30
|
return;
|
|
28
31
|
}
|
|
32
|
+
liveParams = liveParams || {};
|
|
33
|
+
liveParams.tools = liveParams.tools || [];
|
|
34
|
+
liveParams.tools.push({
|
|
35
|
+
functionDeclarations: this.tools.map((tool) => tool.toJSON()),
|
|
36
|
+
});
|
|
29
37
|
try {
|
|
30
38
|
await this.setupAudioCapture();
|
|
31
|
-
await this.startLiveAI();
|
|
39
|
+
await this.startLiveAI(liveParams, model);
|
|
32
40
|
this.startScreenshotCapture();
|
|
33
41
|
this.isAIRunning = true;
|
|
34
42
|
}
|
|
@@ -61,19 +69,21 @@ class GeminiManager extends xb.Script {
|
|
|
61
69
|
sampleRate: 16000,
|
|
62
70
|
channelCount: 1,
|
|
63
71
|
echoCancellation: true,
|
|
64
|
-
noiseSuppression: true
|
|
65
|
-
}
|
|
72
|
+
noiseSuppression: true,
|
|
73
|
+
},
|
|
66
74
|
});
|
|
67
75
|
const audioTracks = this.audioStream.getAudioTracks();
|
|
68
76
|
if (audioTracks.length === 0) {
|
|
69
77
|
throw new Error('No audio tracks found.');
|
|
70
78
|
}
|
|
71
79
|
this.audioContext = new AudioContext({ sampleRate: 16000 });
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
80
|
+
const blob = new Blob([AUDIO_CAPTURE_PROCESSOR_CODE], {
|
|
81
|
+
type: 'text/javascript',
|
|
82
|
+
});
|
|
83
|
+
const blobUrl = URL.createObjectURL(blob);
|
|
84
|
+
await this.audioContext.audioWorklet.addModule(blobUrl);
|
|
85
|
+
this.sourceNode = this.audioContext.createMediaStreamSource(this.audioStream);
|
|
86
|
+
this.processorNode = new AudioWorkletNode(this.audioContext, 'audio-capture-processor');
|
|
77
87
|
this.processorNode.port.onmessage = (event) => {
|
|
78
88
|
if (event.data.type === 'audioData' && this.isAIRunning) {
|
|
79
89
|
this.sendAudioData(event.data.data);
|
|
@@ -82,7 +92,7 @@ class GeminiManager extends xb.Script {
|
|
|
82
92
|
this.sourceNode.connect(this.processorNode);
|
|
83
93
|
this.processorNode.connect(this.audioContext.destination);
|
|
84
94
|
}
|
|
85
|
-
async startLiveAI() {
|
|
95
|
+
async startLiveAI(params, model) {
|
|
86
96
|
return new Promise((resolve, reject) => {
|
|
87
97
|
this.ai.setLiveCallbacks({
|
|
88
98
|
onopen: () => {
|
|
@@ -97,9 +107,9 @@ class GeminiManager extends xb.Script {
|
|
|
97
107
|
},
|
|
98
108
|
onclose: () => {
|
|
99
109
|
this.isAIRunning = false;
|
|
100
|
-
}
|
|
110
|
+
},
|
|
101
111
|
});
|
|
102
|
-
this.ai.startLiveSession().catch(reject);
|
|
112
|
+
this.ai.startLiveSession(params, model).catch(reject);
|
|
103
113
|
});
|
|
104
114
|
}
|
|
105
115
|
startScreenshotCapture(intervalMs = 1000) {
|
|
@@ -120,9 +130,9 @@ class GeminiManager extends xb.Script {
|
|
|
120
130
|
});
|
|
121
131
|
if (typeof base64Image == 'string') {
|
|
122
132
|
// Strip the data URL prefix if present
|
|
123
|
-
const base64Data = base64Image.startsWith('data:')
|
|
124
|
-
base64Image.split(',')[1]
|
|
125
|
-
base64Image;
|
|
133
|
+
const base64Data = base64Image.startsWith('data:')
|
|
134
|
+
? base64Image.split(',')[1]
|
|
135
|
+
: base64Image;
|
|
126
136
|
this.sendVideoFrame(base64Data);
|
|
127
137
|
}
|
|
128
138
|
}
|
|
@@ -174,28 +184,40 @@ class GeminiManager extends xb.Script {
|
|
|
174
184
|
channelData[i] = int16View[i] / 32768.0;
|
|
175
185
|
}
|
|
176
186
|
this.audioQueue.push(audioBuffer);
|
|
177
|
-
|
|
178
|
-
this.playNextAudioBuffer();
|
|
179
|
-
}
|
|
187
|
+
this.scheduleAudioBuffers();
|
|
180
188
|
}
|
|
181
189
|
catch (error) {
|
|
182
190
|
console.error('Error playing audio chunk:', error);
|
|
183
191
|
}
|
|
184
192
|
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
193
|
+
scheduleAudioBuffers() {
|
|
194
|
+
const SCHEDULE_AHEAD_TIME = 0.2;
|
|
195
|
+
while (this.audioQueue.length > 0 &&
|
|
196
|
+
this.nextAudioStartTime <=
|
|
197
|
+
this.audioContext.currentTime + SCHEDULE_AHEAD_TIME) {
|
|
198
|
+
const audioBuffer = this.audioQueue.shift();
|
|
199
|
+
const source = this.audioContext.createBufferSource();
|
|
200
|
+
source.buffer = audioBuffer;
|
|
201
|
+
source.connect(this.audioContext.destination);
|
|
202
|
+
source.onended = () => {
|
|
203
|
+
source.disconnect();
|
|
204
|
+
this.queuedSourceNodes.delete(source);
|
|
205
|
+
this.scheduleAudioBuffers();
|
|
206
|
+
};
|
|
207
|
+
const startTime = Math.max(this.nextAudioStartTime, this.audioContext.currentTime);
|
|
208
|
+
source.start(startTime);
|
|
209
|
+
this.queuedSourceNodes.add(source);
|
|
210
|
+
this.nextAudioStartTime = startTime + audioBuffer.duration;
|
|
189
211
|
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
source
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
}
|
|
198
|
-
|
|
212
|
+
}
|
|
213
|
+
stopPlayingAudio() {
|
|
214
|
+
this.audioQueue = [];
|
|
215
|
+
this.nextAudioStartTime = 0;
|
|
216
|
+
for (const source of this.queuedSourceNodes) {
|
|
217
|
+
source.stop();
|
|
218
|
+
source.disconnect();
|
|
219
|
+
}
|
|
220
|
+
this.queuedSourceNodes.clear();
|
|
199
221
|
}
|
|
200
222
|
cleanup() {
|
|
201
223
|
if (this.screenshotInterval) {
|
|
@@ -204,7 +226,6 @@ class GeminiManager extends xb.Script {
|
|
|
204
226
|
}
|
|
205
227
|
// Clear audio queue and stop playback
|
|
206
228
|
this.audioQueue = [];
|
|
207
|
-
this.isPlayingAudio = false;
|
|
208
229
|
if (this.processorNode) {
|
|
209
230
|
this.processorNode.disconnect();
|
|
210
231
|
this.processorNode = null;
|
|
@@ -218,7 +239,7 @@ class GeminiManager extends xb.Script {
|
|
|
218
239
|
this.audioContext = null;
|
|
219
240
|
}
|
|
220
241
|
if (this.audioStream) {
|
|
221
|
-
this.audioStream.getTracks().forEach(track => track.stop());
|
|
242
|
+
this.audioStream.getTracks().forEach((track) => track.stop());
|
|
222
243
|
this.audioStream = null;
|
|
223
244
|
}
|
|
224
245
|
}
|
|
@@ -226,6 +247,27 @@ class GeminiManager extends xb.Script {
|
|
|
226
247
|
if (message.data) {
|
|
227
248
|
this.playAudioChunk(message.data);
|
|
228
249
|
}
|
|
250
|
+
for (const functionCall of message.toolCall?.functionCalls ?? []) {
|
|
251
|
+
const tool = this.tools.find((tool) => tool.name == functionCall.name);
|
|
252
|
+
if (tool) {
|
|
253
|
+
const exec = tool.execute(functionCall.args);
|
|
254
|
+
exec
|
|
255
|
+
.then((result) => {
|
|
256
|
+
this.ai.sendToolResponse({
|
|
257
|
+
functionResponses: {
|
|
258
|
+
id: functionCall.id,
|
|
259
|
+
name: functionCall.name,
|
|
260
|
+
response: {
|
|
261
|
+
output: result.data,
|
|
262
|
+
error: result.error,
|
|
263
|
+
...result.metadata,
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
})
|
|
268
|
+
.catch((error) => console.error('Tool error:', error));
|
|
269
|
+
}
|
|
270
|
+
}
|
|
229
271
|
if (message.serverContent) {
|
|
230
272
|
if (message.serverContent.inputTranscription) {
|
|
231
273
|
const text = message.serverContent.inputTranscription.text;
|
|
@@ -239,6 +281,10 @@ class GeminiManager extends xb.Script {
|
|
|
239
281
|
this.dispatchEvent({ type: 'outputTranscription', message: text });
|
|
240
282
|
}
|
|
241
283
|
}
|
|
284
|
+
if (message.serverContent.interrupted) {
|
|
285
|
+
this.stopPlayingAudio();
|
|
286
|
+
this.dispatchEvent({ type: 'interrupted' });
|
|
287
|
+
}
|
|
242
288
|
if (message.serverContent.turnComplete) {
|
|
243
289
|
this.dispatchEvent({ type: 'turnComplete' });
|
|
244
290
|
}
|
|
@@ -24,8 +24,7 @@ class SimpleDecalGeometry extends THREE.BufferGeometry {
|
|
|
24
24
|
projectorMatrix.makeRotationFromQuaternion(orientation);
|
|
25
25
|
projectorMatrix.setPosition(position);
|
|
26
26
|
projectorMatrix.scale(scale);
|
|
27
|
-
projectorMatrix
|
|
28
|
-
.invert(); // Inverts the matrix for projection calculations.
|
|
27
|
+
projectorMatrix.invert(); // Inverts the matrix for projection calculations.
|
|
29
28
|
// Accesses the vertices, UVs, and indices from the geometry attributes.
|
|
30
29
|
const vertices = this.attributes.position.array;
|
|
31
30
|
const uvs = this.attributes.uv.array;
|
|
@@ -46,15 +45,20 @@ class SimpleDecalGeometry extends THREE.BufferGeometry {
|
|
|
46
45
|
uvs[2 * i] = vector4.x + 0.5;
|
|
47
46
|
uvs[2 * i + 1] = vector4.y + 0.5;
|
|
48
47
|
// Checks if the vertex is within the -0.5 to 0.5 range in all dimensions.
|
|
49
|
-
vertexBounded[i] = Number(vector4.x >= -0.5 &&
|
|
50
|
-
vector4.
|
|
48
|
+
vertexBounded[i] = Number(vector4.x >= -0.5 &&
|
|
49
|
+
vector4.x <= 0.5 &&
|
|
50
|
+
vector4.y >= -0.5 &&
|
|
51
|
+
vector4.y <= 0.5 &&
|
|
52
|
+
vector4.z >= -0.5 &&
|
|
53
|
+
vector4.z <= 0.5);
|
|
51
54
|
}
|
|
52
55
|
// Creates a list of indices that correspond to bounded vertices only.
|
|
53
56
|
const goodIndices = [];
|
|
54
57
|
for (let i = 0; i < indices.length / 3; ++i) {
|
|
55
58
|
// Adds the triangle indices if any of its vertices are inside the
|
|
56
59
|
// bounding box.
|
|
57
|
-
if (vertexBounded[indices[3 * i]] ||
|
|
60
|
+
if (vertexBounded[indices[3 * i]] ||
|
|
61
|
+
vertexBounded[indices[3 * i + 1]] ||
|
|
58
62
|
vertexBounded[indices[3 * i + 2]]) {
|
|
59
63
|
goodIndices.push(indices[3 * i]);
|
|
60
64
|
goodIndices.push(indices[3 * i + 1]);
|
|
@@ -10,15 +10,14 @@ let CustomInstruction = class CustomInstruction extends SimulatorInstructionsCar
|
|
|
10
10
|
return html `${this.customInstruction.header}`;
|
|
11
11
|
}
|
|
12
12
|
getImageContents() {
|
|
13
|
-
return this.customInstruction.videoSrc
|
|
14
|
-
|
|
15
|
-
<
|
|
16
|
-
src=${this.customInstruction.videoSrc}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
html ``;
|
|
13
|
+
return this.customInstruction.videoSrc
|
|
14
|
+
? html `
|
|
15
|
+
<video playsinline autoplay muted loop>
|
|
16
|
+
<source src=${this.customInstruction.videoSrc} type="video/webm" />
|
|
17
|
+
Your browser does not support the video tag.
|
|
18
|
+
</video>
|
|
19
|
+
`
|
|
20
|
+
: html ``;
|
|
22
21
|
}
|
|
23
22
|
getDescriptionContents() {
|
|
24
23
|
return html `${this.customInstruction.description}`;
|
|
@@ -11,10 +11,8 @@ let HandsInstructions = class HandsInstructions extends SimulatorInstructionsCar
|
|
|
11
11
|
getImageContents() {
|
|
12
12
|
return html `
|
|
13
13
|
<video playsinline autoplay muted loop>
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
type="video/webm">
|
|
17
|
-
Your browser does not support the video tag.
|
|
14
|
+
<source src="${SIMULATOR_HANDS_VIDEO_PATH}" type="video/webm" />
|
|
15
|
+
Your browser does not support the video tag.
|
|
18
16
|
</video>
|
|
19
17
|
`;
|
|
20
18
|
}
|
|
@@ -22,14 +20,23 @@ let HandsInstructions = class HandsInstructions extends SimulatorInstructionsCar
|
|
|
22
20
|
return html `
|
|
23
21
|
<h2>Hands Mode</h2>
|
|
24
22
|
<p>
|
|
25
|
-
From Navigation Mode, press <strong>Left Shift</strong> to enter
|
|
26
|
-
This mode allows for precise manipulation
|
|
23
|
+
From Navigation Mode, press <strong>Left Shift</strong> to enter
|
|
24
|
+
<strong>Hands Mode</strong>. This mode allows for precise manipulation
|
|
25
|
+
of virtual hands.
|
|
27
26
|
</p>
|
|
28
27
|
<ul>
|
|
29
|
-
|
|
30
|
-
<
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
<li>
|
|
29
|
+
<strong>Move Hand:</strong> Use the W, A, S, D keys to move it
|
|
30
|
+
forward, left, backward, and right.
|
|
31
|
+
</li>
|
|
32
|
+
<li>
|
|
33
|
+
<strong>Elevate Hand:</strong> Use the Q (up) and E (down) keys.
|
|
34
|
+
</li>
|
|
35
|
+
<li>
|
|
36
|
+
<strong>Switch Active Hand:</strong> Press the T key to toggle between
|
|
37
|
+
hands.
|
|
38
|
+
</li>
|
|
39
|
+
<li><strong>Simulate Pinch:</strong> Press the Spacebar.</li>
|
|
33
40
|
</ul>
|
|
34
41
|
`;
|
|
35
42
|
}
|
|
@@ -11,10 +11,8 @@ let NavigationInstructions = class NavigationInstructions extends SimulatorInstr
|
|
|
11
11
|
getImageContents() {
|
|
12
12
|
return html `
|
|
13
13
|
<video playsinline autoplay muted loop>
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
type="video/webm">
|
|
17
|
-
Your browser does not support the video tag.
|
|
14
|
+
<source src=${SIMULATOR_NAVIGATION_VIDEO_PATH} type="video/webm" />
|
|
15
|
+
Your browser does not support the video tag.
|
|
18
16
|
</video>
|
|
19
17
|
`;
|
|
20
18
|
}
|
|
@@ -22,13 +20,16 @@ let NavigationInstructions = class NavigationInstructions extends SimulatorInstr
|
|
|
22
20
|
return html `
|
|
23
21
|
<h2>Navigation Mode</h2>
|
|
24
22
|
<p>
|
|
25
|
-
Press <strong>Left Shift</strong> to toggle Navigation Mode.
|
|
26
|
-
|
|
23
|
+
Press <strong>Left Shift</strong> to toggle Navigation Mode. In this
|
|
24
|
+
mode, virtual hands appear and the mouse controls the camera view.
|
|
27
25
|
</p>
|
|
28
26
|
<ul>
|
|
29
|
-
|
|
30
|
-
<
|
|
31
|
-
|
|
27
|
+
<li>
|
|
28
|
+
<strong>Move Forward/Backward/Sideways:</strong> Use the W, A, S, D
|
|
29
|
+
keys.
|
|
30
|
+
</li>
|
|
31
|
+
<li><strong>Move Up/Down:</strong> Use the Q and E keys.</li>
|
|
32
|
+
<li><strong>Rotate Camera:</strong> Click and drag the mouse.</li>
|
|
32
33
|
</ul>
|
|
33
34
|
`;
|
|
34
35
|
}
|