xrblocks 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -22
- package/build/addons/ai/GeminiManager.d.ts +7 -4
- package/build/addons/ai/GeminiManager.js +43 -22
- package/build/agent/Agent.d.ts +25 -1
- package/build/agent/SkyboxAgent.d.ts +119 -3
- package/build/agent/Tool.d.ts +18 -4
- package/build/agent/index.d.ts +1 -0
- package/build/agent/tools/GenerateSkyboxTool.d.ts +3 -3
- package/build/agent/tools/GetWeatherTool.d.ts +7 -8
- package/build/ai/AI.d.ts +2 -2
- package/build/ai/Gemini.d.ts +5 -4
- package/build/core/Core.d.ts +1 -0
- package/build/core/Options.d.ts +10 -0
- package/build/core/components/ScreenshotSynthesizer.d.ts +4 -2
- package/build/depth/Depth.d.ts +5 -3
- package/build/depth/DepthMesh.d.ts +7 -0
- package/build/depth/DepthTextures.d.ts +6 -4
- package/build/depth/occlusion/OcclusionPass.d.ts +6 -5
- package/build/simulator/SimulatorOptions.d.ts +1 -0
- package/build/sound/AudioListener.d.ts +16 -1
- package/build/sound/AudioPlayer.d.ts +21 -2
- package/build/sound/CoreSound.d.ts +25 -0
- package/build/ui/components/IconButton.d.ts +6 -2
- package/build/ui/components/TextButton.d.ts +0 -1
- package/build/xrblocks.js +773 -151
- package/build/xrblocks.js.map +1 -1
- package/build/xrblocks.min.js +1 -1
- package/build/xrblocks.min.js.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -2,18 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/xrblocks)
|
|
4
4
|
[](https://bundlephobia.com/result?p=xrblocks)
|
|
5
|
-
|
|
5
|
+

|
|
6
6
|
|
|
7
7
|
#### JavaScript library for rapid XR and AI prototyping
|
|
8
8
|
|
|
9
|
+
[Site](https://xrblocks.github.io/) —
|
|
9
10
|
[Manual](https://xrblocks.github.io/docs/) —
|
|
10
|
-
[Templates](https://xrblocks.github.io/docs/templates/Basic) —
|
|
11
|
-
[
|
|
11
|
+
[Templates](https://xrblocks.github.io/docs/templates/Basic/) —
|
|
12
|
+
[Demos](https://xrblocks.github.io/docs/samples/ModelViewer/) —
|
|
13
|
+
[YouTube](https://www.youtube.com/watch?v=75QJHTsAoB8) —
|
|
14
|
+
[arXiv](https://arxiv.org/abs/2509.25504) —
|
|
15
|
+
[Blog](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
|
|
12
16
|
|
|
13
17
|
<p align="center">
|
|
14
|
-
|
|
15
|
-
<a href="https://xrblocks.github.io/docs/samples/XR-Emoji" target="_blank"><img width="32.3%" src="assets/xremoji-demo.webp" alt="XR Emoji" /></a>
|
|
16
|
-
|
|
18
|
+
<a href="https://xrblocks.github.io/docs/samples/Ballpit/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/ballpit-demo.webp" alt="Ballpit" /></a>
|
|
19
|
+
<a href="https://xrblocks.github.io/docs/samples/XR-Emoji/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/xremoji-demo.webp" alt="XR Emoji" /></a>
|
|
20
|
+
<a href="https://xrblocks.github.io/docs/samples/Gemini-Icebreakers/" target="_blank"><img width="32.3%" src="https://cdn.jsdelivr.net/gh/google/xrblocks@main/assets/gemini-icebreakers-demo.webp" alt="Gemini Icebreakers" /></a>
|
|
17
21
|
</p>
|
|
18
22
|
|
|
19
23
|
### Description
|
|
@@ -25,21 +29,27 @@ powerful desktop simulator for development. The framework emphasizes a
|
|
|
25
29
|
user-centric, developer-friendly SDK designed to simplify the creation of
|
|
26
30
|
immersive applications with features like:
|
|
27
31
|
|
|
28
|
-
-
|
|
29
|
-
|
|
30
|
-
-
|
|
31
|
-
|
|
32
|
-
-
|
|
33
|
-
|
|
34
|
-
-
|
|
35
|
-
|
|
32
|
+
- **Hand Tracking & Gestures:** Access advanced hand tracking, custom
|
|
33
|
+
gestures with TensorFlow Lite / PyTorch models, and interaction events.
|
|
34
|
+
- **World Understanding:** Present samples with depth sensing, geometry-aware
|
|
35
|
+
physics, and object recognition with Gemini in both XR and desktop simulator.
|
|
36
|
+
- **AI Integration:** Seamlessly connect to Gemini for multimodal
|
|
37
|
+
understanding and live conversational experiences.
|
|
38
|
+
- **Cross-Platform:** Write once and deploy to both XR devices and desktop
|
|
39
|
+
Chrome browsers.
|
|
40
|
+
|
|
41
|
+
We welcome all contributors to foster an AI + XR community! Read our
|
|
42
|
+
[blog post](https://research.google/blog/xr-blocks-accelerating-ai-xr-innovation/)
|
|
43
|
+
and [white paper](https://arxiv.org/abs/2509.25504) for a visionary roadmap.
|
|
36
44
|
|
|
37
45
|
### Usage
|
|
38
46
|
|
|
39
47
|
XR Blocks can be imported directly into a webpage using an importmap. This code
|
|
40
48
|
creates a basic XR scene containing a cylinder. When you view the scene, you can
|
|
41
49
|
pinch your fingers (in XR) or click (in the desktop simulator) to change the
|
|
42
|
-
cylinder's color.
|
|
50
|
+
cylinder's color. Check out
|
|
51
|
+
[this live demo](https://xrblocks.github.io/docs/templates/Basic/) with simple
|
|
52
|
+
code below:
|
|
43
53
|
|
|
44
54
|
```html
|
|
45
55
|
<!DOCTYPE html>
|
|
@@ -132,8 +142,9 @@ npm ci
|
|
|
132
142
|
npm run build
|
|
133
143
|
```
|
|
134
144
|
|
|
135
|
-
This is not an officially supported Google product
|
|
136
|
-
|
|
145
|
+
This is not an officially supported Google product, but will be actively
|
|
146
|
+
maintained by the XR Labs team and external collaborators. This project is not
|
|
147
|
+
eligible for the
|
|
137
148
|
[Google Open Source Software Vulnerability Rewards Program](https://bughunters.google.com/open-source-security).
|
|
138
149
|
|
|
139
150
|
### User Data & Permissions
|
|
@@ -183,11 +194,85 @@ To remove XR Blocks from your code, simple remove the lines from your `<script
|
|
|
183
194
|
type="importmap">` tag in HTML, or `import * from xrblocks` in JavaScript, or
|
|
184
195
|
use `npm uninstall xrblocks` from your project directory.
|
|
185
196
|
|
|
197
|
+
### References
|
|
198
|
+
|
|
199
|
+
If you find XR Blocks inspiring or useful in your research, please reference it
|
|
200
|
+
as:
|
|
201
|
+
|
|
202
|
+
```bibtex
|
|
203
|
+
@misc{Li2025XR,
|
|
204
|
+
title={{XR Blocks: Accelerating Human-centered AI + XR Innovation}},
|
|
205
|
+
author={Li, David and Numan, Nels and Qian, Xun and Chen, Yanhe and Zhou, Zhongyi and Alekseev, Evgenii and Lee, Geonsun and Cooper, Alex and Xia, Min and Chung, Scott and Nelson, Jeremy and Yuan, Xiuxiu and Dias, Jolica and Bettridge, Tim and Hersh, Benjamin and Huynh, Michelle and Piascik, Konrad and Cabello, Ricardo and Kim, David and Du, Ruofei},
|
|
206
|
+
year={2025},
|
|
207
|
+
eprint={2509.25504},
|
|
208
|
+
archivePrefix={arXiv},
|
|
209
|
+
primaryClass={cs.HC},
|
|
210
|
+
url={https://arxiv.org/abs/2509.25504},
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### Key Works Built with XR Blocks
|
|
215
|
+
|
|
216
|
+
These references are built with XR Blocks:
|
|
217
|
+
|
|
218
|
+
```bibtex
|
|
219
|
+
@inproceedings{Lee2025Sensible,
|
|
220
|
+
title = {{Sensible Agent: A Framework for Unobtrusive Interaction with Proactive AR Agent}},
|
|
221
|
+
author = {Lee, Geonsun and Xia, Min and Numan, Nels and Qian, Xun and Li, David and Chen, Yanhe and Kulshrestha, Achin and Chatterjee, Ishan and Zhang, Yinda and Manocha, Dinesh and Kim, David and Du, Ruofei},
|
|
222
|
+
booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
|
|
223
|
+
year = {2025},
|
|
224
|
+
publisher = {ACM},
|
|
225
|
+
numpages = {22},
|
|
226
|
+
series = {UIST},
|
|
227
|
+
doi = {10.1145/3746059.3747748},
|
|
228
|
+
}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### Inspiring Related Works
|
|
232
|
+
|
|
233
|
+
We call for contributors to integrate our prior art into XR Blocks to enhance
|
|
234
|
+
reproducibility and knowledge sharing:
|
|
235
|
+
|
|
236
|
+
E.g., integrating models from https://visualblocks.withgoogle.com to XR Blocks:
|
|
237
|
+
|
|
238
|
+
```bibtex
|
|
239
|
+
@inproceedings{Du2023Rapsai,
|
|
240
|
+
title = {{Rapsai: Accelerating Machine Learning Prototyping of Multimedia Applications Through Visual Programming}},
|
|
241
|
+
author = {Du, Ruofei and Li, Na and Jin, Jing and Carney, Michelle and Miles, Scott and Kleiner, Maria and Yuan, Xiuxiu and Zhang, Yinda and Kulkarni, Anuva and Liu, XingyuBruce and Sabie, Ahmed and Orts-Escolano, Sergio and Kar, Abhishek and Yu, Ping and Iyengar, Ram and Kowdle, Adarsh and Olwal, Alex},
|
|
242
|
+
booktitle = {Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
|
|
243
|
+
year = {2023},
|
|
244
|
+
publisher = {ACM},
|
|
245
|
+
month = {Apr.},
|
|
246
|
+
day = {22-29},
|
|
247
|
+
number = {125},
|
|
248
|
+
pages = {1--23},
|
|
249
|
+
series = {CHI},
|
|
250
|
+
doi = {10.1145/3544548.3581338},
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Extending XR Blocks to XR communication:
|
|
255
|
+
|
|
256
|
+
```bibtex
|
|
257
|
+
@inproceedings{Hu2025DialogLab,
|
|
258
|
+
title = {{DialogLab: Authoring, Simulating, and Testing Dynamic Group Conversations in Hybrid Human-AI Conversations}},
|
|
259
|
+
author = {Hu, Erzhen and Chen, Yanhe and Li, Mingyi and Phadnis, Vrushank and Xu, Pingmei and Qian, Xun and Olwal, Alex and Kim, David and Heo, Seongkook and Du, Ruofei},
|
|
260
|
+
booktitle = {Proceedings of the 39th Annual ACM Symposium on User Interface Software and Technology},
|
|
261
|
+
year = {2025},
|
|
262
|
+
number = {210},
|
|
263
|
+
publisher = {ACM},
|
|
264
|
+
number = {210},
|
|
265
|
+
pages = {1--20},
|
|
266
|
+
series = {UIST},
|
|
267
|
+
doi = {10.1145/3746059.3747696},
|
|
268
|
+
}
|
|
269
|
+
```
|
|
270
|
+
|
|
186
271
|
### Terms of Service
|
|
187
272
|
|
|
188
|
-
-
|
|
189
|
-
|
|
190
|
-
|
|
273
|
+
- Please follow
|
|
274
|
+
[Google's Privacy & Terms](https://ai.google.dev/gemini-api/terms) when
|
|
275
|
+
using this SDK.
|
|
191
276
|
|
|
192
|
-
-
|
|
193
|
-
|
|
277
|
+
- When using AI features in this SDK, please follow
|
|
278
|
+
[Gemini's Privacy & Terms](https://ai.google.dev/gemini-api/terms).
|
|
@@ -19,23 +19,26 @@ export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
|
|
|
19
19
|
processorNode: AudioWorkletNode | null;
|
|
20
20
|
isAIRunning: boolean;
|
|
21
21
|
audioQueue: AudioBuffer[];
|
|
22
|
-
|
|
22
|
+
nextAudioStartTime: number;
|
|
23
23
|
private screenshotInterval?;
|
|
24
24
|
currentInputText: string;
|
|
25
25
|
currentOutputText: string;
|
|
26
|
+
tools: xb.Tool[];
|
|
26
27
|
constructor();
|
|
27
28
|
init(): void;
|
|
28
|
-
startGeminiLive(
|
|
29
|
+
startGeminiLive({ liveParams }?: {
|
|
30
|
+
liveParams?: xb.GeminiStartLiveSessionParams;
|
|
31
|
+
}): Promise<void>;
|
|
29
32
|
stopGeminiLive(): Promise<void>;
|
|
30
33
|
setupAudioCapture(): Promise<void>;
|
|
31
|
-
startLiveAI(): Promise<void>;
|
|
34
|
+
startLiveAI(params: xb.GeminiStartLiveSessionParams): Promise<void>;
|
|
32
35
|
startScreenshotCapture(intervalMs?: number): void;
|
|
33
36
|
captureAndSendScreenshot(): void;
|
|
34
37
|
sendAudioData(audioBuffer: ArrayBuffer): void;
|
|
35
38
|
sendVideoFrame(base64Image: string): void;
|
|
36
39
|
initializeAudioContext(): Promise<void>;
|
|
37
40
|
playAudioChunk(audioData: string): Promise<void>;
|
|
38
|
-
|
|
41
|
+
scheduleAudioBuffers(): void;
|
|
39
42
|
cleanup(): void;
|
|
40
43
|
handleAIMessage(message: GoogleGenAITypes.LiveServerMessage): void;
|
|
41
44
|
arrayBufferToBase64(buffer: ArrayBuffer): string;
|
|
@@ -12,23 +12,29 @@ class GeminiManager extends xb.Script {
|
|
|
12
12
|
this.isAIRunning = false;
|
|
13
13
|
// Audio playback setup
|
|
14
14
|
this.audioQueue = [];
|
|
15
|
-
this.
|
|
15
|
+
this.nextAudioStartTime = 0;
|
|
16
16
|
// Transcription state
|
|
17
17
|
this.currentInputText = '';
|
|
18
18
|
this.currentOutputText = '';
|
|
19
|
+
this.tools = [];
|
|
19
20
|
}
|
|
20
21
|
init() {
|
|
21
22
|
this.xrDeviceCamera = xb.core.deviceCamera;
|
|
22
23
|
this.ai = xb.core.ai;
|
|
23
24
|
}
|
|
24
|
-
async startGeminiLive() {
|
|
25
|
+
async startGeminiLive({ liveParams } = {}) {
|
|
25
26
|
if (this.isAIRunning || !this.ai) {
|
|
26
27
|
console.warn('AI already running or not available');
|
|
27
28
|
return;
|
|
28
29
|
}
|
|
30
|
+
liveParams = liveParams || {};
|
|
31
|
+
liveParams.tools = liveParams.tools || [];
|
|
32
|
+
for (const tool of this.tools) {
|
|
33
|
+
liveParams.tools.push(tool.toJSON());
|
|
34
|
+
}
|
|
29
35
|
try {
|
|
30
36
|
await this.setupAudioCapture();
|
|
31
|
-
await this.startLiveAI();
|
|
37
|
+
await this.startLiveAI(liveParams);
|
|
32
38
|
this.startScreenshotCapture();
|
|
33
39
|
this.isAIRunning = true;
|
|
34
40
|
}
|
|
@@ -82,7 +88,7 @@ class GeminiManager extends xb.Script {
|
|
|
82
88
|
this.sourceNode.connect(this.processorNode);
|
|
83
89
|
this.processorNode.connect(this.audioContext.destination);
|
|
84
90
|
}
|
|
85
|
-
async startLiveAI() {
|
|
91
|
+
async startLiveAI(params) {
|
|
86
92
|
return new Promise((resolve, reject) => {
|
|
87
93
|
this.ai.setLiveCallbacks({
|
|
88
94
|
onopen: () => {
|
|
@@ -99,7 +105,7 @@ class GeminiManager extends xb.Script {
|
|
|
99
105
|
this.isAIRunning = false;
|
|
100
106
|
}
|
|
101
107
|
});
|
|
102
|
-
this.ai.startLiveSession().catch(reject);
|
|
108
|
+
this.ai.startLiveSession(params).catch(reject);
|
|
103
109
|
});
|
|
104
110
|
}
|
|
105
111
|
startScreenshotCapture(intervalMs = 1000) {
|
|
@@ -174,28 +180,28 @@ class GeminiManager extends xb.Script {
|
|
|
174
180
|
channelData[i] = int16View[i] / 32768.0;
|
|
175
181
|
}
|
|
176
182
|
this.audioQueue.push(audioBuffer);
|
|
177
|
-
|
|
178
|
-
this.playNextAudioBuffer();
|
|
179
|
-
}
|
|
183
|
+
this.scheduleAudioBuffers();
|
|
180
184
|
}
|
|
181
185
|
catch (error) {
|
|
182
186
|
console.error('Error playing audio chunk:', error);
|
|
183
187
|
}
|
|
184
188
|
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
+
scheduleAudioBuffers() {
|
|
190
|
+
const SCHEDULE_AHEAD_TIME = 0.2;
|
|
191
|
+
while (this.audioQueue.length > 0 &&
|
|
192
|
+
this.nextAudioStartTime <=
|
|
193
|
+
this.audioContext.currentTime + SCHEDULE_AHEAD_TIME) {
|
|
194
|
+
const audioBuffer = this.audioQueue.shift();
|
|
195
|
+
const source = this.audioContext.createBufferSource();
|
|
196
|
+
source.buffer = audioBuffer;
|
|
197
|
+
source.connect(this.audioContext.destination);
|
|
198
|
+
source.onended = () => {
|
|
199
|
+
this.scheduleAudioBuffers();
|
|
200
|
+
};
|
|
201
|
+
const startTime = Math.max(this.nextAudioStartTime, this.audioContext.currentTime);
|
|
202
|
+
source.start(startTime);
|
|
203
|
+
this.nextAudioStartTime = startTime + audioBuffer.duration;
|
|
189
204
|
}
|
|
190
|
-
this.isPlayingAudio = true;
|
|
191
|
-
const audioBuffer = this.audioQueue.shift();
|
|
192
|
-
const source = this.audioContext.createBufferSource();
|
|
193
|
-
source.buffer = audioBuffer;
|
|
194
|
-
source.connect(this.audioContext.destination);
|
|
195
|
-
source.onended = () => {
|
|
196
|
-
this.playNextAudioBuffer();
|
|
197
|
-
};
|
|
198
|
-
source.start();
|
|
199
205
|
}
|
|
200
206
|
cleanup() {
|
|
201
207
|
if (this.screenshotInterval) {
|
|
@@ -204,7 +210,6 @@ class GeminiManager extends xb.Script {
|
|
|
204
210
|
}
|
|
205
211
|
// Clear audio queue and stop playback
|
|
206
212
|
this.audioQueue = [];
|
|
207
|
-
this.isPlayingAudio = false;
|
|
208
213
|
if (this.processorNode) {
|
|
209
214
|
this.processorNode.disconnect();
|
|
210
215
|
this.processorNode = null;
|
|
@@ -226,6 +231,22 @@ class GeminiManager extends xb.Script {
|
|
|
226
231
|
if (message.data) {
|
|
227
232
|
this.playAudioChunk(message.data);
|
|
228
233
|
}
|
|
234
|
+
for (const functionCall of message.toolCall?.functionCalls ?? []) {
|
|
235
|
+
const tool = this.tools.find(tool => tool.name == functionCall.name);
|
|
236
|
+
if (tool) {
|
|
237
|
+
const exec = tool.execute(functionCall.args);
|
|
238
|
+
exec.then(result => {
|
|
239
|
+
this.ai.sendToolResponse({
|
|
240
|
+
functionResponses: {
|
|
241
|
+
id: functionCall.id,
|
|
242
|
+
name: functionCall.name,
|
|
243
|
+
response: { 'output': result }
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
})
|
|
247
|
+
.catch((error) => console.error('Tool error:', error));
|
|
248
|
+
}
|
|
249
|
+
}
|
|
229
250
|
if (message.serverContent) {
|
|
230
251
|
if (message.serverContent.inputTranscription) {
|
|
231
252
|
const text = message.serverContent.inputTranscription.text;
|
package/build/agent/Agent.d.ts
CHANGED
|
@@ -2,6 +2,19 @@ import { AI } from '../ai/AI';
|
|
|
2
2
|
import { Context } from './Context';
|
|
3
3
|
import { Memory } from './Memory';
|
|
4
4
|
import { Tool } from './Tool';
|
|
5
|
+
/**
|
|
6
|
+
* Lifecycle callbacks for agent events.
|
|
7
|
+
*/
|
|
8
|
+
export interface AgentLifecycleCallbacks {
|
|
9
|
+
/** Called when a session starts */
|
|
10
|
+
onSessionStart?: () => void | Promise<void>;
|
|
11
|
+
/** Called when a session ends */
|
|
12
|
+
onSessionEnd?: () => void | Promise<void>;
|
|
13
|
+
/** Called after a tool is executed */
|
|
14
|
+
onToolExecuted?: (toolName: string, result: unknown) => void;
|
|
15
|
+
/** Called when an error occurs */
|
|
16
|
+
onError?: (error: Error) => void;
|
|
17
|
+
}
|
|
5
18
|
/**
|
|
6
19
|
* An agent that can use an AI to reason and execute tools.
|
|
7
20
|
*/
|
|
@@ -11,7 +24,9 @@ export declare class Agent {
|
|
|
11
24
|
tools: Tool[];
|
|
12
25
|
memory: Memory;
|
|
13
26
|
contextBuilder: Context;
|
|
14
|
-
|
|
27
|
+
lifecycleCallbacks?: AgentLifecycleCallbacks;
|
|
28
|
+
isSessionActive: boolean;
|
|
29
|
+
constructor(ai: AI, tools?: Tool[], instruction?: string, callbacks?: AgentLifecycleCallbacks);
|
|
15
30
|
/**
|
|
16
31
|
* Starts the agent's reasoning loop with an initial prompt.
|
|
17
32
|
* @param prompt - The initial prompt from the user.
|
|
@@ -25,4 +40,13 @@ export declare class Agent {
|
|
|
25
40
|
*/
|
|
26
41
|
private run;
|
|
27
42
|
findTool(name: string): Tool | undefined;
|
|
43
|
+
/**
|
|
44
|
+
* Get the current session state.
|
|
45
|
+
* @returns Object containing session information
|
|
46
|
+
*/
|
|
47
|
+
getSessionState(): {
|
|
48
|
+
isActive: boolean;
|
|
49
|
+
toolCount: number;
|
|
50
|
+
memorySize: number;
|
|
51
|
+
};
|
|
28
52
|
}
|
|
@@ -2,11 +2,127 @@ import type * as GoogleGenAITypes from '@google/genai';
|
|
|
2
2
|
import * as THREE from 'three';
|
|
3
3
|
import { AI } from '../ai/AI';
|
|
4
4
|
import { CoreSound } from '../sound/CoreSound';
|
|
5
|
-
import { Agent } from './Agent';
|
|
5
|
+
import { Agent, AgentLifecycleCallbacks } from './Agent';
|
|
6
|
+
import { ToolResult } from './Tool';
|
|
7
|
+
/**
|
|
8
|
+
* State information for a live session.
|
|
9
|
+
*/
|
|
10
|
+
export interface LiveSessionState {
|
|
11
|
+
/** Whether the session is currently active */
|
|
12
|
+
isActive: boolean;
|
|
13
|
+
/** Timestamp when session started */
|
|
14
|
+
startTime?: number;
|
|
15
|
+
/** Timestamp when session ended */
|
|
16
|
+
endTime?: number;
|
|
17
|
+
/** Number of messages received */
|
|
18
|
+
messageCount: number;
|
|
19
|
+
/** Number of tool calls executed */
|
|
20
|
+
toolCallCount: number;
|
|
21
|
+
/** Last error message if any */
|
|
22
|
+
lastError?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Skybox Agent for generating 360-degree equirectangular backgrounds through conversation.
|
|
26
|
+
*
|
|
27
|
+
* @example Basic usage
|
|
28
|
+
* ```typescript
|
|
29
|
+
* // 1. Enable audio (required for live sessions)
|
|
30
|
+
* await xb.core.sound.enableAudio();
|
|
31
|
+
*
|
|
32
|
+
* // 2. Create agent
|
|
33
|
+
* const agent = new xb.SkyboxAgent(xb.core.ai, xb.core.sound, xb.core.scene);
|
|
34
|
+
*
|
|
35
|
+
* // 3. Start session
|
|
36
|
+
* await agent.startLiveSession({
|
|
37
|
+
* onopen: () => console.log('Session ready'),
|
|
38
|
+
* onmessage: (msg) => handleMessage(msg),
|
|
39
|
+
* onclose: () => console.log('Session closed')
|
|
40
|
+
* });
|
|
41
|
+
*
|
|
42
|
+
* // 4. Clean up when done
|
|
43
|
+
* await agent.stopLiveSession();
|
|
44
|
+
* xb.core.sound.disableAudio();
|
|
45
|
+
* ```
|
|
46
|
+
*
|
|
47
|
+
* @example With lifecycle callbacks
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const agent = new xb.SkyboxAgent(
|
|
50
|
+
* xb.core.ai,
|
|
51
|
+
* xb.core.sound,
|
|
52
|
+
* xb.core.scene,
|
|
53
|
+
* {
|
|
54
|
+
* onSessionStart: () => updateUI('active'),
|
|
55
|
+
* onSessionEnd: () => updateUI('inactive'),
|
|
56
|
+
* onError: (error) => showError(error)
|
|
57
|
+
* }
|
|
58
|
+
* );
|
|
59
|
+
* ```
|
|
60
|
+
*
|
|
61
|
+
* @remarks
|
|
62
|
+
* - Audio must be enabled BEFORE starting live session using `xb.core.sound.enableAudio()`
|
|
63
|
+
* - Users are responsible for managing audio lifecycle
|
|
64
|
+
* - Always call `stopLiveSession()` before disabling audio
|
|
65
|
+
* - Session state can be checked using `getSessionState()` and `getLiveSessionState()`
|
|
66
|
+
*/
|
|
6
67
|
export declare class SkyboxAgent extends Agent {
|
|
7
68
|
private sound;
|
|
8
|
-
|
|
9
|
-
|
|
69
|
+
private sessionState;
|
|
70
|
+
constructor(ai: AI, sound: CoreSound, scene: THREE.Scene, callbacks?: AgentLifecycleCallbacks);
|
|
71
|
+
/**
|
|
72
|
+
* Starts a live AI session for real-time conversation.
|
|
73
|
+
*
|
|
74
|
+
* @param callbacks - Optional callbacks for session events. Can also be set using ai.setLiveCallbacks()
|
|
75
|
+
* @throws If AI model is not initialized or live session is not available
|
|
76
|
+
*
|
|
77
|
+
* @remarks
|
|
78
|
+
* Audio must be enabled separately using `xb.core.sound.enableAudio()` before starting the session.
|
|
79
|
+
* This gives users control over when microphone permissions are requested.
|
|
80
|
+
*/
|
|
81
|
+
startLiveSession(callbacks?: GoogleGenAITypes.LiveCallbacks): Promise<void>;
|
|
82
|
+
/**
|
|
83
|
+
* Stops the live AI session.
|
|
84
|
+
*
|
|
85
|
+
* @remarks
|
|
86
|
+
* Audio must be disabled separately using `xb.core.sound.disableAudio()` after stopping the session.
|
|
87
|
+
*/
|
|
10
88
|
stopLiveSession(): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Wraps user callbacks to track session state and trigger lifecycle events.
|
|
91
|
+
* @param callbacks - The callbacks to wrap.
|
|
92
|
+
* @returns The wrapped callbacks.
|
|
93
|
+
*/
|
|
94
|
+
private wrapCallbacks;
|
|
95
|
+
/**
|
|
96
|
+
* Sends tool execution results back to the AI.
|
|
97
|
+
*
|
|
98
|
+
* @param response - The tool response containing function results
|
|
99
|
+
*/
|
|
11
100
|
sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): Promise<void>;
|
|
101
|
+
/**
|
|
102
|
+
* Validates that a tool response has the correct format.
|
|
103
|
+
* @param response - The tool response to validate.
|
|
104
|
+
* @returns True if the response is valid, false otherwise.
|
|
105
|
+
*/
|
|
106
|
+
private validateToolResponse;
|
|
107
|
+
/**
|
|
108
|
+
* Helper to create a properly formatted tool response from a ToolResult.
|
|
109
|
+
*
|
|
110
|
+
* @param id - The function call ID
|
|
111
|
+
* @param name - The function name
|
|
112
|
+
* @param result - The ToolResult from tool execution
|
|
113
|
+
* @returns A properly formatted FunctionResponse
|
|
114
|
+
*/
|
|
115
|
+
static createToolResponse(id: string, name: string, result: ToolResult): GoogleGenAITypes.FunctionResponse;
|
|
116
|
+
/**
|
|
117
|
+
* Gets the current live session state.
|
|
118
|
+
*
|
|
119
|
+
* @returns Read-only session state information
|
|
120
|
+
*/
|
|
121
|
+
getLiveSessionState(): Readonly<LiveSessionState>;
|
|
122
|
+
/**
|
|
123
|
+
* Gets the duration of the session in milliseconds.
|
|
124
|
+
*
|
|
125
|
+
* @returns Duration in ms, or null if session hasn't started
|
|
126
|
+
*/
|
|
127
|
+
getSessionDuration(): number | null;
|
|
12
128
|
}
|
package/build/agent/Tool.d.ts
CHANGED
|
@@ -3,6 +3,20 @@ export interface ToolCall {
|
|
|
3
3
|
name: string;
|
|
4
4
|
args: unknown;
|
|
5
5
|
}
|
|
6
|
+
/**
|
|
7
|
+
* Standardized result type for tool execution.
|
|
8
|
+
* @typeParam T - The type of data returned on success.
|
|
9
|
+
*/
|
|
10
|
+
export interface ToolResult<T = unknown> {
|
|
11
|
+
/** Whether the tool execution succeeded */
|
|
12
|
+
success: boolean;
|
|
13
|
+
/** The result data if successful */
|
|
14
|
+
data?: T;
|
|
15
|
+
/** Error message if execution failed */
|
|
16
|
+
error?: string;
|
|
17
|
+
/** Additional metadata about the execution */
|
|
18
|
+
metadata?: Record<string, unknown>;
|
|
19
|
+
}
|
|
6
20
|
export type ToolSchema = Omit<GoogleGenAITypes.Schema, 'type' | 'properties'> & {
|
|
7
21
|
properties?: Record<string, ToolSchema>;
|
|
8
22
|
type?: keyof typeof GoogleGenAITypes.Type;
|
|
@@ -15,7 +29,7 @@ export type ToolOptions = {
|
|
|
15
29
|
/** The parameters of the tool */
|
|
16
30
|
parameters?: ToolSchema;
|
|
17
31
|
/** A callback to execute when the tool is triggered */
|
|
18
|
-
onTriggered?: (args: unknown) => unknown
|
|
32
|
+
onTriggered?: (args: unknown) => unknown | Promise<unknown>;
|
|
19
33
|
};
|
|
20
34
|
/**
|
|
21
35
|
* A base class for tools that the agent can use.
|
|
@@ -30,11 +44,11 @@ export declare class Tool {
|
|
|
30
44
|
*/
|
|
31
45
|
constructor(options: ToolOptions);
|
|
32
46
|
/**
|
|
33
|
-
* Executes the tool's action.
|
|
47
|
+
* Executes the tool's action with standardized error handling.
|
|
34
48
|
* @param args - The arguments for the tool.
|
|
35
|
-
* @returns
|
|
49
|
+
* @returns A promise that resolves with a ToolResult containing success/error information.
|
|
36
50
|
*/
|
|
37
|
-
execute(args: unknown):
|
|
51
|
+
execute(args: unknown): Promise<ToolResult>;
|
|
38
52
|
/**
|
|
39
53
|
* Returns a JSON representation of the tool.
|
|
40
54
|
* @returns A valid FunctionDeclaration object.
|
package/build/agent/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as THREE from 'three';
|
|
2
2
|
import { AI } from '../../ai/AI';
|
|
3
|
-
import { Tool } from '../Tool';
|
|
3
|
+
import { Tool, ToolResult } from '../Tool';
|
|
4
4
|
/**
|
|
5
5
|
* A tool that generates a 360-degree equirectangular skybox image
|
|
6
6
|
* based on a given prompt using an AI service.
|
|
@@ -12,9 +12,9 @@ export declare class GenerateSkyboxTool extends Tool {
|
|
|
12
12
|
/**
|
|
13
13
|
* Executes the tool's action.
|
|
14
14
|
* @param args - The prompt to use to generate the skybox.
|
|
15
|
-
* @returns A promise that resolves with
|
|
15
|
+
* @returns A promise that resolves with a ToolResult containing success/error information.
|
|
16
16
|
*/
|
|
17
17
|
execute(args: {
|
|
18
18
|
prompt: string;
|
|
19
|
-
}): Promise<string
|
|
19
|
+
}): Promise<ToolResult<string>>;
|
|
20
20
|
}
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
import { Tool } from '../Tool';
|
|
1
|
+
import { Tool, ToolResult } from '../Tool';
|
|
2
2
|
export interface GetWeatherArgs {
|
|
3
3
|
latitude: number;
|
|
4
4
|
longitude: number;
|
|
5
5
|
}
|
|
6
|
-
export
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
};
|
|
6
|
+
export interface WeatherData {
|
|
7
|
+
temperature: number;
|
|
8
|
+
weathercode: number;
|
|
9
|
+
}
|
|
11
10
|
/**
|
|
12
11
|
* A tool that gets the current weather for a specific location.
|
|
13
12
|
*/
|
|
@@ -16,7 +15,7 @@ export declare class GetWeatherTool extends Tool {
|
|
|
16
15
|
/**
|
|
17
16
|
* Executes the tool's action.
|
|
18
17
|
* @param args - The arguments for the tool.
|
|
19
|
-
* @returns A promise that resolves with
|
|
18
|
+
* @returns A promise that resolves with a ToolResult containing weather information.
|
|
20
19
|
*/
|
|
21
|
-
execute(args: GetWeatherArgs): Promise<
|
|
20
|
+
execute(args: GetWeatherArgs): Promise<ToolResult<WeatherData>>;
|
|
22
21
|
}
|
package/build/ai/AI.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type * as GoogleGenAITypes from '@google/genai';
|
|
|
2
2
|
import { Script } from '../core/Script';
|
|
3
3
|
import { AIOptions, GeminiOptions, OpenAIOptions } from './AIOptions';
|
|
4
4
|
import { GeminiResponse } from './AITypes';
|
|
5
|
-
import { Gemini } from './Gemini';
|
|
5
|
+
import { Gemini, GeminiStartLiveSessionParams } from './Gemini';
|
|
6
6
|
import { OpenAI } from './OpenAI';
|
|
7
7
|
export type ModelClass = Gemini | OpenAI;
|
|
8
8
|
export type ModelOptions = GeminiOptions | OpenAIOptions;
|
|
@@ -60,7 +60,7 @@ export declare class AI extends Script {
|
|
|
60
60
|
query(input: {
|
|
61
61
|
prompt: string;
|
|
62
62
|
}, tools?: never[]): Promise<GeminiResponse | string | null>;
|
|
63
|
-
startLiveSession(config?:
|
|
63
|
+
startLiveSession(config?: GeminiStartLiveSessionParams): Promise<GoogleGenAITypes.Session>;
|
|
64
64
|
stopLiveSession(): Promise<void>;
|
|
65
65
|
setLiveCallbacks(callbacks: GoogleGenAITypes.LiveCallbacks): Promise<void>;
|
|
66
66
|
sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): void;
|
package/build/ai/Gemini.d.ts
CHANGED
|
@@ -14,6 +14,10 @@ export interface GeminiQueryInput {
|
|
|
14
14
|
config?: GoogleGenAITypes.LiveConnectConfig;
|
|
15
15
|
data?: GoogleGenAITypes.LiveSendRealtimeInputParameters;
|
|
16
16
|
}
|
|
17
|
+
export type GeminiStartLiveSessionParams = {
|
|
18
|
+
tools?: GoogleGenAITypes.FunctionDeclaration[];
|
|
19
|
+
systemInstruction?: GoogleGenAITypes.ContentUnion | string;
|
|
20
|
+
};
|
|
17
21
|
export declare class Gemini extends BaseAIModel {
|
|
18
22
|
protected options: GeminiOptions;
|
|
19
23
|
inited: boolean;
|
|
@@ -25,10 +29,7 @@ export declare class Gemini extends BaseAIModel {
|
|
|
25
29
|
init(): Promise<void>;
|
|
26
30
|
isAvailable(): boolean;
|
|
27
31
|
isLiveAvailable(): false | typeof GoogleGenAITypes.Modality | undefined;
|
|
28
|
-
startLiveSession(params?:
|
|
29
|
-
tools?: GoogleGenAITypes.FunctionDeclaration[];
|
|
30
|
-
systemInstruction?: GoogleGenAITypes.ContentUnion | string;
|
|
31
|
-
}): Promise<GoogleGenAITypes.Session>;
|
|
32
|
+
startLiveSession(params?: GeminiStartLiveSessionParams): Promise<GoogleGenAITypes.Session>;
|
|
32
33
|
stopLiveSession(): Promise<void>;
|
|
33
34
|
setLiveCallbacks(callbacks: GoogleGenAITypes.LiveCallbacks): void;
|
|
34
35
|
sendToolResponse(response: GoogleGenAITypes.LiveSendToolResponseParameters): void;
|
package/build/core/Core.d.ts
CHANGED