xrblocks 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![NPM Package](https://img.shields.io/npm/v/xrblocks)](https://www.npmjs.com/package/xrblocks)
4
4
  [![Build Size](https://badgen.net/bundlephobia/minzip/xrblocks)](https://bundlephobia.com/result?p=xrblocks)
5
5
  ![jsDelivr hits (GitHub)](https://img.shields.io/jsdelivr/gh/hw/google/xrblocks)
6
- [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg?style=plastic)](https://arxiv.org/abs/2509.25504)
6
+ [![arXiv](https://img.shields.io/badge/arXiv-2509.25504-b31b1b.svg)](https://arxiv.org/abs/2509.25504)
7
7
 
8
8
  #### JavaScript library for rapid XR and AI prototyping
9
9
 
@@ -37,7 +37,7 @@ export declare class GeminiManager extends xb.Script<GeminiManagerEventMap> {
37
37
  setupAudioCapture(): Promise<void>;
38
38
  startLiveAI(params: GoogleGenAITypes.LiveConnectConfig, model?: string): Promise<void>;
39
39
  startScreenshotCapture(intervalMs?: number): void;
40
- captureAndSendScreenshot(): void;
40
+ captureAndSendScreenshot(): Promise<void>;
41
41
  sendAudioData(audioBuffer: ArrayBuffer): void;
42
42
  sendVideoFrame(base64Image: string): void;
43
43
  initializeAudioContext(): Promise<void>;
@@ -123,9 +123,9 @@ class GeminiManager extends xb.Script {
123
123
  this.captureAndSendScreenshot();
124
124
  }, intervalMs);
125
125
  }
126
- captureAndSendScreenshot() {
126
+ async captureAndSendScreenshot() {
127
127
  try {
128
- const base64Image = this.xrDeviceCamera.getSnapshot({
128
+ const base64Image = await this.xrDeviceCamera.getSnapshot({
129
129
  outputFormat: 'base64',
130
130
  mimeType: 'image/jpeg',
131
131
  quality: 1,
@@ -20,18 +20,29 @@ export interface VideoStreamEventMap<T> extends THREE.Object3DEventMap {
20
20
  details?: T;
21
21
  };
22
22
  }
23
- export type VideoStreamGetSnapshotOptions = {
23
+ type VideoStreamGetSnapshotImageDataOptionsBase = {
24
24
  /** The target width, defaults to the video width. */
25
25
  width?: number;
26
26
  /** The target height, defaults to the video height. */
27
27
  height?: number;
28
- /** The output format, defaults to 'texture'. */
29
- outputFormat?: 'texture' | 'base64' | 'imageData';
30
- /** The MIME type for base64 output. */
28
+ };
29
+ export type VideoStreamGetSnapshotImageDataOptions = VideoStreamGetSnapshotImageDataOptionsBase & {
30
+ outputFormat: 'imageData';
31
+ };
32
+ export type VideoStreamGetSnapshotBase64Options = VideoStreamGetSnapshotImageDataOptionsBase & {
33
+ outputFormat: 'base64';
34
+ mimeType?: string;
35
+ quality?: number;
36
+ };
37
+ export type VideoStreamGetSnapshotBlobOptions = VideoStreamGetSnapshotImageDataOptionsBase & {
38
+ outputFormat: 'blob';
31
39
  mimeType?: string;
32
- /** The quality for base64 output. */
33
40
  quality?: number;
34
41
  };
42
+ export type VideoStreamGetSnapshotTextureOptions = VideoStreamGetSnapshotImageDataOptionsBase & {
43
+ outputFormat?: 'texture';
44
+ };
45
+ export type VideoStreamGetSnapshotOptions = VideoStreamGetSnapshotImageDataOptions | VideoStreamGetSnapshotBase64Options | VideoStreamGetSnapshotTextureOptions | VideoStreamGetSnapshotBlobOptions;
35
46
  export type VideoStreamOptions = {
36
47
  /** Hint for performance optimization for frequent captures. */
37
48
  willCaptureFrequently?: boolean;
@@ -49,6 +60,7 @@ export declare class VideoStream<T extends VideoStreamDetails = VideoStreamDetai
49
60
  state: StreamState;
50
61
  protected stream_: MediaStream | null;
51
62
  protected video_: HTMLVideoElement;
63
+ get video(): HTMLVideoElement;
52
64
  private willCaptureFrequently_;
53
65
  private frozenTexture_;
54
66
  private canvas_;
@@ -75,7 +87,10 @@ export declare class VideoStream<T extends VideoStreamDetails = VideoStreamDetai
75
87
  * @param options - The options for the snapshot.
76
88
  * @returns The captured data.
77
89
  */
78
- getSnapshot({ width, height, outputFormat, mimeType, quality, }?: VideoStreamGetSnapshotOptions): string | THREE.Texture<unknown> | ImageData | null;
90
+ getSnapshot(_: VideoStreamGetSnapshotImageDataOptions): ImageData;
91
+ getSnapshot(_: VideoStreamGetSnapshotBase64Options): Promise<string | null>;
92
+ getSnapshot(_: VideoStreamGetSnapshotTextureOptions): THREE.Texture;
93
+ getSnapshot(_: VideoStreamGetSnapshotBlobOptions): Promise<Blob | null>;
79
94
  /**
80
95
  * Stops the current video stream tracks.
81
96
  */
@@ -85,3 +100,4 @@ export declare class VideoStream<T extends VideoStreamDetails = VideoStreamDetai
85
100
  */
86
101
  dispose(): void;
87
102
  }
103
+ export {};
@@ -6,6 +6,7 @@ export declare class DetectedMesh extends THREE.Mesh {
6
6
  private collider?;
7
7
  private blendedWorld?;
8
8
  private lastChangedTime;
9
+ semanticLabel?: string;
9
10
  constructor(xrMesh: XRMesh, material: THREE.Material);
10
11
  initRapierPhysics(RAPIER: typeof RAPIER_NS, blendedWorld: RAPIER_NS.World): void;
11
12
  updateVertices(mesh: XRMesh): void;
@@ -8,11 +8,13 @@ export declare class MeshDetector extends Script {
8
8
  options: typeof MeshDetectionOptions;
9
9
  renderer: typeof THREE.WebGLRenderer;
10
10
  };
11
- private _debugMaterial;
11
+ private debugMaterials;
12
+ private fallbackDebugMaterial;
12
13
  xrMeshToThreeMesh: Map<XRMesh, DetectedMesh>;
13
14
  threeMeshToXrMesh: Map<DetectedMesh, XRMesh>;
14
15
  private renderer;
15
16
  private physics?;
17
+ private defaultMaterial;
16
18
  init({ options, renderer, }: {
17
19
  options: MeshDetectionOptions;
18
20
  renderer: THREE.WebGLRenderer;
@@ -4,36 +4,20 @@ import * as THREE from 'three';
4
4
  * about the object's properties. Note: 3D object position is stored in the
5
5
  * position property of `Three.Object3D`.
6
6
  */
7
- export declare class DetectedObject extends THREE.Object3D {
8
- /**
9
- * A semantic label for the object (e.g., 'chair', 'table').
10
- */
7
+ export declare class DetectedObject<T> extends THREE.Object3D {
11
8
  label: string;
12
- /**
13
- * The cropped part of the image that contains the object, as a base64 Data
14
- * URL.
15
- */
16
9
  image: string | null;
17
- /**
18
- * The 2D bounding box of the detected object in normalized screen
19
- * coordinates. Values are between 0 and 1. Centerpoint of this bounding is
20
- * used for backproject to obtain 3D object position (i.e., this.position).
21
- */
22
10
  detection2DBoundingBox: THREE.Box2;
23
- /**
24
- * Allows for additional, dynamic properties to be added to the object, as
25
- * defined in the schema in `ObjectsOptions`.
26
- */
27
- [key: string]: any;
11
+ data: T;
28
12
  /**
29
13
  * @param label - The semantic label of the object.
30
14
  * @param image - The base64 encoded cropped image of the object.
31
- * @param boundingBox - The 2D bounding box.
32
- * @param additionalData - A key-value map of additional properties from the
33
- * detector. This includes any object proparties that is requested through the
15
+ * @param detection2DBoundingBox - The 2D bounding box of the detected object in normalized screen
16
+ * coordinates. Values are between 0 and 1. Centerpoint of this bounding is
17
+ * used for backproject to obtain 3D object position (i.e., this.position).
18
+ * @param data - Additional properties from the detector.
19
+ * This includes any object proparties that is requested through the
34
20
  * schema but is not assigned a class property by default (e.g., color, size).
35
21
  */
36
- constructor(label: string, image: string | null, boundingBox: THREE.Box2, additionalData?: {
37
- [key: string]: any;
38
- });
22
+ constructor(label: string, image: string | null, detection2DBoundingBox: THREE.Box2, data: T);
39
23
  }
@@ -52,7 +52,7 @@ export declare class ObjectDetector extends Script {
52
52
  * @returns A promise that resolves with an
53
53
  * array of detected `DetectedObject` instances.
54
54
  */
55
- runDetection(): Promise<(DetectedObject | null | undefined)[]>;
55
+ runDetection<T = null>(): Promise<DetectedObject<T>[]>;
56
56
  /**
57
57
  * Runs object detection using the Gemini backend.
58
58
  */
@@ -64,7 +64,7 @@ export declare class ObjectDetector extends Script {
64
64
  * all objects are returned.
65
65
  * @returns An array of `Object` instances.
66
66
  */
67
- get(label?: null): DetectedObject[];
67
+ get<T = null>(label?: null): DetectedObject<T>[];
68
68
  /**
69
69
  * Removes all currently detected objects from the scene and internal
70
70
  * tracking.
package/build/xrblocks.js CHANGED
@@ -14,9 +14,9 @@
14
14
  * limitations under the License.
15
15
  *
16
16
  * @file xrblocks.js
17
- * @version v0.8.2
18
- * @commitid f15488f
19
- * @builddate 2026-01-17T01:42:44.623Z
17
+ * @version v0.9.0
18
+ * @commitid a54974b
19
+ * @builddate 2026-01-30T18:28:09.762Z
20
20
  * @description XR Blocks SDK, built from source with the above commit ID.
21
21
  * @agent When using with Gemini to create XR apps, use **Gemini Canvas** mode,
22
22
  * and follow rules below:
@@ -3534,11 +3534,22 @@ var StreamState;
3534
3534
  StreamState["ERROR"] = "error";
3535
3535
  StreamState["NO_DEVICES_FOUND"] = "no_devices_found";
3536
3536
  })(StreamState || (StreamState = {}));
3537
+ function blobToBase64(blob) {
3538
+ return new Promise((resolve, reject) => {
3539
+ const reader = new FileReader();
3540
+ reader.onloadend = () => resolve(reader.result);
3541
+ reader.readAsDataURL(blob);
3542
+ reader.onerror = () => reject(reader.error);
3543
+ });
3544
+ }
3537
3545
  /**
3538
3546
  * The base class for handling video streams (from camera or file), managing
3539
3547
  * the underlying <video> element, streaming state, and snapshot logic.
3540
3548
  */
3541
3549
  class VideoStream extends Script {
3550
+ get video() {
3551
+ return this.video_;
3552
+ }
3542
3553
  /**
3543
3554
  * @param options - The configuration options.
3544
3555
  */
@@ -3605,12 +3616,7 @@ class VideoStream extends Script {
3605
3616
  }
3606
3617
  }
3607
3618
  }
3608
- /**
3609
- * Captures the current video frame.
3610
- * @param options - The options for the snapshot.
3611
- * @returns The captured data.
3612
- */
3613
- getSnapshot({ width = this.width, height = this.height, outputFormat = 'texture', mimeType = 'image/jpeg', quality = 0.9, } = {}) {
3619
+ getSnapshot({ width = this.width, height = this.height, outputFormat = 'texture', ...rest } = {}) {
3614
3620
  if (!this.loaded ||
3615
3621
  !width ||
3616
3622
  !height ||
@@ -3620,6 +3626,8 @@ class VideoStream extends Script {
3620
3626
  if (width > this.width || height > this.height) {
3621
3627
  console.warn(`The requested snapshot width (${width}px x ${height}px) is larger than the source video width (${this.width}px x ${this.height}px). The snapshot will be upscaled.`);
3622
3628
  }
3629
+ const mimeType = ('mimeType' in rest ? rest.mimeType : undefined) ?? 'image/jpeg';
3630
+ const quality = ('quality' in rest ? rest.quality : undefined) ?? 0.9;
3623
3631
  try {
3624
3632
  // Re-initialize canvas only if dimensions have changed.
3625
3633
  if (!this.canvas_ ||
@@ -3637,7 +3645,9 @@ class VideoStream extends Script {
3637
3645
  case 'imageData':
3638
3646
  return this.context_.getImageData(0, 0, width, height);
3639
3647
  case 'base64':
3640
- return this.canvas_.toDataURL(mimeType, quality);
3648
+ return new Promise((resolve) => this.canvas_.toBlob(resolve, mimeType, quality)).then((blob) => (blob ? blobToBase64(blob) : null));
3649
+ case 'blob':
3650
+ return new Promise((resolve) => this.canvas_.toBlob(resolve, mimeType, quality));
3641
3651
  case 'texture':
3642
3652
  default: {
3643
3653
  const frozenTexture = new THREE.Texture(this.canvas_);
@@ -6188,7 +6198,12 @@ class User extends Script {
6188
6198
  const currentlyTouchedMeshes = [];
6189
6199
  this.scene.traverse((object) => {
6190
6200
  if (object.isMesh && object.visible) {
6191
- tempBox.setFromObject(object);
6201
+ try {
6202
+ tempBox.setFromObject(object);
6203
+ }
6204
+ catch (_) {
6205
+ return;
6206
+ }
6192
6207
  if (tempBox.containsPoint(indexTipPosition)) {
6193
6208
  currentlyTouchedMeshes.push(object);
6194
6209
  }
@@ -10242,20 +10257,19 @@ class DetectedObject extends THREE.Object3D {
10242
10257
  /**
10243
10258
  * @param label - The semantic label of the object.
10244
10259
  * @param image - The base64 encoded cropped image of the object.
10245
- * @param boundingBox - The 2D bounding box.
10246
- * @param additionalData - A key-value map of additional properties from the
10247
- * detector. This includes any object proparties that is requested through the
10260
+ * @param detection2DBoundingBox - The 2D bounding box of the detected object in normalized screen
10261
+ * coordinates. Values are between 0 and 1. Centerpoint of this bounding is
10262
+ * used for backproject to obtain 3D object position (i.e., this.position).
10263
+ * @param data - Additional properties from the detector.
10264
+ * This includes any object proparties that is requested through the
10248
10265
  * schema but is not assigned a class property by default (e.g., color, size).
10249
10266
  */
10250
- constructor(label, image, boundingBox,
10251
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
10252
- additionalData = {}) {
10267
+ constructor(label, image, detection2DBoundingBox, data) {
10253
10268
  super();
10254
10269
  this.label = label;
10255
10270
  this.image = image;
10256
- this.detection2DBoundingBox = boundingBox;
10257
- // Assign any additional properties to this object.
10258
- Object.assign(this, additionalData);
10271
+ this.detection2DBoundingBox = detection2DBoundingBox;
10272
+ this.data = data;
10259
10273
  }
10260
10274
  }
10261
10275
 
@@ -10325,7 +10339,10 @@ class ObjectDetector extends Script {
10325
10339
  console.error('Gemini is unavailable for object detection.');
10326
10340
  return [];
10327
10341
  }
10328
- const base64Image = this.deviceCamera.getSnapshot({
10342
+ // Cache depth and camera data to align with the captured image frame.
10343
+ const cachedDepthArray = this.depth.depthArray[0].slice(0);
10344
+ const cachedMatrixWorld = this.camera.matrixWorld.clone();
10345
+ const base64Image = await this.deviceCamera.getSnapshot({
10329
10346
  outputFormat: 'base64',
10330
10347
  });
10331
10348
  if (!base64Image) {
@@ -10333,9 +10350,6 @@ class ObjectDetector extends Script {
10333
10350
  return [];
10334
10351
  }
10335
10352
  const { mimeType, strippedBase64 } = parseBase64DataURL(base64Image);
10336
- // Cache depth and camera data to align with the captured image frame.
10337
- const cachedDepthArray = this.depth.depthArray[0].slice(0);
10338
- const cachedMatrixWorld = this.camera.matrixWorld.clone();
10339
10353
  // Temporarily set the Gemini config for this specific query type.
10340
10354
  const originalGeminiConfig = this.aiOptions.gemini.config;
10341
10355
  this.aiOptions.gemini.config = this._geminiConfig;
@@ -10401,7 +10415,7 @@ class ObjectDetector extends Script {
10401
10415
  return object;
10402
10416
  }
10403
10417
  });
10404
- const detectedObjects = (await Promise.all(detectionPromises)).filter(Boolean);
10418
+ const detectedObjects = (await Promise.all(detectionPromises)).filter((obj) => obj !== null && obj !== undefined);
10405
10419
  return detectedObjects;
10406
10420
  }
10407
10421
  catch (error) {
@@ -10813,17 +10827,25 @@ class PlaneDetector extends Script {
10813
10827
  }
10814
10828
  }
10815
10829
 
10830
+ function toFlatArray(array) {
10831
+ if (!Array.isArray(array))
10832
+ return array;
10833
+ const result = new Float32Array(array.reduce((sum, arr) => sum + arr.length, 0));
10834
+ array.reduce((offset, arr) => (result.set(arr, offset), offset + arr.length), 0);
10835
+ return result;
10836
+ }
10816
10837
  class DetectedMesh extends THREE.Mesh {
10817
10838
  constructor(xrMesh, material) {
10818
10839
  const geometry = new THREE.BufferGeometry();
10819
- const vertices = new Float32Array(xrMesh.vertices);
10820
- const indices = new Uint32Array(xrMesh.indices);
10840
+ const vertices = toFlatArray(xrMesh.vertices);
10841
+ const indices = xrMesh.indices;
10821
10842
  geometry.setAttribute('position', new THREE.BufferAttribute(vertices, 3));
10822
10843
  geometry.setIndex(new THREE.BufferAttribute(indices, 1));
10823
10844
  geometry.computeVertexNormals();
10824
10845
  super(geometry, material);
10825
10846
  this.lastChangedTime = 0;
10826
10847
  this.lastChangedTime = xrMesh.lastChangedTime;
10848
+ this.semanticLabel = xrMesh.semanticLabel;
10827
10849
  }
10828
10850
  initRapierPhysics(RAPIER, blendedWorld) {
10829
10851
  this.RAPIER = RAPIER;
@@ -10842,8 +10864,8 @@ class DetectedMesh extends THREE.Mesh {
10842
10864
  return;
10843
10865
  this.lastChangedTime = mesh.lastChangedTime;
10844
10866
  const geometry = new THREE.BufferGeometry();
10845
- const vertices = new Float32Array(mesh.vertices);
10846
- const indices = new Uint32Array(mesh.indices);
10867
+ const vertices = toFlatArray(mesh.vertices);
10868
+ const indices = mesh.indices;
10847
10869
  geometry.setAttribute('position', new THREE.BufferAttribute(vertices, 3));
10848
10870
  geometry.setIndex(new THREE.BufferAttribute(indices, 1));
10849
10871
  geometry.computeVertexNormals();
@@ -10858,14 +10880,18 @@ class DetectedMesh extends THREE.Mesh {
10858
10880
  }
10859
10881
  }
10860
10882
 
10883
+ const SEMANTIC_LABELS = ['Floor', 'Ceiling', 'Wall', 'Table'];
10884
+ const SEMANTIC_COLORS = [0x00ff00, 0xff0000, 0x0000ff, 0xffff00];
10861
10885
  // Wrapper around WebXR Mesh Detection API
10862
10886
  // https://immersive-web.github.io/real-world-meshing/
10863
10887
  class MeshDetector extends Script {
10864
10888
  constructor() {
10865
10889
  super(...arguments);
10866
- this._debugMaterial = null;
10890
+ this.debugMaterials = new Map();
10891
+ this.fallbackDebugMaterial = null;
10867
10892
  this.xrMeshToThreeMesh = new Map();
10868
10893
  this.threeMeshToXrMesh = new Map();
10894
+ this.defaultMaterial = new THREE.MeshBasicMaterial({ visible: false });
10869
10895
  }
10870
10896
  static { this.dependencies = {
10871
10897
  options: MeshDetectionOptions,
@@ -10874,11 +10900,18 @@ class MeshDetector extends Script {
10874
10900
  init({ options, renderer, }) {
10875
10901
  this.renderer = renderer;
10876
10902
  if (options.showDebugVisualizations) {
10877
- this._debugMaterial = new THREE.MeshBasicMaterial({
10878
- color: 0xffff00,
10903
+ this.fallbackDebugMaterial = new THREE.MeshBasicMaterial({
10904
+ color: 0x000000,
10879
10905
  wireframe: true,
10880
10906
  side: THREE.DoubleSide,
10881
10907
  });
10908
+ for (let i = 0; i < SEMANTIC_LABELS.length; i++) {
10909
+ this.debugMaterials.set(SEMANTIC_LABELS[i], new THREE.MeshBasicMaterial({
10910
+ color: SEMANTIC_COLORS[i],
10911
+ wireframe: true,
10912
+ side: THREE.DoubleSide,
10913
+ }));
10914
+ }
10882
10915
  }
10883
10916
  }
10884
10917
  initPhysics(physics) {
@@ -10896,6 +10929,7 @@ class MeshDetector extends Script {
10896
10929
  if (!meshes.has(xrMesh)) {
10897
10930
  this.xrMeshToThreeMesh.delete(xrMesh);
10898
10931
  this.threeMeshToXrMesh.delete(threeMesh);
10932
+ threeMesh.geometry.dispose();
10899
10933
  this.remove(threeMesh);
10900
10934
  }
10901
10935
  }
@@ -10918,7 +10952,10 @@ class MeshDetector extends Script {
10918
10952
  }
10919
10953
  }
10920
10954
  createMesh(frame, xrMesh) {
10921
- const material = this._debugMaterial || new THREE.MeshBasicMaterial({ visible: false });
10955
+ const semanticLabel = xrMesh.semanticLabel;
10956
+ const material = (semanticLabel && this.debugMaterials.get(semanticLabel)) ||
10957
+ this.fallbackDebugMaterial ||
10958
+ this.defaultMaterial;
10922
10959
  const mesh = new DetectedMesh(xrMesh, material);
10923
10960
  this.updateMeshPose(frame, xrMesh, mesh);
10924
10961
  return mesh;