mujoco-react 10.0.1 → 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ /**
2
+ * @license
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Project detector/image coordinates from a camera view into the rendered MuJoCo scene.
6
+ */
7
+
8
+ import * as THREE from 'three';
9
+ import type {
10
+ CameraFrameCaptureOptions,
11
+ CameraFrameCaptureSource,
12
+ ImagePointCoordinateSpace,
13
+ ImagePointProjectionOptions,
14
+ ImagePointProjectionResult,
15
+ } from '../types';
16
+ import { CAPTURE_EXCLUDE_KEY } from './cameraFrameCapture';
17
+
18
+ const _raycaster = new THREE.Raycaster();
19
+ const _ndc = new THREE.Vector2();
20
+
21
+ function toVector3(
22
+ value: CameraFrameCaptureOptions['position'] | undefined,
23
+ fallback: THREE.Vector3
24
+ ) {
25
+ if (!value) return fallback.clone();
26
+ return value instanceof THREE.Vector3
27
+ ? value.clone()
28
+ : new THREE.Vector3(value[0], value[1], value[2]);
29
+ }
30
+
31
+ function applyCameraPose(
32
+ camera: THREE.Camera,
33
+ options: CameraFrameCaptureOptions,
34
+ fallbackCamera: THREE.Camera
35
+ ) {
36
+ camera.position.copy(toVector3(options.position, fallbackCamera.position));
37
+ camera.up.copy(toVector3(options.up, fallbackCamera.up));
38
+
39
+ if (options.quaternion) {
40
+ if (options.quaternion instanceof THREE.Quaternion) {
41
+ camera.quaternion.copy(options.quaternion);
42
+ } else {
43
+ camera.quaternion.set(
44
+ options.quaternion[0],
45
+ options.quaternion[1],
46
+ options.quaternion[2],
47
+ options.quaternion[3]
48
+ );
49
+ }
50
+ } else if (options.lookAt) {
51
+ camera.lookAt(toVector3(options.lookAt, new THREE.Vector3()));
52
+ } else {
53
+ camera.quaternion.copy(fallbackCamera.quaternion);
54
+ }
55
+
56
+ camera.updateMatrixWorld();
57
+ }
58
+
59
+ function createProjectionCamera(
60
+ fallbackCamera: THREE.Camera,
61
+ options: CameraFrameCaptureOptions,
62
+ width: number,
63
+ height: number
64
+ ) {
65
+ const camera = options.camera
66
+ ? options.camera.clone()
67
+ : fallbackCamera instanceof THREE.PerspectiveCamera
68
+ ? fallbackCamera.clone()
69
+ : new THREE.PerspectiveCamera(45, width / height, 0.01, 100);
70
+
71
+ if (camera instanceof THREE.PerspectiveCamera) {
72
+ camera.aspect = width / height;
73
+ camera.fov = options.fov ?? camera.fov;
74
+ camera.near = options.near ?? camera.near;
75
+ camera.far = options.far ?? camera.far;
76
+ camera.updateProjectionMatrix();
77
+ }
78
+
79
+ applyCameraPose(camera, options, fallbackCamera);
80
+ return camera;
81
+ }
82
+
83
+ function getProjectionSource(options: CameraFrameCaptureOptions): CameraFrameCaptureSource {
84
+ if (options.source) return options.source;
85
+ if (options.cameraName) return { kind: 'mujoco-camera', cameraName: options.cameraName };
86
+ if (options.siteName) return { kind: 'mujoco-site', siteName: options.siteName };
87
+ if (options.bodyName) return { kind: 'mujoco-body', bodyName: options.bodyName };
88
+ if (options.camera) return { kind: 'custom-camera' };
89
+ if (options.position || options.lookAt || options.quaternion) return { kind: 'explicit-pose' };
90
+ return { kind: 'fallback-camera' };
91
+ }
92
+
93
+ function imageSize(
94
+ renderer: THREE.WebGLRenderer,
95
+ options: ImagePointProjectionOptions
96
+ ): [number, number] {
97
+ return [
98
+ Math.max(1, Math.floor(options.imageWidth ?? options.width ?? renderer.domElement.width)),
99
+ Math.max(1, Math.floor(options.imageHeight ?? options.height ?? renderer.domElement.height)),
100
+ ];
101
+ }
102
+
103
+ export function imagePointToNdc(
104
+ x: number,
105
+ y: number,
106
+ coordinateSpace: ImagePointCoordinateSpace = 'normalized',
107
+ width = 1,
108
+ height = 1
109
+ ): [number, number] {
110
+ if (coordinateSpace === 'ndc') return [x, y];
111
+ if (coordinateSpace === 'normalized-1000') {
112
+ return [(x / 1000) * 2 - 1, 1 - (y / 1000) * 2];
113
+ }
114
+ if (coordinateSpace === 'pixel') {
115
+ return [(x / width) * 2 - 1, 1 - (y / height) * 2];
116
+ }
117
+ return [x * 2 - 1, 1 - y * 2];
118
+ }
119
+
120
+ function isProjectionCandidate(object: THREE.Object3D, options: ImagePointProjectionOptions) {
121
+ if (!object.visible) return false;
122
+ if (object.userData[CAPTURE_EXCLUDE_KEY]) return false;
123
+
124
+ const geomGroup = object.userData.geomGroup;
125
+ const geomName = object.userData.geomName;
126
+ if (options.hiddenGeomNames && typeof geomName === 'string' && options.hiddenGeomNames.includes(geomName)) {
127
+ return false;
128
+ }
129
+ if (options.hiddenGeomGroups && typeof geomGroup === 'number' && options.hiddenGeomGroups.includes(geomGroup)) {
130
+ return false;
131
+ }
132
+ if (options.visibleGeomGroups && typeof geomGroup === 'number' && !options.visibleGeomGroups.includes(geomGroup)) {
133
+ return false;
134
+ }
135
+ return true;
136
+ }
137
+
138
+ function findBodyId(object: THREE.Object3D) {
139
+ let current: THREE.Object3D | null = object;
140
+ while (current && current.userData.bodyID === undefined && current.parent) {
141
+ current = current.parent;
142
+ }
143
+ return typeof current?.userData.bodyID === 'number' ? current.userData.bodyID : -1;
144
+ }
145
+
146
+ export function projectImagePointTo3D(
147
+ renderer: THREE.WebGLRenderer,
148
+ scene: THREE.Scene,
149
+ fallbackCamera: THREE.Camera,
150
+ options: ImagePointProjectionOptions
151
+ ): ImagePointProjectionResult | null {
152
+ const [width, height] = imageSize(renderer, options);
153
+ const [ndcX, ndcY] = imagePointToNdc(
154
+ options.x,
155
+ options.y,
156
+ options.coordinateSpace,
157
+ width,
158
+ height
159
+ );
160
+ const projectionCamera = createProjectionCamera(fallbackCamera, options, width, height);
161
+
162
+ scene.updateMatrixWorld(true);
163
+ _ndc.set(ndcX, ndcY);
164
+ _raycaster.setFromCamera(_ndc, projectionCamera);
165
+ _raycaster.far = options.maxDistance ?? Infinity;
166
+
167
+ const objects: THREE.Object3D[] = [];
168
+ scene.traverse((object) => {
169
+ if ((object as THREE.Mesh).isMesh && isProjectionCandidate(object, options)) {
170
+ objects.push(object);
171
+ }
172
+ });
173
+
174
+ const [hit] = _raycaster.intersectObjects(objects, true);
175
+ if (!hit) return null;
176
+
177
+ return {
178
+ point: hit.point.clone(),
179
+ bodyId: findBodyId(hit.object),
180
+ geomId: typeof hit.object.userData.geomID === 'number' ? hit.object.userData.geomID : -1,
181
+ distance: hit.distance,
182
+ ndc: [ndcX, ndcY],
183
+ imageSize: [width, height],
184
+ source: getProjectionSource(options),
185
+ };
186
+ }
package/src/types.ts CHANGED
@@ -266,8 +266,18 @@ export interface MujocoModel {
266
266
  geom_friction: Float64Array;
267
267
 
268
268
  // Material
269
+ mat_texid: Int32Array;
270
+ mat_texrepeat: Float32Array;
271
+ mat_texuniform: Uint8Array;
269
272
  mat_rgba: Float32Array;
270
273
 
274
+ // Texture
275
+ tex_adr: Int32Array;
276
+ tex_data: Uint8Array;
277
+ tex_height: Int32Array;
278
+ tex_nchannel: Int32Array;
279
+ tex_width: Int32Array;
280
+
271
281
  // Mesh
272
282
  mesh_vertadr: Int32Array;
273
283
  mesh_vertnum: Int32Array;
@@ -434,6 +444,8 @@ export interface MujocoModule {
434
444
 
435
445
  export interface SceneObject {
436
446
  name: string;
447
+ /** MuJoCo geom name. Defaults to `${name}_geom` for generated objects. */
448
+ geomName?: string;
437
449
  type: 'box' | 'sphere' | 'cylinder';
438
450
  size: [number, number, number];
439
451
  position: [number, number, number];
@@ -735,6 +747,42 @@ export interface RayHit {
735
747
  distance: number;
736
748
  }
737
749
 
750
+ export type ImagePointCoordinateSpace =
751
+ | 'normalized'
752
+ | 'normalized-1000'
753
+ | 'pixel'
754
+ | 'ndc';
755
+
756
+ export interface ImagePointProjectionOptions extends CameraFrameCaptureOptions {
757
+ /** X coordinate in the selected coordinate space. Defaults to normalized 0..1. */
758
+ x: number;
759
+ /** Y coordinate in the selected coordinate space. Defaults to normalized 0..1 with origin at top-left. */
760
+ y: number;
761
+ /**
762
+ * Coordinate convention for x/y:
763
+ * - normalized: 0..1 image coordinates, top-left origin
764
+ * - normalized-1000: 0..1000 detector coordinates, top-left origin
765
+ * - pixel: pixel coordinates, top-left origin
766
+ * - ndc: Three.js normalized device coordinates, -1..1
767
+ */
768
+ coordinateSpace?: ImagePointCoordinateSpace;
769
+ /** Image width for pixel coordinates. Falls back to `width` or renderer canvas width. */
770
+ imageWidth?: number;
771
+ /** Image height for pixel coordinates. Falls back to `height` or renderer canvas height. */
772
+ imageHeight?: number;
773
+ /** Ignore hits farther than this distance from the camera ray origin. */
774
+ maxDistance?: number;
775
+ }
776
+
777
+ export interface ImagePointProjectionResult extends RayHit {
778
+ /** NDC coordinates used for raycasting. */
779
+ ndc: [number, number];
780
+ /** Image dimensions used when interpreting pixel coordinates. */
781
+ imageSize: [number, number];
782
+ /** Camera pose provenance, matching camera-frame capture results. */
783
+ source: CameraFrameCaptureSource;
784
+ }
785
+
738
786
  // ---- Model Options (spec 5.3) ----
739
787
 
740
788
  export interface ModelOptions {
@@ -787,17 +835,141 @@ export interface PolicyInferenceInput extends PolicyObservationInput {
787
835
  observation: PolicyVector;
788
836
  }
789
837
 
838
+ export type PolicyActionChunk = readonly PolicyVector[];
839
+ export type PolicyInferenceOutput = PolicyVector | PolicyActionChunk;
840
+ export type PolicyInferenceResult = PolicyInferenceOutput | Promise<PolicyInferenceOutput>;
841
+
790
842
  export interface PolicyActionInput extends PolicyInferenceInput {
791
843
  action: PolicyVector;
792
844
  }
793
845
 
846
+ export interface PolicyAPI {
847
+ readonly isRunning: boolean;
848
+ start: () => void;
849
+ stop: () => void;
850
+ clearQueue: () => void;
851
+ reset: () => void;
852
+ readonly inFlight: boolean;
853
+ readonly queuedActions: number;
854
+ readonly lastObservation: PolicyVector | null;
855
+ readonly lastAction: PolicyVector | null;
856
+ readonly lastError: unknown;
857
+ }
858
+
794
859
  export interface PolicyConfig {
795
860
  frequency: number;
796
861
  enabled?: boolean;
862
+ /** Start async inference while this many queued actions remain. Defaults to 0. */
863
+ prefetchThreshold?: number;
864
+ /**
865
+ * How async action chunks update the queue.
866
+ * - append preserves legacy FIFO behavior.
867
+ * - replace is useful for receding-horizon policies where a fresh chunk should supersede stale queued actions.
868
+ */
869
+ queueStrategy?: 'append' | 'replace';
870
+ /**
871
+ * Clear queued actions and ignore in-flight async results when `stop()` is called.
872
+ * Defaults to false so callers can choose pause/resume behavior explicitly.
873
+ */
874
+ clearQueueOnStop?: boolean;
797
875
  onObservation: (input: PolicyObservationInput) => PolicyVector;
798
876
  /** Run policy inference. Omit to pass observations directly to `onAction` for custom inline controllers. */
799
- infer?: (input: PolicyInferenceInput) => PolicyVector;
877
+ infer?: (input: PolicyInferenceInput) => PolicyInferenceResult;
800
878
  onAction: (input: PolicyActionInput) => void;
879
+ /** Called when async inference rejects. */
880
+ onError?: (error: unknown) => void;
881
+ }
882
+
883
+ export interface RemotePolicyRequestInput extends PolicyInferenceInput {
884
+ /** True for the first request after hook construction or `reset()`. */
885
+ reset: boolean;
886
+ /** Zero-based request index since construction or `reset()`. */
887
+ requestIndex: number;
888
+ /** Aborts when the request is no longer needed, e.g. after pause/reset. */
889
+ signal: AbortSignal;
890
+ }
891
+
892
+ export interface RemotePolicyRequestInfo extends RemotePolicyRequestInput {
893
+ body: unknown;
894
+ requestStartedAt: number;
895
+ }
896
+
897
+ export interface RemotePolicyResponseInfo extends RemotePolicyRequestInfo {
898
+ response: Response;
899
+ responseBody: unknown;
900
+ responseFinishedAt: number;
901
+ requestMs: number;
902
+ }
903
+
904
+ export type RemotePolicyStatus = 'idle' | 'requesting' | 'ready' | 'error' | 'aborted';
905
+
906
+ export interface RemotePolicyConfig extends Omit<PolicyConfig, 'infer'> {
907
+ endpoint: string | URL;
908
+ method?: string;
909
+ headers?: HeadersInit;
910
+ credentials?: RequestCredentials;
911
+ /** Additional external cancellation signal for remote inference requests. */
912
+ signal?: AbortSignal;
913
+ /**
914
+ * Abort the active HTTP request when `stop()` or `reset()` is called.
915
+ * Defaults to true so paused policies stop consuming server work.
916
+ */
917
+ abortOnStop?: boolean;
918
+ fetcher?: typeof fetch;
919
+ requestInit?: Omit<RequestInit, 'body' | 'headers' | 'method' | 'credentials' | 'signal'>;
920
+ buildRequest?: (input: RemotePolicyRequestInput) => unknown | Promise<unknown>;
921
+ readResponse?: (response: Response) => unknown | Promise<unknown>;
922
+ parseResponse?: (
923
+ responseBody: unknown,
924
+ info: RemotePolicyResponseInfo
925
+ ) => PolicyInferenceResult;
926
+ onRequest?: (info: RemotePolicyRequestInfo) => void;
927
+ onResponse?: (info: RemotePolicyResponseInfo) => void;
928
+ }
929
+
930
+ export interface RemotePolicyAPI extends PolicyAPI {
931
+ abort: (reason?: unknown) => void;
932
+ readonly remoteStatus: RemotePolicyStatus;
933
+ readonly requestCount: number;
934
+ readonly responseCount: number;
935
+ readonly lastRequestBody: unknown;
936
+ readonly lastResponseBody: unknown;
937
+ readonly lastHttpStatus: number | null;
938
+ readonly lastRequestMs: number | null;
939
+ }
940
+
941
+ export interface PolicyCameraFrameStream extends CameraFrameCaptureOptions {
942
+ /** Image key used in policy payloads, e.g. `image`, `front`, or `wrist_cam`. */
943
+ key: string;
944
+ /** Additional payload keys that should receive the same data URL. */
945
+ aliases?: readonly string[];
946
+ }
947
+
948
+ export interface PolicyCameraFrameCaptureOptions {
949
+ streams: readonly PolicyCameraFrameStream[];
950
+ /**
951
+ * Include `observation.images.${key}` for every captured stream.
952
+ * Defaults to true because LeRobot-style policies usually use these names.
953
+ */
954
+ includeObservationImageAliases?: boolean;
955
+ }
956
+
957
+ export interface PolicyCameraFrameCaptureResult {
958
+ frames: Record<string, CameraFrameCaptureResult>;
959
+ images: Record<string, string>;
960
+ /** Human-readable source summary for UI/debug telemetry. */
961
+ sourceSummary: string;
962
+ capturedAt: number;
963
+ }
964
+
965
+ export interface PolicyCameraFrameCaptureAPI {
966
+ status: FrameCaptureStatus;
967
+ error: Error | null;
968
+ isCapturing: boolean;
969
+ capture: (
970
+ options?: Partial<PolicyCameraFrameCaptureOptions>
971
+ ) => Promise<PolicyCameraFrameCaptureResult>;
972
+ reset: () => void;
801
973
  }
802
974
 
803
975
  // ---- Observation Builder ----
@@ -1268,6 +1440,7 @@ export interface MujocoSimAPI {
1268
1440
  cameraPos: THREE.Vector3,
1269
1441
  lookAt: THREE.Vector3
1270
1442
  ): { point: THREE.Vector3; bodyId: number; geomId: number } | null;
1443
+ projectImagePointTo3D(options: ImagePointProjectionOptions): ImagePointProjectionResult | null;
1271
1444
 
1272
1445
  // Domain randomization (spec 10.3)
1273
1446
  setBodyMass(name: Bodies, mass: number): void;
@@ -1343,6 +1516,10 @@ export interface CameraFrameCaptureOptions {
1343
1516
  lookAt?: CameraFrameCaptureVector3;
1344
1517
  quaternion?: CameraFrameCaptureQuaternion;
1345
1518
  up?: CameraFrameCaptureVector3;
1519
+ /** Local-space offset applied after resolving a mounted MuJoCo camera/site/body pose. */
1520
+ positionOffset?: CameraFrameCaptureVector3;
1521
+ /** Local-space rotation applied after resolving a mounted MuJoCo camera/site/body pose. Array values use Three.js order: [x, y, z, w]. */
1522
+ quaternionOffset?: CameraFrameCaptureQuaternion;
1346
1523
  width?: number;
1347
1524
  height?: number;
1348
1525
  type?: string;
@@ -1352,6 +1529,16 @@ export interface CameraFrameCaptureOptions {
1352
1529
  far?: number;
1353
1530
  /** Provenance for the camera pose used by the capture. Usually set by the MuJoCo provider. */
1354
1531
  source?: CameraFrameCaptureSource;
1532
+ /** Hide rendered Three objects whose MuJoCo geom group is in this list. */
1533
+ hiddenGeomGroups?: readonly number[];
1534
+ /** When provided, only rendered Three objects whose MuJoCo geom group is in this list are visible. */
1535
+ visibleGeomGroups?: readonly number[];
1536
+ /** Hide rendered Three objects whose MuJoCo geom name is in this list. */
1537
+ hiddenGeomNames?: readonly string[];
1538
+ /** Optional clear color for this capture only. Defaults to the renderer's current clear color. */
1539
+ background?: THREE.ColorRepresentation;
1540
+ /** Optional clear alpha for this capture only. Defaults to the renderer's current clear alpha. */
1541
+ backgroundAlpha?: number;
1355
1542
  }
1356
1543
 
1357
1544
  export type CameraFrameCaptureSource =