mujoco-react 10.1.0 → 10.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/spark.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import * as react_jsx_runtime from 'react/jsx-runtime';
2
2
  import * as _sparkjsdev_spark from '@sparkjsdev/spark';
3
- import { n as SplatEnvironmentProps, q as PairedSplatEnvironmentConfig, S as SceneConfig, t as SplatEnvironmentReadiness, o as VisualScenarioConfig } from './types-BHBNJubg.js';
3
+ import { n as SplatEnvironmentProps, q as PairedSplatEnvironmentConfig, S as SceneConfig, t as SplatEnvironmentReadiness, o as VisualScenarioConfig } from './types-B-J8fpPP.js';
4
4
  import 'react';
5
5
  import '@react-three/fiber';
6
6
  import 'three';
package/dist/spark.js CHANGED
@@ -1,4 +1,4 @@
1
- import { useSplatSceneConfig, useSplatEnvironment, SplatEnvironment, CAMERA_FRAME_CAPTURE_RENDER_USER_DATA_KEY } from './chunk-FEKBKHEN.js';
1
+ import { useSplatSceneConfig, useSplatEnvironment, SplatEnvironment, CAMERA_FRAME_CAPTURE_RENDER_USER_DATA_KEY } from './chunk-CYDGWNKQ.js';
2
2
  import { useThree } from '@react-three/fiber';
3
3
  import { useMemo, useState, useEffect, useCallback, useRef } from 'react';
4
4
  import * as THREE from 'three';
@@ -573,6 +573,35 @@ interface RayHit {
573
573
  geomId: number;
574
574
  distance: number;
575
575
  }
576
+ type ImagePointCoordinateSpace = 'normalized' | 'normalized-1000' | 'pixel' | 'ndc';
577
+ interface ImagePointProjectionOptions extends CameraFrameCaptureOptions {
578
+ /** X coordinate in the selected coordinate space. Defaults to normalized 0..1. */
579
+ x: number;
580
+ /** Y coordinate in the selected coordinate space. Defaults to normalized 0..1 with origin at top-left. */
581
+ y: number;
582
+ /**
583
+ * Coordinate convention for x/y:
584
+ * - normalized: 0..1 image coordinates, top-left origin
585
+ * - normalized-1000: 0..1000 detector coordinates, top-left origin
586
+ * - pixel: pixel coordinates, top-left origin
587
+ * - ndc: Three.js normalized device coordinates, -1..1
588
+ */
589
+ coordinateSpace?: ImagePointCoordinateSpace;
590
+ /** Image width for pixel coordinates. Falls back to `width` or renderer canvas width. */
591
+ imageWidth?: number;
592
+ /** Image height for pixel coordinates. Falls back to `height` or renderer canvas height. */
593
+ imageHeight?: number;
594
+ /** Ignore hits farther than this distance from the camera ray origin. */
595
+ maxDistance?: number;
596
+ }
597
+ interface ImagePointProjectionResult extends RayHit {
598
+ /** NDC coordinates used for raycasting. */
599
+ ndc: [number, number];
600
+ /** Image dimensions used when interpreting pixel coordinates. */
601
+ imageSize: [number, number];
602
+ /** Camera pose provenance, matching camera-frame capture results. */
603
+ source: CameraFrameCaptureSource;
604
+ }
576
605
  interface ModelOptions {
577
606
  timestep: number;
578
607
  gravity: [number, number, number];
@@ -607,6 +636,8 @@ interface PolicyObservationInput {
607
636
  }
608
637
  interface PolicyInferenceInput extends PolicyObservationInput {
609
638
  observation: PolicyVector;
639
+ /** Number of actions still queued locally when inference is requested. */
640
+ queuedActions?: number;
610
641
  }
611
642
  type PolicyActionChunk = readonly PolicyVector[];
612
643
  type PolicyInferenceOutput = PolicyVector | PolicyActionChunk;
@@ -1094,6 +1125,7 @@ interface MujocoSimAPI {
1094
1125
  bodyId: number;
1095
1126
  geomId: number;
1096
1127
  } | null;
1128
+ projectImagePointTo3D(options: ImagePointProjectionOptions): ImagePointProjectionResult | null;
1097
1129
  setBodyMass(name: Bodies, mass: number): void;
1098
1130
  setGeomFriction(name: Geoms, friction: [number, number, number]): void;
1099
1131
  setGeomSize(name: Geoms, size: [number, number, number]): void;
@@ -1166,6 +1198,8 @@ interface CameraFrameCaptureOptions {
1166
1198
  background?: THREE.ColorRepresentation;
1167
1199
  /** Optional clear alpha for this capture only. Defaults to the renderer's current clear alpha. */
1168
1200
  backgroundAlpha?: number;
1201
+ /** Mirror the captured image horizontally after rendering. Useful when matching policy datasets with mirrored camera frames. */
1202
+ flipX?: boolean;
1169
1203
  }
1170
1204
  type CameraFrameCaptureSource = {
1171
1205
  kind: 'mujoco-camera';
@@ -1350,4 +1384,4 @@ interface ArrayJointStateResult {
1350
1384
  velocity: React__default.RefObject<Float64Array>;
1351
1385
  }
1352
1386
 
1353
- export { type ArrayJointStateResult as $, type ActuatedJointInfo as A, type BodyProps as B, type ControlGroupInfo as C, type DragInteractionProps as D, type ScenarioLightingPreset as E, type SplatEnvironmentMetadataInput as F, type SplatEnvironmentMetadata as G, type SplatSceneInput as H, type IkConfig as I, type DebugProps as J, type GeomInfo as K, type ContactListenerProps as L, type MujocoContextValue as M, type ActuatorInfo as N, type ObservationConfig as O, type PhysicsStepCallback as P, type Sites as Q, type ReadyCallbackInput as R, type SceneConfig as S, type TrajectoryPlayerProps as T, type SitePositionResult as U, type VisualScenarioEffectsProps as V, type Sensors as W, type SensorHandle as X, type SensorInfo as Y, type Joints as Z, type ScalarJointStateResult as _, type MujocoCanvasProps as a, ModelGeoms as a$, type JointStateOptions as a0, type JointStateResult as a1, type Bodies as a2, type BodyStateResult as a3, type Geoms as a4, type Actuators as a5, type CtrlHandle as a6, type ContactInfo as a7, type KeyboardTeleopConfig as a8, type KeyboardIkTargetConfig as a9, type CameraFrameSequenceRecorderAPI as aA, type CameraFrameCaptureResult as aB, type CameraFrameCaptureBlobResult as aC, type PolicyVector as aD, type BodyInfo as aE, type CameraFrameCaptureQuaternion as aF, type CameraFrameCaptureVector3 as aG, type CameraFrameSequenceCameraSummary as aH, type CameraFrameSequenceFrame as aI, type CameraFrameSequenceSampleInput as aJ, type CameraFrameSequenceStepInput as aK, type CameraInfo as aL, type ControlJointInfo as aM, type FrameCaptureTarget as aN, type FrameCaptureTargetRef as aO, type IKSolveFn as aP, type IkGizmoDragInput as aQ, type IkSolveInput as aR, type JointInfo as aS, type JointStateKind as aT, type KeyBinding as aU, type KeyboardIkTargetAction as aV, type KeyboardIkTargetBinding as aW, type Keyframes as aX, ModelActuators as aY, ModelBodies as aZ, ModelCameras as a_, type PolicyConfig as aa, type PolicyAPI as ab, type RemotePolicyConfig as ac, type RemotePolicyAPI as ad, type ObservationHandle as ae, type ObservationOutput as af, type TrajectoryInput as ag, type TrajectoryStateChangeInput as ah, type PlaybackState as ai, type TrajectoryFrame as aj, type FrameCaptureOptions as ak, type FrameCaptureResult as al, type FrameCaptureBlobResult as am, type FrameCaptureAPI as an, type CameraFrameCaptureOptions as ao, type CameraFrameCaptureAPI as ap, type Cameras as aq, type CameraFrameSequenceCamera as ar, type CameraFrameCaptureSource as as, type CameraFrameSequenceOptions as at, type CameraFrameSequenceResult as au, type PolicyCameraFrameStream as av, type PolicyCameraFrameCaptureOptions as aw, type PolicyCameraFrameCaptureResult as ax, type FrameCaptureStatus as ay, type PolicyCameraFrameCaptureAPI as az, type MujocoSimAPI as b, ModelJoints as b0, ModelKeyframes as b1, type ModelOptions as b2, type ModelResource as b3, ModelResources as b4, ModelSensors as b5, ModelSites as b6, type Models as b7, type MujocoContact as b8, type MujocoContactArray as b9, type SiteInfo as bA, type SplatAssetConfig as bB, type SplatScenarioConfig as bC, type StateSnapshot as bD, type TrajectoryData as bE, type TrajectoryFrameCallbackInput as bF, type VisualScenarioMaterialFilterInput as bG, type XmlPatch as bH, getContact as bI, registerModelResources as bJ, type MujocoFrameCaptureOptions as ba, type ObservationLayoutItem as bb, type PhysicsConfig as bc, type PhysicsStepInput as bd, type PolicyActionChunk as be, type PolicyActionInput as bf, type PolicyInferenceInput as bg, type PolicyInferenceOutput as bh, type PolicyInferenceResult as bi, type PolicyObservationInput as bj, type RayHit as bk, type Register as bl, type RegisteredModelMap as bm, type RemotePolicyRequestInfo as bn, type RemotePolicyRequestInput as bo, type RemotePolicyResponseInfo as bp, type RemotePolicyStatus as bq, type ResetCallbackInput as br, type ResolvedScenarioCameraConfig as bs, type ResolvedScenarioMaterialConfig as bt, type ResourceSelector as bu, type ScenarioCameraConfig as bv, type ScenarioMaterialConfig as bw, type SceneMarker as bx, type SceneObject as by, type SensorResult as bz, type StepCallbackInput as c, type SelectionCallbackInput as d, type MujocoModule as e, type MujocoModel as f, type MujocoData as g, type ControlGroupSelector as h, type ObservationResult as i, type IkContextValue as j, type IkGizmoProps as k, type SceneLightsProps as l, type ScenarioLightingProps as m, type SplatEnvironmentProps as n, type VisualScenarioConfig as o, type SplatRendererKind as p, type PairedSplatEnvironmentConfig as q, type SplatFormat as r, type SplatCollisionProxyConfig as s, type SplatEnvironmentReadiness as t, type SplatCollisionPrimitive as u, SplatEnvironmentReadinessStatus as v, type SplatSceneConfigInput as w, type SplatSceneConfigState as x, type VisualScenarioExecutionContextInput as y, type VisualScenarioExecutionContext as z };
1387
+ export { type ArrayJointStateResult as $, type ActuatedJointInfo as A, type BodyProps as B, type ControlGroupInfo as C, type DragInteractionProps as D, type ScenarioLightingPreset as E, type SplatEnvironmentMetadataInput as F, type SplatEnvironmentMetadata as G, type SplatSceneInput as H, type IkConfig as I, type DebugProps as J, type GeomInfo as K, type ContactListenerProps as L, type MujocoContextValue as M, type ActuatorInfo as N, type ObservationConfig as O, type PhysicsStepCallback as P, type Sites as Q, type ReadyCallbackInput as R, type SceneConfig as S, type TrajectoryPlayerProps as T, type SitePositionResult as U, type VisualScenarioEffectsProps as V, type Sensors as W, type SensorHandle as X, type SensorInfo as Y, type Joints as Z, type ScalarJointStateResult as _, type MujocoCanvasProps as a, ModelActuators as a$, type JointStateOptions as a0, type JointStateResult as a1, type Bodies as a2, type BodyStateResult as a3, type Geoms as a4, type Actuators as a5, type CtrlHandle as a6, type ContactInfo as a7, type KeyboardTeleopConfig as a8, type KeyboardIkTargetConfig as a9, type CameraFrameSequenceRecorderAPI as aA, type CameraFrameCaptureResult as aB, type CameraFrameCaptureBlobResult as aC, type ImagePointCoordinateSpace as aD, type ImagePointProjectionOptions as aE, type ImagePointProjectionResult as aF, type PolicyVector as aG, type BodyInfo as aH, type CameraFrameCaptureQuaternion as aI, type CameraFrameCaptureVector3 as aJ, type CameraFrameSequenceCameraSummary as aK, type CameraFrameSequenceFrame as aL, type CameraFrameSequenceSampleInput as aM, type CameraFrameSequenceStepInput as aN, type CameraInfo as aO, type ControlJointInfo as aP, type FrameCaptureTarget as aQ, type FrameCaptureTargetRef as aR, type IKSolveFn as aS, type IkGizmoDragInput as aT, type IkSolveInput as aU, type JointInfo as aV, type JointStateKind as aW, type KeyBinding as aX, type KeyboardIkTargetAction as aY, type KeyboardIkTargetBinding as aZ, type Keyframes as a_, type PolicyConfig as aa, type PolicyAPI as ab, type RemotePolicyConfig as ac, type RemotePolicyAPI as ad, type ObservationHandle as ae, type ObservationOutput as af, type TrajectoryInput as ag, type TrajectoryStateChangeInput as ah, type PlaybackState as ai, type TrajectoryFrame as aj, type FrameCaptureOptions as ak, type FrameCaptureResult as al, type FrameCaptureBlobResult as am, type FrameCaptureAPI as an, type CameraFrameCaptureOptions as ao, type CameraFrameCaptureAPI as ap, type Cameras as aq, type CameraFrameSequenceCamera as ar, type CameraFrameCaptureSource as as, type CameraFrameSequenceOptions as at, type CameraFrameSequenceResult as au, type PolicyCameraFrameStream as av, type PolicyCameraFrameCaptureOptions as aw, type PolicyCameraFrameCaptureResult as ax, type FrameCaptureStatus as ay, type PolicyCameraFrameCaptureAPI as az, type MujocoSimAPI as b, ModelBodies as b0, ModelCameras as b1, ModelGeoms as b2, ModelJoints as b3, ModelKeyframes as b4, type ModelOptions as b5, type ModelResource as b6, ModelResources as b7, ModelSensors as b8, ModelSites as b9, type SceneMarker as bA, type SceneObject as bB, type SensorResult as bC, type SiteInfo as bD, type SplatAssetConfig as bE, type SplatScenarioConfig as bF, type StateSnapshot as bG, type TrajectoryData as bH, type TrajectoryFrameCallbackInput as bI, type VisualScenarioMaterialFilterInput as bJ, type XmlPatch as bK, getContact as bL, registerModelResources as bM, type Models as ba, type MujocoContact as bb, type MujocoContactArray as bc, type MujocoFrameCaptureOptions as bd, type ObservationLayoutItem as be, type PhysicsConfig as bf, type PhysicsStepInput as bg, type PolicyActionChunk as bh, type PolicyActionInput as bi, type PolicyInferenceInput as bj, type PolicyInferenceOutput as bk, type PolicyInferenceResult as bl, type PolicyObservationInput as bm, type RayHit as bn, type Register as bo, type RegisteredModelMap as bp, type RemotePolicyRequestInfo as bq, type RemotePolicyRequestInput as br, type RemotePolicyResponseInfo as bs, type RemotePolicyStatus as bt, type ResetCallbackInput as bu, type ResolvedScenarioCameraConfig as bv, type ResolvedScenarioMaterialConfig as bw, type ResourceSelector as bx, type ScenarioCameraConfig as by, type ScenarioMaterialConfig as bz, type StepCallbackInput as c, type SelectionCallbackInput as d, type MujocoModule as e, type MujocoModel as f, type MujocoData as g, type ControlGroupSelector as h, type ObservationResult as i, type IkContextValue as j, type IkGizmoProps as k, type SceneLightsProps as l, type ScenarioLightingProps as m, type SplatEnvironmentProps as n, type VisualScenarioConfig as o, type SplatRendererKind as p, type PairedSplatEnvironmentConfig as q, type SplatFormat as r, type SplatCollisionProxyConfig as s, type SplatEnvironmentReadiness as t, type SplatCollisionPrimitive as u, SplatEnvironmentReadinessStatus as v, type SplatSceneConfigInput as w, type SplatSceneConfigState as x, type VisualScenarioExecutionContextInput as y, type VisualScenarioExecutionContext as z };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mujoco-react",
3
- "version": "10.1.0",
3
+ "version": "10.2.1",
4
4
  "description": "Composable React Three Fiber building blocks for MuJoCo WASM simulations",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -31,6 +31,8 @@ import {
31
31
  ControlGroupSelector,
32
32
  ContactInfo,
33
33
  GeomInfo,
34
+ ImagePointProjectionOptions,
35
+ ImagePointProjectionResult,
34
36
  JointInfo,
35
37
  LoadFromFilesOptions,
36
38
  LocalMujocoFile,
@@ -61,6 +63,7 @@ import {
61
63
  getCameraFrameCaptureSourceTarget,
62
64
  isMountedCameraFrameCaptureSource,
63
65
  } from '../rendering/cameraFrameSource';
66
+ import { projectImagePointTo3D as projectImagePointTo3DFromScene } from '../rendering/imageProjection';
64
67
  import {
65
68
  loadScene,
66
69
  createSceneConfigFromFiles,
@@ -1590,6 +1593,31 @@ export function MujocoSimProvider({
1590
1593
  [camera, gl]
1591
1594
  );
1592
1595
 
1596
+ const projectImagePointTo3D = useCallback(
1597
+ (options: ImagePointProjectionOptions): ImagePointProjectionResult | null => {
1598
+ const {
1599
+ x,
1600
+ y,
1601
+ coordinateSpace,
1602
+ imageWidth,
1603
+ imageHeight,
1604
+ maxDistance,
1605
+ ...captureOptions
1606
+ } = options;
1607
+ const resolvedCaptureOptions = resolveCameraCaptureOptions(captureOptions);
1608
+ return projectImagePointTo3DFromScene(gl, scene, camera, {
1609
+ ...resolvedCaptureOptions,
1610
+ x,
1611
+ y,
1612
+ coordinateSpace,
1613
+ imageWidth,
1614
+ imageHeight,
1615
+ maxDistance,
1616
+ });
1617
+ },
1618
+ [camera, gl, resolveCameraCaptureOptions, scene]
1619
+ );
1620
+
1593
1621
  // --- Domain randomization ---
1594
1622
 
1595
1623
  const setBodyMass = useCallback((name: string, mass: number): void => {
@@ -1676,6 +1704,7 @@ export function MujocoSimProvider({
1676
1704
  captureCameraFrameBlob: captureCameraFrameBlobApi,
1677
1705
  recordCameraSequence: recordCameraSequenceApi,
1678
1706
  project2DTo3D,
1707
+ projectImagePointTo3D,
1679
1708
  setBodyMass,
1680
1709
  setGeomFriction,
1681
1710
  setGeomSize,
@@ -1696,6 +1725,7 @@ export function MujocoSimProvider({
1696
1725
  captureCameraFrameApi, captureCameraFrameBlobApi,
1697
1726
  recordCameraSequenceApi,
1698
1727
  project2DTo3D,
1728
+ projectImagePointTo3D,
1699
1729
  setBodyMass, setGeomFriction, setGeomSize,
1700
1730
  ]
1701
1731
  );
@@ -111,7 +111,10 @@ export function usePolicy(config: PolicyConfig): PolicyAPI {
111
111
 
112
112
  // Build observation
113
113
  const observation = cfg.onObservation({ model, data });
114
- const result = cfg.infer ? cfg.infer({ observation, model, data }) : observation;
114
+ const queuedActions = actionQueueRef.current.length;
115
+ const result = cfg.infer
116
+ ? cfg.infer({ observation, model, data, queuedActions })
117
+ : observation;
115
118
 
116
119
  if (isPromiseLike(result)) {
117
120
  const epoch = epochRef.current;
@@ -163,7 +163,7 @@ export function useRemotePolicy(config: RemotePolicyConfig): RemotePolicyAPI {
163
163
 
164
164
  const policy = usePolicy({
165
165
  ...config,
166
- infer: async ({ observation, model, data }) => {
166
+ infer: async ({ observation, model, data, queuedActions }) => {
167
167
  const cfg = configRef.current;
168
168
  abortController(abortControllerRef.current, createAbortError('Remote policy request was superseded.'));
169
169
  const controller = new AbortController();
@@ -175,6 +175,7 @@ export function useRemotePolicy(config: RemotePolicyConfig): RemotePolicyAPI {
175
175
  observation,
176
176
  model,
177
177
  data,
178
+ queuedActions,
178
179
  reset: requestIndex === 0,
179
180
  requestIndex,
180
181
  signal,
package/src/index.ts CHANGED
@@ -148,6 +148,10 @@ export {
148
148
  createCameraFrameCaptureSession,
149
149
  renderCameraFrameToCanvas,
150
150
  } from './rendering/cameraFrameCapture';
151
+ export {
152
+ imagePointToNdc,
153
+ projectImagePointTo3D,
154
+ } from './rendering/imageProjection';
151
155
  export {
152
156
  capturePolicyCameraFrames,
153
157
  capturePolicyCameraFramesFromMountedStreams,
@@ -269,6 +273,9 @@ export type {
269
273
  ContactInfo,
270
274
  // Raycast
271
275
  RayHit,
276
+ ImagePointCoordinateSpace,
277
+ ImagePointProjectionOptions,
278
+ ImagePointProjectionResult,
272
279
  // Model options
273
280
  ModelOptions,
274
281
  // Trajectory
@@ -60,6 +60,7 @@ export type CameraFrameCaptureRenderResult = {
60
60
  width?: number;
61
61
  height?: number;
62
62
  flipY?: boolean;
63
+ flipX?: boolean;
63
64
  };
64
65
 
65
66
  type CameraFrameCaptureRender = (
@@ -196,7 +197,8 @@ function readRenderTargetToCanvas(
196
197
  imageData: ImageData,
197
198
  width: number,
198
199
  height: number,
199
- outputColorSpace: string
200
+ outputColorSpace: string,
201
+ flipX = false
200
202
  ) {
201
203
  renderer.readRenderTargetPixels(target, 0, 0, width, height, pixels);
202
204
 
@@ -206,17 +208,25 @@ function readRenderTargetToCanvas(
206
208
  const sourceStart = (height - y - 1) * rowBytes;
207
209
  const targetStart = y * rowBytes;
208
210
  const row = pixels.subarray(sourceStart, sourceStart + rowBytes);
209
- if (!encodeSrgb) {
211
+ if (!encodeSrgb && !flipX) {
210
212
  imageData.data.set(row, targetStart);
211
213
  continue;
212
214
  }
213
215
 
214
- for (let x = 0; x < rowBytes; x += 4) {
215
- const pixelOffset = targetStart + x;
216
- imageData.data[pixelOffset] = linearByteToSrgbByte(row[x]);
217
- imageData.data[pixelOffset + 1] = linearByteToSrgbByte(row[x + 1]);
218
- imageData.data[pixelOffset + 2] = linearByteToSrgbByte(row[x + 2]);
219
- imageData.data[pixelOffset + 3] = row[x + 3];
216
+ for (let x = 0; x < width; x += 1) {
217
+ const sourceX = flipX ? width - x - 1 : x;
218
+ const sourceOffset = sourceX * 4;
219
+ const targetOffset = targetStart + x * 4;
220
+ imageData.data[targetOffset] = encodeSrgb
221
+ ? linearByteToSrgbByte(row[sourceOffset])
222
+ : row[sourceOffset];
223
+ imageData.data[targetOffset + 1] = encodeSrgb
224
+ ? linearByteToSrgbByte(row[sourceOffset + 1])
225
+ : row[sourceOffset + 1];
226
+ imageData.data[targetOffset + 2] = encodeSrgb
227
+ ? linearByteToSrgbByte(row[sourceOffset + 2])
228
+ : row[sourceOffset + 2];
229
+ imageData.data[targetOffset + 3] = row[sourceOffset + 3];
220
230
  }
221
231
  }
222
232
  context.putImageData(imageData, 0, 0);
@@ -238,17 +248,30 @@ function readPixelsToCanvas(
238
248
  imageData: ImageData,
239
249
  width: number,
240
250
  height: number,
241
- flipY = true
251
+ flipY = true,
252
+ flipX = false
242
253
  ) {
243
254
  const rowBytes = width * 4;
244
255
  for (let y = 0; y < height; y += 1) {
245
256
  const sourceY = flipY ? height - y - 1 : y;
246
257
  const sourceStart = sourceY * rowBytes;
247
258
  const targetStart = y * rowBytes;
248
- imageData.data.set(
249
- pixels.subarray(sourceStart, sourceStart + rowBytes),
250
- targetStart
251
- );
259
+ if (!flipX) {
260
+ imageData.data.set(
261
+ pixels.subarray(sourceStart, sourceStart + rowBytes),
262
+ targetStart
263
+ );
264
+ continue;
265
+ }
266
+ for (let x = 0; x < width; x += 1) {
267
+ const sourceX = width - x - 1;
268
+ const sourceOffset = sourceStart + sourceX * 4;
269
+ const targetOffset = targetStart + x * 4;
270
+ imageData.data[targetOffset] = pixels[sourceOffset];
271
+ imageData.data[targetOffset + 1] = pixels[sourceOffset + 1];
272
+ imageData.data[targetOffset + 2] = pixels[sourceOffset + 2];
273
+ imageData.data[targetOffset + 3] = pixels[sourceOffset + 3];
274
+ }
252
275
  }
253
276
  context.putImageData(imageData, 0, 0);
254
277
  }
@@ -462,7 +485,8 @@ export function createCameraFrameCaptureSession(
462
485
  imageData,
463
486
  width,
464
487
  height,
465
- renderer.outputColorSpace
488
+ renderer.outputColorSpace,
489
+ captureOptions.flipX ?? false
466
490
  );
467
491
  return {
468
492
  canvas,
@@ -524,7 +548,8 @@ export function createCameraFrameCaptureSession(
524
548
  imageData,
525
549
  width,
526
550
  height,
527
- captureResult.flipY ?? true
551
+ captureResult.flipY ?? true,
552
+ captureResult.flipX ?? captureOptions.flipX ?? false
528
553
  );
529
554
  return {
530
555
  canvas,
@@ -0,0 +1,186 @@
1
+ /**
2
+ * @license
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Project detector/image coordinates from a camera view into the rendered MuJoCo scene.
6
+ */
7
+
8
+ import * as THREE from 'three';
9
+ import type {
10
+ CameraFrameCaptureOptions,
11
+ CameraFrameCaptureSource,
12
+ ImagePointCoordinateSpace,
13
+ ImagePointProjectionOptions,
14
+ ImagePointProjectionResult,
15
+ } from '../types';
16
+ import { CAPTURE_EXCLUDE_KEY } from './cameraFrameCapture';
17
+
18
+ const _raycaster = new THREE.Raycaster();
19
+ const _ndc = new THREE.Vector2();
20
+
21
+ function toVector3(
22
+ value: CameraFrameCaptureOptions['position'] | undefined,
23
+ fallback: THREE.Vector3
24
+ ) {
25
+ if (!value) return fallback.clone();
26
+ return value instanceof THREE.Vector3
27
+ ? value.clone()
28
+ : new THREE.Vector3(value[0], value[1], value[2]);
29
+ }
30
+
31
+ function applyCameraPose(
32
+ camera: THREE.Camera,
33
+ options: CameraFrameCaptureOptions,
34
+ fallbackCamera: THREE.Camera
35
+ ) {
36
+ camera.position.copy(toVector3(options.position, fallbackCamera.position));
37
+ camera.up.copy(toVector3(options.up, fallbackCamera.up));
38
+
39
+ if (options.quaternion) {
40
+ if (options.quaternion instanceof THREE.Quaternion) {
41
+ camera.quaternion.copy(options.quaternion);
42
+ } else {
43
+ camera.quaternion.set(
44
+ options.quaternion[0],
45
+ options.quaternion[1],
46
+ options.quaternion[2],
47
+ options.quaternion[3]
48
+ );
49
+ }
50
+ } else if (options.lookAt) {
51
+ camera.lookAt(toVector3(options.lookAt, new THREE.Vector3()));
52
+ } else {
53
+ camera.quaternion.copy(fallbackCamera.quaternion);
54
+ }
55
+
56
+ camera.updateMatrixWorld();
57
+ }
58
+
59
+ function createProjectionCamera(
60
+ fallbackCamera: THREE.Camera,
61
+ options: CameraFrameCaptureOptions,
62
+ width: number,
63
+ height: number
64
+ ) {
65
+ const camera = options.camera
66
+ ? options.camera.clone()
67
+ : fallbackCamera instanceof THREE.PerspectiveCamera
68
+ ? fallbackCamera.clone()
69
+ : new THREE.PerspectiveCamera(45, width / height, 0.01, 100);
70
+
71
+ if (camera instanceof THREE.PerspectiveCamera) {
72
+ camera.aspect = width / height;
73
+ camera.fov = options.fov ?? camera.fov;
74
+ camera.near = options.near ?? camera.near;
75
+ camera.far = options.far ?? camera.far;
76
+ camera.updateProjectionMatrix();
77
+ }
78
+
79
+ applyCameraPose(camera, options, fallbackCamera);
80
+ return camera;
81
+ }
82
+
83
+ function getProjectionSource(options: CameraFrameCaptureOptions): CameraFrameCaptureSource {
84
+ if (options.source) return options.source;
85
+ if (options.cameraName) return { kind: 'mujoco-camera', cameraName: options.cameraName };
86
+ if (options.siteName) return { kind: 'mujoco-site', siteName: options.siteName };
87
+ if (options.bodyName) return { kind: 'mujoco-body', bodyName: options.bodyName };
88
+ if (options.camera) return { kind: 'custom-camera' };
89
+ if (options.position || options.lookAt || options.quaternion) return { kind: 'explicit-pose' };
90
+ return { kind: 'fallback-camera' };
91
+ }
92
+
93
+ function imageSize(
94
+ renderer: THREE.WebGLRenderer,
95
+ options: ImagePointProjectionOptions
96
+ ): [number, number] {
97
+ return [
98
+ Math.max(1, Math.floor(options.imageWidth ?? options.width ?? renderer.domElement.width)),
99
+ Math.max(1, Math.floor(options.imageHeight ?? options.height ?? renderer.domElement.height)),
100
+ ];
101
+ }
102
+
103
+ export function imagePointToNdc(
104
+ x: number,
105
+ y: number,
106
+ coordinateSpace: ImagePointCoordinateSpace = 'normalized',
107
+ width = 1,
108
+ height = 1
109
+ ): [number, number] {
110
+ if (coordinateSpace === 'ndc') return [x, y];
111
+ if (coordinateSpace === 'normalized-1000') {
112
+ return [(x / 1000) * 2 - 1, 1 - (y / 1000) * 2];
113
+ }
114
+ if (coordinateSpace === 'pixel') {
115
+ return [(x / width) * 2 - 1, 1 - (y / height) * 2];
116
+ }
117
+ return [x * 2 - 1, 1 - y * 2];
118
+ }
119
+
120
+ function isProjectionCandidate(object: THREE.Object3D, options: ImagePointProjectionOptions) {
121
+ if (!object.visible) return false;
122
+ if (object.userData[CAPTURE_EXCLUDE_KEY]) return false;
123
+
124
+ const geomGroup = object.userData.geomGroup;
125
+ const geomName = object.userData.geomName;
126
+ if (options.hiddenGeomNames && typeof geomName === 'string' && options.hiddenGeomNames.includes(geomName)) {
127
+ return false;
128
+ }
129
+ if (options.hiddenGeomGroups && typeof geomGroup === 'number' && options.hiddenGeomGroups.includes(geomGroup)) {
130
+ return false;
131
+ }
132
+ if (options.visibleGeomGroups && typeof geomGroup === 'number' && !options.visibleGeomGroups.includes(geomGroup)) {
133
+ return false;
134
+ }
135
+ return true;
136
+ }
137
+
138
+ function findBodyId(object: THREE.Object3D) {
139
+ let current: THREE.Object3D | null = object;
140
+ while (current && current.userData.bodyID === undefined && current.parent) {
141
+ current = current.parent;
142
+ }
143
+ return typeof current?.userData.bodyID === 'number' ? current.userData.bodyID : -1;
144
+ }
145
+
146
+ export function projectImagePointTo3D(
147
+ renderer: THREE.WebGLRenderer,
148
+ scene: THREE.Scene,
149
+ fallbackCamera: THREE.Camera,
150
+ options: ImagePointProjectionOptions
151
+ ): ImagePointProjectionResult | null {
152
+ const [width, height] = imageSize(renderer, options);
153
+ const [ndcX, ndcY] = imagePointToNdc(
154
+ options.x,
155
+ options.y,
156
+ options.coordinateSpace,
157
+ width,
158
+ height
159
+ );
160
+ const projectionCamera = createProjectionCamera(fallbackCamera, options, width, height);
161
+
162
+ scene.updateMatrixWorld(true);
163
+ _ndc.set(ndcX, ndcY);
164
+ _raycaster.setFromCamera(_ndc, projectionCamera);
165
+ _raycaster.far = options.maxDistance ?? Infinity;
166
+
167
+ const objects: THREE.Object3D[] = [];
168
+ scene.traverse((object) => {
169
+ if ((object as THREE.Mesh).isMesh && isProjectionCandidate(object, options)) {
170
+ objects.push(object);
171
+ }
172
+ });
173
+
174
+ const [hit] = _raycaster.intersectObjects(objects, true);
175
+ if (!hit) return null;
176
+
177
+ return {
178
+ point: hit.point.clone(),
179
+ bodyId: findBodyId(hit.object),
180
+ geomId: typeof hit.object.userData.geomID === 'number' ? hit.object.userData.geomID : -1,
181
+ distance: hit.distance,
182
+ ndc: [ndcX, ndcY],
183
+ imageSize: [width, height],
184
+ source: getProjectionSource(options),
185
+ };
186
+ }
package/src/types.ts CHANGED
@@ -747,6 +747,42 @@ export interface RayHit {
747
747
  distance: number;
748
748
  }
749
749
 
750
+ export type ImagePointCoordinateSpace =
751
+ | 'normalized'
752
+ | 'normalized-1000'
753
+ | 'pixel'
754
+ | 'ndc';
755
+
756
+ export interface ImagePointProjectionOptions extends CameraFrameCaptureOptions {
757
+ /** X coordinate in the selected coordinate space. Defaults to normalized 0..1. */
758
+ x: number;
759
+ /** Y coordinate in the selected coordinate space. Defaults to normalized 0..1 with origin at top-left. */
760
+ y: number;
761
+ /**
762
+ * Coordinate convention for x/y:
763
+ * - normalized: 0..1 image coordinates, top-left origin
764
+ * - normalized-1000: 0..1000 detector coordinates, top-left origin
765
+ * - pixel: pixel coordinates, top-left origin
766
+ * - ndc: Three.js normalized device coordinates, -1..1
767
+ */
768
+ coordinateSpace?: ImagePointCoordinateSpace;
769
+ /** Image width for pixel coordinates. Falls back to `width` or renderer canvas width. */
770
+ imageWidth?: number;
771
+ /** Image height for pixel coordinates. Falls back to `height` or renderer canvas height. */
772
+ imageHeight?: number;
773
+ /** Ignore hits farther than this distance from the camera ray origin. */
774
+ maxDistance?: number;
775
+ }
776
+
777
+ export interface ImagePointProjectionResult extends RayHit {
778
+ /** NDC coordinates used for raycasting. */
779
+ ndc: [number, number];
780
+ /** Image dimensions used when interpreting pixel coordinates. */
781
+ imageSize: [number, number];
782
+ /** Camera pose provenance, matching camera-frame capture results. */
783
+ source: CameraFrameCaptureSource;
784
+ }
785
+
750
786
  // ---- Model Options (spec 5.3) ----
751
787
 
752
788
  export interface ModelOptions {
@@ -797,6 +833,8 @@ export interface PolicyObservationInput {
797
833
 
798
834
  export interface PolicyInferenceInput extends PolicyObservationInput {
799
835
  observation: PolicyVector;
836
+ /** Number of actions still queued locally when inference is requested. */
837
+ queuedActions?: number;
800
838
  }
801
839
 
802
840
  export type PolicyActionChunk = readonly PolicyVector[];
@@ -1404,6 +1442,7 @@ export interface MujocoSimAPI {
1404
1442
  cameraPos: THREE.Vector3,
1405
1443
  lookAt: THREE.Vector3
1406
1444
  ): { point: THREE.Vector3; bodyId: number; geomId: number } | null;
1445
+ projectImagePointTo3D(options: ImagePointProjectionOptions): ImagePointProjectionResult | null;
1407
1446
 
1408
1447
  // Domain randomization (spec 10.3)
1409
1448
  setBodyMass(name: Bodies, mass: number): void;
@@ -1502,6 +1541,8 @@ export interface CameraFrameCaptureOptions {
1502
1541
  background?: THREE.ColorRepresentation;
1503
1542
  /** Optional clear alpha for this capture only. Defaults to the renderer's current clear alpha. */
1504
1543
  backgroundAlpha?: number;
1544
+ /** Mirror the captured image horizontally after rendering. Useful when matching policy datasets with mirrored camera frames. */
1545
+ flipX?: boolean;
1505
1546
  }
1506
1547
 
1507
1548
  export type CameraFrameCaptureSource =