mujoco-react 10.1.0 → 10.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-FEKBKHEN.js → chunk-CYDGWNKQ.js} +33 -17
- package/dist/chunk-CYDGWNKQ.js.map +1 -0
- package/dist/index.d.ts +13 -3
- package/dist/index.js +307 -154
- package/dist/index.js.map +1 -1
- package/dist/spark.d.ts +1 -1
- package/dist/spark.js +1 -1
- package/dist/{types-BHBNJubg.d.ts → types-B-J8fpPP.d.ts} +35 -1
- package/package.json +1 -1
- package/src/core/MujocoSimProvider.tsx +30 -0
- package/src/hooks/usePolicy.ts +4 -1
- package/src/hooks/useRemotePolicy.ts +2 -1
- package/src/index.ts +7 -0
- package/src/rendering/cameraFrameCapture.ts +40 -15
- package/src/rendering/imageProjection.ts +186 -0
- package/src/types.ts +41 -0
- package/dist/chunk-FEKBKHEN.js.map +0 -1
package/dist/spark.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as react_jsx_runtime from 'react/jsx-runtime';
|
|
2
2
|
import * as _sparkjsdev_spark from '@sparkjsdev/spark';
|
|
3
|
-
import { n as SplatEnvironmentProps, q as PairedSplatEnvironmentConfig, S as SceneConfig, t as SplatEnvironmentReadiness, o as VisualScenarioConfig } from './types-
|
|
3
|
+
import { n as SplatEnvironmentProps, q as PairedSplatEnvironmentConfig, S as SceneConfig, t as SplatEnvironmentReadiness, o as VisualScenarioConfig } from './types-B-J8fpPP.js';
|
|
4
4
|
import 'react';
|
|
5
5
|
import '@react-three/fiber';
|
|
6
6
|
import 'three';
|
package/dist/spark.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { useSplatSceneConfig, useSplatEnvironment, SplatEnvironment, CAMERA_FRAME_CAPTURE_RENDER_USER_DATA_KEY } from './chunk-
|
|
1
|
+
import { useSplatSceneConfig, useSplatEnvironment, SplatEnvironment, CAMERA_FRAME_CAPTURE_RENDER_USER_DATA_KEY } from './chunk-CYDGWNKQ.js';
|
|
2
2
|
import { useThree } from '@react-three/fiber';
|
|
3
3
|
import { useMemo, useState, useEffect, useCallback, useRef } from 'react';
|
|
4
4
|
import * as THREE from 'three';
|
|
@@ -573,6 +573,35 @@ interface RayHit {
|
|
|
573
573
|
geomId: number;
|
|
574
574
|
distance: number;
|
|
575
575
|
}
|
|
576
|
+
type ImagePointCoordinateSpace = 'normalized' | 'normalized-1000' | 'pixel' | 'ndc';
|
|
577
|
+
interface ImagePointProjectionOptions extends CameraFrameCaptureOptions {
|
|
578
|
+
/** X coordinate in the selected coordinate space. Defaults to normalized 0..1. */
|
|
579
|
+
x: number;
|
|
580
|
+
/** Y coordinate in the selected coordinate space. Defaults to normalized 0..1 with origin at top-left. */
|
|
581
|
+
y: number;
|
|
582
|
+
/**
|
|
583
|
+
* Coordinate convention for x/y:
|
|
584
|
+
* - normalized: 0..1 image coordinates, top-left origin
|
|
585
|
+
* - normalized-1000: 0..1000 detector coordinates, top-left origin
|
|
586
|
+
* - pixel: pixel coordinates, top-left origin
|
|
587
|
+
* - ndc: Three.js normalized device coordinates, -1..1
|
|
588
|
+
*/
|
|
589
|
+
coordinateSpace?: ImagePointCoordinateSpace;
|
|
590
|
+
/** Image width for pixel coordinates. Falls back to `width` or renderer canvas width. */
|
|
591
|
+
imageWidth?: number;
|
|
592
|
+
/** Image height for pixel coordinates. Falls back to `height` or renderer canvas height. */
|
|
593
|
+
imageHeight?: number;
|
|
594
|
+
/** Ignore hits farther than this distance from the camera ray origin. */
|
|
595
|
+
maxDistance?: number;
|
|
596
|
+
}
|
|
597
|
+
interface ImagePointProjectionResult extends RayHit {
|
|
598
|
+
/** NDC coordinates used for raycasting. */
|
|
599
|
+
ndc: [number, number];
|
|
600
|
+
/** Image dimensions used when interpreting pixel coordinates. */
|
|
601
|
+
imageSize: [number, number];
|
|
602
|
+
/** Camera pose provenance, matching camera-frame capture results. */
|
|
603
|
+
source: CameraFrameCaptureSource;
|
|
604
|
+
}
|
|
576
605
|
interface ModelOptions {
|
|
577
606
|
timestep: number;
|
|
578
607
|
gravity: [number, number, number];
|
|
@@ -607,6 +636,8 @@ interface PolicyObservationInput {
|
|
|
607
636
|
}
|
|
608
637
|
interface PolicyInferenceInput extends PolicyObservationInput {
|
|
609
638
|
observation: PolicyVector;
|
|
639
|
+
/** Number of actions still queued locally when inference is requested. */
|
|
640
|
+
queuedActions?: number;
|
|
610
641
|
}
|
|
611
642
|
type PolicyActionChunk = readonly PolicyVector[];
|
|
612
643
|
type PolicyInferenceOutput = PolicyVector | PolicyActionChunk;
|
|
@@ -1094,6 +1125,7 @@ interface MujocoSimAPI {
|
|
|
1094
1125
|
bodyId: number;
|
|
1095
1126
|
geomId: number;
|
|
1096
1127
|
} | null;
|
|
1128
|
+
projectImagePointTo3D(options: ImagePointProjectionOptions): ImagePointProjectionResult | null;
|
|
1097
1129
|
setBodyMass(name: Bodies, mass: number): void;
|
|
1098
1130
|
setGeomFriction(name: Geoms, friction: [number, number, number]): void;
|
|
1099
1131
|
setGeomSize(name: Geoms, size: [number, number, number]): void;
|
|
@@ -1166,6 +1198,8 @@ interface CameraFrameCaptureOptions {
|
|
|
1166
1198
|
background?: THREE.ColorRepresentation;
|
|
1167
1199
|
/** Optional clear alpha for this capture only. Defaults to the renderer's current clear alpha. */
|
|
1168
1200
|
backgroundAlpha?: number;
|
|
1201
|
+
/** Mirror the captured image horizontally after rendering. Useful when matching policy datasets with mirrored camera frames. */
|
|
1202
|
+
flipX?: boolean;
|
|
1169
1203
|
}
|
|
1170
1204
|
type CameraFrameCaptureSource = {
|
|
1171
1205
|
kind: 'mujoco-camera';
|
|
@@ -1350,4 +1384,4 @@ interface ArrayJointStateResult {
|
|
|
1350
1384
|
velocity: React__default.RefObject<Float64Array>;
|
|
1351
1385
|
}
|
|
1352
1386
|
|
|
1353
|
-
export { type ArrayJointStateResult as $, type ActuatedJointInfo as A, type BodyProps as B, type ControlGroupInfo as C, type DragInteractionProps as D, type ScenarioLightingPreset as E, type SplatEnvironmentMetadataInput as F, type SplatEnvironmentMetadata as G, type SplatSceneInput as H, type IkConfig as I, type DebugProps as J, type GeomInfo as K, type ContactListenerProps as L, type MujocoContextValue as M, type ActuatorInfo as N, type ObservationConfig as O, type PhysicsStepCallback as P, type Sites as Q, type ReadyCallbackInput as R, type SceneConfig as S, type TrajectoryPlayerProps as T, type SitePositionResult as U, type VisualScenarioEffectsProps as V, type Sensors as W, type SensorHandle as X, type SensorInfo as Y, type Joints as Z, type ScalarJointStateResult as _, type MujocoCanvasProps as a,
|
|
1387
|
+
export { type ArrayJointStateResult as $, type ActuatedJointInfo as A, type BodyProps as B, type ControlGroupInfo as C, type DragInteractionProps as D, type ScenarioLightingPreset as E, type SplatEnvironmentMetadataInput as F, type SplatEnvironmentMetadata as G, type SplatSceneInput as H, type IkConfig as I, type DebugProps as J, type GeomInfo as K, type ContactListenerProps as L, type MujocoContextValue as M, type ActuatorInfo as N, type ObservationConfig as O, type PhysicsStepCallback as P, type Sites as Q, type ReadyCallbackInput as R, type SceneConfig as S, type TrajectoryPlayerProps as T, type SitePositionResult as U, type VisualScenarioEffectsProps as V, type Sensors as W, type SensorHandle as X, type SensorInfo as Y, type Joints as Z, type ScalarJointStateResult as _, type MujocoCanvasProps as a, ModelActuators as a$, type JointStateOptions as a0, type JointStateResult as a1, type Bodies as a2, type BodyStateResult as a3, type Geoms as a4, type Actuators as a5, type CtrlHandle as a6, type ContactInfo as a7, type KeyboardTeleopConfig as a8, type KeyboardIkTargetConfig as a9, type CameraFrameSequenceRecorderAPI as aA, type CameraFrameCaptureResult as aB, type CameraFrameCaptureBlobResult as aC, type ImagePointCoordinateSpace as aD, type ImagePointProjectionOptions as aE, type ImagePointProjectionResult as aF, type PolicyVector as aG, type BodyInfo as aH, type CameraFrameCaptureQuaternion as aI, type CameraFrameCaptureVector3 as aJ, type CameraFrameSequenceCameraSummary as aK, type CameraFrameSequenceFrame as aL, type CameraFrameSequenceSampleInput as aM, type CameraFrameSequenceStepInput as aN, type CameraInfo as aO, type ControlJointInfo as aP, type FrameCaptureTarget as aQ, type FrameCaptureTargetRef as aR, type IKSolveFn as aS, type IkGizmoDragInput as aT, type IkSolveInput as aU, type JointInfo as aV, type JointStateKind as aW, type KeyBinding as aX, type KeyboardIkTargetAction as aY, type KeyboardIkTargetBinding as aZ, type Keyframes as a_, type PolicyConfig as aa, type PolicyAPI as ab, type RemotePolicyConfig as ac, type RemotePolicyAPI as ad, type ObservationHandle as ae, type ObservationOutput as af, type TrajectoryInput as ag, type TrajectoryStateChangeInput as ah, type PlaybackState as ai, type TrajectoryFrame as aj, type FrameCaptureOptions as ak, type FrameCaptureResult as al, type FrameCaptureBlobResult as am, type FrameCaptureAPI as an, type CameraFrameCaptureOptions as ao, type CameraFrameCaptureAPI as ap, type Cameras as aq, type CameraFrameSequenceCamera as ar, type CameraFrameCaptureSource as as, type CameraFrameSequenceOptions as at, type CameraFrameSequenceResult as au, type PolicyCameraFrameStream as av, type PolicyCameraFrameCaptureOptions as aw, type PolicyCameraFrameCaptureResult as ax, type FrameCaptureStatus as ay, type PolicyCameraFrameCaptureAPI as az, type MujocoSimAPI as b, ModelBodies as b0, ModelCameras as b1, ModelGeoms as b2, ModelJoints as b3, ModelKeyframes as b4, type ModelOptions as b5, type ModelResource as b6, ModelResources as b7, ModelSensors as b8, ModelSites as b9, type SceneMarker as bA, type SceneObject as bB, type SensorResult as bC, type SiteInfo as bD, type SplatAssetConfig as bE, type SplatScenarioConfig as bF, type StateSnapshot as bG, type TrajectoryData as bH, type TrajectoryFrameCallbackInput as bI, type VisualScenarioMaterialFilterInput as bJ, type XmlPatch as bK, getContact as bL, registerModelResources as bM, type Models as ba, type MujocoContact as bb, type MujocoContactArray as bc, type MujocoFrameCaptureOptions as bd, type ObservationLayoutItem as be, type PhysicsConfig as bf, type PhysicsStepInput as bg, type PolicyActionChunk as bh, type PolicyActionInput as bi, type PolicyInferenceInput as bj, type PolicyInferenceOutput as bk, type PolicyInferenceResult as bl, type PolicyObservationInput as bm, type RayHit as bn, type Register as bo, type RegisteredModelMap as bp, type RemotePolicyRequestInfo as bq, type RemotePolicyRequestInput as br, type RemotePolicyResponseInfo as bs, type RemotePolicyStatus as bt, type ResetCallbackInput as bu, type ResolvedScenarioCameraConfig as bv, type ResolvedScenarioMaterialConfig as bw, type ResourceSelector as bx, type ScenarioCameraConfig as by, type ScenarioMaterialConfig as bz, type StepCallbackInput as c, type SelectionCallbackInput as d, type MujocoModule as e, type MujocoModel as f, type MujocoData as g, type ControlGroupSelector as h, type ObservationResult as i, type IkContextValue as j, type IkGizmoProps as k, type SceneLightsProps as l, type ScenarioLightingProps as m, type SplatEnvironmentProps as n, type VisualScenarioConfig as o, type SplatRendererKind as p, type PairedSplatEnvironmentConfig as q, type SplatFormat as r, type SplatCollisionProxyConfig as s, type SplatEnvironmentReadiness as t, type SplatCollisionPrimitive as u, SplatEnvironmentReadinessStatus as v, type SplatSceneConfigInput as w, type SplatSceneConfigState as x, type VisualScenarioExecutionContextInput as y, type VisualScenarioExecutionContext as z };
|
package/package.json
CHANGED
|
@@ -31,6 +31,8 @@ import {
|
|
|
31
31
|
ControlGroupSelector,
|
|
32
32
|
ContactInfo,
|
|
33
33
|
GeomInfo,
|
|
34
|
+
ImagePointProjectionOptions,
|
|
35
|
+
ImagePointProjectionResult,
|
|
34
36
|
JointInfo,
|
|
35
37
|
LoadFromFilesOptions,
|
|
36
38
|
LocalMujocoFile,
|
|
@@ -61,6 +63,7 @@ import {
|
|
|
61
63
|
getCameraFrameCaptureSourceTarget,
|
|
62
64
|
isMountedCameraFrameCaptureSource,
|
|
63
65
|
} from '../rendering/cameraFrameSource';
|
|
66
|
+
import { projectImagePointTo3D as projectImagePointTo3DFromScene } from '../rendering/imageProjection';
|
|
64
67
|
import {
|
|
65
68
|
loadScene,
|
|
66
69
|
createSceneConfigFromFiles,
|
|
@@ -1590,6 +1593,31 @@ export function MujocoSimProvider({
|
|
|
1590
1593
|
[camera, gl]
|
|
1591
1594
|
);
|
|
1592
1595
|
|
|
1596
|
+
const projectImagePointTo3D = useCallback(
|
|
1597
|
+
(options: ImagePointProjectionOptions): ImagePointProjectionResult | null => {
|
|
1598
|
+
const {
|
|
1599
|
+
x,
|
|
1600
|
+
y,
|
|
1601
|
+
coordinateSpace,
|
|
1602
|
+
imageWidth,
|
|
1603
|
+
imageHeight,
|
|
1604
|
+
maxDistance,
|
|
1605
|
+
...captureOptions
|
|
1606
|
+
} = options;
|
|
1607
|
+
const resolvedCaptureOptions = resolveCameraCaptureOptions(captureOptions);
|
|
1608
|
+
return projectImagePointTo3DFromScene(gl, scene, camera, {
|
|
1609
|
+
...resolvedCaptureOptions,
|
|
1610
|
+
x,
|
|
1611
|
+
y,
|
|
1612
|
+
coordinateSpace,
|
|
1613
|
+
imageWidth,
|
|
1614
|
+
imageHeight,
|
|
1615
|
+
maxDistance,
|
|
1616
|
+
});
|
|
1617
|
+
},
|
|
1618
|
+
[camera, gl, resolveCameraCaptureOptions, scene]
|
|
1619
|
+
);
|
|
1620
|
+
|
|
1593
1621
|
// --- Domain randomization ---
|
|
1594
1622
|
|
|
1595
1623
|
const setBodyMass = useCallback((name: string, mass: number): void => {
|
|
@@ -1676,6 +1704,7 @@ export function MujocoSimProvider({
|
|
|
1676
1704
|
captureCameraFrameBlob: captureCameraFrameBlobApi,
|
|
1677
1705
|
recordCameraSequence: recordCameraSequenceApi,
|
|
1678
1706
|
project2DTo3D,
|
|
1707
|
+
projectImagePointTo3D,
|
|
1679
1708
|
setBodyMass,
|
|
1680
1709
|
setGeomFriction,
|
|
1681
1710
|
setGeomSize,
|
|
@@ -1696,6 +1725,7 @@ export function MujocoSimProvider({
|
|
|
1696
1725
|
captureCameraFrameApi, captureCameraFrameBlobApi,
|
|
1697
1726
|
recordCameraSequenceApi,
|
|
1698
1727
|
project2DTo3D,
|
|
1728
|
+
projectImagePointTo3D,
|
|
1699
1729
|
setBodyMass, setGeomFriction, setGeomSize,
|
|
1700
1730
|
]
|
|
1701
1731
|
);
|
package/src/hooks/usePolicy.ts
CHANGED
|
@@ -111,7 +111,10 @@ export function usePolicy(config: PolicyConfig): PolicyAPI {
|
|
|
111
111
|
|
|
112
112
|
// Build observation
|
|
113
113
|
const observation = cfg.onObservation({ model, data });
|
|
114
|
-
const
|
|
114
|
+
const queuedActions = actionQueueRef.current.length;
|
|
115
|
+
const result = cfg.infer
|
|
116
|
+
? cfg.infer({ observation, model, data, queuedActions })
|
|
117
|
+
: observation;
|
|
115
118
|
|
|
116
119
|
if (isPromiseLike(result)) {
|
|
117
120
|
const epoch = epochRef.current;
|
|
@@ -163,7 +163,7 @@ export function useRemotePolicy(config: RemotePolicyConfig): RemotePolicyAPI {
|
|
|
163
163
|
|
|
164
164
|
const policy = usePolicy({
|
|
165
165
|
...config,
|
|
166
|
-
infer: async ({ observation, model, data }) => {
|
|
166
|
+
infer: async ({ observation, model, data, queuedActions }) => {
|
|
167
167
|
const cfg = configRef.current;
|
|
168
168
|
abortController(abortControllerRef.current, createAbortError('Remote policy request was superseded.'));
|
|
169
169
|
const controller = new AbortController();
|
|
@@ -175,6 +175,7 @@ export function useRemotePolicy(config: RemotePolicyConfig): RemotePolicyAPI {
|
|
|
175
175
|
observation,
|
|
176
176
|
model,
|
|
177
177
|
data,
|
|
178
|
+
queuedActions,
|
|
178
179
|
reset: requestIndex === 0,
|
|
179
180
|
requestIndex,
|
|
180
181
|
signal,
|
package/src/index.ts
CHANGED
|
@@ -148,6 +148,10 @@ export {
|
|
|
148
148
|
createCameraFrameCaptureSession,
|
|
149
149
|
renderCameraFrameToCanvas,
|
|
150
150
|
} from './rendering/cameraFrameCapture';
|
|
151
|
+
export {
|
|
152
|
+
imagePointToNdc,
|
|
153
|
+
projectImagePointTo3D,
|
|
154
|
+
} from './rendering/imageProjection';
|
|
151
155
|
export {
|
|
152
156
|
capturePolicyCameraFrames,
|
|
153
157
|
capturePolicyCameraFramesFromMountedStreams,
|
|
@@ -269,6 +273,9 @@ export type {
|
|
|
269
273
|
ContactInfo,
|
|
270
274
|
// Raycast
|
|
271
275
|
RayHit,
|
|
276
|
+
ImagePointCoordinateSpace,
|
|
277
|
+
ImagePointProjectionOptions,
|
|
278
|
+
ImagePointProjectionResult,
|
|
272
279
|
// Model options
|
|
273
280
|
ModelOptions,
|
|
274
281
|
// Trajectory
|
|
@@ -60,6 +60,7 @@ export type CameraFrameCaptureRenderResult = {
|
|
|
60
60
|
width?: number;
|
|
61
61
|
height?: number;
|
|
62
62
|
flipY?: boolean;
|
|
63
|
+
flipX?: boolean;
|
|
63
64
|
};
|
|
64
65
|
|
|
65
66
|
type CameraFrameCaptureRender = (
|
|
@@ -196,7 +197,8 @@ function readRenderTargetToCanvas(
|
|
|
196
197
|
imageData: ImageData,
|
|
197
198
|
width: number,
|
|
198
199
|
height: number,
|
|
199
|
-
outputColorSpace: string
|
|
200
|
+
outputColorSpace: string,
|
|
201
|
+
flipX = false
|
|
200
202
|
) {
|
|
201
203
|
renderer.readRenderTargetPixels(target, 0, 0, width, height, pixels);
|
|
202
204
|
|
|
@@ -206,17 +208,25 @@ function readRenderTargetToCanvas(
|
|
|
206
208
|
const sourceStart = (height - y - 1) * rowBytes;
|
|
207
209
|
const targetStart = y * rowBytes;
|
|
208
210
|
const row = pixels.subarray(sourceStart, sourceStart + rowBytes);
|
|
209
|
-
if (!encodeSrgb) {
|
|
211
|
+
if (!encodeSrgb && !flipX) {
|
|
210
212
|
imageData.data.set(row, targetStart);
|
|
211
213
|
continue;
|
|
212
214
|
}
|
|
213
215
|
|
|
214
|
-
for (let x = 0; x <
|
|
215
|
-
const
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
imageData.data[
|
|
219
|
-
|
|
216
|
+
for (let x = 0; x < width; x += 1) {
|
|
217
|
+
const sourceX = flipX ? width - x - 1 : x;
|
|
218
|
+
const sourceOffset = sourceX * 4;
|
|
219
|
+
const targetOffset = targetStart + x * 4;
|
|
220
|
+
imageData.data[targetOffset] = encodeSrgb
|
|
221
|
+
? linearByteToSrgbByte(row[sourceOffset])
|
|
222
|
+
: row[sourceOffset];
|
|
223
|
+
imageData.data[targetOffset + 1] = encodeSrgb
|
|
224
|
+
? linearByteToSrgbByte(row[sourceOffset + 1])
|
|
225
|
+
: row[sourceOffset + 1];
|
|
226
|
+
imageData.data[targetOffset + 2] = encodeSrgb
|
|
227
|
+
? linearByteToSrgbByte(row[sourceOffset + 2])
|
|
228
|
+
: row[sourceOffset + 2];
|
|
229
|
+
imageData.data[targetOffset + 3] = row[sourceOffset + 3];
|
|
220
230
|
}
|
|
221
231
|
}
|
|
222
232
|
context.putImageData(imageData, 0, 0);
|
|
@@ -238,17 +248,30 @@ function readPixelsToCanvas(
|
|
|
238
248
|
imageData: ImageData,
|
|
239
249
|
width: number,
|
|
240
250
|
height: number,
|
|
241
|
-
flipY = true
|
|
251
|
+
flipY = true,
|
|
252
|
+
flipX = false
|
|
242
253
|
) {
|
|
243
254
|
const rowBytes = width * 4;
|
|
244
255
|
for (let y = 0; y < height; y += 1) {
|
|
245
256
|
const sourceY = flipY ? height - y - 1 : y;
|
|
246
257
|
const sourceStart = sourceY * rowBytes;
|
|
247
258
|
const targetStart = y * rowBytes;
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
259
|
+
if (!flipX) {
|
|
260
|
+
imageData.data.set(
|
|
261
|
+
pixels.subarray(sourceStart, sourceStart + rowBytes),
|
|
262
|
+
targetStart
|
|
263
|
+
);
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
for (let x = 0; x < width; x += 1) {
|
|
267
|
+
const sourceX = width - x - 1;
|
|
268
|
+
const sourceOffset = sourceStart + sourceX * 4;
|
|
269
|
+
const targetOffset = targetStart + x * 4;
|
|
270
|
+
imageData.data[targetOffset] = pixels[sourceOffset];
|
|
271
|
+
imageData.data[targetOffset + 1] = pixels[sourceOffset + 1];
|
|
272
|
+
imageData.data[targetOffset + 2] = pixels[sourceOffset + 2];
|
|
273
|
+
imageData.data[targetOffset + 3] = pixels[sourceOffset + 3];
|
|
274
|
+
}
|
|
252
275
|
}
|
|
253
276
|
context.putImageData(imageData, 0, 0);
|
|
254
277
|
}
|
|
@@ -462,7 +485,8 @@ export function createCameraFrameCaptureSession(
|
|
|
462
485
|
imageData,
|
|
463
486
|
width,
|
|
464
487
|
height,
|
|
465
|
-
renderer.outputColorSpace
|
|
488
|
+
renderer.outputColorSpace,
|
|
489
|
+
captureOptions.flipX ?? false
|
|
466
490
|
);
|
|
467
491
|
return {
|
|
468
492
|
canvas,
|
|
@@ -524,7 +548,8 @@ export function createCameraFrameCaptureSession(
|
|
|
524
548
|
imageData,
|
|
525
549
|
width,
|
|
526
550
|
height,
|
|
527
|
-
captureResult.flipY ?? true
|
|
551
|
+
captureResult.flipY ?? true,
|
|
552
|
+
captureResult.flipX ?? captureOptions.flipX ?? false
|
|
528
553
|
);
|
|
529
554
|
return {
|
|
530
555
|
canvas,
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Project detector/image coordinates from a camera view into the rendered MuJoCo scene.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as THREE from 'three';
|
|
9
|
+
import type {
|
|
10
|
+
CameraFrameCaptureOptions,
|
|
11
|
+
CameraFrameCaptureSource,
|
|
12
|
+
ImagePointCoordinateSpace,
|
|
13
|
+
ImagePointProjectionOptions,
|
|
14
|
+
ImagePointProjectionResult,
|
|
15
|
+
} from '../types';
|
|
16
|
+
import { CAPTURE_EXCLUDE_KEY } from './cameraFrameCapture';
|
|
17
|
+
|
|
18
|
+
const _raycaster = new THREE.Raycaster();
|
|
19
|
+
const _ndc = new THREE.Vector2();
|
|
20
|
+
|
|
21
|
+
function toVector3(
|
|
22
|
+
value: CameraFrameCaptureOptions['position'] | undefined,
|
|
23
|
+
fallback: THREE.Vector3
|
|
24
|
+
) {
|
|
25
|
+
if (!value) return fallback.clone();
|
|
26
|
+
return value instanceof THREE.Vector3
|
|
27
|
+
? value.clone()
|
|
28
|
+
: new THREE.Vector3(value[0], value[1], value[2]);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function applyCameraPose(
|
|
32
|
+
camera: THREE.Camera,
|
|
33
|
+
options: CameraFrameCaptureOptions,
|
|
34
|
+
fallbackCamera: THREE.Camera
|
|
35
|
+
) {
|
|
36
|
+
camera.position.copy(toVector3(options.position, fallbackCamera.position));
|
|
37
|
+
camera.up.copy(toVector3(options.up, fallbackCamera.up));
|
|
38
|
+
|
|
39
|
+
if (options.quaternion) {
|
|
40
|
+
if (options.quaternion instanceof THREE.Quaternion) {
|
|
41
|
+
camera.quaternion.copy(options.quaternion);
|
|
42
|
+
} else {
|
|
43
|
+
camera.quaternion.set(
|
|
44
|
+
options.quaternion[0],
|
|
45
|
+
options.quaternion[1],
|
|
46
|
+
options.quaternion[2],
|
|
47
|
+
options.quaternion[3]
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
} else if (options.lookAt) {
|
|
51
|
+
camera.lookAt(toVector3(options.lookAt, new THREE.Vector3()));
|
|
52
|
+
} else {
|
|
53
|
+
camera.quaternion.copy(fallbackCamera.quaternion);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
camera.updateMatrixWorld();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function createProjectionCamera(
|
|
60
|
+
fallbackCamera: THREE.Camera,
|
|
61
|
+
options: CameraFrameCaptureOptions,
|
|
62
|
+
width: number,
|
|
63
|
+
height: number
|
|
64
|
+
) {
|
|
65
|
+
const camera = options.camera
|
|
66
|
+
? options.camera.clone()
|
|
67
|
+
: fallbackCamera instanceof THREE.PerspectiveCamera
|
|
68
|
+
? fallbackCamera.clone()
|
|
69
|
+
: new THREE.PerspectiveCamera(45, width / height, 0.01, 100);
|
|
70
|
+
|
|
71
|
+
if (camera instanceof THREE.PerspectiveCamera) {
|
|
72
|
+
camera.aspect = width / height;
|
|
73
|
+
camera.fov = options.fov ?? camera.fov;
|
|
74
|
+
camera.near = options.near ?? camera.near;
|
|
75
|
+
camera.far = options.far ?? camera.far;
|
|
76
|
+
camera.updateProjectionMatrix();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
applyCameraPose(camera, options, fallbackCamera);
|
|
80
|
+
return camera;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function getProjectionSource(options: CameraFrameCaptureOptions): CameraFrameCaptureSource {
|
|
84
|
+
if (options.source) return options.source;
|
|
85
|
+
if (options.cameraName) return { kind: 'mujoco-camera', cameraName: options.cameraName };
|
|
86
|
+
if (options.siteName) return { kind: 'mujoco-site', siteName: options.siteName };
|
|
87
|
+
if (options.bodyName) return { kind: 'mujoco-body', bodyName: options.bodyName };
|
|
88
|
+
if (options.camera) return { kind: 'custom-camera' };
|
|
89
|
+
if (options.position || options.lookAt || options.quaternion) return { kind: 'explicit-pose' };
|
|
90
|
+
return { kind: 'fallback-camera' };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function imageSize(
|
|
94
|
+
renderer: THREE.WebGLRenderer,
|
|
95
|
+
options: ImagePointProjectionOptions
|
|
96
|
+
): [number, number] {
|
|
97
|
+
return [
|
|
98
|
+
Math.max(1, Math.floor(options.imageWidth ?? options.width ?? renderer.domElement.width)),
|
|
99
|
+
Math.max(1, Math.floor(options.imageHeight ?? options.height ?? renderer.domElement.height)),
|
|
100
|
+
];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function imagePointToNdc(
|
|
104
|
+
x: number,
|
|
105
|
+
y: number,
|
|
106
|
+
coordinateSpace: ImagePointCoordinateSpace = 'normalized',
|
|
107
|
+
width = 1,
|
|
108
|
+
height = 1
|
|
109
|
+
): [number, number] {
|
|
110
|
+
if (coordinateSpace === 'ndc') return [x, y];
|
|
111
|
+
if (coordinateSpace === 'normalized-1000') {
|
|
112
|
+
return [(x / 1000) * 2 - 1, 1 - (y / 1000) * 2];
|
|
113
|
+
}
|
|
114
|
+
if (coordinateSpace === 'pixel') {
|
|
115
|
+
return [(x / width) * 2 - 1, 1 - (y / height) * 2];
|
|
116
|
+
}
|
|
117
|
+
return [x * 2 - 1, 1 - y * 2];
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function isProjectionCandidate(object: THREE.Object3D, options: ImagePointProjectionOptions) {
|
|
121
|
+
if (!object.visible) return false;
|
|
122
|
+
if (object.userData[CAPTURE_EXCLUDE_KEY]) return false;
|
|
123
|
+
|
|
124
|
+
const geomGroup = object.userData.geomGroup;
|
|
125
|
+
const geomName = object.userData.geomName;
|
|
126
|
+
if (options.hiddenGeomNames && typeof geomName === 'string' && options.hiddenGeomNames.includes(geomName)) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
if (options.hiddenGeomGroups && typeof geomGroup === 'number' && options.hiddenGeomGroups.includes(geomGroup)) {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (options.visibleGeomGroups && typeof geomGroup === 'number' && !options.visibleGeomGroups.includes(geomGroup)) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function findBodyId(object: THREE.Object3D) {
|
|
139
|
+
let current: THREE.Object3D | null = object;
|
|
140
|
+
while (current && current.userData.bodyID === undefined && current.parent) {
|
|
141
|
+
current = current.parent;
|
|
142
|
+
}
|
|
143
|
+
return typeof current?.userData.bodyID === 'number' ? current.userData.bodyID : -1;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export function projectImagePointTo3D(
|
|
147
|
+
renderer: THREE.WebGLRenderer,
|
|
148
|
+
scene: THREE.Scene,
|
|
149
|
+
fallbackCamera: THREE.Camera,
|
|
150
|
+
options: ImagePointProjectionOptions
|
|
151
|
+
): ImagePointProjectionResult | null {
|
|
152
|
+
const [width, height] = imageSize(renderer, options);
|
|
153
|
+
const [ndcX, ndcY] = imagePointToNdc(
|
|
154
|
+
options.x,
|
|
155
|
+
options.y,
|
|
156
|
+
options.coordinateSpace,
|
|
157
|
+
width,
|
|
158
|
+
height
|
|
159
|
+
);
|
|
160
|
+
const projectionCamera = createProjectionCamera(fallbackCamera, options, width, height);
|
|
161
|
+
|
|
162
|
+
scene.updateMatrixWorld(true);
|
|
163
|
+
_ndc.set(ndcX, ndcY);
|
|
164
|
+
_raycaster.setFromCamera(_ndc, projectionCamera);
|
|
165
|
+
_raycaster.far = options.maxDistance ?? Infinity;
|
|
166
|
+
|
|
167
|
+
const objects: THREE.Object3D[] = [];
|
|
168
|
+
scene.traverse((object) => {
|
|
169
|
+
if ((object as THREE.Mesh).isMesh && isProjectionCandidate(object, options)) {
|
|
170
|
+
objects.push(object);
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
const [hit] = _raycaster.intersectObjects(objects, true);
|
|
175
|
+
if (!hit) return null;
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
point: hit.point.clone(),
|
|
179
|
+
bodyId: findBodyId(hit.object),
|
|
180
|
+
geomId: typeof hit.object.userData.geomID === 'number' ? hit.object.userData.geomID : -1,
|
|
181
|
+
distance: hit.distance,
|
|
182
|
+
ndc: [ndcX, ndcY],
|
|
183
|
+
imageSize: [width, height],
|
|
184
|
+
source: getProjectionSource(options),
|
|
185
|
+
};
|
|
186
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -747,6 +747,42 @@ export interface RayHit {
|
|
|
747
747
|
distance: number;
|
|
748
748
|
}
|
|
749
749
|
|
|
750
|
+
export type ImagePointCoordinateSpace =
|
|
751
|
+
| 'normalized'
|
|
752
|
+
| 'normalized-1000'
|
|
753
|
+
| 'pixel'
|
|
754
|
+
| 'ndc';
|
|
755
|
+
|
|
756
|
+
export interface ImagePointProjectionOptions extends CameraFrameCaptureOptions {
|
|
757
|
+
/** X coordinate in the selected coordinate space. Defaults to normalized 0..1. */
|
|
758
|
+
x: number;
|
|
759
|
+
/** Y coordinate in the selected coordinate space. Defaults to normalized 0..1 with origin at top-left. */
|
|
760
|
+
y: number;
|
|
761
|
+
/**
|
|
762
|
+
* Coordinate convention for x/y:
|
|
763
|
+
* - normalized: 0..1 image coordinates, top-left origin
|
|
764
|
+
* - normalized-1000: 0..1000 detector coordinates, top-left origin
|
|
765
|
+
* - pixel: pixel coordinates, top-left origin
|
|
766
|
+
* - ndc: Three.js normalized device coordinates, -1..1
|
|
767
|
+
*/
|
|
768
|
+
coordinateSpace?: ImagePointCoordinateSpace;
|
|
769
|
+
/** Image width for pixel coordinates. Falls back to `width` or renderer canvas width. */
|
|
770
|
+
imageWidth?: number;
|
|
771
|
+
/** Image height for pixel coordinates. Falls back to `height` or renderer canvas height. */
|
|
772
|
+
imageHeight?: number;
|
|
773
|
+
/** Ignore hits farther than this distance from the camera ray origin. */
|
|
774
|
+
maxDistance?: number;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
export interface ImagePointProjectionResult extends RayHit {
|
|
778
|
+
/** NDC coordinates used for raycasting. */
|
|
779
|
+
ndc: [number, number];
|
|
780
|
+
/** Image dimensions used when interpreting pixel coordinates. */
|
|
781
|
+
imageSize: [number, number];
|
|
782
|
+
/** Camera pose provenance, matching camera-frame capture results. */
|
|
783
|
+
source: CameraFrameCaptureSource;
|
|
784
|
+
}
|
|
785
|
+
|
|
750
786
|
// ---- Model Options (spec 5.3) ----
|
|
751
787
|
|
|
752
788
|
export interface ModelOptions {
|
|
@@ -797,6 +833,8 @@ export interface PolicyObservationInput {
|
|
|
797
833
|
|
|
798
834
|
export interface PolicyInferenceInput extends PolicyObservationInput {
|
|
799
835
|
observation: PolicyVector;
|
|
836
|
+
/** Number of actions still queued locally when inference is requested. */
|
|
837
|
+
queuedActions?: number;
|
|
800
838
|
}
|
|
801
839
|
|
|
802
840
|
export type PolicyActionChunk = readonly PolicyVector[];
|
|
@@ -1404,6 +1442,7 @@ export interface MujocoSimAPI {
|
|
|
1404
1442
|
cameraPos: THREE.Vector3,
|
|
1405
1443
|
lookAt: THREE.Vector3
|
|
1406
1444
|
): { point: THREE.Vector3; bodyId: number; geomId: number } | null;
|
|
1445
|
+
projectImagePointTo3D(options: ImagePointProjectionOptions): ImagePointProjectionResult | null;
|
|
1407
1446
|
|
|
1408
1447
|
// Domain randomization (spec 10.3)
|
|
1409
1448
|
setBodyMass(name: Bodies, mass: number): void;
|
|
@@ -1502,6 +1541,8 @@ export interface CameraFrameCaptureOptions {
|
|
|
1502
1541
|
background?: THREE.ColorRepresentation;
|
|
1503
1542
|
/** Optional clear alpha for this capture only. Defaults to the renderer's current clear alpha. */
|
|
1504
1543
|
backgroundAlpha?: number;
|
|
1544
|
+
/** Mirror the captured image horizontally after rendering. Useful when matching policy datasets with mirrored camera frames. */
|
|
1545
|
+
flipX?: boolean;
|
|
1505
1546
|
}
|
|
1506
1547
|
|
|
1507
1548
|
export type CameraFrameCaptureSource =
|