@camstack/addon-pipeline 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,7 +121,6 @@ function npuInfoFrom(hw) {
121
121
  return { type };
122
122
  }
123
123
  function envToHardwareInfo(env) {
124
- if (!env.hardware) return null;
125
124
  return {
126
125
  platform: toKnownPlatform(env.platform),
127
126
  arch: toKnownArch(env.arch),
@@ -129,8 +128,8 @@ function envToHardwareInfo(env) {
129
128
  cpuCores: 0,
130
129
  totalRAM_MB: 0,
131
130
  availableRAM_MB: 0,
132
- gpu: gpuInfoFrom(env.hardware),
133
- npu: npuInfoFrom(env.hardware)
131
+ gpu: env.hardware ? gpuInfoFrom(env.hardware) : null,
132
+ npu: env.hardware ? npuInfoFrom(env.hardware) : null
134
133
  };
135
134
  }
136
135
  function probedToHardwareInfo(hw) {
@@ -4096,6 +4095,17 @@ var ONNX_FLOOR = {
4096
4095
  * instead of duplicated inline.
4097
4096
  */
4098
4097
  function onnxFloorPick() {
4098
+ return ONNX_FLOOR;
4099
+ }
4100
+ /**
4101
+ * The platform-deterministic engine pick computed SYNCHRONOUSLY from this node's
4102
+ * own `process.platform`/`arch` alone (no probe): darwin → coreml, else → onnx
4103
+ * (gpu-dependent openvino/cuda need the probe and converge via the auto-pick).
4104
+ * Used as the ENGINE fallback when a persisted selection is unsupported on this
4105
+ * node — so a stale GLOBAL engine config (e.g. the cluster's OpenVINO choice)
4106
+ * can never force an impossible engine onto a node whose platform rejects it.
4107
+ */
4108
+ function platformDefaultPick() {
4099
4109
  const pick = pickBestRuntime(runtimeEnvFromProcess(null), null);
4100
4110
  return {
4101
4111
  runtime: "python",
@@ -4104,6 +4114,19 @@ function onnxFloorPick() {
4104
4114
  device: pick.device
4105
4115
  };
4106
4116
  }
4117
+ /**
4118
+ * Is `backend` even POSSIBLE on this node's OS/arch (ignoring gpu detail)?
4119
+ * coreml ⇒ darwin only; openvino ⇒ x64 non-darwin only; onnx ⇒ anywhere. Used to
4120
+ * reject a persisted/global engine choice that the node's PLATFORM fundamentally
4121
+ * cannot run (e.g. the cluster's OpenVINO default landing on a Mac) — distinct
4122
+ * from the gpu-dependent support (linux without a probed Intel iGPU still keeps
4123
+ * openvino as a valid platform choice; the device falls back to cpu).
4124
+ */
4125
+ function backendPossibleOnPlatform(backend) {
4126
+ if (backend === "coreml") return process.platform === "darwin";
4127
+ if (backend === "openvino") return process.arch === "x64" && process.platform !== "darwin";
4128
+ return true;
4129
+ }
4107
4130
  var DetectionPipelineProvider = class DetectionPipelineProvider {
4108
4131
  modelsDir;
4109
4132
  eventBus;
@@ -4348,17 +4371,11 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4348
4371
  */
4349
4372
  async autoPickAndPersist() {
4350
4373
  let hardware = null;
4351
- let bestBackendHint = null;
4352
4374
  try {
4353
4375
  const api = this.addonCtx?.api;
4354
- if (api) {
4355
- const caps = await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() });
4356
- hardware = caps?.hardware ?? null;
4357
- const bs = caps?.bestScore;
4358
- if (bs && bs.runtime === "python") bestBackendHint = bs.backend;
4359
- }
4376
+ if (api) hardware = (await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() }))?.hardware ?? null;
4360
4377
  } catch {}
4361
- const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), bestBackendHint);
4378
+ const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), null);
4362
4379
  const engine = {
4363
4380
  runtime: "python",
4364
4381
  backend: pick.runtimeId,
@@ -4366,8 +4383,8 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4366
4383
  device: pick.device
4367
4384
  };
4368
4385
  this.currentEngine = engine;
4369
- if (!(hardware !== null || bestBackendHint !== null)) {
4370
- this.log.warn("Auto-pick: probe returned no hardware/hintusing onnx floor WITHOUT persisting", { meta: {
4386
+ if (!(pick.runtimeId !== "onnx" || hardware !== null)) {
4387
+ this.log.info("Auto-pick: onnx floor pending gpu probe NOT persisting (re-pick on done)", { meta: {
4371
4388
  backend: pick.runtimeId,
4372
4389
  device: pick.device
4373
4390
  } });
@@ -4378,10 +4395,10 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4378
4395
  [nodeEngineKey("engineBackend", apNode)]: pick.runtimeId,
4379
4396
  [nodeEngineKey("engineDevice", apNode)]: pick.device
4380
4397
  });
4381
- this.log.info("Auto-picked engine at first boot", { meta: {
4398
+ this.log.info("Auto-picked engine (platform-deterministic)", { meta: {
4382
4399
  backend: pick.runtimeId,
4383
4400
  device: pick.device,
4384
- hint: bestBackendHint ?? "none"
4401
+ hadProbeHardware: hardware !== null
4385
4402
  } });
4386
4403
  }
4387
4404
  /** Map a backend string to a known RuntimeId, flooring to onnx. */
@@ -4441,7 +4458,9 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4441
4458
  const runtimeId = this.toRuntimeId(engine.backend);
4442
4459
  const device = engine.device ?? "cpu";
4443
4460
  const snapshot = this.provisioner.state;
4444
- if (snapshot.runtimeId === runtimeId && snapshot.device === device && snapshot.state !== "idle") return;
4461
+ const sameSelection = snapshot.runtimeId === runtimeId && snapshot.device === device;
4462
+ if (sameSelection && snapshot.state !== "idle") return;
4463
+ if (!sameSelection) this.currentSteps = null;
4445
4464
  this.provisioner.select(runtimeId, device);
4446
4465
  }
4447
4466
  /**
@@ -4677,13 +4696,52 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4677
4696
  async getDefaultSteps(engine) {
4678
4697
  return buildDefaultStepTree(engine.format);
4679
4698
  }
4699
+ /**
4700
+ * Substitute, AT RUNTIME ONLY (never persisted), any step whose configured
4701
+ * model lacks a build for that step's active engine format. A GLOBAL pipeline
4702
+ * config can pin a model that's valid on the cluster's Intel nodes (e.g. the
4703
+ * OpenVINO-only `yolov9t-int8`) but impossible on a CoreML/ONNX node — without
4704
+ * this the node crash-loops `Model "X" has no <format> format`. The operator's
4705
+ * choice stays in the persisted config; this node just runs the smallest
4706
+ * catalog model that DOES have its format. Mirrors the per-node engine
4707
+ * fallback in `loadEngine`. Catalog models only — a custom model that lacks
4708
+ * the format is left as-is (operator's responsibility).
4709
+ */
4710
+ substituteIncompatibleModels(steps) {
4711
+ const fix = (step) => {
4712
+ const format = step.engine?.format ?? this.currentEngine.format;
4713
+ let modelId = step.modelId;
4714
+ try {
4715
+ const entry = getStepDefinition(step.addonId).models.find((m) => m.id === step.modelId);
4716
+ if (entry && !entry.formats[format]) {
4717
+ const fallback = getDefaultModelForFormat(step.addonId, format);
4718
+ if (fallback !== step.modelId) {
4719
+ this.log.info("Step model lacks engine format — substituting format default (runtime)", { meta: {
4720
+ step: step.addonId,
4721
+ configured: step.modelId,
4722
+ substitute: fallback,
4723
+ format
4724
+ } });
4725
+ modelId = fallback;
4726
+ }
4727
+ }
4728
+ } catch {}
4729
+ const children = step.children?.length ? step.children.map(fix) : step.children;
4730
+ return modelId === step.modelId && children === step.children ? step : {
4731
+ ...step,
4732
+ modelId,
4733
+ ...children ? { children } : {}
4734
+ };
4735
+ };
4736
+ return steps.map(fix);
4737
+ }
4680
4738
  async getGlobalSteps() {
4681
4739
  if (this.currentSteps) return this.currentSteps;
4682
4740
  const raw = (await this.readStore())[KEY_STEPS];
4683
4741
  if (!raw) {
4684
4742
  const defaults = buildDefaultStepTree(this.currentEngine.format);
4685
4743
  if (defaults.length === 0) return null;
4686
- this.currentSteps = defaults;
4744
+ this.currentSteps = this.substituteIncompatibleModels(defaults);
4687
4745
  this.writeStore({ [KEY_STEPS]: JSON.stringify(defaults) });
4688
4746
  this.log.info("Bootstrapped default pipeline — object-detection + face + plate recognition enabled by default", { meta: { rootSteps: defaults.length } });
4689
4747
  return this.currentSteps;
@@ -4721,7 +4779,7 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4721
4779
  this.log.info("Migration: added audio-classifier step to persisted pipeline config");
4722
4780
  }
4723
4781
  }
4724
- this.currentSteps = steps;
4782
+ this.currentSteps = this.substituteIncompatibleModels(steps);
4725
4783
  return this.currentSteps;
4726
4784
  } catch {
4727
4785
  throw new Error(`Failed to parse persisted pipeline steps: corrupt data in key "${KEY_STEPS}"`);
@@ -5872,12 +5930,13 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
5872
5930
  const storedDevice = typeof storedDeviceRaw === "string" ? storedDeviceRaw : "";
5873
5931
  const floor = onnxFloorPick();
5874
5932
  const migratedBackend = typeof storedRuntime === "string" && storedRuntime === "node" && backend === "cpu" ? "onnx" : backend;
5875
- if (!DetectionPipelineProvider.isPythonBackendAvailable(migratedBackend, this.executorOptions.pythonPath ?? "")) {
5876
- this.log.warn("Stored engine backend unavailable on this node — falling back to onnx floor", { meta: {
5933
+ if (!backendPossibleOnPlatform(migratedBackend)) {
5934
+ const platformDefault = platformDefaultPick();
5935
+ this.log.warn("Stored engine backend impossible on this platform — using platform default", { meta: {
5877
5936
  stored: migratedBackend,
5878
- fallback: `${floor.backend}`
5937
+ fallback: platformDefault.backend
5879
5938
  } });
5880
- return floor;
5939
+ return platformDefault;
5881
5940
  }
5882
5941
  const device = storedDevice || floor.device;
5883
5942
  return {
@@ -6049,44 +6108,61 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
6049
6108
  }
6050
6109
  async reprobeEngine() {
6051
6110
  const api = this.addonCtx?.api;
6052
- let best;
6111
+ let hardware = null;
6053
6112
  if (api) try {
6054
- const caps = await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() });
6055
- const bs = caps?.bestScore;
6056
- if (bs && bs.runtime === "python") {
6057
- const probeBackend = bs.backend;
6058
- const probeDevice = (() => {
6059
- const hw = caps.hardware;
6060
- if (probeBackend === "openvino") return defaultDeviceFor("openvino");
6061
- if (probeBackend === "coreml") return defaultDeviceFor("coreml");
6062
- if (probeBackend === "onnx") return hw?.gpu?.type === "nvidia" ? "cuda" : "cpu";
6063
- return "cpu";
6064
- })();
6065
- best = {
6066
- runtime: "python",
6067
- backend: probeBackend,
6068
- format: backendToFormat(probeBackend),
6069
- device: probeDevice
6070
- };
6071
- } else best = onnxFloorPick();
6072
- } catch {
6073
- best = onnxFloorPick();
6074
- }
6075
- else best = onnxFloorPick();
6113
+ hardware = (await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() }))?.hardware ?? null;
6114
+ } catch {}
6115
+ const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), null);
6116
+ const best = {
6117
+ runtime: "python",
6118
+ backend: pick.runtimeId,
6119
+ format: modelFormatFor(pick.runtimeId),
6120
+ device: pick.device
6121
+ };
6076
6122
  const probedLabel = `${best.backend}/${best.device ?? "default"}`;
6077
6123
  const rpNode = this.localProbeNodeId();
6078
- await this.writeStore({
6079
- [nodeEngineKey("probedBestEngine", rpNode)]: probedLabel,
6080
- [nodeEngineKey("engineBackend", rpNode)]: best.backend,
6081
- [nodeEngineKey("engineDevice", rpNode)]: best.device ?? "cpu"
6082
- });
6083
- this.log.info("Re-probed engine — wrote back engineBackend + engineDevice", { meta: {
6124
+ if (pick.runtimeId !== "onnx" || hardware !== null) {
6125
+ await this.writeStore({
6126
+ [nodeEngineKey("probedBestEngine", rpNode)]: probedLabel,
6127
+ [nodeEngineKey("engineBackend", rpNode)]: best.backend,
6128
+ [nodeEngineKey("engineDevice", rpNode)]: best.device ?? "cpu"
6129
+ });
6130
+ this.log.info("Re-probed engine (platform-deterministic) — wrote back", { meta: {
6131
+ backend: best.backend,
6132
+ device: best.device ?? null,
6133
+ probedBestEngine: probedLabel
6134
+ } });
6135
+ } else this.log.info("Re-probe: onnx floor pending gpu probe — NOT persisting (re-pick on done)", { meta: {
6084
6136
  backend: best.backend,
6085
- device: best.device ?? null,
6086
- probedBestEngine: probedLabel
6137
+ device: best.device ?? null
6087
6138
  } });
6088
6139
  return best;
6089
6140
  }
6141
+ /**
6142
+ * Re-pick the engine when the platform-probe finishes its async hardware +
6143
+ * Python detection (the `platform-probe.phase` `done` event). At boot the
6144
+ * probe's accelerator result may not be ready yet, so the engine floored to
6145
+ * onnx; once the probe answers (e.g. a Mac's CoreML/ANE surfaces after the
6146
+ * embedded Python is installed) this re-runs the probe-driven pick and
6147
+ * re-provisions. Idempotent: `startProvisioningForCurrentEngine` skips a
6148
+ * no-op when the selection is unchanged.
6149
+ */
6150
+ async repickEngineOnProbeReady() {
6151
+ const before = `${this.currentEngine.backend}/${this.currentEngine.device ?? "default"}`;
6152
+ await this.reprobeEngine();
6153
+ const stored = await this.loadEngine();
6154
+ if (stored) {
6155
+ this.currentEngine = stored;
6156
+ this.needsAutoPick = false;
6157
+ this.cancelDeferredAutoPick();
6158
+ }
6159
+ const after = `${this.currentEngine.backend}/${this.currentEngine.device ?? "default"}`;
6160
+ if (before !== after) this.log.info("Engine re-picked after platform-probe completed", { meta: {
6161
+ before,
6162
+ after
6163
+ } });
6164
+ this.startProvisioningForCurrentEngine();
6165
+ }
6090
6166
  async getReferenceAudioFiles() {
6091
6167
  const dir = resolveReferenceAudioDir();
6092
6168
  if (!dir) return [];
@@ -6554,6 +6630,7 @@ var DetectionPipelineAddon = class extends require_dist.BaseAddon {
6554
6630
  nodeEngineBackend = DEFAULT_CONFIG.engineBackend;
6555
6631
  nodeProbedBestEngine = "";
6556
6632
  engineMetricsTimer = null;
6633
+ probePhaseUnsub = null;
6557
6634
  /** Snapshot-equality cache for engine-metrics emit. Most ticks
6558
6635
  * the engine inventory is unchanged (no model load/unload), so
6559
6636
  * we skip the bus emit and let the heartbeat re-emit at
@@ -6960,11 +7037,35 @@ var DetectionPipelineAddon = class extends require_dist.BaseAddon {
6960
7037
  numWorkers: num(t["numWorkers"], 1)
6961
7038
  };
6962
7039
  }
6963
- async onInitialize() {
6964
- const modelsDir = await this.ctx.api.storage.resolve.query({
7040
+ /**
7041
+ * Resolve the directory models are downloaded into, resilient across nodes.
7042
+ * The `models` storage location is GLOBAL (hub-seeded) and can resolve to a
7043
+ * path that exists only on the hub — e.g. Docker's `/data/models`, which an
7044
+ * agent on a different filesystem (a Mac) cannot create (`ENOENT mkdir
7045
+ * /data/models`). Verify the resolved dir is creatable; otherwise fall back to
7046
+ * this node's LOCAL addon data-dir so models always land on writable disk.
7047
+ */
7048
+ async resolveModelsDir() {
7049
+ const fallback = node_path.join(this.ctx.dataDir, "models");
7050
+ const candidate = await this.ctx.api.storage.resolve.query({
6965
7051
  location: "models",
6966
7052
  relativePath: ""
6967
- }).catch(() => "camstack-data/models");
7053
+ }).catch(() => null) ?? fallback;
7054
+ try {
7055
+ await node_fs.promises.mkdir(candidate, { recursive: true });
7056
+ return candidate;
7057
+ } catch (err) {
7058
+ this.ctx.logger.warn("models dir not creatable on this node — using local data-dir", { meta: {
7059
+ resolved: candidate,
7060
+ fallback,
7061
+ error: err instanceof Error ? err.message : String(err)
7062
+ } });
7063
+ await node_fs.promises.mkdir(fallback, { recursive: true });
7064
+ return fallback;
7065
+ }
7066
+ }
7067
+ async onInitialize() {
7068
+ const modelsDir = await this.resolveModelsDir();
6968
7069
  if (!this.ctx.settings) throw new Error("DetectionPipelineAddon: ctx.settings not available");
6969
7070
  await this.refreshNodeEngineFromStore();
6970
7071
  this.pythonAddonDir = resolveAddonPythonDir();
@@ -6994,6 +7095,12 @@ var DetectionPipelineAddon = class extends require_dist.BaseAddon {
6994
7095
  await this.provider.ensureBootEngineProvisioned().catch((err) => {
6995
7096
  this.ctx.logger.warn("ensureBootEngineProvisioned failed", { meta: { error: err instanceof Error ? err.message : String(err) } });
6996
7097
  });
7098
+ this.probePhaseUnsub = this.ctx.eventBus?.subscribe({ category: require_dist.EventCategory.PlatformProbePhase }, (event) => {
7099
+ if (event.data?.phase !== "done") return;
7100
+ this.provider.repickEngineOnProbeReady().catch((err) => {
7101
+ this.ctx.logger.warn("repick on platform-probe done failed", { meta: { error: err instanceof Error ? err.message : String(err) } });
7102
+ });
7103
+ }) ?? null;
6997
7104
  await this.provider.warmPool();
6998
7105
  this.engineMetricsTimer = setInterval(() => this.emitEngineMetricsSnapshot(), ENGINE_METRICS_SNAPSHOT_INTERVAL_MS);
6999
7106
  this.lastAppliedPoolConfig = this.snapshotPoolConfig();
@@ -7067,6 +7174,10 @@ var DetectionPipelineAddon = class extends require_dist.BaseAddon {
7067
7174
  }
7068
7175
  }
7069
7176
  async onShutdown() {
7177
+ if (this.probePhaseUnsub) {
7178
+ this.probePhaseUnsub();
7179
+ this.probePhaseUnsub = null;
7180
+ }
7070
7181
  if (this.engineMetricsTimer) {
7071
7182
  clearInterval(this.engineMetricsTimer);
7072
7183
  this.engineMetricsTimer = null;
@@ -7132,10 +7243,7 @@ var DetectionPipelineAddon = class extends require_dist.BaseAddon {
7132
7243
  } catch (err) {
7133
7244
  this.ctx.logger.warn("provider shutdown failed during tuning respawn", { meta: { error: err instanceof Error ? err.message : String(err) } });
7134
7245
  }
7135
- const modelsDir = await this.ctx.api.storage.resolve.query({
7136
- location: "models",
7137
- relativePath: ""
7138
- }).catch(() => "camstack-data/models");
7246
+ const modelsDir = await this.resolveModelsDir();
7139
7247
  if (!this.ctx.settings) throw new Error("DetectionPipelineAddon: ctx.settings not available during respawn");
7140
7248
  const effectiveTuning = this.resolveBackendTuning();
7141
7249
  this.provider = new DetectionPipelineProvider(this.ctx.settings, modelsDir, this.ctx.logger, this.ctx.eventBus ?? null, () => ({ sections: [] }), {
@@ -114,7 +114,6 @@ function npuInfoFrom(hw) {
114
114
  return { type };
115
115
  }
116
116
  function envToHardwareInfo(env) {
117
- if (!env.hardware) return null;
118
117
  return {
119
118
  platform: toKnownPlatform(env.platform),
120
119
  arch: toKnownArch(env.arch),
@@ -122,8 +121,8 @@ function envToHardwareInfo(env) {
122
121
  cpuCores: 0,
123
122
  totalRAM_MB: 0,
124
123
  availableRAM_MB: 0,
125
- gpu: gpuInfoFrom(env.hardware),
126
- npu: npuInfoFrom(env.hardware)
124
+ gpu: env.hardware ? gpuInfoFrom(env.hardware) : null,
125
+ npu: env.hardware ? npuInfoFrom(env.hardware) : null
127
126
  };
128
127
  }
129
128
  function probedToHardwareInfo(hw) {
@@ -4089,6 +4088,17 @@ var ONNX_FLOOR = {
4089
4088
  * instead of duplicated inline.
4090
4089
  */
4091
4090
  function onnxFloorPick() {
4091
+ return ONNX_FLOOR;
4092
+ }
4093
+ /**
4094
+ * The platform-deterministic engine pick computed SYNCHRONOUSLY from this node's
4095
+ * own `process.platform`/`arch` alone (no probe): darwin → coreml, else → onnx
4096
+ * (gpu-dependent openvino/cuda need the probe and converge via the auto-pick).
4097
+ * Used as the ENGINE fallback when a persisted selection is unsupported on this
4098
+ * node — so a stale GLOBAL engine config (e.g. the cluster's OpenVINO choice)
4099
+ * can never force an impossible engine onto a node whose platform rejects it.
4100
+ */
4101
+ function platformDefaultPick() {
4092
4102
  const pick = pickBestRuntime(runtimeEnvFromProcess(null), null);
4093
4103
  return {
4094
4104
  runtime: "python",
@@ -4097,6 +4107,19 @@ function onnxFloorPick() {
4097
4107
  device: pick.device
4098
4108
  };
4099
4109
  }
4110
+ /**
4111
+ * Is `backend` even POSSIBLE on this node's OS/arch (ignoring gpu detail)?
4112
+ * coreml ⇒ darwin only; openvino ⇒ x64 non-darwin only; onnx ⇒ anywhere. Used to
4113
+ * reject a persisted/global engine choice that the node's PLATFORM fundamentally
4114
+ * cannot run (e.g. the cluster's OpenVINO default landing on a Mac) — distinct
4115
+ * from the gpu-dependent support (linux without a probed Intel iGPU still keeps
4116
+ * openvino as a valid platform choice; the device falls back to cpu).
4117
+ */
4118
+ function backendPossibleOnPlatform(backend) {
4119
+ if (backend === "coreml") return process.platform === "darwin";
4120
+ if (backend === "openvino") return process.arch === "x64" && process.platform !== "darwin";
4121
+ return true;
4122
+ }
4100
4123
  var DetectionPipelineProvider = class DetectionPipelineProvider {
4101
4124
  modelsDir;
4102
4125
  eventBus;
@@ -4341,17 +4364,11 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4341
4364
  */
4342
4365
  async autoPickAndPersist() {
4343
4366
  let hardware = null;
4344
- let bestBackendHint = null;
4345
4367
  try {
4346
4368
  const api = this.addonCtx?.api;
4347
- if (api) {
4348
- const caps = await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() });
4349
- hardware = caps?.hardware ?? null;
4350
- const bs = caps?.bestScore;
4351
- if (bs && bs.runtime === "python") bestBackendHint = bs.backend;
4352
- }
4369
+ if (api) hardware = (await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() }))?.hardware ?? null;
4353
4370
  } catch {}
4354
- const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), bestBackendHint);
4371
+ const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), null);
4355
4372
  const engine = {
4356
4373
  runtime: "python",
4357
4374
  backend: pick.runtimeId,
@@ -4359,8 +4376,8 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4359
4376
  device: pick.device
4360
4377
  };
4361
4378
  this.currentEngine = engine;
4362
- if (!(hardware !== null || bestBackendHint !== null)) {
4363
- this.log.warn("Auto-pick: probe returned no hardware/hintusing onnx floor WITHOUT persisting", { meta: {
4379
+ if (!(pick.runtimeId !== "onnx" || hardware !== null)) {
4380
+ this.log.info("Auto-pick: onnx floor pending gpu probe NOT persisting (re-pick on done)", { meta: {
4364
4381
  backend: pick.runtimeId,
4365
4382
  device: pick.device
4366
4383
  } });
@@ -4371,10 +4388,10 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4371
4388
  [nodeEngineKey("engineBackend", apNode)]: pick.runtimeId,
4372
4389
  [nodeEngineKey("engineDevice", apNode)]: pick.device
4373
4390
  });
4374
- this.log.info("Auto-picked engine at first boot", { meta: {
4391
+ this.log.info("Auto-picked engine (platform-deterministic)", { meta: {
4375
4392
  backend: pick.runtimeId,
4376
4393
  device: pick.device,
4377
- hint: bestBackendHint ?? "none"
4394
+ hadProbeHardware: hardware !== null
4378
4395
  } });
4379
4396
  }
4380
4397
  /** Map a backend string to a known RuntimeId, flooring to onnx. */
@@ -4434,7 +4451,9 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4434
4451
  const runtimeId = this.toRuntimeId(engine.backend);
4435
4452
  const device = engine.device ?? "cpu";
4436
4453
  const snapshot = this.provisioner.state;
4437
- if (snapshot.runtimeId === runtimeId && snapshot.device === device && snapshot.state !== "idle") return;
4454
+ const sameSelection = snapshot.runtimeId === runtimeId && snapshot.device === device;
4455
+ if (sameSelection && snapshot.state !== "idle") return;
4456
+ if (!sameSelection) this.currentSteps = null;
4438
4457
  this.provisioner.select(runtimeId, device);
4439
4458
  }
4440
4459
  /**
@@ -4670,13 +4689,52 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4670
4689
  async getDefaultSteps(engine) {
4671
4690
  return buildDefaultStepTree(engine.format);
4672
4691
  }
4692
+ /**
4693
+ * Substitute, AT RUNTIME ONLY (never persisted), any step whose configured
4694
+ * model lacks a build for that step's active engine format. A GLOBAL pipeline
4695
+ * config can pin a model that's valid on the cluster's Intel nodes (e.g. the
4696
+ * OpenVINO-only `yolov9t-int8`) but impossible on a CoreML/ONNX node — without
4697
+ * this the node crash-loops `Model "X" has no <format> format`. The operator's
4698
+ * choice stays in the persisted config; this node just runs the smallest
4699
+ * catalog model that DOES have its format. Mirrors the per-node engine
4700
+ * fallback in `loadEngine`. Catalog models only — a custom model that lacks
4701
+ * the format is left as-is (operator's responsibility).
4702
+ */
4703
+ substituteIncompatibleModels(steps) {
4704
+ const fix = (step) => {
4705
+ const format = step.engine?.format ?? this.currentEngine.format;
4706
+ let modelId = step.modelId;
4707
+ try {
4708
+ const entry = getStepDefinition(step.addonId).models.find((m) => m.id === step.modelId);
4709
+ if (entry && !entry.formats[format]) {
4710
+ const fallback = getDefaultModelForFormat(step.addonId, format);
4711
+ if (fallback !== step.modelId) {
4712
+ this.log.info("Step model lacks engine format — substituting format default (runtime)", { meta: {
4713
+ step: step.addonId,
4714
+ configured: step.modelId,
4715
+ substitute: fallback,
4716
+ format
4717
+ } });
4718
+ modelId = fallback;
4719
+ }
4720
+ }
4721
+ } catch {}
4722
+ const children = step.children?.length ? step.children.map(fix) : step.children;
4723
+ return modelId === step.modelId && children === step.children ? step : {
4724
+ ...step,
4725
+ modelId,
4726
+ ...children ? { children } : {}
4727
+ };
4728
+ };
4729
+ return steps.map(fix);
4730
+ }
4673
4731
  async getGlobalSteps() {
4674
4732
  if (this.currentSteps) return this.currentSteps;
4675
4733
  const raw = (await this.readStore())[KEY_STEPS];
4676
4734
  if (!raw) {
4677
4735
  const defaults = buildDefaultStepTree(this.currentEngine.format);
4678
4736
  if (defaults.length === 0) return null;
4679
- this.currentSteps = defaults;
4737
+ this.currentSteps = this.substituteIncompatibleModels(defaults);
4680
4738
  this.writeStore({ [KEY_STEPS]: JSON.stringify(defaults) });
4681
4739
  this.log.info("Bootstrapped default pipeline — object-detection + face + plate recognition enabled by default", { meta: { rootSteps: defaults.length } });
4682
4740
  return this.currentSteps;
@@ -4714,7 +4772,7 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
4714
4772
  this.log.info("Migration: added audio-classifier step to persisted pipeline config");
4715
4773
  }
4716
4774
  }
4717
- this.currentSteps = steps;
4775
+ this.currentSteps = this.substituteIncompatibleModels(steps);
4718
4776
  return this.currentSteps;
4719
4777
  } catch {
4720
4778
  throw new Error(`Failed to parse persisted pipeline steps: corrupt data in key "${KEY_STEPS}"`);
@@ -5865,12 +5923,13 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
5865
5923
  const storedDevice = typeof storedDeviceRaw === "string" ? storedDeviceRaw : "";
5866
5924
  const floor = onnxFloorPick();
5867
5925
  const migratedBackend = typeof storedRuntime === "string" && storedRuntime === "node" && backend === "cpu" ? "onnx" : backend;
5868
- if (!DetectionPipelineProvider.isPythonBackendAvailable(migratedBackend, this.executorOptions.pythonPath ?? "")) {
5869
- this.log.warn("Stored engine backend unavailable on this node — falling back to onnx floor", { meta: {
5926
+ if (!backendPossibleOnPlatform(migratedBackend)) {
5927
+ const platformDefault = platformDefaultPick();
5928
+ this.log.warn("Stored engine backend impossible on this platform — using platform default", { meta: {
5870
5929
  stored: migratedBackend,
5871
- fallback: `${floor.backend}`
5930
+ fallback: platformDefault.backend
5872
5931
  } });
5873
- return floor;
5932
+ return platformDefault;
5874
5933
  }
5875
5934
  const device = storedDevice || floor.device;
5876
5935
  return {
@@ -6042,44 +6101,61 @@ var DetectionPipelineProvider = class DetectionPipelineProvider {
6042
6101
  }
6043
6102
  async reprobeEngine() {
6044
6103
  const api = this.addonCtx?.api;
6045
- let best;
6104
+ let hardware = null;
6046
6105
  if (api) try {
6047
- const caps = await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() });
6048
- const bs = caps?.bestScore;
6049
- if (bs && bs.runtime === "python") {
6050
- const probeBackend = bs.backend;
6051
- const probeDevice = (() => {
6052
- const hw = caps.hardware;
6053
- if (probeBackend === "openvino") return defaultDeviceFor("openvino");
6054
- if (probeBackend === "coreml") return defaultDeviceFor("coreml");
6055
- if (probeBackend === "onnx") return hw?.gpu?.type === "nvidia" ? "cuda" : "cpu";
6056
- return "cpu";
6057
- })();
6058
- best = {
6059
- runtime: "python",
6060
- backend: probeBackend,
6061
- format: backendToFormat(probeBackend),
6062
- device: probeDevice
6063
- };
6064
- } else best = onnxFloorPick();
6065
- } catch {
6066
- best = onnxFloorPick();
6067
- }
6068
- else best = onnxFloorPick();
6106
+ hardware = (await api.platformProbe.getCapabilities.query({ nodeId: this.localProbeNodeId() }))?.hardware ?? null;
6107
+ } catch {}
6108
+ const pick = pickBestRuntime(runtimeEnvFromProcess(toProbedHardware(hardware)), null);
6109
+ const best = {
6110
+ runtime: "python",
6111
+ backend: pick.runtimeId,
6112
+ format: modelFormatFor(pick.runtimeId),
6113
+ device: pick.device
6114
+ };
6069
6115
  const probedLabel = `${best.backend}/${best.device ?? "default"}`;
6070
6116
  const rpNode = this.localProbeNodeId();
6071
- await this.writeStore({
6072
- [nodeEngineKey("probedBestEngine", rpNode)]: probedLabel,
6073
- [nodeEngineKey("engineBackend", rpNode)]: best.backend,
6074
- [nodeEngineKey("engineDevice", rpNode)]: best.device ?? "cpu"
6075
- });
6076
- this.log.info("Re-probed engine — wrote back engineBackend + engineDevice", { meta: {
6117
+ if (pick.runtimeId !== "onnx" || hardware !== null) {
6118
+ await this.writeStore({
6119
+ [nodeEngineKey("probedBestEngine", rpNode)]: probedLabel,
6120
+ [nodeEngineKey("engineBackend", rpNode)]: best.backend,
6121
+ [nodeEngineKey("engineDevice", rpNode)]: best.device ?? "cpu"
6122
+ });
6123
+ this.log.info("Re-probed engine (platform-deterministic) — wrote back", { meta: {
6124
+ backend: best.backend,
6125
+ device: best.device ?? null,
6126
+ probedBestEngine: probedLabel
6127
+ } });
6128
+ } else this.log.info("Re-probe: onnx floor pending gpu probe — NOT persisting (re-pick on done)", { meta: {
6077
6129
  backend: best.backend,
6078
- device: best.device ?? null,
6079
- probedBestEngine: probedLabel
6130
+ device: best.device ?? null
6080
6131
  } });
6081
6132
  return best;
6082
6133
  }
6134
+ /**
6135
+ * Re-pick the engine when the platform-probe finishes its async hardware +
6136
+ * Python detection (the `platform-probe.phase` `done` event). At boot the
6137
+ * probe's accelerator result may not be ready yet, so the engine floored to
6138
+ * onnx; once the probe answers (e.g. a Mac's CoreML/ANE surfaces after the
6139
+ * embedded Python is installed) this re-runs the probe-driven pick and
6140
+ * re-provisions. Idempotent: `startProvisioningForCurrentEngine` skips a
6141
+ * no-op when the selection is unchanged.
6142
+ */
6143
+ async repickEngineOnProbeReady() {
6144
+ const before = `${this.currentEngine.backend}/${this.currentEngine.device ?? "default"}`;
6145
+ await this.reprobeEngine();
6146
+ const stored = await this.loadEngine();
6147
+ if (stored) {
6148
+ this.currentEngine = stored;
6149
+ this.needsAutoPick = false;
6150
+ this.cancelDeferredAutoPick();
6151
+ }
6152
+ const after = `${this.currentEngine.backend}/${this.currentEngine.device ?? "default"}`;
6153
+ if (before !== after) this.log.info("Engine re-picked after platform-probe completed", { meta: {
6154
+ before,
6155
+ after
6156
+ } });
6157
+ this.startProvisioningForCurrentEngine();
6158
+ }
6083
6159
  async getReferenceAudioFiles() {
6084
6160
  const dir = resolveReferenceAudioDir();
6085
6161
  if (!dir) return [];
@@ -6547,6 +6623,7 @@ var DetectionPipelineAddon = class extends BaseAddon {
6547
6623
  nodeEngineBackend = DEFAULT_CONFIG.engineBackend;
6548
6624
  nodeProbedBestEngine = "";
6549
6625
  engineMetricsTimer = null;
6626
+ probePhaseUnsub = null;
6550
6627
  /** Snapshot-equality cache for engine-metrics emit. Most ticks
6551
6628
  * the engine inventory is unchanged (no model load/unload), so
6552
6629
  * we skip the bus emit and let the heartbeat re-emit at
@@ -6953,11 +7030,35 @@ var DetectionPipelineAddon = class extends BaseAddon {
6953
7030
  numWorkers: num(t["numWorkers"], 1)
6954
7031
  };
6955
7032
  }
6956
- async onInitialize() {
6957
- const modelsDir = await this.ctx.api.storage.resolve.query({
7033
+ /**
7034
+ * Resolve the directory models are downloaded into, resilient across nodes.
7035
+ * The `models` storage location is GLOBAL (hub-seeded) and can resolve to a
7036
+ * path that exists only on the hub — e.g. Docker's `/data/models`, which an
7037
+ * agent on a different filesystem (a Mac) cannot create (`ENOENT mkdir
7038
+ * /data/models`). Verify the resolved dir is creatable; otherwise fall back to
7039
+ * this node's LOCAL addon data-dir so models always land on writable disk.
7040
+ */
7041
+ async resolveModelsDir() {
7042
+ const fallback = path$1.join(this.ctx.dataDir, "models");
7043
+ const candidate = await this.ctx.api.storage.resolve.query({
6958
7044
  location: "models",
6959
7045
  relativePath: ""
6960
- }).catch(() => "camstack-data/models");
7046
+ }).catch(() => null) ?? fallback;
7047
+ try {
7048
+ await fs.promises.mkdir(candidate, { recursive: true });
7049
+ return candidate;
7050
+ } catch (err) {
7051
+ this.ctx.logger.warn("models dir not creatable on this node — using local data-dir", { meta: {
7052
+ resolved: candidate,
7053
+ fallback,
7054
+ error: err instanceof Error ? err.message : String(err)
7055
+ } });
7056
+ await fs.promises.mkdir(fallback, { recursive: true });
7057
+ return fallback;
7058
+ }
7059
+ }
7060
+ async onInitialize() {
7061
+ const modelsDir = await this.resolveModelsDir();
6961
7062
  if (!this.ctx.settings) throw new Error("DetectionPipelineAddon: ctx.settings not available");
6962
7063
  await this.refreshNodeEngineFromStore();
6963
7064
  this.pythonAddonDir = resolveAddonPythonDir();
@@ -6987,6 +7088,12 @@ var DetectionPipelineAddon = class extends BaseAddon {
6987
7088
  await this.provider.ensureBootEngineProvisioned().catch((err) => {
6988
7089
  this.ctx.logger.warn("ensureBootEngineProvisioned failed", { meta: { error: err instanceof Error ? err.message : String(err) } });
6989
7090
  });
7091
+ this.probePhaseUnsub = this.ctx.eventBus?.subscribe({ category: EventCategory.PlatformProbePhase }, (event) => {
7092
+ if (event.data?.phase !== "done") return;
7093
+ this.provider.repickEngineOnProbeReady().catch((err) => {
7094
+ this.ctx.logger.warn("repick on platform-probe done failed", { meta: { error: err instanceof Error ? err.message : String(err) } });
7095
+ });
7096
+ }) ?? null;
6990
7097
  await this.provider.warmPool();
6991
7098
  this.engineMetricsTimer = setInterval(() => this.emitEngineMetricsSnapshot(), ENGINE_METRICS_SNAPSHOT_INTERVAL_MS);
6992
7099
  this.lastAppliedPoolConfig = this.snapshotPoolConfig();
@@ -7060,6 +7167,10 @@ var DetectionPipelineAddon = class extends BaseAddon {
7060
7167
  }
7061
7168
  }
7062
7169
  async onShutdown() {
7170
+ if (this.probePhaseUnsub) {
7171
+ this.probePhaseUnsub();
7172
+ this.probePhaseUnsub = null;
7173
+ }
7063
7174
  if (this.engineMetricsTimer) {
7064
7175
  clearInterval(this.engineMetricsTimer);
7065
7176
  this.engineMetricsTimer = null;
@@ -7125,10 +7236,7 @@ var DetectionPipelineAddon = class extends BaseAddon {
7125
7236
  } catch (err) {
7126
7237
  this.ctx.logger.warn("provider shutdown failed during tuning respawn", { meta: { error: err instanceof Error ? err.message : String(err) } });
7127
7238
  }
7128
- const modelsDir = await this.ctx.api.storage.resolve.query({
7129
- location: "models",
7130
- relativePath: ""
7131
- }).catch(() => "camstack-data/models");
7239
+ const modelsDir = await this.resolveModelsDir();
7132
7240
  if (!this.ctx.settings) throw new Error("DetectionPipelineAddon: ctx.settings not available during respawn");
7133
7241
  const effectiveTuning = this.resolveBackendTuning();
7134
7242
  this.provider = new DetectionPipelineProvider(this.ctx.settings, modelsDir, this.ctx.logger, this.ctx.eventBus ?? null, () => ({ sections: [] }), {
@@ -3,7 +3,7 @@ import "./dist-CYZr2fwk.mjs";
3
3
  var e = {
4
4
  "@camstack/sdk": {
5
5
  name: "@camstack/sdk",
6
- version: "1.1.1",
6
+ version: "1.1.2",
7
7
  scope: ["default"],
8
8
  loaded: !1,
9
9
  from: "addon_stream_broker_widgets",
@@ -18,7 +18,7 @@ var e = {
18
18
  },
19
19
  "@camstack/types": {
20
20
  name: "@camstack/types",
21
- version: "1.1.1",
21
+ version: "1.1.2",
22
22
  scope: ["default"],
23
23
  loaded: !1,
24
24
  from: "addon_stream_broker_widgets",
@@ -33,7 +33,7 @@ var e = {
33
33
  },
34
34
  "@camstack/ui-library": {
35
35
  name: "@camstack/ui-library",
36
- version: "1.1.1",
36
+ version: "1.1.2",
37
37
  scope: ["default"],
38
38
  loaded: !1,
39
39
  from: "addon_stream_broker_widgets",
@@ -36,7 +36,7 @@ async function r() {
36
36
  }
37
37
  },
38
38
  "@camstack/types": {
39
- version: "1.1.1",
39
+ version: "1.1.2",
40
40
  scope: "default",
41
41
  shareConfig: {
42
42
  singleton: !0,
@@ -45,7 +45,7 @@ async function r() {
45
45
  }
46
46
  },
47
47
  "@camstack/sdk": {
48
- version: "1.1.1",
48
+ version: "1.1.2",
49
49
  scope: "default",
50
50
  shareConfig: {
51
51
  singleton: !0,
@@ -81,7 +81,7 @@ async function r() {
81
81
  }
82
82
  },
83
83
  "@camstack/ui-library": {
84
- version: "1.1.1",
84
+ version: "1.1.2",
85
85
  scope: "default",
86
86
  shareConfig: {
87
87
  singleton: !0,
@@ -30,7 +30,7 @@ async function d(e) {
30
30
  }
31
31
  }
32
32
  async function f() {
33
- return l ||= d(() => import("./_virtual_mf-localSharedImportMap___mfe_internal__addon_stream_broker_widgets-Do7lgO8N.mjs")).catch((e) => {
33
+ return l ||= d(() => import("./_virtual_mf-localSharedImportMap___mfe_internal__addon_stream_broker_widgets-DLgk22-S.mjs")).catch((e) => {
34
34
  throw l = void 0, e;
35
35
  }), l;
36
36
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@camstack/addon-pipeline",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "description": "CamStack Pipeline bundle — runner, detection, motion, decoders, audio + stream broker. Multi-entry npm package shipping 7 addons under a single bundle.",
5
5
  "keywords": [
6
6
  "camstack",