@mcoda/mswarm 0.1.76 → 0.1.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runtime.js CHANGED
@@ -1,9 +1,10 @@
1
- import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises";
2
- import { dirname, join } from "node:path";
1
+ import { chmod, lstat, mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
2
+ import { dirname, isAbsolute, join, relative, resolve } from "node:path";
3
3
  import { hostname, homedir, platform, userInfo } from "node:os";
4
4
  import { spawn } from "node:child_process";
5
- import { createHash, randomUUID } from "node:crypto";
5
+ import { createHash, createHmac, randomUUID } from "node:crypto";
6
6
  import { MswarmCodaliExecutor } from "./codali-executor.js";
7
+ import { MSWARM_CAPABILITY_SCHEMA_VERSION, assertMswarmSafeRelativePath, validateMswarmArchiveEntry, buildMswarmCapabilityNames, buildMswarmPrivateCapabilityCatalogEntry, buildMswarmLocalArtifactUri, buildMswarmSandboxProfile, defaultMswarmArtifactAccessPolicy, defaultMswarmArtifactRetentionPolicy, projectMswarmPublicCapabilities, validateMswarmGenericJobRequest } from "@mcoda/shared";
7
8
  const DEFAULT_GATEWAY_BASE_URL = "http://127.0.0.1:8080";
8
9
  const DEFAULT_SETUP_GATEWAY_BASE_URL = "https://api.mswarm.org";
9
10
  const DEFAULT_OLLAMA_BASE_URL = "http://127.0.0.1:11434";
@@ -14,11 +15,86 @@ const DEFAULT_SELF_HOSTED_NODE_VERSION = "0.1.70";
14
15
  const DEFAULT_REQUEST_TIMEOUT_MS = 10000;
15
16
  const DEFAULT_JOB_TIMEOUT_MS = 3600000;
16
17
  const DEFAULT_SERVICE_COMMAND_TIMEOUT_MS = 60000;
18
+ const DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS = 2000;
17
19
  const DEFAULT_MCODA_BIN = "mcoda";
18
20
  const DEFAULT_MCODA_LIST_ARGS = ["agent", "list", "--json", "--refresh-health"];
19
21
  const DEFAULT_COMMAND_MAX_BUFFER = 16 * 1024 * 1024;
22
+ const DEFAULT_LOCAL_ARTIFACT_MAX_BYTES = 512 * 1024 * 1024;
20
23
  const DEFAULT_JOB_POLL_WAIT_MS = 25000;
21
24
  const DEFAULT_STREAM_EVENT_BATCH_SIZE = 8;
25
+ const OWNER_LOCAL_TEST_ECHO_JOB_TYPE = "tenant.test-echo";
26
+ const TEST_ECHO_RUNNER_ID = "test.echo";
27
+ const RENDER_BLENDER_JOB_TYPE = "render.blender";
28
+ const BLENDER_RENDER_RUNNER_ID = "blender.render";
29
+ const CUDA_RUN_JOB_TYPE = "cuda.run";
30
+ const CUDA_PACKAGE_RUNNER_ID = "cuda.package";
31
+ const APPROVED_NVIDIA_CUDA_IMAGES = new Set([
32
+ "nvidia/cuda:12.4.1-devel-ubuntu22.04"
33
+ ]);
34
+ const OWNER_LOCAL_GENERIC_JOB_CATALOG = [
35
+ {
36
+ job_type: OWNER_LOCAL_TEST_ECHO_JOB_TYPE,
37
+ args_schema: {
38
+ type: "object",
39
+ additionalProperties: true,
40
+ properties: {
41
+ message: { type: "string" },
42
+ delay_ms: { type: "number", minimum: 0 },
43
+ repeat: { type: "number", minimum: 1 },
44
+ fail: { type: "boolean" }
45
+ }
46
+ },
47
+ policy: {
48
+ trust_mode: "owner-local",
49
+ network: "none",
50
+ allow_raw_command: false
51
+ },
52
+ runner: TEST_ECHO_RUNNER_ID
53
+ },
54
+ {
55
+ job_type: RENDER_BLENDER_JOB_TYPE,
56
+ args_schema: {
57
+ type: "object",
58
+ additionalProperties: false,
59
+ properties: {
60
+ frames: { type: ["string", "number"] },
61
+ engine: { enum: ["cycles", "eevee", "workbench"] },
62
+ resolution: { type: "string", pattern: "^[1-9][0-9]{0,4}x[1-9][0-9]{0,4}$" },
63
+ output_format: { enum: ["png", "jpeg", "open_exr"] },
64
+ scene: { type: "string" },
65
+ camera: { type: "string" }
66
+ }
67
+ },
68
+ policy: {
69
+ trust_mode: "owner-local",
70
+ network: "none",
71
+ allow_raw_command: false
72
+ },
73
+ runner: BLENDER_RENDER_RUNNER_ID,
74
+ required_capabilities: ["software.blender"]
75
+ },
76
+ {
77
+ job_type: CUDA_RUN_JOB_TYPE,
78
+ args_schema: {
79
+ type: "object",
80
+ additionalProperties: false,
81
+ required: ["manifest_path", "profile", "target"],
82
+ properties: {
83
+ manifest_path: { type: "string" },
84
+ profile: { type: "string" },
85
+ target: { type: "string" }
86
+ }
87
+ },
88
+ policy: {
89
+ trust_mode: "owner-local",
90
+ network: "none",
91
+ allow_raw_command: false,
92
+ allowed_images: Array.from(APPROVED_NVIDIA_CUDA_IMAGES)
93
+ },
94
+ runner: CUDA_PACKAGE_RUNNER_ID,
95
+ required_capabilities: ["gpu.nvidia", "software.docker", "docker.nvidia"]
96
+ }
97
+ ];
22
98
  const SERVICE_LABEL = "com.mcoda.mswarm.self-hosted-node";
23
99
  const SYSTEMD_SERVICE_NAME = "mswarm-self-hosted-node.service";
24
100
  const WINDOWS_TASK_NAME = "MswarmSelfHostedNode";
@@ -327,6 +403,9 @@ function defaultStatePath() {
327
403
  function defaultRuntimeTokenPath() {
328
404
  return join(homedir(), ".mswarm", "self-hosted-node", "node.key");
329
405
  }
406
+ function defaultArtifactStorePath() {
407
+ return join(homedir(), ".mswarm", "self-hosted-node", "artifacts");
408
+ }
330
409
  export async function readOrCreateSelfHostedMachineId(machineIdPath = defaultMachineIdPath()) {
331
410
  try {
332
411
  const existing = (await readFile(machineIdPath, "utf8")).trim();
@@ -537,6 +616,7 @@ function serviceEnvironment(config, env, homeDir) {
537
616
  MSWARM_GATEWAY_BASE_URL: config.gatewayBaseUrl,
538
617
  MSWARM_SELF_HOSTED_NODE_STATE_PATH: config.statePath,
539
618
  MSWARM_SELF_HOSTED_NODE_KEY_PATH: config.runtimeTokenPath,
619
+ MSWARM_SELF_HOSTED_ARTIFACT_STORE_PATH: config.artifactStorePath || null,
540
620
  MSWARM_SELF_HOSTED_RELAY_MODE: config.relayMode || "outbound",
541
621
  MSWARM_SELF_HOSTED_DIRECT_BASE_URL: config.directBaseUrl || null,
542
622
  MSWARM_SELF_HOSTED_DISCOVERY_MODE: config.discoveryMode,
@@ -550,7 +630,13 @@ function serviceEnvironment(config, env, homeDir) {
550
630
  MSWARM_SELF_HOSTED_MODEL_BLOCKLIST: config.modelBlocklist.join(","),
551
631
  MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS: String(config.heartbeatIntervalSeconds),
552
632
  MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS: String(config.requestTimeoutMs),
553
- MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS: String(config.jobTimeoutMs)
633
+ MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS: String(config.jobTimeoutMs),
634
+ MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED: config.genericJobsEnabled ? "true" : "false",
635
+ MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS: String(config.genericJobTimeoutMs),
636
+ MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY: String(config.genericJobMaxConcurrency),
637
+ MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS: config.capabilityProbeTimeoutMs
638
+ ? String(config.capabilityProbeTimeoutMs)
639
+ : null
554
640
  };
555
641
  return Object.fromEntries(Object.entries(values).filter((entry) => typeof entry[1] === "string" && entry[1] !== ""));
556
642
  }
@@ -970,6 +1056,9 @@ export async function readSelfHostedNodeConfig(env = process.env) {
970
1056
  ollamaBaseUrl: trimTrailingSlash(ollamaBaseUrl),
971
1057
  statePath,
972
1058
  runtimeTokenPath,
1059
+ artifactStorePath: optionalText(env.MSWARM_SELF_HOSTED_ARTIFACT_STORE_PATH) ||
1060
+ state.artifact_store_path ||
1061
+ defaultArtifactStorePath(),
973
1062
  invocationSigningSecret: optionalText(env.MSWARM_SELF_HOSTED_INVOCATION_SIGNING_SECRET) ||
974
1063
  optionalText(env.MSWARM_SELF_HOSTED_RELAY_SIGNING_SECRET),
975
1064
  listenHost: optionalText(env.MSWARM_SELF_HOSTED_LISTEN_HOST) || DEFAULT_LISTEN_HOST,
@@ -981,6 +1070,10 @@ export async function readSelfHostedNodeConfig(env = process.env) {
981
1070
  heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, state.heartbeat_interval_seconds || DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
982
1071
  requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, state.request_timeout_ms || DEFAULT_REQUEST_TIMEOUT_MS),
983
1072
  jobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
1073
+ genericJobsEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED ?? env.MSWARM_SELF_HOSTED_GENERIC_JOBS, state.generic_jobs_enabled === true),
1074
+ genericJobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, state.generic_job_timeout_ms || state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
1075
+ genericJobMaxConcurrency: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, state.generic_job_max_concurrency || 1),
1076
+ capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, state.capability_probe_timeout_ms || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
984
1077
  exposeAllModels: resolveDaemonExposeAllModels(env, state),
985
1078
  modelAllowlist: parseList(env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST || state.model_allowlist),
986
1079
  modelBlocklist: parseList(env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST || state.model_blocklist)
@@ -1022,6 +1115,9 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
1022
1115
  discoveryMode: parseDiscoveryMode(env.MSWARM_SELF_HOSTED_DISCOVERY_MODE),
1023
1116
  statePath,
1024
1117
  runtimeTokenPath,
1118
+ artifactStorePath: optionalText(options["artifact-store-path"]) ||
1119
+ optionalText(env.MSWARM_SELF_HOSTED_ARTIFACT_STORE_PATH) ||
1120
+ defaultArtifactStorePath(),
1025
1121
  machineIdPath: optionalText(env.MSWARM_SELF_HOSTED_MACHINE_ID_PATH) || defaultMachineIdPath(),
1026
1122
  mcodaBin: optionalText(env.MSWARM_SELF_HOSTED_MCODA_BIN) || DEFAULT_MCODA_BIN,
1027
1123
  mcodaListArgs: parseArgs(env.MSWARM_SELF_HOSTED_MCODA_LIST_ARGS, DEFAULT_MCODA_LIST_ARGS),
@@ -1030,6 +1126,10 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
1030
1126
  heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
1031
1127
  requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, DEFAULT_REQUEST_TIMEOUT_MS),
1032
1128
  jobTimeoutMs: parsePositiveInteger(options["job-timeout-ms"] || env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
1129
+ genericJobsEnabled: parseBoolean(options["enable-generic-jobs"] || env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED || env.MSWARM_SELF_HOSTED_GENERIC_JOBS, false),
1130
+ genericJobTimeoutMs: parsePositiveInteger(options["generic-job-timeout-ms"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
1131
+ genericJobMaxConcurrency: parsePositiveInteger(options["generic-job-max-concurrency"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, 1),
1132
+ capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
1033
1133
  exposeAllModels: resolveOwnerSetupExposeAllModels(options, env),
1034
1134
  modelAllowlist: allowlist,
1035
1135
  modelBlocklist: blocklist,
@@ -1148,10 +1248,17 @@ async function defaultCommandRunner(command, args, options) {
1148
1248
  let stdout = "";
1149
1249
  let stderr = "";
1150
1250
  let settled = false;
1251
+ const abort = () => {
1252
+ if (settled)
1253
+ return;
1254
+ child.kill("SIGTERM");
1255
+ finish(new Error("command aborted"));
1256
+ };
1151
1257
  const timer = setTimeout(() => {
1152
1258
  if (settled)
1153
1259
  return;
1154
1260
  settled = true;
1261
+ options.signal?.removeEventListener("abort", abort);
1155
1262
  child.kill("SIGTERM");
1156
1263
  reject(new Error(`command timed out after ${options.timeoutMs}ms: ${command}`));
1157
1264
  }, options.timeoutMs);
@@ -1160,6 +1267,7 @@ async function defaultCommandRunner(command, args, options) {
1160
1267
  return;
1161
1268
  settled = true;
1162
1269
  clearTimeout(timer);
1270
+ options.signal?.removeEventListener("abort", abort);
1163
1271
  if (error) {
1164
1272
  reject(error);
1165
1273
  return;
@@ -1188,6 +1296,11 @@ async function defaultCommandRunner(command, args, options) {
1188
1296
  }
1189
1297
  finish();
1190
1298
  });
1299
+ if (options.signal?.aborted) {
1300
+ abort();
1301
+ return;
1302
+ }
1303
+ options.signal?.addEventListener("abort", abort, { once: true });
1191
1304
  if (options.input) {
1192
1305
  child.stdin.write(options.input);
1193
1306
  }
@@ -1530,6 +1643,1387 @@ function buildCodaliPolicy(job) {
1530
1643
  maxOutputTokens: job.policy?.max_output_tokens ?? job.openai_request.max_tokens,
1531
1644
  };
1532
1645
  }
1646
+ function numberArg(value, fallback) {
1647
+ const parsed = Number(value);
1648
+ return Number.isFinite(parsed) ? parsed : fallback;
1649
+ }
1650
+ function boundedMilliseconds(value, fallback, max) {
1651
+ return Math.max(0, Math.min(max, Math.floor(numberArg(value, fallback))));
1652
+ }
1653
+ function abortErrorCode(signal) {
1654
+ return signal.reason === "timeout" ? "timeout" : "cancelled";
1655
+ }
1656
+ function abortErrorMessage(signal) {
1657
+ return abortErrorCode(signal) === "timeout" ? "generic job timed out" : "generic job cancelled";
1658
+ }
1659
+ async function sleepWithAbort(ms, signal) {
1660
+ if (ms <= 0)
1661
+ return;
1662
+ if (signal.aborted) {
1663
+ throw new Error(abortErrorMessage(signal));
1664
+ }
1665
+ await new Promise((resolve, reject) => {
1666
+ const timer = setTimeout(() => {
1667
+ cleanup();
1668
+ resolve();
1669
+ }, ms);
1670
+ const onAbort = () => {
1671
+ cleanup();
1672
+ reject(new Error(abortErrorMessage(signal)));
1673
+ };
1674
+ const cleanup = () => {
1675
+ clearTimeout(timer);
1676
+ signal.removeEventListener("abort", onAbort);
1677
+ };
1678
+ signal.addEventListener("abort", onAbort, { once: true });
1679
+ });
1680
+ }
1681
+ function safeLocalArtifactJobId(jobId) {
1682
+ const normalized = jobId.replace(/[^a-zA-Z0-9_.-]/g, "_") || "job";
1683
+ return assertMswarmSafeRelativePath(normalized, "job_id");
1684
+ }
1685
+ function safeLocalArtifactName(value, fallback) {
1686
+ const normalized = value.replace(/[^a-zA-Z0-9_.-]/g, "_") || fallback;
1687
+ return assertMswarmSafeRelativePath(normalized, "artifact_name");
1688
+ }
1689
+ function resolveWithinRoot(root, relativePath) {
1690
+ const rootPath = resolve(root);
1691
+ const target = resolve(rootPath, relativePath);
1692
+ const delta = relative(rootPath, target);
1693
+ if (delta === "" || (!delta.startsWith("..") && !isAbsolute(delta))) {
1694
+ return target;
1695
+ }
1696
+ throw new Error("path_escape_not_allowed");
1697
+ }
1698
+ function sha256Hex(buffer) {
1699
+ return createHash("sha256").update(buffer).digest("hex");
1700
+ }
1701
+ function positiveByteLimit(...values) {
1702
+ const positive = values.filter((value) => typeof value === "number" && Number.isFinite(value) && value > 0);
1703
+ return positive.length ? Math.min(...positive) : DEFAULT_LOCAL_ARTIFACT_MAX_BYTES;
1704
+ }
1705
+ function parseLocalArtifactUri(uri) {
1706
+ try {
1707
+ const parsed = new URL(uri);
1708
+ if (parsed.protocol !== "artifact:" || parsed.hostname !== "local") {
1709
+ return null;
1710
+ }
1711
+ const parts = decodeURIComponent(parsed.pathname).split("/").filter(Boolean);
1712
+ if (parts.length < 2) {
1713
+ return null;
1714
+ }
1715
+ const [jobId, ...artifactPath] = parts;
1716
+ return {
1717
+ jobId: assertMswarmSafeRelativePath(jobId, "artifact_job_id"),
1718
+ path: assertMswarmSafeRelativePath(artifactPath.join("/"), "artifact_path")
1719
+ };
1720
+ }
1721
+ catch {
1722
+ return null;
1723
+ }
1724
+ }
1725
+ export class MswarmLocalArtifactStore {
1726
+ constructor(input = {}) {
1727
+ this.rootDir = input.rootDir || defaultArtifactStorePath();
1728
+ this.now = input.now || (() => new Date());
1729
+ }
1730
+ async prepareJobWorkspace(jobId, job) {
1731
+ const safeJobId = safeLocalArtifactJobId(jobId);
1732
+ const workDir = resolveWithinRoot(this.rootDir, safeJobId);
1733
+ const inputDir = resolveWithinRoot(workDir, "inputs");
1734
+ const outputDir = resolveWithinRoot(workDir, "outputs");
1735
+ await rm(workDir, { recursive: true, force: true });
1736
+ await mkdir(inputDir, { recursive: true });
1737
+ await mkdir(outputDir, { recursive: true });
1738
+ const store = {
1739
+ backend: "local-dev",
1740
+ root_uri: `artifact://local/${safeJobId}`
1741
+ };
1742
+ const registeredInputs = await Promise.all((job.inputs || []).map((input, index) => this.registerInput(jobId, job, input, index, inputDir, store)));
1743
+ const outputSpecs = (job.outputs || []).map((output) => ({
1744
+ ...output,
1745
+ path: assertMswarmSafeRelativePath(output.path, "output_path")
1746
+ }));
1747
+ const sandbox = buildMswarmSandboxProfile({
1748
+ policy: job.policy,
1749
+ limits: job.limits,
1750
+ containerized: job.policy.trust_mode === "tenant-owned" || job.job_type === CUDA_RUN_JOB_TYPE,
1751
+ gpu: job.resources?.gpu ? "nvidia" : "none"
1752
+ });
1753
+ return {
1754
+ store,
1755
+ workDir,
1756
+ inputDir,
1757
+ outputDir,
1758
+ registeredInputs,
1759
+ outputSpecs,
1760
+ sandbox
1761
+ };
1762
+ }
1763
+ async collectOutputs(context, jobId) {
1764
+ const artifacts = [];
1765
+ let totalBytes = 0;
1766
+ for (const output of context.outputSpecs) {
1767
+ const collected = await this.collectDeclaredOutput(context, jobId, output);
1768
+ for (const artifact of collected) {
1769
+ totalBytes += artifact.size_bytes || 0;
1770
+ const totalLimit = positiveByteLimit(context.sandbox.limits.max_output_bytes);
1771
+ if (totalBytes > totalLimit) {
1772
+ throw new Error("output_size_limit_exceeded");
1773
+ }
1774
+ artifacts.push(artifact);
1775
+ }
1776
+ }
1777
+ return artifacts;
1778
+ }
1779
+ async registerInput(jobId, job, input, index, inputDir, store) {
1780
+ const mountPath = input.mount_path
1781
+ ? assertMswarmSafeRelativePath(input.mount_path, "input_mount_path")
1782
+ : safeLocalArtifactName(input.name, `input-${index}`);
1783
+ const targetPath = resolveWithinRoot(inputDir, mountPath);
1784
+ const maxArtifactBytes = positiveByteLimit(job.policy.max_artifact_bytes);
1785
+ if (Number.isFinite(input.artifact.size_bytes) && input.artifact.size_bytes !== undefined) {
1786
+ if (input.artifact.size_bytes > maxArtifactBytes) {
1787
+ throw new Error("input_artifact_size_limit_exceeded");
1788
+ }
1789
+ }
1790
+ const source = parseLocalArtifactUri(input.artifact.uri);
1791
+ let localPath;
1792
+ if (source) {
1793
+ const sourcePath = resolveWithinRoot(resolveWithinRoot(this.rootDir, source.jobId), join("outputs", source.path));
1794
+ try {
1795
+ const sourceStat = await lstat(sourcePath);
1796
+ if (!sourceStat.isFile()) {
1797
+ throw new Error("input_artifact_must_be_file");
1798
+ }
1799
+ if (sourceStat.size > maxArtifactBytes) {
1800
+ throw new Error("input_artifact_size_limit_exceeded");
1801
+ }
1802
+ const bytes = await readFile(sourcePath);
1803
+ if (input.artifact.sha256 && input.artifact.sha256 !== sha256Hex(bytes)) {
1804
+ throw new Error("input_artifact_checksum_mismatch");
1805
+ }
1806
+ await mkdir(dirname(targetPath), { recursive: true });
1807
+ await writeFile(targetPath, bytes);
1808
+ localPath = targetPath;
1809
+ }
1810
+ catch (error) {
1811
+ if (error.code !== "ENOENT" || input.required === true) {
1812
+ throw error;
1813
+ }
1814
+ }
1815
+ }
1816
+ else if (input.required === true) {
1817
+ throw new Error("input_artifact_unavailable");
1818
+ }
1819
+ const registeredAt = this.now().toISOString();
1820
+ return {
1821
+ ...input.artifact,
1822
+ id: input.artifact.id || `input_${sha256Hex(Buffer.from(`${jobId}:${input.name}:${input.artifact.uri}`)).slice(0, 16)}`,
1823
+ job_id: jobId,
1824
+ name: input.name,
1825
+ scope: "input",
1826
+ registered_at: registeredAt,
1827
+ store,
1828
+ access: defaultMswarmArtifactAccessPolicy(job.policy.trust_mode === "tenant-owned" ? "tenant-scoped" : "owner-local"),
1829
+ retention: defaultMswarmArtifactRetentionPolicy(),
1830
+ ...(localPath ? { local_path: localPath } : {})
1831
+ };
1832
+ }
1833
+ async collectDeclaredOutput(context, jobId, output) {
1834
+ const normalizedPath = assertMswarmSafeRelativePath(output.path, "output_path");
1835
+ const targetPath = resolveWithinRoot(context.outputDir, normalizedPath);
1836
+ try {
1837
+ const targetStat = await lstat(targetPath);
1838
+ if (targetStat.isSymbolicLink()) {
1839
+ throw new Error("output_symlink_not_allowed");
1840
+ }
1841
+ if (targetStat.isDirectory()) {
1842
+ return this.collectOutputDirectory(context, jobId, output, normalizedPath);
1843
+ }
1844
+ if (targetStat.isFile()) {
1845
+ return [await this.collectOutputFile(context, jobId, output, normalizedPath, targetPath)];
1846
+ }
1847
+ throw new Error("output_entry_type_not_allowed");
1848
+ }
1849
+ catch (error) {
1850
+ if (error.code === "ENOENT" && output.required !== true) {
1851
+ return [];
1852
+ }
1853
+ throw error;
1854
+ }
1855
+ }
1856
+ async collectOutputDirectory(context, jobId, output, relativeDir) {
1857
+ const dirPath = resolveWithinRoot(context.outputDir, relativeDir);
1858
+ const entries = await readdir(dirPath, { withFileTypes: true });
1859
+ const artifacts = [];
1860
+ for (const entry of entries) {
1861
+ const childRelativePath = assertMswarmSafeRelativePath(`${relativeDir}/${entry.name}`, "output_path");
1862
+ const childPath = resolveWithinRoot(context.outputDir, childRelativePath);
1863
+ if (entry.isSymbolicLink()) {
1864
+ throw new Error("output_symlink_not_allowed");
1865
+ }
1866
+ if (entry.isDirectory()) {
1867
+ artifacts.push(...(await this.collectOutputDirectory(context, jobId, output, childRelativePath)));
1868
+ }
1869
+ else if (entry.isFile()) {
1870
+ artifacts.push(await this.collectOutputFile(context, jobId, output, childRelativePath, childPath));
1871
+ }
1872
+ else {
1873
+ throw new Error("output_entry_type_not_allowed");
1874
+ }
1875
+ }
1876
+ return artifacts;
1877
+ }
1878
+ async collectOutputFile(context, jobId, output, relativePath, filePath) {
1879
+ const stat = await lstat(filePath);
1880
+ if (!stat.isFile()) {
1881
+ throw new Error("output_entry_type_not_allowed");
1882
+ }
1883
+ const perArtifactLimit = positiveByteLimit(context.sandbox.limits.max_artifact_bytes, context.sandbox.limits.max_output_bytes);
1884
+ if (stat.size > perArtifactLimit) {
1885
+ throw new Error("output_artifact_size_limit_exceeded");
1886
+ }
1887
+ const bytes = await readFile(filePath);
1888
+ return {
1889
+ id: `output_${sha256Hex(Buffer.from(`${jobId}:${relativePath}`)).slice(0, 16)}`,
1890
+ job_id: jobId,
1891
+ name: output.path === relativePath ? output.name : `${output.name}/${relativePath}`,
1892
+ uri: buildMswarmLocalArtifactUri(jobId, relativePath),
1893
+ content_type: output.content_type,
1894
+ size_bytes: stat.size,
1895
+ sha256: sha256Hex(bytes),
1896
+ scope: "output",
1897
+ registered_at: this.now().toISOString(),
1898
+ store: context.store,
1899
+ access: defaultMswarmArtifactAccessPolicy(context.sandbox.trust_mode === "tenant-owned" ? "tenant-scoped" : "owner-local"),
1900
+ retention: defaultMswarmArtifactRetentionPolicy()
1901
+ };
1902
+ }
1903
+ }
1904
+ export class MswarmTestEchoRunner {
1905
+ constructor() {
1906
+ this.id = TEST_ECHO_RUNNER_ID;
1907
+ }
1908
+ async run(context) {
1909
+ const args = context.job.args || {};
1910
+ const message = optionalText(args.message) || "ok";
1911
+ const repeat = Math.max(1, Math.min(20, Math.floor(numberArg(args.repeat, 1))));
1912
+ const delayMs = boundedMilliseconds(args.delay_ms, 0, 30000);
1913
+ if (args.fail === true) {
1914
+ throw new Error(message);
1915
+ }
1916
+ for (let index = 0; index < repeat; index += 1) {
1917
+ if (context.signal.aborted) {
1918
+ throw new Error(abortErrorMessage(context.signal));
1919
+ }
1920
+ if (delayMs > 0) {
1921
+ await sleepWithAbort(delayMs, context.signal);
1922
+ }
1923
+ await context.emitEvent({
1924
+ type: "stdout",
1925
+ message,
1926
+ data: {
1927
+ runner: this.id,
1928
+ index,
1929
+ repeat
1930
+ }
1931
+ });
1932
+ }
1933
+ await context.emitEvent({
1934
+ type: "progress",
1935
+ message: "echo complete",
1936
+ data: {
1937
+ completed: repeat,
1938
+ total: repeat
1939
+ }
1940
+ });
1941
+ return {
1942
+ job_id: context.job.idempotency_key || "local-generic-job",
1943
+ status: "succeeded",
1944
+ exit_code: 0,
1945
+ started_at: new Date().toISOString(),
1946
+ finished_at: new Date().toISOString(),
1947
+ metrics: {
1948
+ runner: this.id,
1949
+ echoed: repeat,
1950
+ message
1951
+ }
1952
+ };
1953
+ }
1954
+ }
1955
+ const BLENDER_ENGINE_ARGS = {
1956
+ cycles: "CYCLES",
1957
+ eevee: "BLENDER_EEVEE_NEXT",
1958
+ workbench: "BLENDER_WORKBENCH"
1959
+ };
1960
+ const BLENDER_OUTPUT_FORMAT_ARGS = {
1961
+ png: "PNG",
1962
+ jpeg: "JPEG",
1963
+ open_exr: "OPEN_EXR"
1964
+ };
1965
+ function positiveSafeInteger(value) {
1966
+ if (typeof value !== "number" || !Number.isSafeInteger(value) || value <= 0) {
1967
+ return null;
1968
+ }
1969
+ return value;
1970
+ }
1971
+ function parseBlenderFrameSelection(value) {
1972
+ const defaultFrame = 1;
1973
+ if (value === undefined || value === null) {
1974
+ return { mode: "frame", frame: defaultFrame, label: String(defaultFrame), total: 1 };
1975
+ }
1976
+ const numericFrame = positiveSafeInteger(value);
1977
+ if (numericFrame !== null) {
1978
+ return { mode: "frame", frame: numericFrame, label: String(numericFrame), total: 1 };
1979
+ }
1980
+ const raw = optionalText(value);
1981
+ const match = raw?.match(/^([1-9]\d{0,6})(?:-([1-9]\d{0,6}))?$/);
1982
+ if (!match) {
1983
+ throw new Error("render.blender args.frames must be a positive frame number or start-end range");
1984
+ }
1985
+ const start = Number(match[1]);
1986
+ const end = match[2] ? Number(match[2]) : start;
1987
+ if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end) || start <= 0 || end <= 0 || end < start) {
1988
+ throw new Error("render.blender args.frames must use a valid positive frame range");
1989
+ }
1990
+ if (end - start > 10000) {
1991
+ throw new Error("render.blender args.frames range exceeds the maximum supported 10001 frames");
1992
+ }
1993
+ if (start === end) {
1994
+ return { mode: "frame", frame: start, label: String(start), total: 1 };
1995
+ }
1996
+ return { mode: "range", start, end, label: `${start}-${end}`, total: end - start + 1 };
1997
+ }
1998
+ function normalizeBlenderEngine(value) {
1999
+ const raw = optionalText(value);
2000
+ if (!raw)
2001
+ return undefined;
2002
+ const key = raw.toLowerCase();
2003
+ const blender = BLENDER_ENGINE_ARGS[key];
2004
+ if (!blender) {
2005
+ throw new Error("render.blender args.engine must be cycles, eevee, or workbench");
2006
+ }
2007
+ return { label: key, blender };
2008
+ }
2009
+ function normalizeBlenderOutputFormat(value) {
2010
+ const key = (optionalText(value) || "png").toLowerCase();
2011
+ const blender = BLENDER_OUTPUT_FORMAT_ARGS[key];
2012
+ if (!blender) {
2013
+ throw new Error("render.blender args.output_format must be png, jpeg, or open_exr");
2014
+ }
2015
+ return { label: key, blender, extension: key === "open_exr" ? "exr" : key === "jpeg" ? "jpg" : "png" };
2016
+ }
2017
+ function parseBlenderResolution(value) {
2018
+ if (value === undefined || value === null)
2019
+ return undefined;
2020
+ const raw = optionalText(value);
2021
+ const match = raw?.match(/^([1-9]\d{0,4})x([1-9]\d{0,4})$/i);
2022
+ if (!match) {
2023
+ throw new Error("render.blender args.resolution must use WIDTHxHEIGHT");
2024
+ }
2025
+ const width = Number(match[1]);
2026
+ const height = Number(match[2]);
2027
+ if (width > 16384 || height > 16384) {
2028
+ throw new Error("render.blender args.resolution exceeds 16384x16384");
2029
+ }
2030
+ return { width, height, label: `${width}x${height}` };
2031
+ }
2032
+ function safeBlenderSceneName(value, label) {
2033
+ const raw = optionalText(value);
2034
+ if (!raw)
2035
+ return undefined;
2036
+ if (raw.length > 128 || /[\0\r\n]/.test(raw)) {
2037
+ throw new Error(`render.blender args.${label} is not a safe Blender object name`);
2038
+ }
2039
+ return raw;
2040
+ }
2041
+ function blenderSceneInputPath(context) {
2042
+ const scene = context.artifacts.registeredInputs.find((input) => input.name === "scene") || context.artifacts.registeredInputs[0];
2043
+ if (!scene?.local_path) {
2044
+ throw new Error("render.blender requires a materialized scene input artifact");
2045
+ }
2046
+ return scene.local_path;
2047
+ }
2048
+ function blenderOutputPattern(context) {
2049
+ const output = context.artifacts.outputSpecs[0];
2050
+ if (!output) {
2051
+ throw new Error("render.blender requires a declared output directory");
2052
+ }
2053
+ const normalizedPath = assertMswarmSafeRelativePath(output.path, "render_blender_output_path");
2054
+ const leaf = normalizedPath.split("/").filter(Boolean).at(-1) || normalizedPath;
2055
+ if (/\.[a-zA-Z0-9]{1,8}$/.test(leaf)) {
2056
+ throw new Error("render.blender output path must be a directory, not a file path");
2057
+ }
2058
+ return resolveWithinRoot(context.artifacts.outputDir, `${normalizedPath}/frame_####`);
2059
+ }
2060
+ function redactBlenderLocalPaths(context, value) {
2061
+ const replacements = [
2062
+ [context.artifacts.workDir, "[job-workdir]"],
2063
+ [context.artifacts.inputDir, "[job-inputs]"],
2064
+ [context.artifacts.outputDir, "[job-outputs]"],
2065
+ ...context.artifacts.registeredInputs.map((input) => [input.local_path, "[job-input]"])
2066
+ ];
2067
+ let output = value;
2068
+ for (const [source, replacement] of replacements) {
2069
+ if (source) {
2070
+ output = output.split(source).join(replacement);
2071
+ }
2072
+ }
2073
+ return output;
2074
+ }
2075
+ async function emitBlenderOutput(context, type, value) {
2076
+ const lines = value.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).slice(0, 200);
2077
+ for (const line of lines) {
2078
+ await context.emitEvent({
2079
+ type,
2080
+ message: redactBlenderLocalPaths(context, line),
2081
+ data: { runner: BLENDER_RENDER_RUNNER_ID }
2082
+ });
2083
+ }
2084
+ }
2085
+ async function emitBlenderProgress(context, output, frames) {
2086
+ const seen = new Set();
2087
+ const lowerBound = frames.mode === "range" ? frames.start : frames.frame;
2088
+ const upperBound = frames.mode === "range" ? frames.end : frames.frame;
2089
+ for (const line of output.split(/\r?\n/)) {
2090
+ const match = line.match(/\bFra:(\d+)\b/i) || line.match(/\bFrame\s+(\d+)\b/i);
2091
+ if (!match)
2092
+ continue;
2093
+ const frame = Number(match[1]);
2094
+ if (!Number.isSafeInteger(frame) || frame < lowerBound || frame > upperBound || seen.has(frame)) {
2095
+ continue;
2096
+ }
2097
+ seen.add(frame);
2098
+ await context.emitEvent({
2099
+ type: "progress",
2100
+ message: `rendered frame ${frame}`,
2101
+ data: {
2102
+ runner: BLENDER_RENDER_RUNNER_ID,
2103
+ frame,
2104
+ completed: seen.size,
2105
+ total: frames.total
2106
+ }
2107
+ });
2108
+ }
2109
+ }
2110
+ function blenderFailureResult(job, code, message, startedAt) {
2111
+ return {
2112
+ job_id: job.idempotency_key || "render.blender",
2113
+ status: "failed",
2114
+ exit_code: 1,
2115
+ started_at: startedAt,
2116
+ finished_at: new Date().toISOString(),
2117
+ error: {
2118
+ code,
2119
+ message,
2120
+ retryable: false
2121
+ }
2122
+ };
2123
+ }
2124
+ function blenderGpuComputeDeviceType() {
2125
+ // The current GPU probe only marks NVIDIA devices as available, so CUDA is
2126
+ // the only concrete Blender compute backend this runner can safely request.
2127
+ return "CUDA";
2128
+ }
2129
+ export class MswarmBlenderRenderRunner {
2130
+ constructor(runner = defaultCommandRunner) {
2131
+ this.id = BLENDER_RENDER_RUNNER_ID;
2132
+ this.runner = runner;
2133
+ }
2134
+ async run(context) {
2135
+ const startedAt = new Date().toISOString();
2136
+ if (context.signal.aborted) {
2137
+ throw new Error(abortErrorMessage(context.signal));
2138
+ }
2139
+ if (context.job.policy.trust_mode !== "owner-local") {
2140
+ return blenderFailureResult(context.job, "policy_denied", "render.blender is owner-local only until containerized Blender execution is available", startedAt);
2141
+ }
2142
+ let scenePath;
2143
+ let frames;
2144
+ let engine;
2145
+ let outputFormat;
2146
+ let resolution;
2147
+ let sceneName;
2148
+ let cameraName;
2149
+ let outputPattern;
2150
+ const gpuRequested = Boolean(context.job.resources?.gpu);
2151
+ try {
2152
+ const args = context.job.args || {};
2153
+ scenePath = blenderSceneInputPath(context);
2154
+ frames = parseBlenderFrameSelection(args.frames);
2155
+ engine = normalizeBlenderEngine(args.engine);
2156
+ outputFormat = normalizeBlenderOutputFormat(args.output_format);
2157
+ resolution = parseBlenderResolution(args.resolution);
2158
+ sceneName = safeBlenderSceneName(args.scene, "scene");
2159
+ cameraName = safeBlenderSceneName(args.camera, "camera");
2160
+ outputPattern = blenderOutputPattern(context);
2161
+ await mkdir(dirname(outputPattern), { recursive: true });
2162
+ }
2163
+ catch (error) {
2164
+ return blenderFailureResult(context.job, "validation_failed", error instanceof Error ? error.message : String(error || "render.blender validation failed"), startedAt);
2165
+ }
2166
+ const pythonStatements = [];
2167
+ if (resolution) {
2168
+ pythonStatements.push(`bpy.context.scene.render.resolution_x=${resolution.width}`);
2169
+ pythonStatements.push(`bpy.context.scene.render.resolution_y=${resolution.height}`);
2170
+ }
2171
+ if (cameraName) {
2172
+ pythonStatements.push(`camera=bpy.data.objects.get(${JSON.stringify(cameraName)})`);
2173
+ pythonStatements.push("bpy.context.scene.camera=camera if camera is not None else bpy.context.scene.camera");
2174
+ }
2175
+ if (gpuRequested) {
2176
+ const computeDeviceType = blenderGpuComputeDeviceType();
2177
+ pythonStatements.push("cycles_addon=bpy.context.preferences.addons.get('cycles')");
2178
+ pythonStatements.push("cycles_prefs=cycles_addon.preferences if cycles_addon is not None else None");
2179
+ pythonStatements.push(`setattr(cycles_prefs,'compute_device_type',${JSON.stringify(computeDeviceType)}) if cycles_prefs is not None and hasattr(cycles_prefs,'compute_device_type') else None`);
2180
+ pythonStatements.push("getattr(cycles_prefs,'get_devices',lambda: None)() if cycles_prefs is not None else None");
2181
+ pythonStatements.push("setattr(bpy.context.scene.cycles,'device','GPU') if hasattr(bpy.context.scene,'cycles') else None");
2182
+ pythonStatements.push("[setattr(device,'use',True) for device in getattr(cycles_prefs,'devices',[]) if hasattr(device,'use')] if cycles_prefs is not None else None");
2183
+ }
2184
+ const blenderArgs = ["-b", scenePath];
2185
+ if (sceneName) {
2186
+ blenderArgs.push("--scene", sceneName);
2187
+ }
2188
+ if (engine) {
2189
+ blenderArgs.push("--engine", engine.blender);
2190
+ }
2191
+ if (pythonStatements.length > 0) {
2192
+ blenderArgs.push("--python-expr", `import bpy; ${pythonStatements.join("; ")}`);
2193
+ }
2194
+ blenderArgs.push("--render-output", outputPattern, "--render-format", outputFormat.blender);
2195
+ if (frames.mode === "range") {
2196
+ blenderArgs.push("-s", String(frames.start), "-e", String(frames.end), "-a");
2197
+ }
2198
+ else {
2199
+ blenderArgs.push("--render-frame", String(frames.frame));
2200
+ }
2201
+ await context.emitEvent({
2202
+ type: "progress",
2203
+ message: "blender render starting",
2204
+ data: {
2205
+ runner: this.id,
2206
+ frames: frames.label,
2207
+ engine: engine?.label || "scene-default",
2208
+ output_format: outputFormat.label,
2209
+ ...(resolution ? { resolution: resolution.label } : {}),
2210
+ gpu_requested: gpuRequested,
2211
+ render_device: gpuRequested ? "gpu" : "scene-default"
2212
+ }
2213
+ });
2214
+ const timeoutMs = Math.max(1000, Math.min(DEFAULT_JOB_TIMEOUT_MS, Math.floor((context.sandbox.limits.timeout_sec || DEFAULT_JOB_TIMEOUT_MS / 1000) * 1000)));
2215
+ const maxBuffer = Math.min(DEFAULT_COMMAND_MAX_BUFFER, Math.max(1024 * 1024, context.job.limits?.max_stdout_bytes || 0, context.job.limits?.max_stderr_bytes || 0));
2216
+ try {
2217
+ const result = await this.runner("blender", blenderArgs, {
2218
+ timeoutMs,
2219
+ maxBuffer,
2220
+ signal: context.signal
2221
+ });
2222
+ await emitBlenderOutput(context, "stdout", result.stdout);
2223
+ await emitBlenderOutput(context, "stderr", result.stderr);
2224
+ await emitBlenderProgress(context, `${result.stdout}\n${result.stderr}`, frames);
2225
+ return {
2226
+ job_id: context.job.idempotency_key || "render.blender",
2227
+ status: "succeeded",
2228
+ exit_code: 0,
2229
+ started_at: startedAt,
2230
+ finished_at: new Date().toISOString(),
2231
+ metrics: {
2232
+ runner: this.id,
2233
+ frames: frames.label,
2234
+ engine: engine?.label || "scene-default",
2235
+ output_format: outputFormat.label,
2236
+ ...(resolution ? { resolution: resolution.label } : {}),
2237
+ gpu_requested: gpuRequested,
2238
+ render_device: gpuRequested ? "gpu" : "scene-default"
2239
+ }
2240
+ };
2241
+ }
2242
+ catch (error) {
2243
+ if (context.signal.aborted) {
2244
+ throw error;
2245
+ }
2246
+ return blenderFailureResult(context.job, "runner_failed", redactBlenderLocalPaths(context, error instanceof Error ? error.message : String(error || "Blender render failed")), startedAt);
2247
+ }
2248
+ }
2249
+ }
2250
+ const SAFE_CUDA_IDENTIFIER = /^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,127}$/;
2251
+ const SAFE_CUDA_TOKEN = /^[a-zA-Z0-9_@%+=:,./-]{1,200}$/;
2252
+ const UNSAFE_CUDA_MANIFEST_KEYS = new Set([
2253
+ "command",
2254
+ "cmd",
2255
+ "shell",
2256
+ "entrypoint",
2257
+ "docker_args",
2258
+ "mount",
2259
+ "mounts",
2260
+ "volumes",
2261
+ "binds",
2262
+ "device",
2263
+ "devices",
2264
+ "privileged",
2265
+ "network",
2266
+ "host_network"
2267
+ ]);
2268
+ function cudaFailureResult(job, code, message, startedAt) {
2269
+ return {
2270
+ job_id: job.idempotency_key || "cuda.run",
2271
+ status: "failed",
2272
+ exit_code: 1,
2273
+ started_at: startedAt,
2274
+ finished_at: new Date().toISOString(),
2275
+ error: {
2276
+ code,
2277
+ message,
2278
+ retryable: false
2279
+ }
2280
+ };
2281
+ }
2282
+ function safeCudaIdentifier(value, label) {
2283
+ const text = optionalText(value);
2284
+ if (!text || !SAFE_CUDA_IDENTIFIER.test(text)) {
2285
+ throw new Error(`${label}_invalid`);
2286
+ }
2287
+ return text;
2288
+ }
2289
+ function safeCudaRelativePath(value, label) {
2290
+ return assertMswarmSafeRelativePath(optionalText(value), label);
2291
+ }
2292
+ function safeCudaToken(value, label) {
2293
+ const text = optionalText(value);
2294
+ if (!text || !SAFE_CUDA_TOKEN.test(text) || /[`$;&|<>\r\n]/.test(text)) {
2295
+ throw new Error(`${label}_invalid`);
2296
+ }
2297
+ return text;
2298
+ }
2299
+ function safeCudaTokenList(value, label) {
2300
+ if (value === undefined)
2301
+ return [];
2302
+ if (!Array.isArray(value)) {
2303
+ throw new Error(`${label}_must_be_array`);
2304
+ }
2305
+ return value.map((entry, index) => safeCudaToken(entry, `${label}_${index}`));
2306
+ }
2307
+ function assertNoUnsafeCudaManifestKeys(record, label) {
2308
+ for (const key of Object.keys(record)) {
2309
+ if (UNSAFE_CUDA_MANIFEST_KEYS.has(key)) {
2310
+ throw new Error(`${label}_${key}_not_allowed`);
2311
+ }
2312
+ }
2313
+ }
2314
+ function parseCudaRunArgs(job) {
2315
+ const args = job.args || {};
2316
+ return {
2317
+ manifestPath: safeCudaRelativePath(args.manifest_path, "cuda_manifest_path"),
2318
+ profile: safeCudaIdentifier(args.profile, "cuda_profile"),
2319
+ target: safeCudaIdentifier(args.target, "cuda_target")
2320
+ };
2321
+ }
2322
+ function cudaPackageArchive(context) {
2323
+ const registeredInput = context.artifacts.registeredInputs.find((input) => input.name === "package" && input.local_path) ||
2324
+ context.artifacts.registeredInputs.find((input) => input.local_path && input.name !== "manifest");
2325
+ if (!registeredInput?.local_path) {
2326
+ throw new Error("cuda_package_artifact_required");
2327
+ }
2328
+ const inputPath = assertMswarmSafeRelativePath(relative(context.artifacts.inputDir, registeredInput.local_path), "cuda_package_input_path");
2329
+ if (!/(\.tar\.gz|\.tgz)$/i.test(inputPath)) {
2330
+ throw new Error("cuda_package_archive_must_be_targz");
2331
+ }
2332
+ return { hostPath: registeredInput.local_path, inputPath };
2333
+ }
2334
+ function cudaArchiveValidationError(reason) {
2335
+ return new Error(`cuda_package_archive_${reason || "invalid"}`);
2336
+ }
2337
+ function cudaTarVerboseEntryType(line) {
2338
+ const marker = line.trimStart()[0];
2339
+ if (marker === "d")
2340
+ return "directory";
2341
+ if (marker === "-")
2342
+ return "file";
2343
+ if (marker === "l")
2344
+ return "symlink";
2345
+ if (marker === "h")
2346
+ return "hardlink";
2347
+ if (marker === "b" || marker === "c")
2348
+ return "device";
2349
+ return marker ? "other" : "file";
2350
+ }
2351
+ async function validateCudaPackageArchive(context, runner, archive) {
2352
+ const listOptions = {
2353
+ timeoutMs: 5000,
2354
+ maxBuffer: 512 * 1024,
2355
+ signal: context.signal
2356
+ };
2357
+ const names = await runner("tar", ["-tzf", archive.hostPath], listOptions);
2358
+ let entryCount = 0;
2359
+ for (const rawLine of names.stdout.split(/\r?\n/)) {
2360
+ const entryPath = rawLine.trim();
2361
+ if (!entryPath)
2362
+ continue;
2363
+ entryCount += 1;
2364
+ const result = validateMswarmArchiveEntry({
2365
+ path: entryPath,
2366
+ type: entryPath.endsWith("/") ? "directory" : "file"
2367
+ });
2368
+ if (!result.ok) {
2369
+ throw cudaArchiveValidationError(result.reason);
2370
+ }
2371
+ }
2372
+ if (entryCount === 0) {
2373
+ throw cudaArchiveValidationError("empty");
2374
+ }
2375
+ const verbose = await runner("tar", ["-tvzf", archive.hostPath], listOptions);
2376
+ for (const rawLine of verbose.stdout.split(/\r?\n/)) {
2377
+ if (!rawLine.trim())
2378
+ continue;
2379
+ const type = cudaTarVerboseEntryType(rawLine);
2380
+ if (type === "file" || type === "directory")
2381
+ continue;
2382
+ const result = validateMswarmArchiveEntry({ path: "entry", type });
2383
+ throw cudaArchiveValidationError(result.reason);
2384
+ }
2385
+ }
2386
+ async function readCudaManifestText(context, runner, args) {
2387
+ const directManifestPath = resolveWithinRoot(context.artifacts.inputDir, args.manifestPath);
2388
+ try {
2389
+ const directStat = await lstat(directManifestPath);
2390
+ if (directStat.isFile()) {
2391
+ return await readFile(directManifestPath, "utf8");
2392
+ }
2393
+ }
2394
+ catch (error) {
2395
+ if (error.code !== "ENOENT") {
2396
+ throw error;
2397
+ }
2398
+ }
2399
+ const archive = cudaPackageArchive(context);
2400
+ const extracted = await runner("tar", ["-xOf", archive.hostPath, args.manifestPath], {
2401
+ timeoutMs: 5000,
2402
+ maxBuffer: 256 * 1024,
2403
+ signal: context.signal
2404
+ });
2405
+ return extracted.stdout;
2406
+ }
2407
+ function parseCudaPackageManifest(text, args, policy) {
2408
+ const parsed = JSON.parse(text);
2409
+ const manifest = objectRecord(parsed);
2410
+ if (!manifest) {
2411
+ throw new Error("cuda_manifest_must_be_object");
2412
+ }
2413
+ assertNoUnsafeCudaManifestKeys(manifest, "cuda_manifest");
2414
+ const schemaVersion = optionalText(manifest.schema_version);
2415
+ if (schemaVersion !== "2026-06-14") {
2416
+ throw new Error("cuda_manifest_schema_version_invalid");
2417
+ }
2418
+ const packageInfo = objectRecord(manifest.package);
2419
+ const publisher = optionalText(packageInfo?.publisher);
2420
+ if (policy.allowed_package_publishers?.length) {
2421
+ if (!publisher || !policy.allowed_package_publishers.includes(publisher)) {
2422
+ throw new Error("cuda_manifest_publisher_not_allowed");
2423
+ }
2424
+ }
2425
+ const profiles = objectRecord(manifest.profiles);
2426
+ const targets = objectRecord(manifest.targets);
2427
+ const profile = objectRecord(profiles?.[args.profile]);
2428
+ const target = objectRecord(targets?.[args.target]);
2429
+ if (!profile) {
2430
+ throw new Error("cuda_manifest_profile_not_found");
2431
+ }
2432
+ if (!target) {
2433
+ throw new Error("cuda_manifest_target_not_found");
2434
+ }
2435
+ assertNoUnsafeCudaManifestKeys(profile, "cuda_manifest_profile");
2436
+ assertNoUnsafeCudaManifestKeys(target, "cuda_manifest_target");
2437
+ const image = optionalText(profile.image);
2438
+ if (!image || !APPROVED_NVIDIA_CUDA_IMAGES.has(image)) {
2439
+ throw new Error("cuda_image_not_approved");
2440
+ }
2441
+ if (!policy.allowed_images?.includes(image)) {
2442
+ throw new Error("cuda_image_not_allowed_by_policy");
2443
+ }
2444
+ const compiler = optionalText(profile.compiler) || "nvcc";
2445
+ if (compiler !== "nvcc") {
2446
+ throw new Error("cuda_compiler_not_allowed");
2447
+ }
2448
+ const source = safeCudaRelativePath(target.source, "cuda_target_source");
2449
+ if (!source.endsWith(".cu")) {
2450
+ throw new Error("cuda_target_source_must_be_cu");
2451
+ }
2452
+ const output = safeCudaRelativePath(optionalText(target.output) || `bin/${args.target}`, "cuda_target_output");
2453
+ return {
2454
+ schemaVersion,
2455
+ packageName: optionalText(packageInfo?.name) ?? undefined,
2456
+ publisher: publisher ?? undefined,
2457
+ image,
2458
+ compiler,
2459
+ source,
2460
+ output,
2461
+ flags: [...safeCudaTokenList(profile.flags, "cuda_profile_flags"), ...safeCudaTokenList(target.flags, "cuda_target_flags")],
2462
+ runArgs: safeCudaTokenList(target.args, "cuda_target_args")
2463
+ };
2464
+ }
2465
+ function redactCudaLocalPaths(context, value) {
2466
+ const replacements = [
2467
+ ...context.artifacts.registeredInputs.map((input) => [input.local_path, "[job-input]"]),
2468
+ [context.artifacts.inputDir, "[job-inputs]"],
2469
+ [context.artifacts.outputDir, "[job-outputs]"],
2470
+ [context.artifacts.workDir, "[job-workdir]"]
2471
+ ];
2472
+ replacements.sort((left, right) => (right[0]?.length || 0) - (left[0]?.length || 0));
2473
+ let output = value;
2474
+ for (const [source, replacement] of replacements) {
2475
+ if (source) {
2476
+ output = output.split(source).join(replacement);
2477
+ }
2478
+ }
2479
+ return output;
2480
+ }
2481
+ async function emitCudaOutput(context, type, value) {
2482
+ const lines = value.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).slice(0, 200);
2483
+ for (const line of lines) {
2484
+ await context.emitEvent({
2485
+ type,
2486
+ message: redactCudaLocalPaths(context, line),
2487
+ data: { runner: CUDA_PACKAGE_RUNNER_ID }
2488
+ });
2489
+ }
2490
+ }
2491
+ function buildCudaRunnerScript(input) {
2492
+ const srcDir = "/workspace/work/src";
2493
+ const buildOutput = `/workspace/work/${input.selection.output}`;
2494
+ const compile = [
2495
+ "/usr/local/cuda/bin/nvcc",
2496
+ ...input.selection.flags,
2497
+ "-o",
2498
+ buildOutput,
2499
+ `${srcDir}/${input.selection.source}`
2500
+ ].map(quotePosixShellValue).join(" ");
2501
+ const run = [
2502
+ buildOutput,
2503
+ ...input.selection.runArgs
2504
+ ].map(quotePosixShellValue).join(" ");
2505
+ return [
2506
+ "set -euo pipefail",
2507
+ "mkdir -p /workspace/work/src /workspace/outputs",
2508
+ `tar -xzf ${quotePosixShellValue(`/workspace/inputs/${input.archiveInputPath}`)} -C /workspace/work/src`,
2509
+ `mkdir -p ${quotePosixShellValue(dirname(buildOutput))}`,
2510
+ "cd /workspace/work/src",
2511
+ compile,
2512
+ run
2513
+ ].join("\n");
2514
+ }
2515
+ function dockerBindMount(hostPath, containerPath, mode) {
2516
+ return `${hostPath}:${containerPath}:${mode}`;
2517
+ }
2518
+ function buildCudaDockerArgs(input) {
2519
+ const gpuCount = Math.max(1, input.context.job.resources?.gpu?.count || 1);
2520
+ const args = [
2521
+ "run",
2522
+ "--rm",
2523
+ "--pull",
2524
+ "never",
2525
+ "--network",
2526
+ "none",
2527
+ "--runtime",
2528
+ "nvidia",
2529
+ "--gpus",
2530
+ `count=${gpuCount}`,
2531
+ "--user",
2532
+ input.context.sandbox.container.user,
2533
+ "--read-only",
2534
+ "--cap-drop",
2535
+ "ALL",
2536
+ "--security-opt",
2537
+ "no-new-privileges",
2538
+ "--workdir",
2539
+ "/workspace",
2540
+ "--env",
2541
+ "CUDA_CACHE_PATH=/workspace/work/.cuda-cache",
2542
+ "--tmpfs",
2543
+ "/tmp:rw,nosuid,nodev,size=64m"
2544
+ ];
2545
+ if (Number.isFinite(input.context.job.resources?.memory_gb) && input.context.job.resources?.memory_gb) {
2546
+ args.push("--memory", `${Math.floor(input.context.job.resources.memory_gb)}g`);
2547
+ }
2548
+ if (Number.isFinite(input.context.job.resources?.disk_gb) && input.context.job.resources?.disk_gb) {
2549
+ args.push("--storage-opt", `size=${Math.floor(input.context.job.resources.disk_gb)}G`);
2550
+ }
2551
+ args.push("-v", dockerBindMount(input.context.artifacts.inputDir, "/workspace/inputs", "ro"), "-v", dockerBindMount(input.context.artifacts.outputDir, "/workspace/outputs", "rw"), "-v", dockerBindMount(input.workPath, "/workspace/work", "rw"), "-v", dockerBindMount(input.scriptPath, "/workspace/__mcoda_cuda_run.sh", "ro"), input.selection.image, "/bin/bash", "/workspace/__mcoda_cuda_run.sh");
2552
+ return args;
2553
+ }
2554
+ export class MswarmCudaPackageRunner {
2555
+ constructor(runner = defaultCommandRunner) {
2556
+ this.id = CUDA_PACKAGE_RUNNER_ID;
2557
+ this.runner = runner;
2558
+ }
2559
+ async run(context) {
2560
+ const startedAt = new Date().toISOString();
2561
+ if (context.signal.aborted) {
2562
+ throw new Error(abortErrorMessage(context.signal));
2563
+ }
2564
+ if (context.job.policy.network !== "none") {
2565
+ return cudaFailureResult(context.job, "policy_denied", "cuda.run requires network policy none", startedAt);
2566
+ }
2567
+ if (context.job.policy.allow_raw_command !== false) {
2568
+ return cudaFailureResult(context.job, "policy_denied", "cuda.run does not allow raw commands", startedAt);
2569
+ }
2570
+ if (!context.job.resources?.gpu) {
2571
+ return cudaFailureResult(context.job, "validation_failed", "cuda.run requires GPU resources", startedAt);
2572
+ }
2573
+ if (!context.job.outputs?.length) {
2574
+ return cudaFailureResult(context.job, "validation_failed", "cuda.run requires declared outputs", startedAt);
2575
+ }
2576
+ let args;
2577
+ let archive;
2578
+ let selection;
2579
+ let scriptPath;
2580
+ let workPath;
2581
+ try {
2582
+ args = parseCudaRunArgs(context.job);
2583
+ archive = cudaPackageArchive(context);
2584
+ await validateCudaPackageArchive(context, this.runner, archive);
2585
+ const manifestText = await readCudaManifestText(context, this.runner, args);
2586
+ selection = parseCudaPackageManifest(manifestText, args, context.job.policy);
2587
+ scriptPath = resolveWithinRoot(context.artifacts.workDir, "__mcoda_cuda_run.sh");
2588
+ workPath = resolveWithinRoot(context.artifacts.workDir, "cuda-work");
2589
+ await mkdir(workPath, { recursive: true });
2590
+ await chmod(workPath, 0o777);
2591
+ await chmod(context.artifacts.outputDir, 0o777);
2592
+ await writeFile(scriptPath, buildCudaRunnerScript({ archiveInputPath: archive.inputPath, selection }), { mode: 0o644 });
2593
+ }
2594
+ catch (error) {
2595
+ return cudaFailureResult(context.job, "validation_failed", redactCudaLocalPaths(context, error instanceof Error ? error.message : String(error || "cuda.run validation failed")), startedAt);
2596
+ }
2597
+ const dockerArgs = buildCudaDockerArgs({
2598
+ context,
2599
+ selection,
2600
+ archiveInputPath: archive.inputPath,
2601
+ scriptPath,
2602
+ workPath
2603
+ });
2604
+ await context.emitEvent({
2605
+ type: "progress",
2606
+ message: "cuda package container starting",
2607
+ data: {
2608
+ runner: this.id,
2609
+ image: selection.image,
2610
+ profile: args.profile,
2611
+ target: args.target,
2612
+ gpu_count: Math.max(1, context.job.resources.gpu.count || 1),
2613
+ network: "none",
2614
+ container_user: context.sandbox.container.user
2615
+ }
2616
+ });
2617
+ const timeoutMs = Math.max(1000, Math.min(DEFAULT_JOB_TIMEOUT_MS, Math.floor((context.sandbox.limits.timeout_sec || DEFAULT_JOB_TIMEOUT_MS / 1000) * 1000)));
2618
+ const maxBuffer = Math.min(DEFAULT_COMMAND_MAX_BUFFER, Math.max(1024 * 1024, context.job.limits?.max_stdout_bytes || 0, context.job.limits?.max_stderr_bytes || 0));
2619
+ try {
2620
+ const result = await this.runner("docker", dockerArgs, {
2621
+ timeoutMs,
2622
+ maxBuffer,
2623
+ signal: context.signal
2624
+ });
2625
+ await emitCudaOutput(context, "stdout", result.stdout);
2626
+ await emitCudaOutput(context, "stderr", result.stderr);
2627
+ await context.emitEvent({
2628
+ type: "progress",
2629
+ message: "cuda package container completed",
2630
+ data: {
2631
+ runner: this.id,
2632
+ profile: args.profile,
2633
+ target: args.target
2634
+ }
2635
+ });
2636
+ return {
2637
+ job_id: context.job.idempotency_key || "cuda.run",
2638
+ status: "succeeded",
2639
+ exit_code: 0,
2640
+ started_at: startedAt,
2641
+ finished_at: new Date().toISOString(),
2642
+ metrics: {
2643
+ runner: this.id,
2644
+ image: selection.image,
2645
+ profile: args.profile,
2646
+ target: args.target,
2647
+ package: selection.packageName,
2648
+ publisher: selection.publisher,
2649
+ gpu_count: Math.max(1, context.job.resources.gpu.count || 1),
2650
+ network: "none",
2651
+ container_user: context.sandbox.container.user
2652
+ }
2653
+ };
2654
+ }
2655
+ catch (error) {
2656
+ if (context.signal.aborted) {
2657
+ throw error;
2658
+ }
2659
+ return cudaFailureResult(context.job, "runner_failed", redactCudaLocalPaths(context, error instanceof Error ? error.message : String(error || "cuda.run failed")), startedAt);
2660
+ }
2661
+ }
2662
+ }
2663
+ function createDefaultGenericJobRunners(runner = defaultCommandRunner) {
2664
+ return [new MswarmTestEchoRunner(), new MswarmBlenderRenderRunner(runner), new MswarmCudaPackageRunner(runner)];
2665
+ }
2666
+ function uniqueSortedStrings(values) {
2667
+ return Array.from(new Set(values.filter((value) => typeof value === "string" && value.length > 0))).sort();
2668
+ }
2669
+ function capabilityProbeTimeoutMs(config) {
2670
+ return parsePositiveInteger(config.capabilityProbeTimeoutMs, DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS);
2671
+ }
2672
+ function capabilityCommandFailureMessage(error) {
2673
+ if (error instanceof Error && error.message)
2674
+ return error.message;
2675
+ return String(error || "capability probe failed");
2676
+ }
2677
+ function isMissingCapabilityCommand(error, stderr = "") {
2678
+ const message = `${capabilityCommandFailureMessage(error)}\n${stderr}`.toLowerCase();
2679
+ return /enoent|not found|command not found|no such file|executable file not found/.test(message);
2680
+ }
2681
+ async function runCapabilityCommand(runner, command, args, timeoutMs) {
2682
+ try {
2683
+ const result = await runner(command, args, {
2684
+ timeoutMs,
2685
+ maxBuffer: Math.min(DEFAULT_COMMAND_MAX_BUFFER, 512 * 1024)
2686
+ });
2687
+ return { ok: true, stdout: result.stdout, stderr: result.stderr };
2688
+ }
2689
+ catch (error) {
2690
+ return {
2691
+ ok: false,
2692
+ missing: isMissingCapabilityCommand(error),
2693
+ message: capabilityCommandFailureMessage(error)
2694
+ };
2695
+ }
2696
+ }
2697
+ function parseNvidiaSmiMemoryGb(value) {
2698
+ if (!value)
2699
+ return undefined;
2700
+ const parsed = Number(value.replace(/[^\d.]/g, ""));
2701
+ if (!Number.isFinite(parsed) || parsed <= 0)
2702
+ return undefined;
2703
+ return Math.round((parsed / 1024) * 10) / 10;
2704
+ }
2705
+ function parseNvidiaGpuProbe(stdout) {
2706
+ const devices = [];
2707
+ const cudaVersions = new Set();
2708
+ for (const line of stdout.split(/\r?\n/)) {
2709
+ const trimmed = line.trim();
2710
+ if (!trimmed)
2711
+ continue;
2712
+ const [index, name, memoryMb, driverVersion, computeCapability, cudaVersion] = trimmed
2713
+ .split(",")
2714
+ .map((part) => part.trim());
2715
+ const id = index ? `gpu-${index}` : `gpu-${devices.length}`;
2716
+ if (cudaVersion) {
2717
+ cudaVersions.add(cudaVersion);
2718
+ }
2719
+ devices.push({
2720
+ id,
2721
+ vendor: "nvidia",
2722
+ ...(name ? { name } : {}),
2723
+ ...(parseNvidiaSmiMemoryGb(memoryMb) ? { vram_gb: parseNvidiaSmiMemoryGb(memoryMb) } : {}),
2724
+ ...(driverVersion ? { driver_version: driverVersion } : {}),
2725
+ ...(cudaVersion ? { cuda_version: cudaVersion } : {}),
2726
+ ...(computeCapability ? { compute_capability: computeCapability } : {}),
2727
+ capabilities: ["cuda"]
2728
+ });
2729
+ }
2730
+ const maxVramGb = devices.reduce((max, device) => {
2731
+ if (!Number.isFinite(device.vram_gb))
2732
+ return max;
2733
+ return max === undefined ? device.vram_gb : Math.max(max, device.vram_gb || 0);
2734
+ }, undefined);
2735
+ return {
2736
+ status: devices.length > 0 ? "available" : "missing",
2737
+ count: devices.length,
2738
+ vendors: devices.length > 0 ? ["nvidia"] : [],
2739
+ devices,
2740
+ ...(cudaVersions.size > 0 ? { cuda_versions: Array.from(cudaVersions).sort() } : {}),
2741
+ ...(maxVramGb !== undefined ? { max_vram_gb: maxVramGb } : {}),
2742
+ ...(devices.length === 0 ? { message: "nvidia-smi returned no GPU rows" } : {})
2743
+ };
2744
+ }
2745
+ function parseNvidiaSmiCudaVersion(stdout) {
2746
+ return stdout.match(/CUDA\s+Version:\s*([0-9]+(?:\.[0-9]+)?)/i)?.[1];
2747
+ }
2748
+ async function probeNvidiaGpuCapabilities(runner, timeoutMs) {
2749
+ const result = await runCapabilityCommand(runner, "nvidia-smi", ["--query-gpu=index,name,memory.total,driver_version,compute_cap", "--format=csv,noheader,nounits"], timeoutMs);
2750
+ if (!result.ok) {
2751
+ return {
2752
+ status: result.missing ? "missing" : "error",
2753
+ count: 0,
2754
+ vendors: [],
2755
+ devices: [],
2756
+ message: result.message
2757
+ };
2758
+ }
2759
+ const probe = parseNvidiaGpuProbe(result.stdout);
2760
+ const versionResult = await runCapabilityCommand(runner, "nvidia-smi", [], timeoutMs);
2761
+ if (!versionResult.ok) {
2762
+ return probe;
2763
+ }
2764
+ const cudaVersion = parseNvidiaSmiCudaVersion(versionResult.stdout || versionResult.stderr);
2765
+ if (!cudaVersion) {
2766
+ return probe;
2767
+ }
2768
+ const cudaVersions = Array.from(new Set([...(probe.cuda_versions || []), cudaVersion])).sort();
2769
+ return {
2770
+ ...probe,
2771
+ cuda_versions: cudaVersions,
2772
+ devices: probe.devices.map((device) => ({
2773
+ ...device,
2774
+ cuda_version: device.cuda_version || cudaVersion
2775
+ }))
2776
+ };
2777
+ }
2778
+ function missingSoftwareProbe(name, message) {
2779
+ return {
2780
+ name,
2781
+ status: "missing",
2782
+ ...(message ? { message } : {})
2783
+ };
2784
+ }
2785
+ function errorSoftwareProbe(name, message) {
2786
+ return {
2787
+ name,
2788
+ status: "error",
2789
+ message
2790
+ };
2791
+ }
2792
+ function extractToolVersion(stdout, tool) {
2793
+ const firstLine = stdout.split(/\r?\n/).find((line) => line.trim().length > 0)?.trim() || "";
2794
+ if (tool === "blender") {
2795
+ return firstLine.match(/Blender\s+([^\s]+)/i)?.[1];
2796
+ }
2797
+ if (tool === "ffmpeg") {
2798
+ return firstLine.match(/ffmpeg\s+version\s+([^\s]+)/i)?.[1];
2799
+ }
2800
+ return firstLine || undefined;
2801
+ }
2802
+ async function probeVersionedSoftware(runner, name, command, args, timeoutMs) {
2803
+ const result = await runCapabilityCommand(runner, command, args, timeoutMs);
2804
+ if (!result.ok) {
2805
+ return result.missing
2806
+ ? missingSoftwareProbe(name, result.message)
2807
+ : errorSoftwareProbe(name, result.message);
2808
+ }
2809
+ return {
2810
+ name,
2811
+ status: "available",
2812
+ ...(extractToolVersion(result.stdout || result.stderr, name) ? { version: extractToolVersion(result.stdout || result.stderr, name) } : {})
2813
+ };
2814
+ }
2815
+ async function probeDockerCapabilities(runner, timeoutMs) {
2816
+ const result = await runCapabilityCommand(runner, "docker", ["info", "--format", "{{json .Runtimes}}"], timeoutMs);
2817
+ if (!result.ok) {
2818
+ const docker = result.missing
2819
+ ? missingSoftwareProbe("docker", result.message)
2820
+ : errorSoftwareProbe("docker", result.message);
2821
+ return {
2822
+ docker,
2823
+ dockerNvidia: { name: "docker-nvidia", status: docker.status, message: result.message }
2824
+ };
2825
+ }
2826
+ try {
2827
+ const runtimes = JSON.parse(result.stdout || "{}");
2828
+ const runtimeNames = Object.keys(runtimes);
2829
+ const hasNvidiaRuntime = runtimeNames.some((name) => name.toLowerCase() === "nvidia");
2830
+ return {
2831
+ docker: { name: "docker", status: "available" },
2832
+ dockerNvidia: hasNvidiaRuntime
2833
+ ? { name: "docker-nvidia", status: "available", version: "nvidia" }
2834
+ : missingSoftwareProbe("docker-nvidia", "Docker is available but the nvidia runtime is not registered")
2835
+ };
2836
+ }
2837
+ catch (error) {
2838
+ const message = capabilityCommandFailureMessage(error);
2839
+ return {
2840
+ docker: errorSoftwareProbe("docker", `Unable to parse docker runtime inventory: ${message}`),
2841
+ dockerNvidia: errorSoftwareProbe("docker-nvidia", `Unable to parse docker runtime inventory: ${message}`)
2842
+ };
2843
+ }
2844
+ }
2845
+ function capabilityDiagnostics(snapshot) {
2846
+ const diagnostics = [];
2847
+ if (snapshot.gpu.status !== "available") {
2848
+ diagnostics.push({
2849
+ name: "gpu",
2850
+ status: snapshot.gpu.status,
2851
+ message: snapshot.gpu.message
2852
+ });
2853
+ }
2854
+ for (const result of Object.values(snapshot.software)) {
2855
+ if (result.status !== "available") {
2856
+ diagnostics.push({
2857
+ name: result.name,
2858
+ status: result.status,
2859
+ message: result.message
2860
+ });
2861
+ }
2862
+ }
2863
+ return diagnostics.length ? diagnostics : undefined;
2864
+ }
2865
+ function buildCapabilitySnapshotId(snapshot) {
2866
+ const digest = createHash("sha256").update(JSON.stringify(snapshot)).digest("hex").slice(0, 16);
2867
+ return `caps_${digest}`;
2868
+ }
2869
+ function buildRunnerCapabilityCatalog(config, runners) {
2870
+ if (!config.genericJobsEnabled) {
2871
+ return [];
2872
+ }
2873
+ return OWNER_LOCAL_GENERIC_JOB_CATALOG
2874
+ .filter((entry) => runners.has(entry.runner))
2875
+ .map((entry) => ({
2876
+ job_type: entry.job_type,
2877
+ runner: entry.runner,
2878
+ trust_modes: uniqueSortedStrings([entry.policy.trust_mode]),
2879
+ required_capabilities: entry.required_capabilities || []
2880
+ }));
2881
+ }
2882
+ function runnerCapabilityRequirementsAvailable(entry, input) {
2883
+ if (!input.genericJobsEnabled)
2884
+ return false;
2885
+ if (!entry.required_capabilities?.length)
2886
+ return true;
2887
+ const snapshot = {
2888
+ schema_version: MSWARM_CAPABILITY_SCHEMA_VERSION,
2889
+ snapshot_id: "caps_requirement_check",
2890
+ captured_at: new Date(0).toISOString(),
2891
+ generic_jobs_enabled: input.genericJobsEnabled,
2892
+ job_types: [],
2893
+ trust_modes: [],
2894
+ gpu: input.gpu,
2895
+ software: input.software,
2896
+ runner_catalog: []
2897
+ };
2898
+ const capabilities = new Set(buildMswarmCapabilityNames(snapshot));
2899
+ return entry.required_capabilities.every((capability) => capabilities.has(capability));
2900
+ }
2901
+ function registeredOwnerLocalGenericJobCatalog() {
2902
+ return OWNER_LOCAL_GENERIC_JOB_CATALOG.filter((entry) => entry.job_type.startsWith("tenant.") || entry.job_type.startsWith("package."));
2903
+ }
2904
+ function base64UrlEncodeRuntime(buffer) {
2905
+ return buffer.toString("base64").replace(/=/g, "").replace(/\+/g, "-").replace(/\//g, "_");
2906
+ }
2907
+ function signCapabilityPayload(input) {
2908
+ const unsignedPayload = {
2909
+ schema_version: MSWARM_CAPABILITY_SCHEMA_VERSION,
2910
+ snapshot_id: input.privateCatalogEntry.snapshot_id,
2911
+ private_catalog_entry: input.privateCatalogEntry,
2912
+ scheduler_match: input.privateCatalogEntry.scheduler_match,
2913
+ public_projection: input.privateCatalogEntry.public_projection
2914
+ };
2915
+ const signature = base64UrlEncodeRuntime(createHmac("sha256", input.runtimeToken).update(JSON.stringify(unsignedPayload)).digest());
2916
+ return {
2917
+ ...unsignedPayload,
2918
+ signature: {
2919
+ alg: "HS256",
2920
+ value: signature,
2921
+ signed_at: new Date().toISOString(),
2922
+ key_id: "self_hosted_runtime_token"
2923
+ }
2924
+ };
2925
+ }
2926
+ function runnerForGenericJob(job, runners) {
2927
+ const catalogEntry = OWNER_LOCAL_GENERIC_JOB_CATALOG.find((entry) => entry.job_type === job.job_type);
2928
+ return catalogEntry ? runners.get(catalogEntry.runner) || null : null;
2929
+ }
2930
+ function compareDottedVersion(left, right) {
2931
+ if (!left || !right)
2932
+ return 0;
2933
+ const leftParts = left.split(".").map((part) => Number(part.replace(/[^\d]/g, "")) || 0);
2934
+ const rightParts = right.split(".").map((part) => Number(part.replace(/[^\d]/g, "")) || 0);
2935
+ const length = Math.max(leftParts.length, rightParts.length);
2936
+ for (let index = 0; index < length; index += 1) {
2937
+ const delta = (leftParts[index] || 0) - (rightParts[index] || 0);
2938
+ if (delta !== 0)
2939
+ return delta;
2940
+ }
2941
+ return 0;
2942
+ }
2943
+ function snapshotHasCudaVersion(snapshot, minVersion) {
2944
+ if (!minVersion)
2945
+ return true;
2946
+ const versions = [
2947
+ ...(snapshot.gpu.cuda_versions || []),
2948
+ ...snapshot.gpu.devices.map((device) => device.cuda_version).filter((value) => Boolean(value))
2949
+ ];
2950
+ return versions.some((version) => compareDottedVersion(version, minVersion) >= 0);
2951
+ }
2952
+ export function genericJobCapabilityMismatch(job, snapshot) {
2953
+ if (!snapshot.generic_jobs_enabled) {
2954
+ return { code: "no_capable_node", message: "Generic jobs are disabled on this node." };
2955
+ }
2956
+ if (job.job_type === RENDER_BLENDER_JOB_TYPE && snapshot.software.blender.status !== "available") {
2957
+ return {
2958
+ code: "no_capable_node",
2959
+ message: "Blender is not available on this node."
2960
+ };
2961
+ }
2962
+ if (job.job_type === CUDA_RUN_JOB_TYPE) {
2963
+ if (snapshot.gpu.status !== "available" || !snapshot.gpu.vendors.includes("nvidia")) {
2964
+ return {
2965
+ code: "no_capable_node",
2966
+ message: "No NVIDIA GPU is available on this node."
2967
+ };
2968
+ }
2969
+ if (snapshot.software.docker.status !== "available" || snapshot.software["docker-nvidia"].status !== "available") {
2970
+ return {
2971
+ code: "no_capable_node",
2972
+ message: "Docker with the NVIDIA runtime is not available on this node."
2973
+ };
2974
+ }
2975
+ }
2976
+ if (!snapshot.job_types.includes(job.job_type)) {
2977
+ return {
2978
+ code: "no_capable_node",
2979
+ message: `No capable owner-local node is available for ${job.job_type}.`
2980
+ };
2981
+ }
2982
+ if (job.resources?.gpu) {
2983
+ const requestedCount = Math.max(1, job.resources.gpu.count || 1);
2984
+ if (snapshot.gpu.status !== "available" || snapshot.gpu.count < requestedCount) {
2985
+ return {
2986
+ code: "no_capable_node",
2987
+ message: `Requested ${requestedCount} GPU(s), but this node reports ${snapshot.gpu.count}.`
2988
+ };
2989
+ }
2990
+ if (job.resources.gpu.vendor && !snapshot.gpu.vendors.includes(job.resources.gpu.vendor)) {
2991
+ return {
2992
+ code: "no_capable_node",
2993
+ message: `Requested GPU vendor ${job.resources.gpu.vendor} is not available on this node.`
2994
+ };
2995
+ }
2996
+ if (Number.isFinite(job.resources.gpu.min_vram_gb) &&
2997
+ job.resources.gpu.min_vram_gb !== undefined &&
2998
+ (!Number.isFinite(snapshot.gpu.max_vram_gb) || (snapshot.gpu.max_vram_gb || 0) < job.resources.gpu.min_vram_gb)) {
2999
+ return {
3000
+ code: "no_capable_node",
3001
+ message: `Requested GPU VRAM ${job.resources.gpu.min_vram_gb}GB exceeds this node capability.`
3002
+ };
3003
+ }
3004
+ if (!snapshotHasCudaVersion(snapshot, job.resources.gpu.cuda_min_version)) {
3005
+ return {
3006
+ code: "no_capable_node",
3007
+ message: `Requested CUDA ${job.resources.gpu.cuda_min_version} is not available on this node.`
3008
+ };
3009
+ }
3010
+ }
3011
+ return null;
3012
+ }
3013
+ function genericJobTimeoutMs(job, fallbackMs) {
3014
+ const limitSeconds = positiveInteger(job.limits?.timeout_sec);
3015
+ if (!limitSeconds) {
3016
+ return fallbackMs;
3017
+ }
3018
+ return Math.max(1, Math.min(fallbackMs, limitSeconds * 1000));
3019
+ }
3020
+ function isGenericAbortError(error, signal) {
3021
+ if (signal.aborted)
3022
+ return true;
3023
+ if (!(error instanceof Error))
3024
+ return false;
3025
+ return /cancelled|canceled|aborted|timed out|timeout/i.test(error.message);
3026
+ }
1533
3027
  function usageTokens(usage) {
1534
3028
  return {
1535
3029
  promptTokens: positiveInteger(usage?.inputTokens),
@@ -1690,6 +3184,13 @@ export class SelfHostedNodeRuntime {
1690
3184
  fetchImpl: deps?.fetchImpl,
1691
3185
  timeoutMs: config.jobTimeoutMs
1692
3186
  });
3187
+ this.capabilityRunner = deps?.capabilityRunner || defaultCommandRunner;
3188
+ this.genericRunners = new Map((deps?.genericRunners || createDefaultGenericJobRunners(this.capabilityRunner)).map((runner) => [runner.id, runner]));
3189
+ this.artifactStore =
3190
+ deps?.artifactStore ||
3191
+ new MswarmLocalArtifactStore({
3192
+ rootDir: config.artifactStorePath || defaultArtifactStorePath()
3193
+ });
1693
3194
  }
1694
3195
  static async setup(setupConfig, deps) {
1695
3196
  const gateway = deps?.gateway ||
@@ -1711,7 +3212,8 @@ export class SelfHostedNodeRuntime {
1711
3212
  expose_all_models: setupConfig.exposeAllModels,
1712
3213
  model_allowlist: setupConfig.modelAllowlist,
1713
3214
  model_blocklist: setupConfig.modelBlocklist,
1714
- heartbeat_interval_seconds: setupConfig.heartbeatIntervalSeconds
3215
+ heartbeat_interval_seconds: setupConfig.heartbeatIntervalSeconds,
3216
+ generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency
1715
3217
  });
1716
3218
  const nodeId = optionalText(bootstrap.node?.node_id);
1717
3219
  const runtimeToken = optionalText(bootstrap.runtime_token);
@@ -1726,6 +3228,7 @@ export class SelfHostedNodeRuntime {
1726
3228
  machine_fingerprint: machineFingerprint,
1727
3229
  direct_base_url: setupConfig.directBaseUrl || null,
1728
3230
  runtime_token: undefined,
3231
+ artifact_store_path: setupConfig.artifactStorePath || defaultArtifactStorePath(),
1729
3232
  config_version: bootstrap.config_version,
1730
3233
  heartbeat_interval_seconds: heartbeatInterval,
1731
3234
  heartbeat_timeout_seconds: bootstrap.heartbeat_timeout_seconds,
@@ -1739,6 +3242,10 @@ export class SelfHostedNodeRuntime {
1739
3242
  node_version: setupConfig.nodeVersion,
1740
3243
  request_timeout_ms: setupConfig.requestTimeoutMs,
1741
3244
  job_timeout_ms: setupConfig.jobTimeoutMs,
3245
+ generic_jobs_enabled: setupConfig.genericJobsEnabled,
3246
+ generic_job_timeout_ms: setupConfig.genericJobTimeoutMs,
3247
+ generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency,
3248
+ capability_probe_timeout_ms: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
1742
3249
  expose_all_models: setupConfig.exposeAllModels,
1743
3250
  exposure_policy: setupConfig.exposeAllModels ? "all" : "none",
1744
3251
  model_allowlist: setupConfig.modelAllowlist,
@@ -1761,6 +3268,7 @@ export class SelfHostedNodeRuntime {
1761
3268
  ollamaBaseUrl: setupConfig.ollamaBaseUrl,
1762
3269
  statePath: setupConfig.statePath,
1763
3270
  runtimeTokenPath: setupConfig.runtimeTokenPath,
3271
+ artifactStorePath: setupConfig.artifactStorePath || defaultArtifactStorePath(),
1764
3272
  invocationSigningSecret: null,
1765
3273
  listenHost: DEFAULT_LISTEN_HOST,
1766
3274
  listenPort: DEFAULT_LISTEN_PORT,
@@ -1768,6 +3276,10 @@ export class SelfHostedNodeRuntime {
1768
3276
  heartbeatIntervalSeconds: heartbeatInterval,
1769
3277
  requestTimeoutMs: setupConfig.requestTimeoutMs,
1770
3278
  jobTimeoutMs: setupConfig.jobTimeoutMs,
3279
+ genericJobsEnabled: setupConfig.genericJobsEnabled,
3280
+ genericJobTimeoutMs: setupConfig.genericJobTimeoutMs,
3281
+ genericJobMaxConcurrency: setupConfig.genericJobMaxConcurrency,
3282
+ capabilityProbeTimeoutMs: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
1771
3283
  exposeAllModels: setupConfig.exposeAllModels,
1772
3284
  modelAllowlist: setupConfig.modelAllowlist,
1773
3285
  modelBlocklist: setupConfig.modelBlocklist
@@ -1795,6 +3307,53 @@ export class SelfHostedNodeRuntime {
1795
3307
  const models = await this.mcoda.listAgents(this.config);
1796
3308
  return { source: "mcoda", status: "online", models, version: null, failureCount: 0 };
1797
3309
  }
3310
+ async probeCapabilities() {
3311
+ const timeoutMs = capabilityProbeTimeoutMs(this.config);
3312
+ const [gpu, docker, blender, ffmpeg] = await Promise.all([
3313
+ probeNvidiaGpuCapabilities(this.capabilityRunner, timeoutMs),
3314
+ probeDockerCapabilities(this.capabilityRunner, timeoutMs),
3315
+ probeVersionedSoftware(this.capabilityRunner, "blender", "blender", ["--version"], timeoutMs),
3316
+ probeVersionedSoftware(this.capabilityRunner, "ffmpeg", "ffmpeg", ["-version"], timeoutMs)
3317
+ ]);
3318
+ const software = {
3319
+ docker: docker.docker,
3320
+ "docker-nvidia": docker.dockerNvidia,
3321
+ blender,
3322
+ ffmpeg
3323
+ };
3324
+ const runnerCatalog = buildRunnerCapabilityCatalog(this.config, this.genericRunners).filter((entry) => runnerCapabilityRequirementsAvailable(entry, {
3325
+ gpu,
3326
+ software,
3327
+ genericJobsEnabled: this.config.genericJobsEnabled
3328
+ }));
3329
+ const snapshotWithoutId = {
3330
+ schema_version: MSWARM_CAPABILITY_SCHEMA_VERSION,
3331
+ captured_at: new Date().toISOString(),
3332
+ node_id: this.config.nodeId,
3333
+ platform: platform(),
3334
+ arch: process.arch,
3335
+ generic_jobs_enabled: this.config.genericJobsEnabled,
3336
+ job_types: uniqueSortedStrings(runnerCatalog.map((entry) => entry.job_type)),
3337
+ trust_modes: uniqueSortedStrings(runnerCatalog.flatMap((entry) => entry.trust_modes)),
3338
+ gpu,
3339
+ software,
3340
+ runner_catalog: runnerCatalog
3341
+ };
3342
+ const snapshot = {
3343
+ ...snapshotWithoutId,
3344
+ snapshot_id: buildCapabilitySnapshotId(snapshotWithoutId)
3345
+ };
3346
+ const diagnostics = capabilityDiagnostics(snapshot);
3347
+ return diagnostics ? { ...snapshot, diagnostics } : snapshot;
3348
+ }
3349
+ async publicCapabilityProjection() {
3350
+ return projectMswarmPublicCapabilities(await this.probeCapabilities());
3351
+ }
3352
+ async buildCapabilityHeartbeatPayload(runtimeToken) {
3353
+ const snapshot = await this.probeCapabilities();
3354
+ const privateCatalogEntry = buildMswarmPrivateCapabilityCatalogEntry(snapshot);
3355
+ return signCapabilityPayload({ privateCatalogEntry, runtimeToken });
3356
+ }
1798
3357
  async ensureEnrolled() {
1799
3358
  const currentState = await readSelfHostedNodeState(this.config.statePath);
1800
3359
  const persistedRuntimeToken = await readSelfHostedRuntimeToken(this.config.runtimeTokenPath);
@@ -1827,6 +3386,9 @@ export class SelfHostedNodeRuntime {
1827
3386
  node_version: this.config.nodeVersion,
1828
3387
  request_timeout_ms: this.config.requestTimeoutMs,
1829
3388
  job_timeout_ms: this.config.jobTimeoutMs,
3389
+ generic_jobs_enabled: this.config.genericJobsEnabled,
3390
+ generic_job_timeout_ms: this.config.genericJobTimeoutMs,
3391
+ generic_job_max_concurrency: this.config.genericJobMaxConcurrency,
1830
3392
  expose_all_models: this.config.exposeAllModels,
1831
3393
  exposure_policy: this.config.exposeAllModels ? "all" : "none",
1832
3394
  model_allowlist: this.config.modelAllowlist,
@@ -1855,6 +3417,166 @@ export class SelfHostedNodeRuntime {
1855
3417
  }
1856
3418
  return mapMcodaAgentToCodaliAgent(agent, selected);
1857
3419
  }
3420
+ async executeGenericJob(envelope, options = {}) {
3421
+ const startedAt = Date.now();
3422
+ const events = [];
3423
+ let sequence = 0;
3424
+ const emitEvent = async (event) => {
3425
+ const next = {
3426
+ job_id: envelope.job_id,
3427
+ sequence,
3428
+ timestamp: new Date().toISOString(),
3429
+ ...event
3430
+ };
3431
+ sequence += 1;
3432
+ events.push(next);
3433
+ await options.onEvent?.(next);
3434
+ };
3435
+ const failed = async (code, message, validationIssues) => {
3436
+ await emitEvent({
3437
+ type: code === "cancelled" ? "cancelled" : "failed",
3438
+ message,
3439
+ data: { code }
3440
+ });
3441
+ const status = code === "cancelled" ? "cancelled" : "failed";
3442
+ const result = {
3443
+ job_id: envelope.job_id,
3444
+ status,
3445
+ error: {
3446
+ code,
3447
+ message,
3448
+ retryable: code === "timeout"
3449
+ },
3450
+ finished_at: new Date().toISOString()
3451
+ };
3452
+ return {
3453
+ job_id: envelope.job_id,
3454
+ request_id: envelope.request_id,
3455
+ status,
3456
+ result,
3457
+ events,
3458
+ ...(validationIssues?.length ? { validation_issues: validationIssues } : {}),
3459
+ timing: { local_latency_ms: Date.now() - startedAt }
3460
+ };
3461
+ };
3462
+ if (!this.config.genericJobsEnabled) {
3463
+ return failed("feature_disabled", "Generic node jobs are disabled on this node.");
3464
+ }
3465
+ if (envelope.node_id !== this.config.nodeId) {
3466
+ return failed("validation_failed", "generic job node_id does not match this node");
3467
+ }
3468
+ const validation = validateMswarmGenericJobRequest(envelope.job, {
3469
+ registeredJobCatalog: registeredOwnerLocalGenericJobCatalog()
3470
+ });
3471
+ if (!validation.ok || !validation.value) {
3472
+ return failed("validation_failed", "generic job request failed validation", validation.issues);
3473
+ }
3474
+ const job = validation.value;
3475
+ const runner = runnerForGenericJob(job, this.genericRunners);
3476
+ if (!runner) {
3477
+ return failed("runner_unavailable", `No generic job runner is registered for ${job.job_type}.`);
3478
+ }
3479
+ if (job.job_type === RENDER_BLENDER_JOB_TYPE || job.job_type === CUDA_RUN_JOB_TYPE) {
3480
+ const capabilityMismatch = genericJobCapabilityMismatch(job, await this.probeCapabilities());
3481
+ if (capabilityMismatch) {
3482
+ return failed(capabilityMismatch.code, capabilityMismatch.message);
3483
+ }
3484
+ }
3485
+ let artifactContext;
3486
+ try {
3487
+ artifactContext = await this.artifactStore.prepareJobWorkspace(envelope.job_id, job);
3488
+ }
3489
+ catch (error) {
3490
+ return failed("validation_failed", error instanceof Error ? error.message : String(error || "generic job artifact preparation failed"));
3491
+ }
3492
+ const controller = new AbortController();
3493
+ const timeoutMs = genericJobTimeoutMs(job, this.config.genericJobTimeoutMs || this.config.jobTimeoutMs);
3494
+ const onAbort = () => {
3495
+ if (!controller.signal.aborted) {
3496
+ controller.abort(options.signal?.reason || "cancelled");
3497
+ }
3498
+ };
3499
+ if (options.signal?.aborted) {
3500
+ controller.abort(options.signal.reason || "cancelled");
3501
+ }
3502
+ options.signal?.addEventListener("abort", onAbort, { once: true });
3503
+ const timeout = setTimeout(() => {
3504
+ if (!controller.signal.aborted) {
3505
+ controller.abort("timeout");
3506
+ }
3507
+ }, timeoutMs);
3508
+ try {
3509
+ await emitEvent({
3510
+ type: "started",
3511
+ message: `Running ${job.job_type}`,
3512
+ data: {
3513
+ runner: runner.id,
3514
+ sandbox_profile: artifactContext.sandbox.name,
3515
+ timeout_ms: timeoutMs
3516
+ }
3517
+ });
3518
+ const runnerResult = await runner.run({
3519
+ job,
3520
+ signal: controller.signal,
3521
+ emitEvent,
3522
+ artifacts: artifactContext,
3523
+ sandbox: artifactContext.sandbox
3524
+ });
3525
+ const status = runnerResult.status || "succeeded";
3526
+ const outputContext = status === "succeeded"
3527
+ ? artifactContext
3528
+ : {
3529
+ ...artifactContext,
3530
+ outputSpecs: artifactContext.outputSpecs.map((output) => ({ ...output, required: false }))
3531
+ };
3532
+ const outputArtifacts = await this.artifactStore.collectOutputs(outputContext, envelope.job_id);
3533
+ for (const artifact of outputArtifacts) {
3534
+ await emitEvent({
3535
+ type: "artifact",
3536
+ message: "output artifact collected",
3537
+ data: { artifact }
3538
+ });
3539
+ }
3540
+ const result = {
3541
+ ...runnerResult,
3542
+ job_id: envelope.job_id,
3543
+ status,
3544
+ artifacts: [...(runnerResult.artifacts || []), ...outputArtifacts],
3545
+ started_at: runnerResult.started_at || new Date(startedAt).toISOString(),
3546
+ finished_at: runnerResult.finished_at || new Date().toISOString()
3547
+ };
3548
+ await emitEvent({
3549
+ type: status === "succeeded" ? "completed" : "failed",
3550
+ message: status === "succeeded" ? "generic job completed" : runnerResult.error?.message || "generic job failed",
3551
+ data: {
3552
+ status,
3553
+ exit_code: result.exit_code,
3554
+ runner: runner.id
3555
+ }
3556
+ });
3557
+ return {
3558
+ job_id: envelope.job_id,
3559
+ request_id: envelope.request_id,
3560
+ status,
3561
+ result,
3562
+ events,
3563
+ timing: { local_latency_ms: Date.now() - startedAt }
3564
+ };
3565
+ }
3566
+ catch (error) {
3567
+ const code = isGenericAbortError(error, controller.signal) ? abortErrorCode(controller.signal) : "runner_error";
3568
+ const message = code === "timeout" || code === "cancelled"
3569
+ ? abortErrorMessage(controller.signal)
3570
+ : error instanceof Error
3571
+ ? error.message
3572
+ : String(error);
3573
+ return failed(code, message);
3574
+ }
3575
+ finally {
3576
+ clearTimeout(timeout);
3577
+ options.signal?.removeEventListener("abort", onAbort);
3578
+ }
3579
+ }
1858
3580
  async executeJob(job, options = {}) {
1859
3581
  const startedAt = Date.now();
1860
3582
  let selectedAgent;
@@ -2050,6 +3772,7 @@ export class SelfHostedNodeRuntime {
2050
3772
  models = [];
2051
3773
  version = null;
2052
3774
  }
3775
+ const capabilityPayload = await this.buildCapabilityHeartbeatPayload(enrollment.runtimeToken);
2053
3776
  const heartbeatPayload = {
2054
3777
  node_id: this.config.nodeId,
2055
3778
  node_version: this.config.nodeVersion,
@@ -2077,7 +3800,8 @@ export class SelfHostedNodeRuntime {
2077
3800
  recent_failure_count: recentFailureCount,
2078
3801
  last_success_at: status === "online" ? new Date().toISOString() : null
2079
3802
  },
2080
- models
3803
+ models,
3804
+ capabilities: capabilityPayload
2081
3805
  };
2082
3806
  const heartbeatResponse = await this.gateway.heartbeat(enrollment.runtimeToken, heartbeatPayload);
2083
3807
  const exposedModelCount = models.filter((model) => model.exposed !== false).length;