npm - @mcoda/mswarm - Versions diffs - 0.1.78 → 0.1.79 - Mend

@mcoda/mswarm 0.1.78 → 0.1.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/runtime.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { chmod, lstat, mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
 import { dirname, isAbsolute, join, relative, resolve } from "node:path";
-import { hostname, homedir, platform, userInfo } from "node:os";
+import { cpus, freemem, hostname, homedir, loadavg, platform, totalmem, userInfo } from "node:os";
 import { spawn } from "node:child_process";
 import { createHash, createHmac, randomUUID } from "node:crypto";
 import { MswarmCodaliExecutor } from "./codali-executor.js";
@@ -16,6 +16,11 @@ const DEFAULT_REQUEST_TIMEOUT_MS = 10000;
 const DEFAULT_JOB_TIMEOUT_MS = 3600000;
 const DEFAULT_SERVICE_COMMAND_TIMEOUT_MS = 60000;
 const DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS = 2000;
+const SELF_HOSTED_RUNTIME_PROTOCOL_VERSION = 1;
+const SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION = 1;
+const SELF_HOSTED_CATALOG_METADATA_VERSION = 1;
+const MAX_TELEMETRY_LATENCY_SAMPLES = 50;
+const MAX_TELEMETRY_FAILURES = 20;
 const DEFAULT_MCODA_BIN = "mcoda";
 const DEFAULT_MCODA_LIST_ARGS = ["agent", "list", "--json", "--refresh-health"];
 const DEFAULT_COMMAND_MAX_BUFFER = 16 * 1024 * 1024;
@@ -460,6 +465,112 @@ function optionalBoolean(...values) {
     }
     return null;
 }
+function roundedTelemetryNumber(value, digits = 3) {
+    if (!Number.isFinite(value)) {
+        return 0;
+    }
+    const factor = 10 ** digits;
+    return Math.round(value * factor) / factor;
+}
+function nonNegativeTelemetryInteger(value) {
+    return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : 0;
+}
+function sha256Json(value) {
+    return createHash("sha256").update(JSON.stringify(value)).digest("hex");
+}
+function buildCatalogFingerprint(models) {
+    const projection = models
+        .map((model) => ({
+        name: optionalText(model.name) || "",
+        provider: optionalText(model.provider) || null,
+        adapter: optionalText(model.adapter) || null,
+        source_agent_slug: optionalText(model.source_agent_slug) || null,
+        model_id: optionalText(model.model_id) || optionalText(model.model) || null,
+        exposed: model.exposed !== false,
+        capabilities: normalizeCapabilities(model.capabilities).sort(),
+        health_status: normalizeHealthStatus(model.health_status)
+    }))
+        .sort((left, right) => `${left.provider || ""}:${left.name}`.localeCompare(`${right.provider || ""}:${right.name}`));
+    return `sha256:${sha256Json(projection)}`;
+}
+function executionClassCapacity(input) {
+    const maxConcurrency = Math.max(1, Math.floor(input.maxConcurrency));
+    const activeJobs = nonNegativeTelemetryInteger(input.activeJobs);
+    const queuedJobs = nonNegativeTelemetryInteger(input.queuedJobs);
+    return {
+        max_concurrency: maxConcurrency,
+        active_jobs: activeJobs,
+        queued_jobs: queuedJobs,
+        free_slots: input.drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs)
+    };
+}
+function totalHostMemoryBucket() {
+    const gib = totalmem() / (1024 ** 3);
+    if (!Number.isFinite(gib) || gib <= 0)
+        return "unknown";
+    if (gib <= 8)
+        return "<=8GiB";
+    if (gib <= 16)
+        return "<=16GiB";
+    if (gib <= 32)
+        return "<=32GiB";
+    if (gib <= 64)
+        return "<=64GiB";
+    if (gib <= 128)
+        return "<=128GiB";
+    return ">128GiB";
+}
+function coarsePublicVramTier(value, gpuCount) {
+    if (value === "none" ||
+        value === "lt8" ||
+        value === "8-15" ||
+        value === "16-31" ||
+        value === "32plus") {
+        return value;
+    }
+    return gpuCount > 0 ? "unknown" : "none";
+}
+function buildCoarseHardwarePressure(capabilityPayload) {
+    const cpuCount = Math.max(1, cpus().length || 1);
+    const totalMemory = totalmem();
+    const freeMemory = freemem();
+    const projection = capabilityPayload?.public_projection;
+    const projectionRecord = projection && typeof projection === "object" && !Array.isArray(projection)
+        ? projection
+        : {};
+    const accelerators = projectionRecord.accelerators && typeof projectionRecord.accelerators === "object"
+        ? projectionRecord.accelerators
+        : {};
+    const gpu = accelerators.gpu && typeof accelerators.gpu === "object" && !Array.isArray(accelerators.gpu)
+        ? accelerators.gpu
+        : null;
+    const rawGpuCount = gpu?.["count"];
+    const gpuCount = typeof rawGpuCount === "number" && Number.isFinite(rawGpuCount)
+        ? Math.max(0, Math.floor(rawGpuCount))
+        : 0;
+    const vramTier = coarsePublicVramTier(gpu?.["vram_tier"], gpuCount);
+    return {
+        schema_version: 1,
+        collected_at: new Date().toISOString(),
+        cpu: {
+            core_count: cpuCount,
+            load_1m_ratio: roundedTelemetryNumber((loadavg()[0] || 0) / cpuCount)
+        },
+        ram: {
+            used_ratio: totalMemory > 0 ? roundedTelemetryNumber((totalMemory - freeMemory) / totalMemory) : null,
+            total_bucket: totalHostMemoryBucket()
+        },
+        gpu: {
+            available: Boolean(gpu?.["available"]),
+            count: gpuCount,
+            cuda: Boolean(gpu?.["cuda"] || gpu?.["has_cuda"]),
+            vram: {
+                total_tier: vramTier,
+                used_ratio: null
+            }
+        }
+    };
+}
 function normalizeCapabilities(value) {
     if (!Array.isArray(value)) {
         return [];
@@ -631,9 +742,14 @@ function serviceEnvironment(config, env, homeDir) {
         MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS: String(config.heartbeatIntervalSeconds),
         MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS: String(config.requestTimeoutMs),
         MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS: String(config.jobTimeoutMs),
+        MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS: String(config.maxConcurrentJobs || 1),
+        MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS: String(config.maxConcurrentLlmJobs || config.maxConcurrentJobs || 1),
         MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED: config.genericJobsEnabled ? "true" : "false",
         MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS: String(config.genericJobTimeoutMs),
         MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY: String(config.genericJobMaxConcurrency),
+        MSWARM_SELF_HOSTED_DRAIN_MODE: config.drainMode ? "true" : "false",
+        MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED: config.loadReportingEnabled === false ? "false" : "true",
+        MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED: config.hardwareTelemetryEnabled ? "true" : "false",
         MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS: config.capabilityProbeTimeoutMs
             ? String(config.capabilityProbeTimeoutMs)
             : null
@@ -1041,6 +1157,8 @@ export async function readSelfHostedNodeConfig(env = process.env) {
         optionalText(env.OLLAMA_HOST) ||
         DEFAULT_OLLAMA_BASE_URL;
     const packageNodeVersion = await readPackageNodeVersion();
+    const maxConcurrentJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, state.max_concurrent_jobs || 1);
+    const maxConcurrentLlmJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, state.max_concurrent_llm_jobs || maxConcurrentJobs);
     return {
         gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
         nodeId,
@@ -1070,10 +1188,15 @@ export async function readSelfHostedNodeConfig(env = process.env) {
         heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, state.heartbeat_interval_seconds || DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
         requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, state.request_timeout_ms || DEFAULT_REQUEST_TIMEOUT_MS),
         jobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
+        maxConcurrentJobs,
+        maxConcurrentLlmJobs,
         genericJobsEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED ?? env.MSWARM_SELF_HOSTED_GENERIC_JOBS, state.generic_jobs_enabled === true),
         genericJobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, state.generic_job_timeout_ms || state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
         genericJobMaxConcurrency: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, state.generic_job_max_concurrency || 1),
         capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, state.capability_probe_timeout_ms || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
+        drainMode: parseBoolean(env.MSWARM_SELF_HOSTED_DRAIN_MODE, state.drain_mode === true),
+        loadReportingEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING, state.load_reporting_enabled !== false),
+        hardwareTelemetryEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED ?? env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, state.hardware_telemetry_enabled === true),
         exposeAllModels: resolveDaemonExposeAllModels(env, state),
         modelAllowlist: parseList(env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST || state.model_allowlist),
         modelBlocklist: parseList(env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST || state.model_blocklist)
@@ -1104,6 +1227,8 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
     const allowlist = parseList(options.allow || env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST);
     const blocklist = parseList(options.block || env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST);
     const packageNodeVersion = await readPackageNodeVersion();
+    const maxConcurrentJobs = parsePositiveInteger(options["max-concurrent-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, 1);
+    const maxConcurrentLlmJobs = parsePositiveInteger(options["max-concurrent-llm-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, maxConcurrentJobs);
     return {
         apiKey,
         gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
@@ -1126,10 +1251,17 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
         heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
         requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, DEFAULT_REQUEST_TIMEOUT_MS),
         jobTimeoutMs: parsePositiveInteger(options["job-timeout-ms"] || env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
+        maxConcurrentJobs,
+        maxConcurrentLlmJobs,
         genericJobsEnabled: parseBoolean(options["enable-generic-jobs"] || env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED || env.MSWARM_SELF_HOSTED_GENERIC_JOBS, false),
         genericJobTimeoutMs: parsePositiveInteger(options["generic-job-timeout-ms"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
         genericJobMaxConcurrency: parsePositiveInteger(options["generic-job-max-concurrency"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, 1),
         capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
+        drainMode: parseBoolean(options.drain || env.MSWARM_SELF_HOSTED_DRAIN_MODE, false),
+        loadReportingEnabled: parseBoolean(options["disable-load-reporting"] === true
+            ? false
+            : (env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING), true),
+        hardwareTelemetryEnabled: parseBoolean(options["enable-hardware-telemetry"] || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, false),
         exposeAllModels: resolveOwnerSetupExposeAllModels(options, env),
         modelAllowlist: allowlist,
         modelBlocklist: blocklist,
@@ -1527,10 +1659,6 @@ function mapMcodaAgentToCodaliAgent(agent, fallbackSlug) {
         maxOutputTokens: optionalNumber(agent.maxOutputTokens, agent.max_output_tokens) ?? undefined,
     };
 }
-function isExposedLocalAgent(agent, config) {
-    const mapped = mapMcodaAgentToSelfHostedModel(agent, config);
-    return Boolean(mapped?.exposed);
-}
 function buildCodaliWorkspace(job) {
     const root = optionalText(job.workspace?.root);
     if (!root) {
@@ -1550,6 +1678,13 @@ const DOCDEX_JOB_ERROR_CODES = new Set([
     "docdex_repo_access_denied",
     "docdex_unavailable",
 ]);
+const PRE_START_JOB_ERROR_CODES = new Set([
+    "selected_agent_unavailable",
+    "selected_agent_unhealthy",
+    "validation_failed",
+    "docdex_context_missing",
+    "docdex_api_key_missing",
+]);
 class SelfHostedDocdexJobError extends Error {
     constructor(code, message) {
         super(message);
@@ -1557,6 +1692,13 @@ class SelfHostedDocdexJobError extends Error {
         this.code = code;
     }
 }
+class SelfHostedPreStartJobError extends Error {
+    constructor(code, message) {
+        super(message);
+        this.name = code;
+        this.code = code;
+    }
+}
 function normalizeDocdexCapabilityMap(value) {
     const record = objectRecord(value);
     if (!record)
@@ -1614,11 +1756,14 @@ function selfHostedErrorCode(error) {
     if (!error || typeof error !== "object")
         return undefined;
     const code = error.code;
-    if (typeof code === "string" && DOCDEX_JOB_ERROR_CODES.has(code)) {
+    if (typeof code === "string" &&
+        (DOCDEX_JOB_ERROR_CODES.has(code) || PRE_START_JOB_ERROR_CODES.has(code))) {
         return code;
     }
     const name = error.name;
-    return typeof name === "string" && DOCDEX_JOB_ERROR_CODES.has(name) ? name : undefined;
+    return typeof name === "string" && (DOCDEX_JOB_ERROR_CODES.has(name) || PRE_START_JOB_ERROR_CODES.has(name))
+        ? name
+        : undefined;
 }
 function redactRuntimeSecretValues(value, secrets) {
     let output = value;
@@ -3135,6 +3280,16 @@ export class MswarmSelfHostedNodeClient {
             body: JSON.stringify(payload)
         }, this.timeoutMs);
     }
+    async postJobStart(runtimeToken, jobId, payload) {
+        return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/start`, {
+            method: "POST",
+            headers: {
+                "content-type": "application/json",
+                authorization: `Bearer ${runtimeToken}`
+            },
+            body: JSON.stringify(payload)
+        }, this.timeoutMs);
+    }
     async postJobEvents(runtimeToken, jobId, payload) {
         return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/events`, {
             method: "POST",
@@ -3148,6 +3303,12 @@ export class MswarmSelfHostedNodeClient {
 }
 export class SelfHostedNodeRuntime {
     constructor(config, deps) {
+        this.activeLlmJobs = 0;
+        this.activeGenericJobs = 0;
+        this.queuedLlmJobs = 0;
+        this.queuedGenericJobs = 0;
+        this.latencySamplesMs = [];
+        this.recentFailures = [];
         this.config = config;
         this.gateway =
             deps?.gateway ||
@@ -3192,6 +3353,100 @@ export class SelfHostedNodeRuntime {
                     rootDir: config.artifactStorePath || defaultArtifactStorePath()
                 });
     }
+    updateLocalQueueTelemetry(input) {
+        if (input.llmQueuedJobs !== undefined) {
+            this.queuedLlmJobs = nonNegativeTelemetryInteger(input.llmQueuedJobs);
+        }
+        if (input.genericQueuedJobs !== undefined) {
+            this.queuedGenericJobs = nonNegativeTelemetryInteger(input.genericQueuedJobs);
+        }
+    }
+    beginExecutionTelemetry(executionClass) {
+        if (executionClass === "generic_job") {
+            this.activeGenericJobs += 1;
+            return;
+        }
+        this.activeLlmJobs += 1;
+    }
+    finishExecutionTelemetry(input) {
+        if (input.executionClass === "generic_job") {
+            this.activeGenericJobs = Math.max(0, this.activeGenericJobs - 1);
+        }
+        else {
+            this.activeLlmJobs = Math.max(0, this.activeLlmJobs - 1);
+        }
+        this.latencySamplesMs.push(Math.max(0, Date.now() - input.startedAt));
+        while (this.latencySamplesMs.length > MAX_TELEMETRY_LATENCY_SAMPLES) {
+            this.latencySamplesMs.shift();
+        }
+        if (!input.ok) {
+            this.recentFailures.unshift({
+                execution_class: input.executionClass === "generic_job" ? "generic_job" : "agentic",
+                code: optionalText(input.code) || "upstream_error",
+                at: new Date().toISOString()
+            });
+            this.recentFailures.splice(MAX_TELEMETRY_FAILURES);
+        }
+    }
+    averageLatencyMs(fallback = null) {
+        if (this.latencySamplesMs.length === 0) {
+            return fallback;
+        }
+        const total = this.latencySamplesMs.reduce((sum, value) => sum + value, 0);
+        return Math.round(total / this.latencySamplesMs.length);
+    }
+    buildLoadTelemetry(input) {
+        const drainMode = this.config.drainMode === true;
+        const llmMaxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentLlmJobs || this.config.maxConcurrentJobs || 1));
+        const genericMaxConcurrency = Math.max(1, Math.floor(this.config.genericJobMaxConcurrency || 1));
+        const maxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentJobs || 1), llmMaxConcurrency, this.config.genericJobsEnabled ? genericMaxConcurrency : 1);
+        const activeLlmJobs = nonNegativeTelemetryInteger(this.activeLlmJobs);
+        const activeGenericJobs = nonNegativeTelemetryInteger(this.activeGenericJobs);
+        const queuedLlmJobs = nonNegativeTelemetryInteger(this.queuedLlmJobs);
+        const queuedGenericJobs = nonNegativeTelemetryInteger(this.queuedGenericJobs);
+        const llmCapacity = executionClassCapacity({
+            maxConcurrency: llmMaxConcurrency,
+            activeJobs: activeLlmJobs,
+            queuedJobs: queuedLlmJobs,
+            drainMode
+        });
+        const genericCapacity = executionClassCapacity({
+            maxConcurrency: genericMaxConcurrency,
+            activeJobs: activeGenericJobs,
+            queuedJobs: queuedGenericJobs,
+            drainMode: drainMode || !this.config.genericJobsEnabled
+        });
+        const activeJobs = activeLlmJobs + activeGenericJobs;
+        const queuedJobs = queuedLlmJobs + queuedGenericJobs;
+        const freeSlots = drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs);
+        const failures = this.recentFailures.slice(0, 10);
+        const discoveryFailureCount = nonNegativeTelemetryInteger(input.discoveryFailureCount);
+        const telemetry = {
+            runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
+            load_balancer_protocol_version: SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION,
+            catalog_metadata_version: SELF_HOSTED_CATALOG_METADATA_VERSION,
+            catalog_fingerprint: buildCatalogFingerprint(input.models),
+            max_concurrency: maxConcurrency,
+            max_concurrent_llm_jobs: llmMaxConcurrency,
+            max_concurrent_generic_jobs: this.config.genericJobsEnabled ? genericMaxConcurrency : 0,
+            active_jobs: activeJobs,
+            queued_jobs: queuedJobs,
+            free_slots: freeSlots,
+            drain_mode: drainMode,
+            execution_class_capacity: {
+                chat: llmCapacity,
+                agentic: llmCapacity,
+                generic_job: genericCapacity
+            },
+            avg_latency_ms: this.averageLatencyMs(input.discoveryLatencyMs ?? null),
+            recent_failure_count: failures.length + discoveryFailureCount,
+            recent_failures: failures
+        };
+        if (this.config.hardwareTelemetryEnabled === true) {
+            telemetry.hardware_pressure = buildCoarseHardwarePressure(input.capabilityPayload || null);
+        }
+        return telemetry;
+    }
     static async setup(setupConfig, deps) {
         const gateway = deps?.gateway ||
             new MswarmSelfHostedNodeClient({
@@ -3213,6 +3468,11 @@ export class SelfHostedNodeRuntime {
             model_allowlist: setupConfig.modelAllowlist,
             model_blocklist: setupConfig.modelBlocklist,
             heartbeat_interval_seconds: setupConfig.heartbeatIntervalSeconds,
+            max_concurrent_jobs: setupConfig.maxConcurrentJobs,
+            max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
+            drain_mode: setupConfig.drainMode,
+            load_reporting_enabled: setupConfig.loadReportingEnabled,
+            hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
             generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency
         });
         const nodeId = optionalText(bootstrap.node?.node_id);
@@ -3242,10 +3502,15 @@ export class SelfHostedNodeRuntime {
             node_version: setupConfig.nodeVersion,
             request_timeout_ms: setupConfig.requestTimeoutMs,
             job_timeout_ms: setupConfig.jobTimeoutMs,
+            max_concurrent_jobs: setupConfig.maxConcurrentJobs,
+            max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
             generic_jobs_enabled: setupConfig.genericJobsEnabled,
             generic_job_timeout_ms: setupConfig.genericJobTimeoutMs,
             generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency,
             capability_probe_timeout_ms: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
+            drain_mode: setupConfig.drainMode,
+            load_reporting_enabled: setupConfig.loadReportingEnabled,
+            hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
             expose_all_models: setupConfig.exposeAllModels,
             exposure_policy: setupConfig.exposeAllModels ? "all" : "none",
             model_allowlist: setupConfig.modelAllowlist,
@@ -3276,10 +3541,15 @@ export class SelfHostedNodeRuntime {
             heartbeatIntervalSeconds: heartbeatInterval,
             requestTimeoutMs: setupConfig.requestTimeoutMs,
             jobTimeoutMs: setupConfig.jobTimeoutMs,
+            maxConcurrentJobs: setupConfig.maxConcurrentJobs,
+            maxConcurrentLlmJobs: setupConfig.maxConcurrentLlmJobs,
             genericJobsEnabled: setupConfig.genericJobsEnabled,
             genericJobTimeoutMs: setupConfig.genericJobTimeoutMs,
             genericJobMaxConcurrency: setupConfig.genericJobMaxConcurrency,
             capabilityProbeTimeoutMs: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
+            drainMode: setupConfig.drainMode,
+            loadReportingEnabled: setupConfig.loadReportingEnabled,
+            hardwareTelemetryEnabled: setupConfig.hardwareTelemetryEnabled,
             exposeAllModels: setupConfig.exposeAllModels,
             modelAllowlist: setupConfig.modelAllowlist,
             modelBlocklist: setupConfig.modelBlocklist
@@ -3386,9 +3656,14 @@ export class SelfHostedNodeRuntime {
             node_version: this.config.nodeVersion,
             request_timeout_ms: this.config.requestTimeoutMs,
             job_timeout_ms: this.config.jobTimeoutMs,
+            max_concurrent_jobs: this.config.maxConcurrentJobs,
+            max_concurrent_llm_jobs: this.config.maxConcurrentLlmJobs,
             generic_jobs_enabled: this.config.genericJobsEnabled,
             generic_job_timeout_ms: this.config.genericJobTimeoutMs,
             generic_job_max_concurrency: this.config.genericJobMaxConcurrency,
+            drain_mode: this.config.drainMode === true,
+            load_reporting_enabled: this.config.loadReportingEnabled !== false,
+            hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
             expose_all_models: this.config.exposeAllModels,
             exposure_policy: this.config.exposeAllModels ? "all" : "none",
             model_allowlist: this.config.modelAllowlist,
@@ -3399,26 +3674,38 @@ export class SelfHostedNodeRuntime {
         return { runtimeToken, state: nextState, enrolled: true };
     }
     async resolveMcodaAgentForJob(job) {
-        const selected = optionalText(job.source_agent_slug) ||
-            optionalText(job.agent_slug) ||
-            optionalText(job.model) ||
-            optionalText(job.openai_request.model);
+        const selectedSourceAgentSlug = optionalText(job.source_agent_slug);
+        const selectedAgentSlug = optionalText(job.agent_slug);
+        const selectedModel = optionalText(job.model) || optionalText(job.openai_request.model);
+        const selected = selectedSourceAgentSlug || selectedAgentSlug || selectedModel;
         if (!selected) {
-            throw new Error("mcoda source agent slug is required");
+            throw new SelfHostedPreStartJobError("selected_agent_unavailable", "mcoda source agent slug is required");
         }
         const rawAgents = await this.mcoda.listRawAgents();
+        const strictSelectedAgent = selectedSourceAgentSlug || selectedAgentSlug;
         const agent = rawAgents.find((entry) => {
             const slug = optionalText(entry.slug);
+            if (strictSelectedAgent) {
+                return slug === strictSelectedAgent;
+            }
             const defaultModel = mcodaAgentDefaultModel(entry);
             return slug === selected || defaultModel === selected;
         });
-        if (!agent || !isExposedLocalAgent(agent, this.config)) {
-            throw new Error("selected local mcoda agent is not exposed by this node");
+        if (!agent) {
+            throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not available on this node`);
+        }
+        const mapped = mapMcodaAgentToSelfHostedModel(agent, this.config);
+        if (!mapped?.exposed) {
+            throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not exposed by this node`);
+        }
+        if (mapped.health_status && mapped.health_status !== "healthy" && mapped.health_status !== "unknown") {
+            throw new SelfHostedPreStartJobError("selected_agent_unhealthy", `selected local mcoda agent ${selected} is ${mapped.health_status}`);
         }
         return mapMcodaAgentToCodaliAgent(agent, selected);
     }
     async executeGenericJob(envelope, options = {}) {
         const startedAt = Date.now();
+        this.beginExecutionTelemetry("generic_job");
         const events = [];
         let sequence = 0;
         const emitEvent = async (event) => {
@@ -3449,6 +3736,12 @@ export class SelfHostedNodeRuntime {
                 },
                 finished_at: new Date().toISOString()
             };
+            this.finishExecutionTelemetry({
+                executionClass: "generic_job",
+                startedAt,
+                ok: false,
+                code
+            });
             return {
                 job_id: envelope.job_id,
                 request_id: envelope.request_id,
@@ -3554,6 +3847,12 @@ export class SelfHostedNodeRuntime {
                     runner: runner.id
                 }
             });
+            this.finishExecutionTelemetry({
+                executionClass: "generic_job",
+                startedAt,
+                ok: status === "succeeded",
+                code: runnerResult.error?.code || status
+            });
             return {
                 job_id: envelope.job_id,
                 request_id: envelope.request_id,
@@ -3579,7 +3878,9 @@ export class SelfHostedNodeRuntime {
     }
     async executeJob(job, options = {}) {
         const startedAt = Date.now();
+        this.beginExecutionTelemetry("llm");
         let selectedAgent;
+        let jobStarted = false;
         const progressEvents = [];
         const streamEvents = [];
         const recordProgress = async (event) => {
@@ -3590,13 +3891,35 @@ export class SelfHostedNodeRuntime {
             streamEvents.push(chunk);
             await options.onOpenAIChunk?.(chunk);
         };
+        const acknowledgeStarted = async (agent) => {
+            if (jobStarted) {
+                return;
+            }
+            await options.onStarted?.({
+                job_id: job.job_id,
+                request_id: job.request_id,
+                node_id: job.node_id,
+                agent_slug: optionalText(job.agent_slug) || agent?.slug || "",
+                source_agent_slug: optionalText(job.source_agent_slug) || agent?.slug || null,
+                model: optionalText(job.model) || optionalText(job.openai_request.model)
+            });
+            jobStarted = true;
+        };
         if (job.node_id !== this.config.nodeId) {
-            return {
+            const result = {
                 job_id: job.job_id,
                 request_id: job.request_id,
                 status: "failed",
+                pre_start_failure: true,
                 error: { code: "validation_failed", message: "job node_id does not match this node" }
             };
+            this.finishExecutionTelemetry({
+                executionClass: "llm",
+                startedAt,
+                ok: false,
+                code: "validation_failed"
+            });
+            return result;
         }
         try {
             if (job.provider === "ollama") {
@@ -3609,7 +3932,8 @@ export class SelfHostedNodeRuntime {
                     options.num_predict = job.openai_request.max_tokens;
                 if (job.openai_request.stop !== undefined)
                     options.stop = job.openai_request.stop;
-                const result = await this.jobOllama.chat({
+                await acknowledgeStarted();
+                const ollamaResult = await this.jobOllama.chat({
                     model: job.model || job.openai_request.model,
                     messages: job.openai_request.messages,
                     options,
@@ -3622,7 +3946,7 @@ export class SelfHostedNodeRuntime {
                         created: Math.floor(Date.now() / 1000),
                         model: job.openai_request.model,
                         choices: [
-                            { index: 0, delta: { content: result.content }, finish_reason: null }
+                            { index: 0, delta: { content: ollamaResult.content }, finish_reason: null }
                         ]
                     });
                     await emitOpenAIChunk({
@@ -3635,22 +3959,28 @@ export class SelfHostedNodeRuntime {
                         ]
                     });
                 }
-                return {
+                const invocationResult = {
                     job_id: job.job_id,
                     request_id: job.request_id,
                     status: "success",
                     openai_response: buildOpenAIChatCompletion({
                         requestId: job.request_id,
                         model: job.openai_request.model,
-                        content: result.content,
-                        promptTokens: result.promptTokens,
-                        completionTokens: result.completionTokens,
-                        metadata: { provider: "ollama", raw: result.raw }
+                        content: ollamaResult.content,
+                        promptTokens: ollamaResult.promptTokens,
+                        completionTokens: ollamaResult.completionTokens,
+                        metadata: { provider: "ollama", raw: ollamaResult.raw }
                     }),
                     ...(streamEvents.length ? { stream_events: streamEvents } : {}),
                     ...(progressEvents.length ? { progress_events: progressEvents } : {}),
                     timing: { local_latency_ms: Date.now() - startedAt }
                 };
+                this.finishExecutionTelemetry({
+                    executionClass: "llm",
+                    startedAt,
+                    ok: true
+                });
+                return invocationResult;
             }
             const taskPreview = messagesToPrompt(job.openai_request.messages);
             if (!taskPreview) {
@@ -3660,6 +3990,7 @@ export class SelfHostedNodeRuntime {
             selectedAgent = agent;
             validateRequiredDocdexContext(job, options.attachedMswarmApiKey);
             const attachedMswarmApiKey = attachedMswarmApiKeyForDocdex(job, options.attachedMswarmApiKey);
+            await acknowledgeStarted(agent);
             await recordProgress({
                 type: "agent_selected",
                 job_id: job.job_id,
@@ -3698,7 +4029,7 @@ export class SelfHostedNodeRuntime {
                 }
             });
             const tokens = usageTokens(response.usage);
-            return {
+            const result = {
                 job_id: job.job_id,
                 request_id: job.request_id,
                 status: "success",
@@ -3724,6 +4055,12 @@ export class SelfHostedNodeRuntime {
                 ...(progressEvents.length ? { progress_events: progressEvents } : {}),
                 timing: { local_latency_ms: Date.now() - startedAt }
             };
+            this.finishExecutionTelemetry({
+                executionClass: "llm",
+                startedAt,
+                ok: true
+            });
+            return result;
         }
         catch (error) {
             const message = redactRuntimeSecretValues(error instanceof Error ? error.message : String(error), [selectedAgent?.apiKey, options.attachedMswarmApiKey]);
@@ -3736,10 +4073,11 @@ export class SelfHostedNodeRuntime {
                         : /permission|policy|denied/i.test(message)
                             ? "policy_denied"
                             : "upstream_error");
-            return {
+            const result = {
                 job_id: job.job_id,
                 request_id: job.request_id,
                 status: "failed",
+                ...(!jobStarted ? { pre_start_failure: true } : {}),
                 error: {
                     code,
                     message
@@ -3748,6 +4086,13 @@ export class SelfHostedNodeRuntime {
                 ...(progressEvents.length ? { progress_events: progressEvents } : {}),
                 timing: { local_latency_ms: Date.now() - startedAt }
             };
+            this.finishExecutionTelemetry({
+                executionClass: "llm",
+                startedAt,
+                ok: false,
+                code
+            });
+            return result;
         }
     }
     async runOnce() {
@@ -3772,12 +4117,49 @@ export class SelfHostedNodeRuntime {
             models = [];
             version = null;
         }
+        const discoveryLatencyMs = Date.now() - startedAt;
         const capabilityPayload = await this.buildCapabilityHeartbeatPayload(enrollment.runtimeToken);
+        const loadTelemetry = this.buildLoadTelemetry({
+            models,
+            discoveryLatencyMs,
+            discoveryFailureCount: recentFailureCount,
+            capabilityPayload
+        });
+        const exposedModelCount = models.filter((model) => model.exposed !== false).length;
+        const loadReportingEnabled = this.config.loadReportingEnabled !== false;
+        const capacityPayload = loadReportingEnabled
+            ? {
+                protocol_version: loadTelemetry.runtime_protocol_version,
+                runtime_protocol_version: loadTelemetry.runtime_protocol_version,
+                load_balancer_protocol_version: loadTelemetry.load_balancer_protocol_version,
+                catalog_metadata_version: loadTelemetry.catalog_metadata_version,
+                catalog_fingerprint: loadTelemetry.catalog_fingerprint,
+                max_concurrency: loadTelemetry.max_concurrency,
+                max_concurrent_llm_jobs: loadTelemetry.max_concurrent_llm_jobs,
+                max_concurrent_generic_jobs: loadTelemetry.max_concurrent_generic_jobs,
+                active_jobs: loadTelemetry.active_jobs,
+                queued_jobs: loadTelemetry.queued_jobs,
+                free_slots: loadTelemetry.free_slots,
+                drain_mode: loadTelemetry.drain_mode,
+                execution_class_capacity: loadTelemetry.execution_class_capacity
+            }
+            : {
+                active_jobs: loadTelemetry.active_jobs,
+                queued_jobs: loadTelemetry.queued_jobs
+            };
         const heartbeatPayload = {
             node_id: this.config.nodeId,
             node_version: this.config.nodeVersion,
+            runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
             config_version: enrollment.state.config_version ?? null,
             status,
+            runtime: {
+                protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
+                relay_mode: this.config.relayMode || "outbound",
+                load_reporting_enabled: loadReportingEnabled,
+                hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
+                drain_mode: this.config.drainMode === true
+            },
             discovery: {
                 source: discoverySource,
                 mcoda_status: discoverySource === "mcoda" && status === "online" ? "ok" : status === "degraded" ? "error" : null
@@ -3791,20 +4173,24 @@ export class SelfHostedNodeRuntime {
                     status: null,
                     version: null
                 },
-            capacity: {
-                active_jobs: 0,
-                queued_jobs: 0
-            },
+            capacity: capacityPayload,
             health: {
-                avg_latency_ms: Date.now() - startedAt,
-                recent_failure_count: recentFailureCount,
+                avg_latency_ms: loadTelemetry.avg_latency_ms ?? discoveryLatencyMs,
+                recent_failure_count: loadTelemetry.recent_failure_count,
+                recent_failures: loadTelemetry.recent_failures,
                 last_success_at: status === "online" ? new Date().toISOString() : null
             },
+            local_agent_catalog: {
+                revision: loadTelemetry.catalog_fingerprint,
+                metadata_version: loadTelemetry.catalog_metadata_version,
+                model_count: models.length,
+                exposed_model_count: exposedModelCount
+            },
             models,
-            capabilities: capabilityPayload
+            capabilities: capabilityPayload,
+            ...(loadTelemetry.hardware_pressure ? { hardware_pressure: loadTelemetry.hardware_pressure } : {})
         };
         const heartbeatResponse = await this.gateway.heartbeat(enrollment.runtimeToken, heartbeatPayload);
-        const exposedModelCount = models.filter((model) => model.exposed !== false).length;
         return {
             enrolled: enrollment.enrolled,
             status,
@@ -3812,6 +4198,7 @@ export class SelfHostedNodeRuntime {
             discovery_source: discoverySource,
             mcoda_agent_count: discoverySource === "mcoda" ? exposedModelCount : undefined,
             ollama_version: version,
+            capacity: loadTelemetry,
             heartbeat_response: heartbeatResponse
         };
     }
@@ -3846,9 +4233,17 @@ export class SelfHostedNodeRuntime {
     }
     async pollAndExecuteJob(waitMs = DEFAULT_JOB_POLL_WAIT_MS) {
         const enrollment = await this.ensureEnrolled();
+        const pollCapacity = this.buildLoadTelemetry({ models: [] });
         const response = await this.gateway.pollJob(enrollment.runtimeToken, {
             node_id: this.config.nodeId,
-            capacity: { active_jobs: 0, max_jobs: 1 },
+            capacity: {
+                active_jobs: pollCapacity.active_jobs,
+                queued_jobs: pollCapacity.queued_jobs,
+                max_jobs: pollCapacity.max_concurrency,
+                max_concurrency: pollCapacity.max_concurrency,
+                free_slots: pollCapacity.free_slots,
+                drain_mode: pollCapacity.drain_mode
+            },
             wait_ms: waitMs
         });
         const job = response.job || null;
@@ -3876,6 +4271,14 @@ export class SelfHostedNodeRuntime {
         };
         const result = await this.executeJob(job, {
             attachedMswarmApiKey: optionalText(response.attached_mswarm_api_key) || undefined,
+            onStarted: async (event) => {
+                await this.gateway.postJobStart(enrollment.runtimeToken, job.job_id, {
+                    node_id: this.config.nodeId,
+                    agent_slug: event.agent_slug || job.agent_slug,
+                    source_agent_slug: event.source_agent_slug || job.source_agent_slug || null,
+                    model: event.model || job.model || job.openai_request.model
+                });
+            },
             onOpenAIChunk: async (chunk) => {
                 if (job.openai_request.stream !== true || streamEventForwardingFailed) {
                     return;