@mcoda/mswarm 0.1.78 → 0.1.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runtime.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { chmod, lstat, mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
2
2
  import { dirname, isAbsolute, join, relative, resolve } from "node:path";
3
- import { hostname, homedir, platform, userInfo } from "node:os";
3
+ import { cpus, freemem, hostname, homedir, loadavg, platform, totalmem, userInfo } from "node:os";
4
4
  import { spawn } from "node:child_process";
5
5
  import { createHash, createHmac, randomUUID } from "node:crypto";
6
6
  import { MswarmCodaliExecutor } from "./codali-executor.js";
@@ -16,6 +16,11 @@ const DEFAULT_REQUEST_TIMEOUT_MS = 10000;
16
16
  const DEFAULT_JOB_TIMEOUT_MS = 3600000;
17
17
  const DEFAULT_SERVICE_COMMAND_TIMEOUT_MS = 60000;
18
18
  const DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS = 2000;
19
+ const SELF_HOSTED_RUNTIME_PROTOCOL_VERSION = 1;
20
+ const SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION = 1;
21
+ const SELF_HOSTED_CATALOG_METADATA_VERSION = 1;
22
+ const MAX_TELEMETRY_LATENCY_SAMPLES = 50;
23
+ const MAX_TELEMETRY_FAILURES = 20;
19
24
  const DEFAULT_MCODA_BIN = "mcoda";
20
25
  const DEFAULT_MCODA_LIST_ARGS = ["agent", "list", "--json", "--refresh-health"];
21
26
  const DEFAULT_COMMAND_MAX_BUFFER = 16 * 1024 * 1024;
@@ -460,6 +465,112 @@ function optionalBoolean(...values) {
460
465
  }
461
466
  return null;
462
467
  }
468
+ function roundedTelemetryNumber(value, digits = 3) {
469
+ if (!Number.isFinite(value)) {
470
+ return 0;
471
+ }
472
+ const factor = 10 ** digits;
473
+ return Math.round(value * factor) / factor;
474
+ }
475
+ function nonNegativeTelemetryInteger(value) {
476
+ return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : 0;
477
+ }
478
+ function sha256Json(value) {
479
+ return createHash("sha256").update(JSON.stringify(value)).digest("hex");
480
+ }
481
+ function buildCatalogFingerprint(models) {
482
+ const projection = models
483
+ .map((model) => ({
484
+ name: optionalText(model.name) || "",
485
+ provider: optionalText(model.provider) || null,
486
+ adapter: optionalText(model.adapter) || null,
487
+ source_agent_slug: optionalText(model.source_agent_slug) || null,
488
+ model_id: optionalText(model.model_id) || optionalText(model.model) || null,
489
+ exposed: model.exposed !== false,
490
+ capabilities: normalizeCapabilities(model.capabilities).sort(),
491
+ health_status: normalizeHealthStatus(model.health_status)
492
+ }))
493
+ .sort((left, right) => `${left.provider || ""}:${left.name}`.localeCompare(`${right.provider || ""}:${right.name}`));
494
+ return `sha256:${sha256Json(projection)}`;
495
+ }
496
+ function executionClassCapacity(input) {
497
+ const maxConcurrency = Math.max(1, Math.floor(input.maxConcurrency));
498
+ const activeJobs = nonNegativeTelemetryInteger(input.activeJobs);
499
+ const queuedJobs = nonNegativeTelemetryInteger(input.queuedJobs);
500
+ return {
501
+ max_concurrency: maxConcurrency,
502
+ active_jobs: activeJobs,
503
+ queued_jobs: queuedJobs,
504
+ free_slots: input.drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs)
505
+ };
506
+ }
507
+ function totalHostMemoryBucket() {
508
+ const gib = totalmem() / (1024 ** 3);
509
+ if (!Number.isFinite(gib) || gib <= 0)
510
+ return "unknown";
511
+ if (gib <= 8)
512
+ return "<=8GiB";
513
+ if (gib <= 16)
514
+ return "<=16GiB";
515
+ if (gib <= 32)
516
+ return "<=32GiB";
517
+ if (gib <= 64)
518
+ return "<=64GiB";
519
+ if (gib <= 128)
520
+ return "<=128GiB";
521
+ return ">128GiB";
522
+ }
523
+ function coarsePublicVramTier(value, gpuCount) {
524
+ if (value === "none" ||
525
+ value === "lt8" ||
526
+ value === "8-15" ||
527
+ value === "16-31" ||
528
+ value === "32plus") {
529
+ return value;
530
+ }
531
+ return gpuCount > 0 ? "unknown" : "none";
532
+ }
533
+ function buildCoarseHardwarePressure(capabilityPayload) {
534
+ const cpuCount = Math.max(1, cpus().length || 1);
535
+ const totalMemory = totalmem();
536
+ const freeMemory = freemem();
537
+ const projection = capabilityPayload?.public_projection;
538
+ const projectionRecord = projection && typeof projection === "object" && !Array.isArray(projection)
539
+ ? projection
540
+ : {};
541
+ const accelerators = projectionRecord.accelerators && typeof projectionRecord.accelerators === "object"
542
+ ? projectionRecord.accelerators
543
+ : {};
544
+ const gpu = accelerators.gpu && typeof accelerators.gpu === "object" && !Array.isArray(accelerators.gpu)
545
+ ? accelerators.gpu
546
+ : null;
547
+ const rawGpuCount = gpu?.["count"];
548
+ const gpuCount = typeof rawGpuCount === "number" && Number.isFinite(rawGpuCount)
549
+ ? Math.max(0, Math.floor(rawGpuCount))
550
+ : 0;
551
+ const vramTier = coarsePublicVramTier(gpu?.["vram_tier"], gpuCount);
552
+ return {
553
+ schema_version: 1,
554
+ collected_at: new Date().toISOString(),
555
+ cpu: {
556
+ core_count: cpuCount,
557
+ load_1m_ratio: roundedTelemetryNumber((loadavg()[0] || 0) / cpuCount)
558
+ },
559
+ ram: {
560
+ used_ratio: totalMemory > 0 ? roundedTelemetryNumber((totalMemory - freeMemory) / totalMemory) : null,
561
+ total_bucket: totalHostMemoryBucket()
562
+ },
563
+ gpu: {
564
+ available: Boolean(gpu?.["available"]),
565
+ count: gpuCount,
566
+ cuda: Boolean(gpu?.["cuda"] || gpu?.["has_cuda"]),
567
+ vram: {
568
+ total_tier: vramTier,
569
+ used_ratio: null
570
+ }
571
+ }
572
+ };
573
+ }
463
574
  function normalizeCapabilities(value) {
464
575
  if (!Array.isArray(value)) {
465
576
  return [];
@@ -631,9 +742,14 @@ function serviceEnvironment(config, env, homeDir) {
631
742
  MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS: String(config.heartbeatIntervalSeconds),
632
743
  MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS: String(config.requestTimeoutMs),
633
744
  MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS: String(config.jobTimeoutMs),
745
+ MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS: String(config.maxConcurrentJobs || 1),
746
+ MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS: String(config.maxConcurrentLlmJobs || config.maxConcurrentJobs || 1),
634
747
  MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED: config.genericJobsEnabled ? "true" : "false",
635
748
  MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS: String(config.genericJobTimeoutMs),
636
749
  MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY: String(config.genericJobMaxConcurrency),
750
+ MSWARM_SELF_HOSTED_DRAIN_MODE: config.drainMode ? "true" : "false",
751
+ MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED: config.loadReportingEnabled === false ? "false" : "true",
752
+ MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED: config.hardwareTelemetryEnabled ? "true" : "false",
637
753
  MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS: config.capabilityProbeTimeoutMs
638
754
  ? String(config.capabilityProbeTimeoutMs)
639
755
  : null
@@ -1041,6 +1157,8 @@ export async function readSelfHostedNodeConfig(env = process.env) {
1041
1157
  optionalText(env.OLLAMA_HOST) ||
1042
1158
  DEFAULT_OLLAMA_BASE_URL;
1043
1159
  const packageNodeVersion = await readPackageNodeVersion();
1160
+ const maxConcurrentJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, state.max_concurrent_jobs || 1);
1161
+ const maxConcurrentLlmJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, state.max_concurrent_llm_jobs || maxConcurrentJobs);
1044
1162
  return {
1045
1163
  gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
1046
1164
  nodeId,
@@ -1070,10 +1188,15 @@ export async function readSelfHostedNodeConfig(env = process.env) {
1070
1188
  heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, state.heartbeat_interval_seconds || DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
1071
1189
  requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, state.request_timeout_ms || DEFAULT_REQUEST_TIMEOUT_MS),
1072
1190
  jobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
1191
+ maxConcurrentJobs,
1192
+ maxConcurrentLlmJobs,
1073
1193
  genericJobsEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED ?? env.MSWARM_SELF_HOSTED_GENERIC_JOBS, state.generic_jobs_enabled === true),
1074
1194
  genericJobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, state.generic_job_timeout_ms || state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
1075
1195
  genericJobMaxConcurrency: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, state.generic_job_max_concurrency || 1),
1076
1196
  capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, state.capability_probe_timeout_ms || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
1197
+ drainMode: parseBoolean(env.MSWARM_SELF_HOSTED_DRAIN_MODE, state.drain_mode === true),
1198
+ loadReportingEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING, state.load_reporting_enabled !== false),
1199
+ hardwareTelemetryEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED ?? env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, state.hardware_telemetry_enabled === true),
1077
1200
  exposeAllModels: resolveDaemonExposeAllModels(env, state),
1078
1201
  modelAllowlist: parseList(env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST || state.model_allowlist),
1079
1202
  modelBlocklist: parseList(env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST || state.model_blocklist)
@@ -1104,6 +1227,8 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
1104
1227
  const allowlist = parseList(options.allow || env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST);
1105
1228
  const blocklist = parseList(options.block || env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST);
1106
1229
  const packageNodeVersion = await readPackageNodeVersion();
1230
+ const maxConcurrentJobs = parsePositiveInteger(options["max-concurrent-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, 1);
1231
+ const maxConcurrentLlmJobs = parsePositiveInteger(options["max-concurrent-llm-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, maxConcurrentJobs);
1107
1232
  return {
1108
1233
  apiKey,
1109
1234
  gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
@@ -1126,10 +1251,17 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
1126
1251
  heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
1127
1252
  requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, DEFAULT_REQUEST_TIMEOUT_MS),
1128
1253
  jobTimeoutMs: parsePositiveInteger(options["job-timeout-ms"] || env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
1254
+ maxConcurrentJobs,
1255
+ maxConcurrentLlmJobs,
1129
1256
  genericJobsEnabled: parseBoolean(options["enable-generic-jobs"] || env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED || env.MSWARM_SELF_HOSTED_GENERIC_JOBS, false),
1130
1257
  genericJobTimeoutMs: parsePositiveInteger(options["generic-job-timeout-ms"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
1131
1258
  genericJobMaxConcurrency: parsePositiveInteger(options["generic-job-max-concurrency"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, 1),
1132
1259
  capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
1260
+ drainMode: parseBoolean(options.drain || env.MSWARM_SELF_HOSTED_DRAIN_MODE, false),
1261
+ loadReportingEnabled: parseBoolean(options["disable-load-reporting"] === true
1262
+ ? false
1263
+ : (env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING), true),
1264
+ hardwareTelemetryEnabled: parseBoolean(options["enable-hardware-telemetry"] || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, false),
1133
1265
  exposeAllModels: resolveOwnerSetupExposeAllModels(options, env),
1134
1266
  modelAllowlist: allowlist,
1135
1267
  modelBlocklist: blocklist,
@@ -1527,10 +1659,6 @@ function mapMcodaAgentToCodaliAgent(agent, fallbackSlug) {
1527
1659
  maxOutputTokens: optionalNumber(agent.maxOutputTokens, agent.max_output_tokens) ?? undefined,
1528
1660
  };
1529
1661
  }
1530
- function isExposedLocalAgent(agent, config) {
1531
- const mapped = mapMcodaAgentToSelfHostedModel(agent, config);
1532
- return Boolean(mapped?.exposed);
1533
- }
1534
1662
  function buildCodaliWorkspace(job) {
1535
1663
  const root = optionalText(job.workspace?.root);
1536
1664
  if (!root) {
@@ -1550,6 +1678,13 @@ const DOCDEX_JOB_ERROR_CODES = new Set([
1550
1678
  "docdex_repo_access_denied",
1551
1679
  "docdex_unavailable",
1552
1680
  ]);
1681
+ const PRE_START_JOB_ERROR_CODES = new Set([
1682
+ "selected_agent_unavailable",
1683
+ "selected_agent_unhealthy",
1684
+ "validation_failed",
1685
+ "docdex_context_missing",
1686
+ "docdex_api_key_missing",
1687
+ ]);
1553
1688
  class SelfHostedDocdexJobError extends Error {
1554
1689
  constructor(code, message) {
1555
1690
  super(message);
@@ -1557,6 +1692,13 @@ class SelfHostedDocdexJobError extends Error {
1557
1692
  this.code = code;
1558
1693
  }
1559
1694
  }
1695
+ class SelfHostedPreStartJobError extends Error {
1696
+ constructor(code, message) {
1697
+ super(message);
1698
+ this.name = code;
1699
+ this.code = code;
1700
+ }
1701
+ }
1560
1702
  function normalizeDocdexCapabilityMap(value) {
1561
1703
  const record = objectRecord(value);
1562
1704
  if (!record)
@@ -1614,11 +1756,14 @@ function selfHostedErrorCode(error) {
1614
1756
  if (!error || typeof error !== "object")
1615
1757
  return undefined;
1616
1758
  const code = error.code;
1617
- if (typeof code === "string" && DOCDEX_JOB_ERROR_CODES.has(code)) {
1759
+ if (typeof code === "string" &&
1760
+ (DOCDEX_JOB_ERROR_CODES.has(code) || PRE_START_JOB_ERROR_CODES.has(code))) {
1618
1761
  return code;
1619
1762
  }
1620
1763
  const name = error.name;
1621
- return typeof name === "string" && DOCDEX_JOB_ERROR_CODES.has(name) ? name : undefined;
1764
+ return typeof name === "string" && (DOCDEX_JOB_ERROR_CODES.has(name) || PRE_START_JOB_ERROR_CODES.has(name))
1765
+ ? name
1766
+ : undefined;
1622
1767
  }
1623
1768
  function redactRuntimeSecretValues(value, secrets) {
1624
1769
  let output = value;
@@ -3135,6 +3280,16 @@ export class MswarmSelfHostedNodeClient {
3135
3280
  body: JSON.stringify(payload)
3136
3281
  }, this.timeoutMs);
3137
3282
  }
3283
+ async postJobStart(runtimeToken, jobId, payload) {
3284
+ return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/start`, {
3285
+ method: "POST",
3286
+ headers: {
3287
+ "content-type": "application/json",
3288
+ authorization: `Bearer ${runtimeToken}`
3289
+ },
3290
+ body: JSON.stringify(payload)
3291
+ }, this.timeoutMs);
3292
+ }
3138
3293
  async postJobEvents(runtimeToken, jobId, payload) {
3139
3294
  return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/events`, {
3140
3295
  method: "POST",
@@ -3148,6 +3303,12 @@ export class MswarmSelfHostedNodeClient {
3148
3303
  }
3149
3304
  export class SelfHostedNodeRuntime {
3150
3305
  constructor(config, deps) {
3306
+ this.activeLlmJobs = 0;
3307
+ this.activeGenericJobs = 0;
3308
+ this.queuedLlmJobs = 0;
3309
+ this.queuedGenericJobs = 0;
3310
+ this.latencySamplesMs = [];
3311
+ this.recentFailures = [];
3151
3312
  this.config = config;
3152
3313
  this.gateway =
3153
3314
  deps?.gateway ||
@@ -3192,6 +3353,100 @@ export class SelfHostedNodeRuntime {
3192
3353
  rootDir: config.artifactStorePath || defaultArtifactStorePath()
3193
3354
  });
3194
3355
  }
3356
+ updateLocalQueueTelemetry(input) {
3357
+ if (input.llmQueuedJobs !== undefined) {
3358
+ this.queuedLlmJobs = nonNegativeTelemetryInteger(input.llmQueuedJobs);
3359
+ }
3360
+ if (input.genericQueuedJobs !== undefined) {
3361
+ this.queuedGenericJobs = nonNegativeTelemetryInteger(input.genericQueuedJobs);
3362
+ }
3363
+ }
3364
+ beginExecutionTelemetry(executionClass) {
3365
+ if (executionClass === "generic_job") {
3366
+ this.activeGenericJobs += 1;
3367
+ return;
3368
+ }
3369
+ this.activeLlmJobs += 1;
3370
+ }
3371
+ finishExecutionTelemetry(input) {
3372
+ if (input.executionClass === "generic_job") {
3373
+ this.activeGenericJobs = Math.max(0, this.activeGenericJobs - 1);
3374
+ }
3375
+ else {
3376
+ this.activeLlmJobs = Math.max(0, this.activeLlmJobs - 1);
3377
+ }
3378
+ this.latencySamplesMs.push(Math.max(0, Date.now() - input.startedAt));
3379
+ while (this.latencySamplesMs.length > MAX_TELEMETRY_LATENCY_SAMPLES) {
3380
+ this.latencySamplesMs.shift();
3381
+ }
3382
+ if (!input.ok) {
3383
+ this.recentFailures.unshift({
3384
+ execution_class: input.executionClass === "generic_job" ? "generic_job" : "agentic",
3385
+ code: optionalText(input.code) || "upstream_error",
3386
+ at: new Date().toISOString()
3387
+ });
3388
+ this.recentFailures.splice(MAX_TELEMETRY_FAILURES);
3389
+ }
3390
+ }
3391
+ averageLatencyMs(fallback = null) {
3392
+ if (this.latencySamplesMs.length === 0) {
3393
+ return fallback;
3394
+ }
3395
+ const total = this.latencySamplesMs.reduce((sum, value) => sum + value, 0);
3396
+ return Math.round(total / this.latencySamplesMs.length);
3397
+ }
3398
+ buildLoadTelemetry(input) {
3399
+ const drainMode = this.config.drainMode === true;
3400
+ const llmMaxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentLlmJobs || this.config.maxConcurrentJobs || 1));
3401
+ const genericMaxConcurrency = Math.max(1, Math.floor(this.config.genericJobMaxConcurrency || 1));
3402
+ const maxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentJobs || 1), llmMaxConcurrency, this.config.genericJobsEnabled ? genericMaxConcurrency : 1);
3403
+ const activeLlmJobs = nonNegativeTelemetryInteger(this.activeLlmJobs);
3404
+ const activeGenericJobs = nonNegativeTelemetryInteger(this.activeGenericJobs);
3405
+ const queuedLlmJobs = nonNegativeTelemetryInteger(this.queuedLlmJobs);
3406
+ const queuedGenericJobs = nonNegativeTelemetryInteger(this.queuedGenericJobs);
3407
+ const llmCapacity = executionClassCapacity({
3408
+ maxConcurrency: llmMaxConcurrency,
3409
+ activeJobs: activeLlmJobs,
3410
+ queuedJobs: queuedLlmJobs,
3411
+ drainMode
3412
+ });
3413
+ const genericCapacity = executionClassCapacity({
3414
+ maxConcurrency: genericMaxConcurrency,
3415
+ activeJobs: activeGenericJobs,
3416
+ queuedJobs: queuedGenericJobs,
3417
+ drainMode: drainMode || !this.config.genericJobsEnabled
3418
+ });
3419
+ const activeJobs = activeLlmJobs + activeGenericJobs;
3420
+ const queuedJobs = queuedLlmJobs + queuedGenericJobs;
3421
+ const freeSlots = drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs);
3422
+ const failures = this.recentFailures.slice(0, 10);
3423
+ const discoveryFailureCount = nonNegativeTelemetryInteger(input.discoveryFailureCount);
3424
+ const telemetry = {
3425
+ runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
3426
+ load_balancer_protocol_version: SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION,
3427
+ catalog_metadata_version: SELF_HOSTED_CATALOG_METADATA_VERSION,
3428
+ catalog_fingerprint: buildCatalogFingerprint(input.models),
3429
+ max_concurrency: maxConcurrency,
3430
+ max_concurrent_llm_jobs: llmMaxConcurrency,
3431
+ max_concurrent_generic_jobs: this.config.genericJobsEnabled ? genericMaxConcurrency : 0,
3432
+ active_jobs: activeJobs,
3433
+ queued_jobs: queuedJobs,
3434
+ free_slots: freeSlots,
3435
+ drain_mode: drainMode,
3436
+ execution_class_capacity: {
3437
+ chat: llmCapacity,
3438
+ agentic: llmCapacity,
3439
+ generic_job: genericCapacity
3440
+ },
3441
+ avg_latency_ms: this.averageLatencyMs(input.discoveryLatencyMs ?? null),
3442
+ recent_failure_count: failures.length + discoveryFailureCount,
3443
+ recent_failures: failures
3444
+ };
3445
+ if (this.config.hardwareTelemetryEnabled === true) {
3446
+ telemetry.hardware_pressure = buildCoarseHardwarePressure(input.capabilityPayload || null);
3447
+ }
3448
+ return telemetry;
3449
+ }
3195
3450
  static async setup(setupConfig, deps) {
3196
3451
  const gateway = deps?.gateway ||
3197
3452
  new MswarmSelfHostedNodeClient({
@@ -3213,6 +3468,11 @@ export class SelfHostedNodeRuntime {
3213
3468
  model_allowlist: setupConfig.modelAllowlist,
3214
3469
  model_blocklist: setupConfig.modelBlocklist,
3215
3470
  heartbeat_interval_seconds: setupConfig.heartbeatIntervalSeconds,
3471
+ max_concurrent_jobs: setupConfig.maxConcurrentJobs,
3472
+ max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
3473
+ drain_mode: setupConfig.drainMode,
3474
+ load_reporting_enabled: setupConfig.loadReportingEnabled,
3475
+ hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
3216
3476
  generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency
3217
3477
  });
3218
3478
  const nodeId = optionalText(bootstrap.node?.node_id);
@@ -3242,10 +3502,15 @@ export class SelfHostedNodeRuntime {
3242
3502
  node_version: setupConfig.nodeVersion,
3243
3503
  request_timeout_ms: setupConfig.requestTimeoutMs,
3244
3504
  job_timeout_ms: setupConfig.jobTimeoutMs,
3505
+ max_concurrent_jobs: setupConfig.maxConcurrentJobs,
3506
+ max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
3245
3507
  generic_jobs_enabled: setupConfig.genericJobsEnabled,
3246
3508
  generic_job_timeout_ms: setupConfig.genericJobTimeoutMs,
3247
3509
  generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency,
3248
3510
  capability_probe_timeout_ms: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
3511
+ drain_mode: setupConfig.drainMode,
3512
+ load_reporting_enabled: setupConfig.loadReportingEnabled,
3513
+ hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
3249
3514
  expose_all_models: setupConfig.exposeAllModels,
3250
3515
  exposure_policy: setupConfig.exposeAllModels ? "all" : "none",
3251
3516
  model_allowlist: setupConfig.modelAllowlist,
@@ -3276,10 +3541,15 @@ export class SelfHostedNodeRuntime {
3276
3541
  heartbeatIntervalSeconds: heartbeatInterval,
3277
3542
  requestTimeoutMs: setupConfig.requestTimeoutMs,
3278
3543
  jobTimeoutMs: setupConfig.jobTimeoutMs,
3544
+ maxConcurrentJobs: setupConfig.maxConcurrentJobs,
3545
+ maxConcurrentLlmJobs: setupConfig.maxConcurrentLlmJobs,
3279
3546
  genericJobsEnabled: setupConfig.genericJobsEnabled,
3280
3547
  genericJobTimeoutMs: setupConfig.genericJobTimeoutMs,
3281
3548
  genericJobMaxConcurrency: setupConfig.genericJobMaxConcurrency,
3282
3549
  capabilityProbeTimeoutMs: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
3550
+ drainMode: setupConfig.drainMode,
3551
+ loadReportingEnabled: setupConfig.loadReportingEnabled,
3552
+ hardwareTelemetryEnabled: setupConfig.hardwareTelemetryEnabled,
3283
3553
  exposeAllModels: setupConfig.exposeAllModels,
3284
3554
  modelAllowlist: setupConfig.modelAllowlist,
3285
3555
  modelBlocklist: setupConfig.modelBlocklist
@@ -3386,9 +3656,14 @@ export class SelfHostedNodeRuntime {
3386
3656
  node_version: this.config.nodeVersion,
3387
3657
  request_timeout_ms: this.config.requestTimeoutMs,
3388
3658
  job_timeout_ms: this.config.jobTimeoutMs,
3659
+ max_concurrent_jobs: this.config.maxConcurrentJobs,
3660
+ max_concurrent_llm_jobs: this.config.maxConcurrentLlmJobs,
3389
3661
  generic_jobs_enabled: this.config.genericJobsEnabled,
3390
3662
  generic_job_timeout_ms: this.config.genericJobTimeoutMs,
3391
3663
  generic_job_max_concurrency: this.config.genericJobMaxConcurrency,
3664
+ drain_mode: this.config.drainMode === true,
3665
+ load_reporting_enabled: this.config.loadReportingEnabled !== false,
3666
+ hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
3392
3667
  expose_all_models: this.config.exposeAllModels,
3393
3668
  exposure_policy: this.config.exposeAllModels ? "all" : "none",
3394
3669
  model_allowlist: this.config.modelAllowlist,
@@ -3399,26 +3674,38 @@ export class SelfHostedNodeRuntime {
3399
3674
  return { runtimeToken, state: nextState, enrolled: true };
3400
3675
  }
3401
3676
  async resolveMcodaAgentForJob(job) {
3402
- const selected = optionalText(job.source_agent_slug) ||
3403
- optionalText(job.agent_slug) ||
3404
- optionalText(job.model) ||
3405
- optionalText(job.openai_request.model);
3677
+ const selectedSourceAgentSlug = optionalText(job.source_agent_slug);
3678
+ const selectedAgentSlug = optionalText(job.agent_slug);
3679
+ const selectedModel = optionalText(job.model) || optionalText(job.openai_request.model);
3680
+ const selected = selectedSourceAgentSlug || selectedAgentSlug || selectedModel;
3406
3681
  if (!selected) {
3407
- throw new Error("mcoda source agent slug is required");
3682
+ throw new SelfHostedPreStartJobError("selected_agent_unavailable", "mcoda source agent slug is required");
3408
3683
  }
3409
3684
  const rawAgents = await this.mcoda.listRawAgents();
3685
+ const strictSelectedAgent = selectedSourceAgentSlug || selectedAgentSlug;
3410
3686
  const agent = rawAgents.find((entry) => {
3411
3687
  const slug = optionalText(entry.slug);
3688
+ if (strictSelectedAgent) {
3689
+ return slug === strictSelectedAgent;
3690
+ }
3412
3691
  const defaultModel = mcodaAgentDefaultModel(entry);
3413
3692
  return slug === selected || defaultModel === selected;
3414
3693
  });
3415
- if (!agent || !isExposedLocalAgent(agent, this.config)) {
3416
- throw new Error("selected local mcoda agent is not exposed by this node");
3694
+ if (!agent) {
3695
+ throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not available on this node`);
3696
+ }
3697
+ const mapped = mapMcodaAgentToSelfHostedModel(agent, this.config);
3698
+ if (!mapped?.exposed) {
3699
+ throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not exposed by this node`);
3700
+ }
3701
+ if (mapped.health_status && mapped.health_status !== "healthy" && mapped.health_status !== "unknown") {
3702
+ throw new SelfHostedPreStartJobError("selected_agent_unhealthy", `selected local mcoda agent ${selected} is ${mapped.health_status}`);
3417
3703
  }
3418
3704
  return mapMcodaAgentToCodaliAgent(agent, selected);
3419
3705
  }
3420
3706
  async executeGenericJob(envelope, options = {}) {
3421
3707
  const startedAt = Date.now();
3708
+ this.beginExecutionTelemetry("generic_job");
3422
3709
  const events = [];
3423
3710
  let sequence = 0;
3424
3711
  const emitEvent = async (event) => {
@@ -3449,6 +3736,12 @@ export class SelfHostedNodeRuntime {
3449
3736
  },
3450
3737
  finished_at: new Date().toISOString()
3451
3738
  };
3739
+ this.finishExecutionTelemetry({
3740
+ executionClass: "generic_job",
3741
+ startedAt,
3742
+ ok: false,
3743
+ code
3744
+ });
3452
3745
  return {
3453
3746
  job_id: envelope.job_id,
3454
3747
  request_id: envelope.request_id,
@@ -3554,6 +3847,12 @@ export class SelfHostedNodeRuntime {
3554
3847
  runner: runner.id
3555
3848
  }
3556
3849
  });
3850
+ this.finishExecutionTelemetry({
3851
+ executionClass: "generic_job",
3852
+ startedAt,
3853
+ ok: status === "succeeded",
3854
+ code: runnerResult.error?.code || status
3855
+ });
3557
3856
  return {
3558
3857
  job_id: envelope.job_id,
3559
3858
  request_id: envelope.request_id,
@@ -3579,7 +3878,9 @@ export class SelfHostedNodeRuntime {
3579
3878
  }
3580
3879
  async executeJob(job, options = {}) {
3581
3880
  const startedAt = Date.now();
3881
+ this.beginExecutionTelemetry("llm");
3582
3882
  let selectedAgent;
3883
+ let jobStarted = false;
3583
3884
  const progressEvents = [];
3584
3885
  const streamEvents = [];
3585
3886
  const recordProgress = async (event) => {
@@ -3590,13 +3891,35 @@ export class SelfHostedNodeRuntime {
3590
3891
  streamEvents.push(chunk);
3591
3892
  await options.onOpenAIChunk?.(chunk);
3592
3893
  };
3894
+ const acknowledgeStarted = async (agent) => {
3895
+ if (jobStarted) {
3896
+ return;
3897
+ }
3898
+ await options.onStarted?.({
3899
+ job_id: job.job_id,
3900
+ request_id: job.request_id,
3901
+ node_id: job.node_id,
3902
+ agent_slug: optionalText(job.agent_slug) || agent?.slug || "",
3903
+ source_agent_slug: optionalText(job.source_agent_slug) || agent?.slug || null,
3904
+ model: optionalText(job.model) || optionalText(job.openai_request.model)
3905
+ });
3906
+ jobStarted = true;
3907
+ };
3593
3908
  if (job.node_id !== this.config.nodeId) {
3594
- return {
3909
+ const result = {
3595
3910
  job_id: job.job_id,
3596
3911
  request_id: job.request_id,
3597
3912
  status: "failed",
3913
+ pre_start_failure: true,
3598
3914
  error: { code: "validation_failed", message: "job node_id does not match this node" }
3599
3915
  };
3916
+ this.finishExecutionTelemetry({
3917
+ executionClass: "llm",
3918
+ startedAt,
3919
+ ok: false,
3920
+ code: "validation_failed"
3921
+ });
3922
+ return result;
3600
3923
  }
3601
3924
  try {
3602
3925
  if (job.provider === "ollama") {
@@ -3609,7 +3932,8 @@ export class SelfHostedNodeRuntime {
3609
3932
  options.num_predict = job.openai_request.max_tokens;
3610
3933
  if (job.openai_request.stop !== undefined)
3611
3934
  options.stop = job.openai_request.stop;
3612
- const result = await this.jobOllama.chat({
3935
+ await acknowledgeStarted();
3936
+ const ollamaResult = await this.jobOllama.chat({
3613
3937
  model: job.model || job.openai_request.model,
3614
3938
  messages: job.openai_request.messages,
3615
3939
  options,
@@ -3622,7 +3946,7 @@ export class SelfHostedNodeRuntime {
3622
3946
  created: Math.floor(Date.now() / 1000),
3623
3947
  model: job.openai_request.model,
3624
3948
  choices: [
3625
- { index: 0, delta: { content: result.content }, finish_reason: null }
3949
+ { index: 0, delta: { content: ollamaResult.content }, finish_reason: null }
3626
3950
  ]
3627
3951
  });
3628
3952
  await emitOpenAIChunk({
@@ -3635,22 +3959,28 @@ export class SelfHostedNodeRuntime {
3635
3959
  ]
3636
3960
  });
3637
3961
  }
3638
- return {
3962
+ const invocationResult = {
3639
3963
  job_id: job.job_id,
3640
3964
  request_id: job.request_id,
3641
3965
  status: "success",
3642
3966
  openai_response: buildOpenAIChatCompletion({
3643
3967
  requestId: job.request_id,
3644
3968
  model: job.openai_request.model,
3645
- content: result.content,
3646
- promptTokens: result.promptTokens,
3647
- completionTokens: result.completionTokens,
3648
- metadata: { provider: "ollama", raw: result.raw }
3969
+ content: ollamaResult.content,
3970
+ promptTokens: ollamaResult.promptTokens,
3971
+ completionTokens: ollamaResult.completionTokens,
3972
+ metadata: { provider: "ollama", raw: ollamaResult.raw }
3649
3973
  }),
3650
3974
  ...(streamEvents.length ? { stream_events: streamEvents } : {}),
3651
3975
  ...(progressEvents.length ? { progress_events: progressEvents } : {}),
3652
3976
  timing: { local_latency_ms: Date.now() - startedAt }
3653
3977
  };
3978
+ this.finishExecutionTelemetry({
3979
+ executionClass: "llm",
3980
+ startedAt,
3981
+ ok: true
3982
+ });
3983
+ return invocationResult;
3654
3984
  }
3655
3985
  const taskPreview = messagesToPrompt(job.openai_request.messages);
3656
3986
  if (!taskPreview) {
@@ -3660,6 +3990,7 @@ export class SelfHostedNodeRuntime {
3660
3990
  selectedAgent = agent;
3661
3991
  validateRequiredDocdexContext(job, options.attachedMswarmApiKey);
3662
3992
  const attachedMswarmApiKey = attachedMswarmApiKeyForDocdex(job, options.attachedMswarmApiKey);
3993
+ await acknowledgeStarted(agent);
3663
3994
  await recordProgress({
3664
3995
  type: "agent_selected",
3665
3996
  job_id: job.job_id,
@@ -3698,7 +4029,7 @@ export class SelfHostedNodeRuntime {
3698
4029
  }
3699
4030
  });
3700
4031
  const tokens = usageTokens(response.usage);
3701
- return {
4032
+ const result = {
3702
4033
  job_id: job.job_id,
3703
4034
  request_id: job.request_id,
3704
4035
  status: "success",
@@ -3724,6 +4055,12 @@ export class SelfHostedNodeRuntime {
3724
4055
  ...(progressEvents.length ? { progress_events: progressEvents } : {}),
3725
4056
  timing: { local_latency_ms: Date.now() - startedAt }
3726
4057
  };
4058
+ this.finishExecutionTelemetry({
4059
+ executionClass: "llm",
4060
+ startedAt,
4061
+ ok: true
4062
+ });
4063
+ return result;
3727
4064
  }
3728
4065
  catch (error) {
3729
4066
  const message = redactRuntimeSecretValues(error instanceof Error ? error.message : String(error), [selectedAgent?.apiKey, options.attachedMswarmApiKey]);
@@ -3736,10 +4073,11 @@ export class SelfHostedNodeRuntime {
3736
4073
  : /permission|policy|denied/i.test(message)
3737
4074
  ? "policy_denied"
3738
4075
  : "upstream_error");
3739
- return {
4076
+ const result = {
3740
4077
  job_id: job.job_id,
3741
4078
  request_id: job.request_id,
3742
4079
  status: "failed",
4080
+ ...(!jobStarted ? { pre_start_failure: true } : {}),
3743
4081
  error: {
3744
4082
  code,
3745
4083
  message
@@ -3748,6 +4086,13 @@ export class SelfHostedNodeRuntime {
3748
4086
  ...(progressEvents.length ? { progress_events: progressEvents } : {}),
3749
4087
  timing: { local_latency_ms: Date.now() - startedAt }
3750
4088
  };
4089
+ this.finishExecutionTelemetry({
4090
+ executionClass: "llm",
4091
+ startedAt,
4092
+ ok: false,
4093
+ code
4094
+ });
4095
+ return result;
3751
4096
  }
3752
4097
  }
3753
4098
  async runOnce() {
@@ -3772,12 +4117,49 @@ export class SelfHostedNodeRuntime {
3772
4117
  models = [];
3773
4118
  version = null;
3774
4119
  }
4120
+ const discoveryLatencyMs = Date.now() - startedAt;
3775
4121
  const capabilityPayload = await this.buildCapabilityHeartbeatPayload(enrollment.runtimeToken);
4122
+ const loadTelemetry = this.buildLoadTelemetry({
4123
+ models,
4124
+ discoveryLatencyMs,
4125
+ discoveryFailureCount: recentFailureCount,
4126
+ capabilityPayload
4127
+ });
4128
+ const exposedModelCount = models.filter((model) => model.exposed !== false).length;
4129
+ const loadReportingEnabled = this.config.loadReportingEnabled !== false;
4130
+ const capacityPayload = loadReportingEnabled
4131
+ ? {
4132
+ protocol_version: loadTelemetry.runtime_protocol_version,
4133
+ runtime_protocol_version: loadTelemetry.runtime_protocol_version,
4134
+ load_balancer_protocol_version: loadTelemetry.load_balancer_protocol_version,
4135
+ catalog_metadata_version: loadTelemetry.catalog_metadata_version,
4136
+ catalog_fingerprint: loadTelemetry.catalog_fingerprint,
4137
+ max_concurrency: loadTelemetry.max_concurrency,
4138
+ max_concurrent_llm_jobs: loadTelemetry.max_concurrent_llm_jobs,
4139
+ max_concurrent_generic_jobs: loadTelemetry.max_concurrent_generic_jobs,
4140
+ active_jobs: loadTelemetry.active_jobs,
4141
+ queued_jobs: loadTelemetry.queued_jobs,
4142
+ free_slots: loadTelemetry.free_slots,
4143
+ drain_mode: loadTelemetry.drain_mode,
4144
+ execution_class_capacity: loadTelemetry.execution_class_capacity
4145
+ }
4146
+ : {
4147
+ active_jobs: loadTelemetry.active_jobs,
4148
+ queued_jobs: loadTelemetry.queued_jobs
4149
+ };
3776
4150
  const heartbeatPayload = {
3777
4151
  node_id: this.config.nodeId,
3778
4152
  node_version: this.config.nodeVersion,
4153
+ runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
3779
4154
  config_version: enrollment.state.config_version ?? null,
3780
4155
  status,
4156
+ runtime: {
4157
+ protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
4158
+ relay_mode: this.config.relayMode || "outbound",
4159
+ load_reporting_enabled: loadReportingEnabled,
4160
+ hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
4161
+ drain_mode: this.config.drainMode === true
4162
+ },
3781
4163
  discovery: {
3782
4164
  source: discoverySource,
3783
4165
  mcoda_status: discoverySource === "mcoda" && status === "online" ? "ok" : status === "degraded" ? "error" : null
@@ -3791,20 +4173,24 @@ export class SelfHostedNodeRuntime {
3791
4173
  status: null,
3792
4174
  version: null
3793
4175
  },
3794
- capacity: {
3795
- active_jobs: 0,
3796
- queued_jobs: 0
3797
- },
4176
+ capacity: capacityPayload,
3798
4177
  health: {
3799
- avg_latency_ms: Date.now() - startedAt,
3800
- recent_failure_count: recentFailureCount,
4178
+ avg_latency_ms: loadTelemetry.avg_latency_ms ?? discoveryLatencyMs,
4179
+ recent_failure_count: loadTelemetry.recent_failure_count,
4180
+ recent_failures: loadTelemetry.recent_failures,
3801
4181
  last_success_at: status === "online" ? new Date().toISOString() : null
3802
4182
  },
4183
+ local_agent_catalog: {
4184
+ revision: loadTelemetry.catalog_fingerprint,
4185
+ metadata_version: loadTelemetry.catalog_metadata_version,
4186
+ model_count: models.length,
4187
+ exposed_model_count: exposedModelCount
4188
+ },
3803
4189
  models,
3804
- capabilities: capabilityPayload
4190
+ capabilities: capabilityPayload,
4191
+ ...(loadTelemetry.hardware_pressure ? { hardware_pressure: loadTelemetry.hardware_pressure } : {})
3805
4192
  };
3806
4193
  const heartbeatResponse = await this.gateway.heartbeat(enrollment.runtimeToken, heartbeatPayload);
3807
- const exposedModelCount = models.filter((model) => model.exposed !== false).length;
3808
4194
  return {
3809
4195
  enrolled: enrollment.enrolled,
3810
4196
  status,
@@ -3812,6 +4198,7 @@ export class SelfHostedNodeRuntime {
3812
4198
  discovery_source: discoverySource,
3813
4199
  mcoda_agent_count: discoverySource === "mcoda" ? exposedModelCount : undefined,
3814
4200
  ollama_version: version,
4201
+ capacity: loadTelemetry,
3815
4202
  heartbeat_response: heartbeatResponse
3816
4203
  };
3817
4204
  }
@@ -3846,9 +4233,17 @@ export class SelfHostedNodeRuntime {
3846
4233
  }
3847
4234
  async pollAndExecuteJob(waitMs = DEFAULT_JOB_POLL_WAIT_MS) {
3848
4235
  const enrollment = await this.ensureEnrolled();
4236
+ const pollCapacity = this.buildLoadTelemetry({ models: [] });
3849
4237
  const response = await this.gateway.pollJob(enrollment.runtimeToken, {
3850
4238
  node_id: this.config.nodeId,
3851
- capacity: { active_jobs: 0, max_jobs: 1 },
4239
+ capacity: {
4240
+ active_jobs: pollCapacity.active_jobs,
4241
+ queued_jobs: pollCapacity.queued_jobs,
4242
+ max_jobs: pollCapacity.max_concurrency,
4243
+ max_concurrency: pollCapacity.max_concurrency,
4244
+ free_slots: pollCapacity.free_slots,
4245
+ drain_mode: pollCapacity.drain_mode
4246
+ },
3852
4247
  wait_ms: waitMs
3853
4248
  });
3854
4249
  const job = response.job || null;
@@ -3876,6 +4271,14 @@ export class SelfHostedNodeRuntime {
3876
4271
  };
3877
4272
  const result = await this.executeJob(job, {
3878
4273
  attachedMswarmApiKey: optionalText(response.attached_mswarm_api_key) || undefined,
4274
+ onStarted: async (event) => {
4275
+ await this.gateway.postJobStart(enrollment.runtimeToken, job.job_id, {
4276
+ node_id: this.config.nodeId,
4277
+ agent_slug: event.agent_slug || job.agent_slug,
4278
+ source_agent_slug: event.source_agent_slug || job.source_agent_slug || null,
4279
+ model: event.model || job.model || job.openai_request.model
4280
+ });
4281
+ },
3879
4282
  onOpenAIChunk: async (chunk) => {
3880
4283
  if (job.openai_request.stream !== true || streamEventForwardingFailed) {
3881
4284
  return;