@mcoda/mswarm 0.1.78 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -0
- package/dist/runtime.d.ts +74 -0
- package/dist/runtime.d.ts.map +1 -1
- package/dist/runtime.js +436 -33
- package/dist/runtime.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +15 -0
- package/dist/server.js.map +1 -1
- package/package.json +3 -3
package/dist/runtime.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { chmod, lstat, mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
2
2
|
import { dirname, isAbsolute, join, relative, resolve } from "node:path";
|
|
3
|
-
import { hostname, homedir, platform, userInfo } from "node:os";
|
|
3
|
+
import { cpus, freemem, hostname, homedir, loadavg, platform, totalmem, userInfo } from "node:os";
|
|
4
4
|
import { spawn } from "node:child_process";
|
|
5
5
|
import { createHash, createHmac, randomUUID } from "node:crypto";
|
|
6
6
|
import { MswarmCodaliExecutor } from "./codali-executor.js";
|
|
@@ -16,6 +16,11 @@ const DEFAULT_REQUEST_TIMEOUT_MS = 10000;
|
|
|
16
16
|
const DEFAULT_JOB_TIMEOUT_MS = 3600000;
|
|
17
17
|
const DEFAULT_SERVICE_COMMAND_TIMEOUT_MS = 60000;
|
|
18
18
|
const DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS = 2000;
|
|
19
|
+
const SELF_HOSTED_RUNTIME_PROTOCOL_VERSION = 1;
|
|
20
|
+
const SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION = 1;
|
|
21
|
+
const SELF_HOSTED_CATALOG_METADATA_VERSION = 1;
|
|
22
|
+
const MAX_TELEMETRY_LATENCY_SAMPLES = 50;
|
|
23
|
+
const MAX_TELEMETRY_FAILURES = 20;
|
|
19
24
|
const DEFAULT_MCODA_BIN = "mcoda";
|
|
20
25
|
const DEFAULT_MCODA_LIST_ARGS = ["agent", "list", "--json", "--refresh-health"];
|
|
21
26
|
const DEFAULT_COMMAND_MAX_BUFFER = 16 * 1024 * 1024;
|
|
@@ -460,6 +465,112 @@ function optionalBoolean(...values) {
|
|
|
460
465
|
}
|
|
461
466
|
return null;
|
|
462
467
|
}
|
|
468
|
+
function roundedTelemetryNumber(value, digits = 3) {
|
|
469
|
+
if (!Number.isFinite(value)) {
|
|
470
|
+
return 0;
|
|
471
|
+
}
|
|
472
|
+
const factor = 10 ** digits;
|
|
473
|
+
return Math.round(value * factor) / factor;
|
|
474
|
+
}
|
|
475
|
+
function nonNegativeTelemetryInteger(value) {
|
|
476
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : 0;
|
|
477
|
+
}
|
|
478
|
+
function sha256Json(value) {
|
|
479
|
+
return createHash("sha256").update(JSON.stringify(value)).digest("hex");
|
|
480
|
+
}
|
|
481
|
+
function buildCatalogFingerprint(models) {
|
|
482
|
+
const projection = models
|
|
483
|
+
.map((model) => ({
|
|
484
|
+
name: optionalText(model.name) || "",
|
|
485
|
+
provider: optionalText(model.provider) || null,
|
|
486
|
+
adapter: optionalText(model.adapter) || null,
|
|
487
|
+
source_agent_slug: optionalText(model.source_agent_slug) || null,
|
|
488
|
+
model_id: optionalText(model.model_id) || optionalText(model.model) || null,
|
|
489
|
+
exposed: model.exposed !== false,
|
|
490
|
+
capabilities: normalizeCapabilities(model.capabilities).sort(),
|
|
491
|
+
health_status: normalizeHealthStatus(model.health_status)
|
|
492
|
+
}))
|
|
493
|
+
.sort((left, right) => `${left.provider || ""}:${left.name}`.localeCompare(`${right.provider || ""}:${right.name}`));
|
|
494
|
+
return `sha256:${sha256Json(projection)}`;
|
|
495
|
+
}
|
|
496
|
+
function executionClassCapacity(input) {
|
|
497
|
+
const maxConcurrency = Math.max(1, Math.floor(input.maxConcurrency));
|
|
498
|
+
const activeJobs = nonNegativeTelemetryInteger(input.activeJobs);
|
|
499
|
+
const queuedJobs = nonNegativeTelemetryInteger(input.queuedJobs);
|
|
500
|
+
return {
|
|
501
|
+
max_concurrency: maxConcurrency,
|
|
502
|
+
active_jobs: activeJobs,
|
|
503
|
+
queued_jobs: queuedJobs,
|
|
504
|
+
free_slots: input.drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs)
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
function totalHostMemoryBucket() {
|
|
508
|
+
const gib = totalmem() / (1024 ** 3);
|
|
509
|
+
if (!Number.isFinite(gib) || gib <= 0)
|
|
510
|
+
return "unknown";
|
|
511
|
+
if (gib <= 8)
|
|
512
|
+
return "<=8GiB";
|
|
513
|
+
if (gib <= 16)
|
|
514
|
+
return "<=16GiB";
|
|
515
|
+
if (gib <= 32)
|
|
516
|
+
return "<=32GiB";
|
|
517
|
+
if (gib <= 64)
|
|
518
|
+
return "<=64GiB";
|
|
519
|
+
if (gib <= 128)
|
|
520
|
+
return "<=128GiB";
|
|
521
|
+
return ">128GiB";
|
|
522
|
+
}
|
|
523
|
+
function coarsePublicVramTier(value, gpuCount) {
|
|
524
|
+
if (value === "none" ||
|
|
525
|
+
value === "lt8" ||
|
|
526
|
+
value === "8-15" ||
|
|
527
|
+
value === "16-31" ||
|
|
528
|
+
value === "32plus") {
|
|
529
|
+
return value;
|
|
530
|
+
}
|
|
531
|
+
return gpuCount > 0 ? "unknown" : "none";
|
|
532
|
+
}
|
|
533
|
+
function buildCoarseHardwarePressure(capabilityPayload) {
|
|
534
|
+
const cpuCount = Math.max(1, cpus().length || 1);
|
|
535
|
+
const totalMemory = totalmem();
|
|
536
|
+
const freeMemory = freemem();
|
|
537
|
+
const projection = capabilityPayload?.public_projection;
|
|
538
|
+
const projectionRecord = projection && typeof projection === "object" && !Array.isArray(projection)
|
|
539
|
+
? projection
|
|
540
|
+
: {};
|
|
541
|
+
const accelerators = projectionRecord.accelerators && typeof projectionRecord.accelerators === "object"
|
|
542
|
+
? projectionRecord.accelerators
|
|
543
|
+
: {};
|
|
544
|
+
const gpu = accelerators.gpu && typeof accelerators.gpu === "object" && !Array.isArray(accelerators.gpu)
|
|
545
|
+
? accelerators.gpu
|
|
546
|
+
: null;
|
|
547
|
+
const rawGpuCount = gpu?.["count"];
|
|
548
|
+
const gpuCount = typeof rawGpuCount === "number" && Number.isFinite(rawGpuCount)
|
|
549
|
+
? Math.max(0, Math.floor(rawGpuCount))
|
|
550
|
+
: 0;
|
|
551
|
+
const vramTier = coarsePublicVramTier(gpu?.["vram_tier"], gpuCount);
|
|
552
|
+
return {
|
|
553
|
+
schema_version: 1,
|
|
554
|
+
collected_at: new Date().toISOString(),
|
|
555
|
+
cpu: {
|
|
556
|
+
core_count: cpuCount,
|
|
557
|
+
load_1m_ratio: roundedTelemetryNumber((loadavg()[0] || 0) / cpuCount)
|
|
558
|
+
},
|
|
559
|
+
ram: {
|
|
560
|
+
used_ratio: totalMemory > 0 ? roundedTelemetryNumber((totalMemory - freeMemory) / totalMemory) : null,
|
|
561
|
+
total_bucket: totalHostMemoryBucket()
|
|
562
|
+
},
|
|
563
|
+
gpu: {
|
|
564
|
+
available: Boolean(gpu?.["available"]),
|
|
565
|
+
count: gpuCount,
|
|
566
|
+
cuda: Boolean(gpu?.["cuda"] || gpu?.["has_cuda"]),
|
|
567
|
+
vram: {
|
|
568
|
+
total_tier: vramTier,
|
|
569
|
+
used_ratio: null
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
};
|
|
573
|
+
}
|
|
463
574
|
function normalizeCapabilities(value) {
|
|
464
575
|
if (!Array.isArray(value)) {
|
|
465
576
|
return [];
|
|
@@ -631,9 +742,14 @@ function serviceEnvironment(config, env, homeDir) {
|
|
|
631
742
|
MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS: String(config.heartbeatIntervalSeconds),
|
|
632
743
|
MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS: String(config.requestTimeoutMs),
|
|
633
744
|
MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS: String(config.jobTimeoutMs),
|
|
745
|
+
MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS: String(config.maxConcurrentJobs || 1),
|
|
746
|
+
MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS: String(config.maxConcurrentLlmJobs || config.maxConcurrentJobs || 1),
|
|
634
747
|
MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED: config.genericJobsEnabled ? "true" : "false",
|
|
635
748
|
MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS: String(config.genericJobTimeoutMs),
|
|
636
749
|
MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY: String(config.genericJobMaxConcurrency),
|
|
750
|
+
MSWARM_SELF_HOSTED_DRAIN_MODE: config.drainMode ? "true" : "false",
|
|
751
|
+
MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED: config.loadReportingEnabled === false ? "false" : "true",
|
|
752
|
+
MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED: config.hardwareTelemetryEnabled ? "true" : "false",
|
|
637
753
|
MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS: config.capabilityProbeTimeoutMs
|
|
638
754
|
? String(config.capabilityProbeTimeoutMs)
|
|
639
755
|
: null
|
|
@@ -1041,6 +1157,8 @@ export async function readSelfHostedNodeConfig(env = process.env) {
|
|
|
1041
1157
|
optionalText(env.OLLAMA_HOST) ||
|
|
1042
1158
|
DEFAULT_OLLAMA_BASE_URL;
|
|
1043
1159
|
const packageNodeVersion = await readPackageNodeVersion();
|
|
1160
|
+
const maxConcurrentJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, state.max_concurrent_jobs || 1);
|
|
1161
|
+
const maxConcurrentLlmJobs = parsePositiveInteger(env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, state.max_concurrent_llm_jobs || maxConcurrentJobs);
|
|
1044
1162
|
return {
|
|
1045
1163
|
gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
|
|
1046
1164
|
nodeId,
|
|
@@ -1070,10 +1188,15 @@ export async function readSelfHostedNodeConfig(env = process.env) {
|
|
|
1070
1188
|
heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, state.heartbeat_interval_seconds || DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
|
|
1071
1189
|
requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, state.request_timeout_ms || DEFAULT_REQUEST_TIMEOUT_MS),
|
|
1072
1190
|
jobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
|
|
1191
|
+
maxConcurrentJobs,
|
|
1192
|
+
maxConcurrentLlmJobs,
|
|
1073
1193
|
genericJobsEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED ?? env.MSWARM_SELF_HOSTED_GENERIC_JOBS, state.generic_jobs_enabled === true),
|
|
1074
1194
|
genericJobTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, state.generic_job_timeout_ms || state.job_timeout_ms || DEFAULT_JOB_TIMEOUT_MS),
|
|
1075
1195
|
genericJobMaxConcurrency: parsePositiveInteger(env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, state.generic_job_max_concurrency || 1),
|
|
1076
1196
|
capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, state.capability_probe_timeout_ms || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
|
|
1197
|
+
drainMode: parseBoolean(env.MSWARM_SELF_HOSTED_DRAIN_MODE, state.drain_mode === true),
|
|
1198
|
+
loadReportingEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING, state.load_reporting_enabled !== false),
|
|
1199
|
+
hardwareTelemetryEnabled: parseBoolean(env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED ?? env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, state.hardware_telemetry_enabled === true),
|
|
1077
1200
|
exposeAllModels: resolveDaemonExposeAllModels(env, state),
|
|
1078
1201
|
modelAllowlist: parseList(env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST || state.model_allowlist),
|
|
1079
1202
|
modelBlocklist: parseList(env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST || state.model_blocklist)
|
|
@@ -1104,6 +1227,8 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
|
|
|
1104
1227
|
const allowlist = parseList(options.allow || env.MSWARM_SELF_HOSTED_MODEL_ALLOWLIST);
|
|
1105
1228
|
const blocklist = parseList(options.block || env.MSWARM_SELF_HOSTED_MODEL_BLOCKLIST);
|
|
1106
1229
|
const packageNodeVersion = await readPackageNodeVersion();
|
|
1230
|
+
const maxConcurrentJobs = parsePositiveInteger(options["max-concurrent-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_JOBS, 1);
|
|
1231
|
+
const maxConcurrentLlmJobs = parsePositiveInteger(options["max-concurrent-llm-jobs"] || env.MSWARM_SELF_HOSTED_MAX_CONCURRENT_LLM_JOBS, maxConcurrentJobs);
|
|
1107
1232
|
return {
|
|
1108
1233
|
apiKey,
|
|
1109
1234
|
gatewayBaseUrl: trimTrailingSlash(gatewayBaseUrl),
|
|
@@ -1126,10 +1251,17 @@ export async function readOwnerSetupConfig(argv = process.argv.slice(3), env = p
|
|
|
1126
1251
|
heartbeatIntervalSeconds: parsePositiveInteger(env.MSWARM_SELF_HOSTED_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL_SECONDS),
|
|
1127
1252
|
requestTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_REQUEST_TIMEOUT_MS, DEFAULT_REQUEST_TIMEOUT_MS),
|
|
1128
1253
|
jobTimeoutMs: parsePositiveInteger(options["job-timeout-ms"] || env.MSWARM_SELF_HOSTED_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
|
|
1254
|
+
maxConcurrentJobs,
|
|
1255
|
+
maxConcurrentLlmJobs,
|
|
1129
1256
|
genericJobsEnabled: parseBoolean(options["enable-generic-jobs"] || env.MSWARM_SELF_HOSTED_GENERIC_JOBS_ENABLED || env.MSWARM_SELF_HOSTED_GENERIC_JOBS, false),
|
|
1130
1257
|
genericJobTimeoutMs: parsePositiveInteger(options["generic-job-timeout-ms"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_TIMEOUT_MS, DEFAULT_JOB_TIMEOUT_MS),
|
|
1131
1258
|
genericJobMaxConcurrency: parsePositiveInteger(options["generic-job-max-concurrency"] || env.MSWARM_SELF_HOSTED_GENERIC_JOB_MAX_CONCURRENCY, 1),
|
|
1132
1259
|
capabilityProbeTimeoutMs: parsePositiveInteger(env.MSWARM_SELF_HOSTED_CAPABILITY_PROBE_TIMEOUT_MS, DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS),
|
|
1260
|
+
drainMode: parseBoolean(options.drain || env.MSWARM_SELF_HOSTED_DRAIN_MODE, false),
|
|
1261
|
+
loadReportingEnabled: parseBoolean(options["disable-load-reporting"] === true
|
|
1262
|
+
? false
|
|
1263
|
+
: (env.MSWARM_SELF_HOSTED_LOAD_REPORTING_ENABLED ?? env.MSWARM_SELF_HOSTED_LOAD_REPORTING), true),
|
|
1264
|
+
hardwareTelemetryEnabled: parseBoolean(options["enable-hardware-telemetry"] || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY_ENABLED || env.MSWARM_SELF_HOSTED_HARDWARE_TELEMETRY, false),
|
|
1133
1265
|
exposeAllModels: resolveOwnerSetupExposeAllModels(options, env),
|
|
1134
1266
|
modelAllowlist: allowlist,
|
|
1135
1267
|
modelBlocklist: blocklist,
|
|
@@ -1527,10 +1659,6 @@ function mapMcodaAgentToCodaliAgent(agent, fallbackSlug) {
|
|
|
1527
1659
|
maxOutputTokens: optionalNumber(agent.maxOutputTokens, agent.max_output_tokens) ?? undefined,
|
|
1528
1660
|
};
|
|
1529
1661
|
}
|
|
1530
|
-
function isExposedLocalAgent(agent, config) {
|
|
1531
|
-
const mapped = mapMcodaAgentToSelfHostedModel(agent, config);
|
|
1532
|
-
return Boolean(mapped?.exposed);
|
|
1533
|
-
}
|
|
1534
1662
|
function buildCodaliWorkspace(job) {
|
|
1535
1663
|
const root = optionalText(job.workspace?.root);
|
|
1536
1664
|
if (!root) {
|
|
@@ -1550,6 +1678,13 @@ const DOCDEX_JOB_ERROR_CODES = new Set([
|
|
|
1550
1678
|
"docdex_repo_access_denied",
|
|
1551
1679
|
"docdex_unavailable",
|
|
1552
1680
|
]);
|
|
1681
|
+
const PRE_START_JOB_ERROR_CODES = new Set([
|
|
1682
|
+
"selected_agent_unavailable",
|
|
1683
|
+
"selected_agent_unhealthy",
|
|
1684
|
+
"validation_failed",
|
|
1685
|
+
"docdex_context_missing",
|
|
1686
|
+
"docdex_api_key_missing",
|
|
1687
|
+
]);
|
|
1553
1688
|
class SelfHostedDocdexJobError extends Error {
|
|
1554
1689
|
constructor(code, message) {
|
|
1555
1690
|
super(message);
|
|
@@ -1557,6 +1692,13 @@ class SelfHostedDocdexJobError extends Error {
|
|
|
1557
1692
|
this.code = code;
|
|
1558
1693
|
}
|
|
1559
1694
|
}
|
|
1695
|
+
class SelfHostedPreStartJobError extends Error {
|
|
1696
|
+
constructor(code, message) {
|
|
1697
|
+
super(message);
|
|
1698
|
+
this.name = code;
|
|
1699
|
+
this.code = code;
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1560
1702
|
function normalizeDocdexCapabilityMap(value) {
|
|
1561
1703
|
const record = objectRecord(value);
|
|
1562
1704
|
if (!record)
|
|
@@ -1614,11 +1756,14 @@ function selfHostedErrorCode(error) {
|
|
|
1614
1756
|
if (!error || typeof error !== "object")
|
|
1615
1757
|
return undefined;
|
|
1616
1758
|
const code = error.code;
|
|
1617
|
-
if (typeof code === "string" &&
|
|
1759
|
+
if (typeof code === "string" &&
|
|
1760
|
+
(DOCDEX_JOB_ERROR_CODES.has(code) || PRE_START_JOB_ERROR_CODES.has(code))) {
|
|
1618
1761
|
return code;
|
|
1619
1762
|
}
|
|
1620
1763
|
const name = error.name;
|
|
1621
|
-
return typeof name === "string" && DOCDEX_JOB_ERROR_CODES.has(name)
|
|
1764
|
+
return typeof name === "string" && (DOCDEX_JOB_ERROR_CODES.has(name) || PRE_START_JOB_ERROR_CODES.has(name))
|
|
1765
|
+
? name
|
|
1766
|
+
: undefined;
|
|
1622
1767
|
}
|
|
1623
1768
|
function redactRuntimeSecretValues(value, secrets) {
|
|
1624
1769
|
let output = value;
|
|
@@ -3135,6 +3280,16 @@ export class MswarmSelfHostedNodeClient {
|
|
|
3135
3280
|
body: JSON.stringify(payload)
|
|
3136
3281
|
}, this.timeoutMs);
|
|
3137
3282
|
}
|
|
3283
|
+
async postJobStart(runtimeToken, jobId, payload) {
|
|
3284
|
+
return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/start`, {
|
|
3285
|
+
method: "POST",
|
|
3286
|
+
headers: {
|
|
3287
|
+
"content-type": "application/json",
|
|
3288
|
+
authorization: `Bearer ${runtimeToken}`
|
|
3289
|
+
},
|
|
3290
|
+
body: JSON.stringify(payload)
|
|
3291
|
+
}, this.timeoutMs);
|
|
3292
|
+
}
|
|
3138
3293
|
async postJobEvents(runtimeToken, jobId, payload) {
|
|
3139
3294
|
return fetchJson(this.fetchImpl, `${this.gatewayBaseUrl}/v1/swarm/self-hosted/node/jobs/${encodeURIComponent(jobId)}/events`, {
|
|
3140
3295
|
method: "POST",
|
|
@@ -3148,6 +3303,12 @@ export class MswarmSelfHostedNodeClient {
|
|
|
3148
3303
|
}
|
|
3149
3304
|
export class SelfHostedNodeRuntime {
|
|
3150
3305
|
constructor(config, deps) {
|
|
3306
|
+
this.activeLlmJobs = 0;
|
|
3307
|
+
this.activeGenericJobs = 0;
|
|
3308
|
+
this.queuedLlmJobs = 0;
|
|
3309
|
+
this.queuedGenericJobs = 0;
|
|
3310
|
+
this.latencySamplesMs = [];
|
|
3311
|
+
this.recentFailures = [];
|
|
3151
3312
|
this.config = config;
|
|
3152
3313
|
this.gateway =
|
|
3153
3314
|
deps?.gateway ||
|
|
@@ -3192,6 +3353,100 @@ export class SelfHostedNodeRuntime {
|
|
|
3192
3353
|
rootDir: config.artifactStorePath || defaultArtifactStorePath()
|
|
3193
3354
|
});
|
|
3194
3355
|
}
|
|
3356
|
+
updateLocalQueueTelemetry(input) {
|
|
3357
|
+
if (input.llmQueuedJobs !== undefined) {
|
|
3358
|
+
this.queuedLlmJobs = nonNegativeTelemetryInteger(input.llmQueuedJobs);
|
|
3359
|
+
}
|
|
3360
|
+
if (input.genericQueuedJobs !== undefined) {
|
|
3361
|
+
this.queuedGenericJobs = nonNegativeTelemetryInteger(input.genericQueuedJobs);
|
|
3362
|
+
}
|
|
3363
|
+
}
|
|
3364
|
+
beginExecutionTelemetry(executionClass) {
|
|
3365
|
+
if (executionClass === "generic_job") {
|
|
3366
|
+
this.activeGenericJobs += 1;
|
|
3367
|
+
return;
|
|
3368
|
+
}
|
|
3369
|
+
this.activeLlmJobs += 1;
|
|
3370
|
+
}
|
|
3371
|
+
finishExecutionTelemetry(input) {
|
|
3372
|
+
if (input.executionClass === "generic_job") {
|
|
3373
|
+
this.activeGenericJobs = Math.max(0, this.activeGenericJobs - 1);
|
|
3374
|
+
}
|
|
3375
|
+
else {
|
|
3376
|
+
this.activeLlmJobs = Math.max(0, this.activeLlmJobs - 1);
|
|
3377
|
+
}
|
|
3378
|
+
this.latencySamplesMs.push(Math.max(0, Date.now() - input.startedAt));
|
|
3379
|
+
while (this.latencySamplesMs.length > MAX_TELEMETRY_LATENCY_SAMPLES) {
|
|
3380
|
+
this.latencySamplesMs.shift();
|
|
3381
|
+
}
|
|
3382
|
+
if (!input.ok) {
|
|
3383
|
+
this.recentFailures.unshift({
|
|
3384
|
+
execution_class: input.executionClass === "generic_job" ? "generic_job" : "agentic",
|
|
3385
|
+
code: optionalText(input.code) || "upstream_error",
|
|
3386
|
+
at: new Date().toISOString()
|
|
3387
|
+
});
|
|
3388
|
+
this.recentFailures.splice(MAX_TELEMETRY_FAILURES);
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
averageLatencyMs(fallback = null) {
|
|
3392
|
+
if (this.latencySamplesMs.length === 0) {
|
|
3393
|
+
return fallback;
|
|
3394
|
+
}
|
|
3395
|
+
const total = this.latencySamplesMs.reduce((sum, value) => sum + value, 0);
|
|
3396
|
+
return Math.round(total / this.latencySamplesMs.length);
|
|
3397
|
+
}
|
|
3398
|
+
buildLoadTelemetry(input) {
|
|
3399
|
+
const drainMode = this.config.drainMode === true;
|
|
3400
|
+
const llmMaxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentLlmJobs || this.config.maxConcurrentJobs || 1));
|
|
3401
|
+
const genericMaxConcurrency = Math.max(1, Math.floor(this.config.genericJobMaxConcurrency || 1));
|
|
3402
|
+
const maxConcurrency = Math.max(1, Math.floor(this.config.maxConcurrentJobs || 1), llmMaxConcurrency, this.config.genericJobsEnabled ? genericMaxConcurrency : 1);
|
|
3403
|
+
const activeLlmJobs = nonNegativeTelemetryInteger(this.activeLlmJobs);
|
|
3404
|
+
const activeGenericJobs = nonNegativeTelemetryInteger(this.activeGenericJobs);
|
|
3405
|
+
const queuedLlmJobs = nonNegativeTelemetryInteger(this.queuedLlmJobs);
|
|
3406
|
+
const queuedGenericJobs = nonNegativeTelemetryInteger(this.queuedGenericJobs);
|
|
3407
|
+
const llmCapacity = executionClassCapacity({
|
|
3408
|
+
maxConcurrency: llmMaxConcurrency,
|
|
3409
|
+
activeJobs: activeLlmJobs,
|
|
3410
|
+
queuedJobs: queuedLlmJobs,
|
|
3411
|
+
drainMode
|
|
3412
|
+
});
|
|
3413
|
+
const genericCapacity = executionClassCapacity({
|
|
3414
|
+
maxConcurrency: genericMaxConcurrency,
|
|
3415
|
+
activeJobs: activeGenericJobs,
|
|
3416
|
+
queuedJobs: queuedGenericJobs,
|
|
3417
|
+
drainMode: drainMode || !this.config.genericJobsEnabled
|
|
3418
|
+
});
|
|
3419
|
+
const activeJobs = activeLlmJobs + activeGenericJobs;
|
|
3420
|
+
const queuedJobs = queuedLlmJobs + queuedGenericJobs;
|
|
3421
|
+
const freeSlots = drainMode ? 0 : Math.max(0, maxConcurrency - activeJobs - queuedJobs);
|
|
3422
|
+
const failures = this.recentFailures.slice(0, 10);
|
|
3423
|
+
const discoveryFailureCount = nonNegativeTelemetryInteger(input.discoveryFailureCount);
|
|
3424
|
+
const telemetry = {
|
|
3425
|
+
runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
|
|
3426
|
+
load_balancer_protocol_version: SELF_HOSTED_LOAD_BALANCER_PROTOCOL_VERSION,
|
|
3427
|
+
catalog_metadata_version: SELF_HOSTED_CATALOG_METADATA_VERSION,
|
|
3428
|
+
catalog_fingerprint: buildCatalogFingerprint(input.models),
|
|
3429
|
+
max_concurrency: maxConcurrency,
|
|
3430
|
+
max_concurrent_llm_jobs: llmMaxConcurrency,
|
|
3431
|
+
max_concurrent_generic_jobs: this.config.genericJobsEnabled ? genericMaxConcurrency : 0,
|
|
3432
|
+
active_jobs: activeJobs,
|
|
3433
|
+
queued_jobs: queuedJobs,
|
|
3434
|
+
free_slots: freeSlots,
|
|
3435
|
+
drain_mode: drainMode,
|
|
3436
|
+
execution_class_capacity: {
|
|
3437
|
+
chat: llmCapacity,
|
|
3438
|
+
agentic: llmCapacity,
|
|
3439
|
+
generic_job: genericCapacity
|
|
3440
|
+
},
|
|
3441
|
+
avg_latency_ms: this.averageLatencyMs(input.discoveryLatencyMs ?? null),
|
|
3442
|
+
recent_failure_count: failures.length + discoveryFailureCount,
|
|
3443
|
+
recent_failures: failures
|
|
3444
|
+
};
|
|
3445
|
+
if (this.config.hardwareTelemetryEnabled === true) {
|
|
3446
|
+
telemetry.hardware_pressure = buildCoarseHardwarePressure(input.capabilityPayload || null);
|
|
3447
|
+
}
|
|
3448
|
+
return telemetry;
|
|
3449
|
+
}
|
|
3195
3450
|
static async setup(setupConfig, deps) {
|
|
3196
3451
|
const gateway = deps?.gateway ||
|
|
3197
3452
|
new MswarmSelfHostedNodeClient({
|
|
@@ -3213,6 +3468,11 @@ export class SelfHostedNodeRuntime {
|
|
|
3213
3468
|
model_allowlist: setupConfig.modelAllowlist,
|
|
3214
3469
|
model_blocklist: setupConfig.modelBlocklist,
|
|
3215
3470
|
heartbeat_interval_seconds: setupConfig.heartbeatIntervalSeconds,
|
|
3471
|
+
max_concurrent_jobs: setupConfig.maxConcurrentJobs,
|
|
3472
|
+
max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
|
|
3473
|
+
drain_mode: setupConfig.drainMode,
|
|
3474
|
+
load_reporting_enabled: setupConfig.loadReportingEnabled,
|
|
3475
|
+
hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
|
|
3216
3476
|
generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency
|
|
3217
3477
|
});
|
|
3218
3478
|
const nodeId = optionalText(bootstrap.node?.node_id);
|
|
@@ -3242,10 +3502,15 @@ export class SelfHostedNodeRuntime {
|
|
|
3242
3502
|
node_version: setupConfig.nodeVersion,
|
|
3243
3503
|
request_timeout_ms: setupConfig.requestTimeoutMs,
|
|
3244
3504
|
job_timeout_ms: setupConfig.jobTimeoutMs,
|
|
3505
|
+
max_concurrent_jobs: setupConfig.maxConcurrentJobs,
|
|
3506
|
+
max_concurrent_llm_jobs: setupConfig.maxConcurrentLlmJobs,
|
|
3245
3507
|
generic_jobs_enabled: setupConfig.genericJobsEnabled,
|
|
3246
3508
|
generic_job_timeout_ms: setupConfig.genericJobTimeoutMs,
|
|
3247
3509
|
generic_job_max_concurrency: setupConfig.genericJobMaxConcurrency,
|
|
3248
3510
|
capability_probe_timeout_ms: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
|
|
3511
|
+
drain_mode: setupConfig.drainMode,
|
|
3512
|
+
load_reporting_enabled: setupConfig.loadReportingEnabled,
|
|
3513
|
+
hardware_telemetry_enabled: setupConfig.hardwareTelemetryEnabled,
|
|
3249
3514
|
expose_all_models: setupConfig.exposeAllModels,
|
|
3250
3515
|
exposure_policy: setupConfig.exposeAllModels ? "all" : "none",
|
|
3251
3516
|
model_allowlist: setupConfig.modelAllowlist,
|
|
@@ -3276,10 +3541,15 @@ export class SelfHostedNodeRuntime {
|
|
|
3276
3541
|
heartbeatIntervalSeconds: heartbeatInterval,
|
|
3277
3542
|
requestTimeoutMs: setupConfig.requestTimeoutMs,
|
|
3278
3543
|
jobTimeoutMs: setupConfig.jobTimeoutMs,
|
|
3544
|
+
maxConcurrentJobs: setupConfig.maxConcurrentJobs,
|
|
3545
|
+
maxConcurrentLlmJobs: setupConfig.maxConcurrentLlmJobs,
|
|
3279
3546
|
genericJobsEnabled: setupConfig.genericJobsEnabled,
|
|
3280
3547
|
genericJobTimeoutMs: setupConfig.genericJobTimeoutMs,
|
|
3281
3548
|
genericJobMaxConcurrency: setupConfig.genericJobMaxConcurrency,
|
|
3282
3549
|
capabilityProbeTimeoutMs: setupConfig.capabilityProbeTimeoutMs || DEFAULT_CAPABILITY_PROBE_TIMEOUT_MS,
|
|
3550
|
+
drainMode: setupConfig.drainMode,
|
|
3551
|
+
loadReportingEnabled: setupConfig.loadReportingEnabled,
|
|
3552
|
+
hardwareTelemetryEnabled: setupConfig.hardwareTelemetryEnabled,
|
|
3283
3553
|
exposeAllModels: setupConfig.exposeAllModels,
|
|
3284
3554
|
modelAllowlist: setupConfig.modelAllowlist,
|
|
3285
3555
|
modelBlocklist: setupConfig.modelBlocklist
|
|
@@ -3386,9 +3656,14 @@ export class SelfHostedNodeRuntime {
|
|
|
3386
3656
|
node_version: this.config.nodeVersion,
|
|
3387
3657
|
request_timeout_ms: this.config.requestTimeoutMs,
|
|
3388
3658
|
job_timeout_ms: this.config.jobTimeoutMs,
|
|
3659
|
+
max_concurrent_jobs: this.config.maxConcurrentJobs,
|
|
3660
|
+
max_concurrent_llm_jobs: this.config.maxConcurrentLlmJobs,
|
|
3389
3661
|
generic_jobs_enabled: this.config.genericJobsEnabled,
|
|
3390
3662
|
generic_job_timeout_ms: this.config.genericJobTimeoutMs,
|
|
3391
3663
|
generic_job_max_concurrency: this.config.genericJobMaxConcurrency,
|
|
3664
|
+
drain_mode: this.config.drainMode === true,
|
|
3665
|
+
load_reporting_enabled: this.config.loadReportingEnabled !== false,
|
|
3666
|
+
hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
|
|
3392
3667
|
expose_all_models: this.config.exposeAllModels,
|
|
3393
3668
|
exposure_policy: this.config.exposeAllModels ? "all" : "none",
|
|
3394
3669
|
model_allowlist: this.config.modelAllowlist,
|
|
@@ -3399,26 +3674,38 @@ export class SelfHostedNodeRuntime {
|
|
|
3399
3674
|
return { runtimeToken, state: nextState, enrolled: true };
|
|
3400
3675
|
}
|
|
3401
3676
|
async resolveMcodaAgentForJob(job) {
|
|
3402
|
-
const
|
|
3403
|
-
|
|
3404
|
-
|
|
3405
|
-
|
|
3677
|
+
const selectedSourceAgentSlug = optionalText(job.source_agent_slug);
|
|
3678
|
+
const selectedAgentSlug = optionalText(job.agent_slug);
|
|
3679
|
+
const selectedModel = optionalText(job.model) || optionalText(job.openai_request.model);
|
|
3680
|
+
const selected = selectedSourceAgentSlug || selectedAgentSlug || selectedModel;
|
|
3406
3681
|
if (!selected) {
|
|
3407
|
-
throw new
|
|
3682
|
+
throw new SelfHostedPreStartJobError("selected_agent_unavailable", "mcoda source agent slug is required");
|
|
3408
3683
|
}
|
|
3409
3684
|
const rawAgents = await this.mcoda.listRawAgents();
|
|
3685
|
+
const strictSelectedAgent = selectedSourceAgentSlug || selectedAgentSlug;
|
|
3410
3686
|
const agent = rawAgents.find((entry) => {
|
|
3411
3687
|
const slug = optionalText(entry.slug);
|
|
3688
|
+
if (strictSelectedAgent) {
|
|
3689
|
+
return slug === strictSelectedAgent;
|
|
3690
|
+
}
|
|
3412
3691
|
const defaultModel = mcodaAgentDefaultModel(entry);
|
|
3413
3692
|
return slug === selected || defaultModel === selected;
|
|
3414
3693
|
});
|
|
3415
|
-
if (!agent
|
|
3416
|
-
throw new
|
|
3694
|
+
if (!agent) {
|
|
3695
|
+
throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not available on this node`);
|
|
3696
|
+
}
|
|
3697
|
+
const mapped = mapMcodaAgentToSelfHostedModel(agent, this.config);
|
|
3698
|
+
if (!mapped?.exposed) {
|
|
3699
|
+
throw new SelfHostedPreStartJobError("selected_agent_unavailable", `selected local mcoda agent ${selected} is not exposed by this node`);
|
|
3700
|
+
}
|
|
3701
|
+
if (mapped.health_status && mapped.health_status !== "healthy" && mapped.health_status !== "unknown") {
|
|
3702
|
+
throw new SelfHostedPreStartJobError("selected_agent_unhealthy", `selected local mcoda agent ${selected} is ${mapped.health_status}`);
|
|
3417
3703
|
}
|
|
3418
3704
|
return mapMcodaAgentToCodaliAgent(agent, selected);
|
|
3419
3705
|
}
|
|
3420
3706
|
async executeGenericJob(envelope, options = {}) {
|
|
3421
3707
|
const startedAt = Date.now();
|
|
3708
|
+
this.beginExecutionTelemetry("generic_job");
|
|
3422
3709
|
const events = [];
|
|
3423
3710
|
let sequence = 0;
|
|
3424
3711
|
const emitEvent = async (event) => {
|
|
@@ -3449,6 +3736,12 @@ export class SelfHostedNodeRuntime {
|
|
|
3449
3736
|
},
|
|
3450
3737
|
finished_at: new Date().toISOString()
|
|
3451
3738
|
};
|
|
3739
|
+
this.finishExecutionTelemetry({
|
|
3740
|
+
executionClass: "generic_job",
|
|
3741
|
+
startedAt,
|
|
3742
|
+
ok: false,
|
|
3743
|
+
code
|
|
3744
|
+
});
|
|
3452
3745
|
return {
|
|
3453
3746
|
job_id: envelope.job_id,
|
|
3454
3747
|
request_id: envelope.request_id,
|
|
@@ -3554,6 +3847,12 @@ export class SelfHostedNodeRuntime {
|
|
|
3554
3847
|
runner: runner.id
|
|
3555
3848
|
}
|
|
3556
3849
|
});
|
|
3850
|
+
this.finishExecutionTelemetry({
|
|
3851
|
+
executionClass: "generic_job",
|
|
3852
|
+
startedAt,
|
|
3853
|
+
ok: status === "succeeded",
|
|
3854
|
+
code: runnerResult.error?.code || status
|
|
3855
|
+
});
|
|
3557
3856
|
return {
|
|
3558
3857
|
job_id: envelope.job_id,
|
|
3559
3858
|
request_id: envelope.request_id,
|
|
@@ -3579,7 +3878,9 @@ export class SelfHostedNodeRuntime {
|
|
|
3579
3878
|
}
|
|
3580
3879
|
async executeJob(job, options = {}) {
|
|
3581
3880
|
const startedAt = Date.now();
|
|
3881
|
+
this.beginExecutionTelemetry("llm");
|
|
3582
3882
|
let selectedAgent;
|
|
3883
|
+
let jobStarted = false;
|
|
3583
3884
|
const progressEvents = [];
|
|
3584
3885
|
const streamEvents = [];
|
|
3585
3886
|
const recordProgress = async (event) => {
|
|
@@ -3590,13 +3891,35 @@ export class SelfHostedNodeRuntime {
|
|
|
3590
3891
|
streamEvents.push(chunk);
|
|
3591
3892
|
await options.onOpenAIChunk?.(chunk);
|
|
3592
3893
|
};
|
|
3894
|
+
const acknowledgeStarted = async (agent) => {
|
|
3895
|
+
if (jobStarted) {
|
|
3896
|
+
return;
|
|
3897
|
+
}
|
|
3898
|
+
await options.onStarted?.({
|
|
3899
|
+
job_id: job.job_id,
|
|
3900
|
+
request_id: job.request_id,
|
|
3901
|
+
node_id: job.node_id,
|
|
3902
|
+
agent_slug: optionalText(job.agent_slug) || agent?.slug || "",
|
|
3903
|
+
source_agent_slug: optionalText(job.source_agent_slug) || agent?.slug || null,
|
|
3904
|
+
model: optionalText(job.model) || optionalText(job.openai_request.model)
|
|
3905
|
+
});
|
|
3906
|
+
jobStarted = true;
|
|
3907
|
+
};
|
|
3593
3908
|
if (job.node_id !== this.config.nodeId) {
|
|
3594
|
-
|
|
3909
|
+
const result = {
|
|
3595
3910
|
job_id: job.job_id,
|
|
3596
3911
|
request_id: job.request_id,
|
|
3597
3912
|
status: "failed",
|
|
3913
|
+
pre_start_failure: true,
|
|
3598
3914
|
error: { code: "validation_failed", message: "job node_id does not match this node" }
|
|
3599
3915
|
};
|
|
3916
|
+
this.finishExecutionTelemetry({
|
|
3917
|
+
executionClass: "llm",
|
|
3918
|
+
startedAt,
|
|
3919
|
+
ok: false,
|
|
3920
|
+
code: "validation_failed"
|
|
3921
|
+
});
|
|
3922
|
+
return result;
|
|
3600
3923
|
}
|
|
3601
3924
|
try {
|
|
3602
3925
|
if (job.provider === "ollama") {
|
|
@@ -3609,7 +3932,8 @@ export class SelfHostedNodeRuntime {
|
|
|
3609
3932
|
options.num_predict = job.openai_request.max_tokens;
|
|
3610
3933
|
if (job.openai_request.stop !== undefined)
|
|
3611
3934
|
options.stop = job.openai_request.stop;
|
|
3612
|
-
|
|
3935
|
+
await acknowledgeStarted();
|
|
3936
|
+
const ollamaResult = await this.jobOllama.chat({
|
|
3613
3937
|
model: job.model || job.openai_request.model,
|
|
3614
3938
|
messages: job.openai_request.messages,
|
|
3615
3939
|
options,
|
|
@@ -3622,7 +3946,7 @@ export class SelfHostedNodeRuntime {
|
|
|
3622
3946
|
created: Math.floor(Date.now() / 1000),
|
|
3623
3947
|
model: job.openai_request.model,
|
|
3624
3948
|
choices: [
|
|
3625
|
-
{ index: 0, delta: { content:
|
|
3949
|
+
{ index: 0, delta: { content: ollamaResult.content }, finish_reason: null }
|
|
3626
3950
|
]
|
|
3627
3951
|
});
|
|
3628
3952
|
await emitOpenAIChunk({
|
|
@@ -3635,22 +3959,28 @@ export class SelfHostedNodeRuntime {
|
|
|
3635
3959
|
]
|
|
3636
3960
|
});
|
|
3637
3961
|
}
|
|
3638
|
-
|
|
3962
|
+
const invocationResult = {
|
|
3639
3963
|
job_id: job.job_id,
|
|
3640
3964
|
request_id: job.request_id,
|
|
3641
3965
|
status: "success",
|
|
3642
3966
|
openai_response: buildOpenAIChatCompletion({
|
|
3643
3967
|
requestId: job.request_id,
|
|
3644
3968
|
model: job.openai_request.model,
|
|
3645
|
-
content:
|
|
3646
|
-
promptTokens:
|
|
3647
|
-
completionTokens:
|
|
3648
|
-
metadata: { provider: "ollama", raw:
|
|
3969
|
+
content: ollamaResult.content,
|
|
3970
|
+
promptTokens: ollamaResult.promptTokens,
|
|
3971
|
+
completionTokens: ollamaResult.completionTokens,
|
|
3972
|
+
metadata: { provider: "ollama", raw: ollamaResult.raw }
|
|
3649
3973
|
}),
|
|
3650
3974
|
...(streamEvents.length ? { stream_events: streamEvents } : {}),
|
|
3651
3975
|
...(progressEvents.length ? { progress_events: progressEvents } : {}),
|
|
3652
3976
|
timing: { local_latency_ms: Date.now() - startedAt }
|
|
3653
3977
|
};
|
|
3978
|
+
this.finishExecutionTelemetry({
|
|
3979
|
+
executionClass: "llm",
|
|
3980
|
+
startedAt,
|
|
3981
|
+
ok: true
|
|
3982
|
+
});
|
|
3983
|
+
return invocationResult;
|
|
3654
3984
|
}
|
|
3655
3985
|
const taskPreview = messagesToPrompt(job.openai_request.messages);
|
|
3656
3986
|
if (!taskPreview) {
|
|
@@ -3660,6 +3990,7 @@ export class SelfHostedNodeRuntime {
|
|
|
3660
3990
|
selectedAgent = agent;
|
|
3661
3991
|
validateRequiredDocdexContext(job, options.attachedMswarmApiKey);
|
|
3662
3992
|
const attachedMswarmApiKey = attachedMswarmApiKeyForDocdex(job, options.attachedMswarmApiKey);
|
|
3993
|
+
await acknowledgeStarted(agent);
|
|
3663
3994
|
await recordProgress({
|
|
3664
3995
|
type: "agent_selected",
|
|
3665
3996
|
job_id: job.job_id,
|
|
@@ -3698,7 +4029,7 @@ export class SelfHostedNodeRuntime {
|
|
|
3698
4029
|
}
|
|
3699
4030
|
});
|
|
3700
4031
|
const tokens = usageTokens(response.usage);
|
|
3701
|
-
|
|
4032
|
+
const result = {
|
|
3702
4033
|
job_id: job.job_id,
|
|
3703
4034
|
request_id: job.request_id,
|
|
3704
4035
|
status: "success",
|
|
@@ -3724,6 +4055,12 @@ export class SelfHostedNodeRuntime {
|
|
|
3724
4055
|
...(progressEvents.length ? { progress_events: progressEvents } : {}),
|
|
3725
4056
|
timing: { local_latency_ms: Date.now() - startedAt }
|
|
3726
4057
|
};
|
|
4058
|
+
this.finishExecutionTelemetry({
|
|
4059
|
+
executionClass: "llm",
|
|
4060
|
+
startedAt,
|
|
4061
|
+
ok: true
|
|
4062
|
+
});
|
|
4063
|
+
return result;
|
|
3727
4064
|
}
|
|
3728
4065
|
catch (error) {
|
|
3729
4066
|
const message = redactRuntimeSecretValues(error instanceof Error ? error.message : String(error), [selectedAgent?.apiKey, options.attachedMswarmApiKey]);
|
|
@@ -3736,10 +4073,11 @@ export class SelfHostedNodeRuntime {
|
|
|
3736
4073
|
: /permission|policy|denied/i.test(message)
|
|
3737
4074
|
? "policy_denied"
|
|
3738
4075
|
: "upstream_error");
|
|
3739
|
-
|
|
4076
|
+
const result = {
|
|
3740
4077
|
job_id: job.job_id,
|
|
3741
4078
|
request_id: job.request_id,
|
|
3742
4079
|
status: "failed",
|
|
4080
|
+
...(!jobStarted ? { pre_start_failure: true } : {}),
|
|
3743
4081
|
error: {
|
|
3744
4082
|
code,
|
|
3745
4083
|
message
|
|
@@ -3748,6 +4086,13 @@ export class SelfHostedNodeRuntime {
|
|
|
3748
4086
|
...(progressEvents.length ? { progress_events: progressEvents } : {}),
|
|
3749
4087
|
timing: { local_latency_ms: Date.now() - startedAt }
|
|
3750
4088
|
};
|
|
4089
|
+
this.finishExecutionTelemetry({
|
|
4090
|
+
executionClass: "llm",
|
|
4091
|
+
startedAt,
|
|
4092
|
+
ok: false,
|
|
4093
|
+
code
|
|
4094
|
+
});
|
|
4095
|
+
return result;
|
|
3751
4096
|
}
|
|
3752
4097
|
}
|
|
3753
4098
|
async runOnce() {
|
|
@@ -3772,12 +4117,49 @@ export class SelfHostedNodeRuntime {
|
|
|
3772
4117
|
models = [];
|
|
3773
4118
|
version = null;
|
|
3774
4119
|
}
|
|
4120
|
+
const discoveryLatencyMs = Date.now() - startedAt;
|
|
3775
4121
|
const capabilityPayload = await this.buildCapabilityHeartbeatPayload(enrollment.runtimeToken);
|
|
4122
|
+
const loadTelemetry = this.buildLoadTelemetry({
|
|
4123
|
+
models,
|
|
4124
|
+
discoveryLatencyMs,
|
|
4125
|
+
discoveryFailureCount: recentFailureCount,
|
|
4126
|
+
capabilityPayload
|
|
4127
|
+
});
|
|
4128
|
+
const exposedModelCount = models.filter((model) => model.exposed !== false).length;
|
|
4129
|
+
const loadReportingEnabled = this.config.loadReportingEnabled !== false;
|
|
4130
|
+
const capacityPayload = loadReportingEnabled
|
|
4131
|
+
? {
|
|
4132
|
+
protocol_version: loadTelemetry.runtime_protocol_version,
|
|
4133
|
+
runtime_protocol_version: loadTelemetry.runtime_protocol_version,
|
|
4134
|
+
load_balancer_protocol_version: loadTelemetry.load_balancer_protocol_version,
|
|
4135
|
+
catalog_metadata_version: loadTelemetry.catalog_metadata_version,
|
|
4136
|
+
catalog_fingerprint: loadTelemetry.catalog_fingerprint,
|
|
4137
|
+
max_concurrency: loadTelemetry.max_concurrency,
|
|
4138
|
+
max_concurrent_llm_jobs: loadTelemetry.max_concurrent_llm_jobs,
|
|
4139
|
+
max_concurrent_generic_jobs: loadTelemetry.max_concurrent_generic_jobs,
|
|
4140
|
+
active_jobs: loadTelemetry.active_jobs,
|
|
4141
|
+
queued_jobs: loadTelemetry.queued_jobs,
|
|
4142
|
+
free_slots: loadTelemetry.free_slots,
|
|
4143
|
+
drain_mode: loadTelemetry.drain_mode,
|
|
4144
|
+
execution_class_capacity: loadTelemetry.execution_class_capacity
|
|
4145
|
+
}
|
|
4146
|
+
: {
|
|
4147
|
+
active_jobs: loadTelemetry.active_jobs,
|
|
4148
|
+
queued_jobs: loadTelemetry.queued_jobs
|
|
4149
|
+
};
|
|
3776
4150
|
const heartbeatPayload = {
|
|
3777
4151
|
node_id: this.config.nodeId,
|
|
3778
4152
|
node_version: this.config.nodeVersion,
|
|
4153
|
+
runtime_protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
|
|
3779
4154
|
config_version: enrollment.state.config_version ?? null,
|
|
3780
4155
|
status,
|
|
4156
|
+
runtime: {
|
|
4157
|
+
protocol_version: SELF_HOSTED_RUNTIME_PROTOCOL_VERSION,
|
|
4158
|
+
relay_mode: this.config.relayMode || "outbound",
|
|
4159
|
+
load_reporting_enabled: loadReportingEnabled,
|
|
4160
|
+
hardware_telemetry_enabled: this.config.hardwareTelemetryEnabled === true,
|
|
4161
|
+
drain_mode: this.config.drainMode === true
|
|
4162
|
+
},
|
|
3781
4163
|
discovery: {
|
|
3782
4164
|
source: discoverySource,
|
|
3783
4165
|
mcoda_status: discoverySource === "mcoda" && status === "online" ? "ok" : status === "degraded" ? "error" : null
|
|
@@ -3791,20 +4173,24 @@ export class SelfHostedNodeRuntime {
|
|
|
3791
4173
|
status: null,
|
|
3792
4174
|
version: null
|
|
3793
4175
|
},
|
|
3794
|
-
capacity:
|
|
3795
|
-
active_jobs: 0,
|
|
3796
|
-
queued_jobs: 0
|
|
3797
|
-
},
|
|
4176
|
+
capacity: capacityPayload,
|
|
3798
4177
|
health: {
|
|
3799
|
-
avg_latency_ms:
|
|
3800
|
-
recent_failure_count:
|
|
4178
|
+
avg_latency_ms: loadTelemetry.avg_latency_ms ?? discoveryLatencyMs,
|
|
4179
|
+
recent_failure_count: loadTelemetry.recent_failure_count,
|
|
4180
|
+
recent_failures: loadTelemetry.recent_failures,
|
|
3801
4181
|
last_success_at: status === "online" ? new Date().toISOString() : null
|
|
3802
4182
|
},
|
|
4183
|
+
local_agent_catalog: {
|
|
4184
|
+
revision: loadTelemetry.catalog_fingerprint,
|
|
4185
|
+
metadata_version: loadTelemetry.catalog_metadata_version,
|
|
4186
|
+
model_count: models.length,
|
|
4187
|
+
exposed_model_count: exposedModelCount
|
|
4188
|
+
},
|
|
3803
4189
|
models,
|
|
3804
|
-
capabilities: capabilityPayload
|
|
4190
|
+
capabilities: capabilityPayload,
|
|
4191
|
+
...(loadTelemetry.hardware_pressure ? { hardware_pressure: loadTelemetry.hardware_pressure } : {})
|
|
3805
4192
|
};
|
|
3806
4193
|
const heartbeatResponse = await this.gateway.heartbeat(enrollment.runtimeToken, heartbeatPayload);
|
|
3807
|
-
const exposedModelCount = models.filter((model) => model.exposed !== false).length;
|
|
3808
4194
|
return {
|
|
3809
4195
|
enrolled: enrollment.enrolled,
|
|
3810
4196
|
status,
|
|
@@ -3812,6 +4198,7 @@ export class SelfHostedNodeRuntime {
|
|
|
3812
4198
|
discovery_source: discoverySource,
|
|
3813
4199
|
mcoda_agent_count: discoverySource === "mcoda" ? exposedModelCount : undefined,
|
|
3814
4200
|
ollama_version: version,
|
|
4201
|
+
capacity: loadTelemetry,
|
|
3815
4202
|
heartbeat_response: heartbeatResponse
|
|
3816
4203
|
};
|
|
3817
4204
|
}
|
|
@@ -3846,9 +4233,17 @@ export class SelfHostedNodeRuntime {
|
|
|
3846
4233
|
}
|
|
3847
4234
|
async pollAndExecuteJob(waitMs = DEFAULT_JOB_POLL_WAIT_MS) {
|
|
3848
4235
|
const enrollment = await this.ensureEnrolled();
|
|
4236
|
+
const pollCapacity = this.buildLoadTelemetry({ models: [] });
|
|
3849
4237
|
const response = await this.gateway.pollJob(enrollment.runtimeToken, {
|
|
3850
4238
|
node_id: this.config.nodeId,
|
|
3851
|
-
capacity: {
|
|
4239
|
+
capacity: {
|
|
4240
|
+
active_jobs: pollCapacity.active_jobs,
|
|
4241
|
+
queued_jobs: pollCapacity.queued_jobs,
|
|
4242
|
+
max_jobs: pollCapacity.max_concurrency,
|
|
4243
|
+
max_concurrency: pollCapacity.max_concurrency,
|
|
4244
|
+
free_slots: pollCapacity.free_slots,
|
|
4245
|
+
drain_mode: pollCapacity.drain_mode
|
|
4246
|
+
},
|
|
3852
4247
|
wait_ms: waitMs
|
|
3853
4248
|
});
|
|
3854
4249
|
const job = response.job || null;
|
|
@@ -3876,6 +4271,14 @@ export class SelfHostedNodeRuntime {
|
|
|
3876
4271
|
};
|
|
3877
4272
|
const result = await this.executeJob(job, {
|
|
3878
4273
|
attachedMswarmApiKey: optionalText(response.attached_mswarm_api_key) || undefined,
|
|
4274
|
+
onStarted: async (event) => {
|
|
4275
|
+
await this.gateway.postJobStart(enrollment.runtimeToken, job.job_id, {
|
|
4276
|
+
node_id: this.config.nodeId,
|
|
4277
|
+
agent_slug: event.agent_slug || job.agent_slug,
|
|
4278
|
+
source_agent_slug: event.source_agent_slug || job.source_agent_slug || null,
|
|
4279
|
+
model: event.model || job.model || job.openai_request.model
|
|
4280
|
+
});
|
|
4281
|
+
},
|
|
3879
4282
|
onOpenAIChunk: async (chunk) => {
|
|
3880
4283
|
if (job.openai_request.stream !== true || streamEventForwardingFailed) {
|
|
3881
4284
|
return;
|