@cadenza.io/service 2.6.1 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -298,6 +298,7 @@ var isBrowser = typeof window !== "undefined" && typeof window.document !== "und
298
298
  // src/utils/inquiry.ts
299
299
  var META_INTENT_PREFIX = "meta-";
300
300
  var META_RUNTIME_TRANSPORT_DIAGNOSTICS_INTENT = "meta-runtime-transport-diagnostics";
301
+ var META_RUNTIME_STATUS_INTENT = "meta-runtime-status";
301
302
  function isPlainObject(value) {
302
303
  return typeof value === "object" && value !== null && !Array.isArray(value) && Object.getPrototypeOf(value) === Object.prototype;
303
304
  }
@@ -361,8 +362,104 @@ function summarizeResponderStatuses(statuses) {
361
362
  return { responded, failed, timedOut, pending };
362
363
  }
363
364
 
365
+ // src/utils/runtimeStatus.ts
366
+ function resolveRuntimeStatus(input) {
367
+ const numberOfRunningGraphs = Math.max(
368
+ 0,
369
+ Math.trunc(Number(input.numberOfRunningGraphs) || 0)
370
+ );
371
+ const isActive = Boolean(input.isActive);
372
+ const isNonResponsive = Boolean(input.isNonResponsive);
373
+ const isBlocked = Boolean(input.isBlocked);
374
+ if (!isActive || isNonResponsive || isBlocked) {
375
+ return {
376
+ state: "unavailable",
377
+ acceptingWork: false,
378
+ numberOfRunningGraphs,
379
+ isActive,
380
+ isNonResponsive,
381
+ isBlocked
382
+ };
383
+ }
384
+ if (numberOfRunningGraphs >= input.overloadedGraphThreshold) {
385
+ return {
386
+ state: "overloaded",
387
+ acceptingWork: true,
388
+ numberOfRunningGraphs,
389
+ isActive,
390
+ isNonResponsive,
391
+ isBlocked
392
+ };
393
+ }
394
+ if (numberOfRunningGraphs >= input.degradedGraphThreshold) {
395
+ return {
396
+ state: "degraded",
397
+ acceptingWork: true,
398
+ numberOfRunningGraphs,
399
+ isActive,
400
+ isNonResponsive,
401
+ isBlocked
402
+ };
403
+ }
404
+ return {
405
+ state: "healthy",
406
+ acceptingWork: true,
407
+ numberOfRunningGraphs,
408
+ isActive,
409
+ isNonResponsive,
410
+ isBlocked
411
+ };
412
+ }
413
+ function runtimeStatusPriority(state) {
414
+ switch (state) {
415
+ case "healthy":
416
+ return 0;
417
+ case "degraded":
418
+ return 1;
419
+ case "overloaded":
420
+ return 2;
421
+ case "unavailable":
422
+ return 3;
423
+ default:
424
+ return 4;
425
+ }
426
+ }
427
+ function hasSignificantRuntimeStatusChange(previous, next) {
428
+ if (!previous) {
429
+ return true;
430
+ }
431
+ return previous.state !== next.state || previous.acceptingWork !== next.acceptingWork || previous.isActive !== next.isActive || previous.isNonResponsive !== next.isNonResponsive || previous.isBlocked !== next.isBlocked;
432
+ }
433
+
364
434
  // src/registry/ServiceRegistry.ts
365
435
  var META_SERVICE_REGISTRY_FULL_SYNC_INTENT = "meta-service-registry-full-sync";
436
+ var META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL = "meta.service_registry.runtime_status.heartbeat_tick";
437
+ var META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL = "meta.service_registry.runtime_status.monitor_tick";
438
+ var INTERNAL_RUNTIME_STATUS_TASK_NAMES = /* @__PURE__ */ new Set([
439
+ "Track local routine start",
440
+ "Track local routine end",
441
+ "Start runtime status sharing intervals",
442
+ "Broadcast runtime status",
443
+ "Monitor dependee heartbeat freshness",
444
+ "Resolve runtime status fallback inquiry",
445
+ "Respond runtime status inquiry",
446
+ "Get status"
447
+ ]);
448
+ function readPositiveIntegerEnv(name, fallback) {
449
+ if (typeof process === "undefined") {
450
+ return fallback;
451
+ }
452
+ const raw = process.env?.[name];
453
+ const parsed = Number(raw);
454
+ if (!Number.isFinite(parsed)) {
455
+ return fallback;
456
+ }
457
+ const normalized = Math.trunc(parsed);
458
+ if (normalized <= 0) {
459
+ return fallback;
460
+ }
461
+ return normalized;
462
+ }
366
463
  var ServiceRegistry = class _ServiceRegistry {
367
464
  /**
368
465
  * Initializes a private constructor for managing service instances, remote signals,
@@ -380,6 +477,34 @@ var ServiceRegistry = class _ServiceRegistry {
380
477
  this.remoteIntents = /* @__PURE__ */ new Map();
381
478
  this.remoteIntentDeputiesByKey = /* @__PURE__ */ new Map();
382
479
  this.remoteIntentDeputiesByTask = /* @__PURE__ */ new Map();
480
+ this.dependeesByService = /* @__PURE__ */ new Map();
481
+ this.dependeeByInstance = /* @__PURE__ */ new Map();
482
+ this.lastHeartbeatAtByInstance = /* @__PURE__ */ new Map();
483
+ this.missedHeartbeatsByInstance = /* @__PURE__ */ new Map();
484
+ this.runtimeStatusFallbackInFlightByInstance = /* @__PURE__ */ new Set();
485
+ this.activeRoutineExecutionIds = /* @__PURE__ */ new Set();
486
+ this.runtimeStatusHeartbeatStarted = false;
487
+ this.lastRuntimeStatusSnapshot = null;
488
+ this.runtimeStatusHeartbeatIntervalMs = readPositiveIntegerEnv(
489
+ "CADENZA_RUNTIME_STATUS_HEARTBEAT_MS",
490
+ 3e4
491
+ );
492
+ this.runtimeStatusMissThreshold = readPositiveIntegerEnv(
493
+ "CADENZA_RUNTIME_STATUS_MISSED_HEARTBEATS",
494
+ 3
495
+ );
496
+ this.runtimeStatusFallbackTimeoutMs = readPositiveIntegerEnv(
497
+ "CADENZA_RUNTIME_STATUS_FALLBACK_TIMEOUT_MS",
498
+ 1500
499
+ );
500
+ this.degradedGraphThreshold = readPositiveIntegerEnv(
501
+ "CADENZA_RUNTIME_STATUS_DEGRADED_GRAPH_THRESHOLD",
502
+ 10
503
+ );
504
+ this.overloadedGraphThreshold = readPositiveIntegerEnv(
505
+ "CADENZA_RUNTIME_STATUS_OVERLOADED_GRAPH_THRESHOLD",
506
+ 20
507
+ );
383
508
  this.serviceName = null;
384
509
  this.serviceInstanceId = null;
385
510
  this.numberOfRunningGraphs = 0;
@@ -418,10 +543,77 @@ var ServiceRegistry = class _ServiceRegistry {
418
543
  }
419
544
  }
420
545
  });
546
+ CadenzaService.defineIntent({
547
+ name: META_RUNTIME_STATUS_INTENT,
548
+ description: "Gather lightweight runtime status reports from services in the distributed runtime.",
549
+ input: {
550
+ type: "object",
551
+ properties: {
552
+ detailLevel: {
553
+ type: "string",
554
+ constraints: {
555
+ oneOf: ["minimal", "full"]
556
+ }
557
+ },
558
+ targetServiceName: {
559
+ type: "string"
560
+ },
561
+ targetServiceInstanceId: {
562
+ type: "string"
563
+ }
564
+ }
565
+ },
566
+ output: {
567
+ type: "object",
568
+ properties: {
569
+ runtimeStatusReports: {
570
+ type: "array"
571
+ }
572
+ }
573
+ }
574
+ });
575
+ CadenzaService.createMetaTask(
576
+ "Respond runtime status inquiry",
577
+ (ctx) => {
578
+ const targetServiceName = ctx.targetServiceName;
579
+ const targetServiceInstanceId = ctx.targetServiceInstanceId;
580
+ const detailLevel = ctx.detailLevel === "full" ? "full" : "minimal";
581
+ const report = this.buildLocalRuntimeStatusReport(detailLevel);
582
+ if (!report) {
583
+ return {};
584
+ }
585
+ if (targetServiceName && targetServiceName !== report.serviceName) {
586
+ return {};
587
+ }
588
+ if (targetServiceInstanceId && targetServiceInstanceId !== report.serviceInstanceId) {
589
+ return {};
590
+ }
591
+ return {
592
+ runtimeStatusReports: [report]
593
+ };
594
+ },
595
+ "Responds to runtime-status inquiries with local service instance status."
596
+ ).respondsTo(META_RUNTIME_STATUS_INTENT);
421
597
  this.handleInstanceUpdateTask = CadenzaService.createMetaTask(
422
598
  "Handle Instance Update",
423
599
  (ctx, emit) => {
424
- const { serviceInstance } = ctx;
600
+ const serviceInstance = ctx.serviceInstance ?? (ctx.__serviceInstanceId || ctx.serviceInstanceId ? {
601
+ uuid: ctx.__serviceInstanceId ?? ctx.serviceInstanceId,
602
+ serviceName: ctx.__serviceName ?? ctx.serviceName,
603
+ address: ctx.serviceAddress ?? "",
604
+ port: ctx.servicePort ?? 0,
605
+ exposed: !!ctx.exposed,
606
+ isFrontend: !!ctx.isFrontend,
607
+ isActive: typeof ctx.isActive === "boolean" ? ctx.isActive : typeof ctx.__active === "boolean" ? ctx.__active : true,
608
+ isNonResponsive: !!ctx.isNonResponsive,
609
+ isBlocked: !!ctx.isBlocked,
610
+ health: ctx.health ?? ctx.__health ?? {},
611
+ numberOfRunningGraphs: ctx.numberOfRunningGraphs ?? ctx.__numberOfRunningGraphs ?? 0,
612
+ isPrimary: false
613
+ } : void 0);
614
+ if (!serviceInstance?.uuid || !serviceInstance?.serviceName) {
615
+ return false;
616
+ }
425
617
  const {
426
618
  uuid: uuid4,
427
619
  serviceName,
@@ -443,6 +635,7 @@ var ServiceRegistry = class _ServiceRegistry {
443
635
  emit(`meta.socket_shutdown_requested:${address}_${port}`, {});
444
636
  emit(`meta.fetch.destroy_requested:${address}_${port}`, {});
445
637
  }
638
+ this.unregisterDependee(uuid4, serviceName);
446
639
  return;
447
640
  }
448
641
  if (!this.instances.has(serviceName))
@@ -454,6 +647,18 @@ var ServiceRegistry = class _ServiceRegistry {
454
647
  } else {
455
648
  instances.push(serviceInstance);
456
649
  }
650
+ const trackedInstance = existing ?? instances.find((instance) => instance.uuid === uuid4);
651
+ if (trackedInstance) {
652
+ const snapshot = this.resolveRuntimeStatusSnapshot(
653
+ trackedInstance.numberOfRunningGraphs ?? 0,
654
+ trackedInstance.isActive,
655
+ trackedInstance.isNonResponsive,
656
+ trackedInstance.isBlocked
657
+ );
658
+ trackedInstance.runtimeState = snapshot.state;
659
+ trackedInstance.acceptingWork = snapshot.acceptingWork;
660
+ trackedInstance.reportedAt = trackedInstance.reportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
661
+ }
457
662
  if (this.serviceName === serviceName) {
458
663
  return false;
459
664
  }
@@ -493,13 +698,23 @@ var ServiceRegistry = class _ServiceRegistry {
493
698
  "global.meta.service_instance.inserted",
494
699
  "global.meta.service_instance.updated",
495
700
  "meta.service_instance.inserted",
496
- "meta.service_instance.updated",
497
- "meta.socket_client.status_received"
701
+ "meta.service_instance.updated"
498
702
  ).attachSignal(
499
703
  "meta.service_registry.dependee_registered",
500
704
  "meta.socket_shutdown_requested",
501
705
  "meta.fetch.destroy_requested"
502
706
  );
707
+ CadenzaService.createMetaTask(
708
+ "Track dependee registration",
709
+ (ctx) => {
710
+ if (!ctx.serviceName || !ctx.serviceInstanceId) {
711
+ return false;
712
+ }
713
+ this.registerDependee(ctx.serviceName, ctx.serviceInstanceId);
714
+ return true;
715
+ },
716
+ "Tracks remote dependency instances for runtime heartbeat monitoring."
717
+ ).doOn("meta.service_registry.dependee_registered");
503
718
  CadenzaService.createMetaTask("Split service instances", function* (ctx) {
504
719
  if (!ctx.serviceInstances) {
505
720
  return;
@@ -599,6 +814,15 @@ var ServiceRegistry = class _ServiceRegistry {
599
814
  for (const instance of instances ?? []) {
600
815
  instance.isActive = false;
601
816
  instance.isNonResponsive = true;
817
+ const snapshot = this.resolveRuntimeStatusSnapshot(
818
+ instance.numberOfRunningGraphs ?? 0,
819
+ instance.isActive,
820
+ instance.isNonResponsive,
821
+ instance.isBlocked
822
+ );
823
+ instance.runtimeState = snapshot.state;
824
+ instance.acceptingWork = snapshot.acceptingWork;
825
+ instance.reportedAt = (/* @__PURE__ */ new Date()).toISOString();
602
826
  emit("global.meta.service_registry.service_not_responding", {
603
827
  data: {
604
828
  isActive: false,
@@ -616,7 +840,8 @@ var ServiceRegistry = class _ServiceRegistry {
616
840
  "meta.fetch.handshake_failed",
617
841
  "meta.fetch.handshake_failed.*",
618
842
  "meta.socket_client.disconnected",
619
- "meta.socket_client.disconnected.*"
843
+ "meta.socket_client.disconnected.*",
844
+ "meta.service_registry.runtime_status_unreachable"
620
845
  ).attachSignal("global.meta.service_registry.service_not_responding");
621
846
  this.handleServiceHandshakeTask = CadenzaService.createMetaTask(
622
847
  "Handle service handshake",
@@ -631,6 +856,15 @@ var ServiceRegistry = class _ServiceRegistry {
631
856
  }
632
857
  instance.isActive = true;
633
858
  instance.isNonResponsive = false;
859
+ const snapshot = this.resolveRuntimeStatusSnapshot(
860
+ instance.numberOfRunningGraphs ?? 0,
861
+ instance.isActive,
862
+ instance.isNonResponsive,
863
+ instance.isBlocked
864
+ );
865
+ instance.runtimeState = snapshot.state;
866
+ instance.acceptingWork = snapshot.acceptingWork;
867
+ instance.reportedAt = (/* @__PURE__ */ new Date()).toISOString();
634
868
  emit("global.meta.service_registry.service_handshake", {
635
869
  data: {
636
870
  isActive: instance.isActive,
@@ -648,6 +882,7 @@ var ServiceRegistry = class _ServiceRegistry {
648
882
  if (indexToDelete >= 0) {
649
883
  this.instances.get(serviceName)?.splice(indexToDelete, 1);
650
884
  }
885
+ this.unregisterDependee(i.uuid, serviceName);
651
886
  emit("global.meta.service_registry.deleted", {
652
887
  data: {
653
888
  isActive: false,
@@ -669,14 +904,46 @@ var ServiceRegistry = class _ServiceRegistry {
669
904
  this.handleSocketStatusUpdateTask = CadenzaService.createMetaTask(
670
905
  "Handle Socket Status Update",
671
906
  (ctx) => {
672
- const instanceId = ctx.__serviceInstanceId;
673
- const serviceName = ctx.__serviceName;
674
- const instances = this.instances.get(serviceName);
675
- const instance = instances?.find((i) => i.uuid === instanceId);
676
- if (instance) {
677
- instance.health = ctx.health;
678
- instance.numberOfRunningGraphs = ctx.numberOfRunningGraphs;
907
+ const report = this.normalizeRuntimeStatusReport(ctx);
908
+ if (!report) {
909
+ return false;
679
910
  }
911
+ if (report.serviceName === this.serviceName && report.serviceInstanceId === this.serviceInstanceId) {
912
+ return false;
913
+ }
914
+ let applied = this.applyRuntimeStatusReport(report);
915
+ if (!applied && report.serviceAddress && typeof report.servicePort === "number") {
916
+ if (!this.instances.has(report.serviceName)) {
917
+ this.instances.set(report.serviceName, []);
918
+ }
919
+ this.instances.get(report.serviceName).push({
920
+ uuid: report.serviceInstanceId,
921
+ serviceName: report.serviceName,
922
+ address: report.serviceAddress,
923
+ port: report.servicePort,
924
+ exposed: !!report.exposed,
925
+ isFrontend: !!report.isFrontend,
926
+ isActive: report.isActive,
927
+ isNonResponsive: report.isNonResponsive,
928
+ isBlocked: report.isBlocked,
929
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
930
+ runtimeState: report.state,
931
+ acceptingWork: report.acceptingWork,
932
+ reportedAt: report.reportedAt,
933
+ health: report.health ?? {},
934
+ isPrimary: false
935
+ });
936
+ applied = true;
937
+ }
938
+ if (!applied) {
939
+ return false;
940
+ }
941
+ this.registerDependee(report.serviceName, report.serviceInstanceId);
942
+ this.lastHeartbeatAtByInstance.set(report.serviceInstanceId, Date.now());
943
+ this.missedHeartbeatsByInstance.set(report.serviceInstanceId, 0);
944
+ this.runtimeStatusFallbackInFlightByInstance.delete(
945
+ report.serviceInstanceId
946
+ );
680
947
  return true;
681
948
  },
682
949
  "Handles status update from socket broadcast"
@@ -806,7 +1073,25 @@ var ServiceRegistry = class _ServiceRegistry {
806
1073
  const { __serviceName, __triedInstances, __retries, __broadcast } = context;
807
1074
  let retries = __retries ?? 0;
808
1075
  let triedInstances = __triedInstances ?? [];
809
- const instances = this.instances.get(__serviceName)?.filter((i) => i.isActive && !i.isNonResponsive && !i.isBlocked).sort((a, b) => a.numberOfRunningGraphs - b.numberOfRunningGraphs);
1076
+ const instances = this.instances.get(__serviceName)?.filter((i) => i.isActive && !i.isNonResponsive && !i.isBlocked).sort((a, b) => {
1077
+ const leftStatus = this.resolveRuntimeStatusSnapshot(
1078
+ a.numberOfRunningGraphs ?? 0,
1079
+ a.isActive,
1080
+ a.isNonResponsive,
1081
+ a.isBlocked
1082
+ );
1083
+ const rightStatus = this.resolveRuntimeStatusSnapshot(
1084
+ b.numberOfRunningGraphs ?? 0,
1085
+ b.isActive,
1086
+ b.isNonResponsive,
1087
+ b.isBlocked
1088
+ );
1089
+ const priorityDelta = runtimeStatusPriority(leftStatus.state) - runtimeStatusPriority(rightStatus.state);
1090
+ if (priorityDelta !== 0) {
1091
+ return priorityDelta;
1092
+ }
1093
+ return (a.numberOfRunningGraphs ?? 0) - (b.numberOfRunningGraphs ?? 0);
1094
+ });
810
1095
  if (!instances || instances.length === 0 || retries > this.retryCount) {
811
1096
  context.errored = true;
812
1097
  context.__error = `No active instances for ${__serviceName}. Retries: ${retries}. ${this.instances.get(
@@ -892,15 +1177,287 @@ var ServiceRegistry = class _ServiceRegistry {
892
1177
  errored: true
893
1178
  };
894
1179
  }
895
- const self = this.instances.get(this.serviceName)?.find((i) => i.uuid === this.serviceInstanceId);
1180
+ const report = this.buildLocalRuntimeStatusReport("full");
1181
+ if (!report) {
1182
+ return {
1183
+ ...ctx,
1184
+ __status: "error",
1185
+ __error: "No local service instance available for status check",
1186
+ errored: true
1187
+ };
1188
+ }
896
1189
  return {
897
1190
  ...ctx,
898
1191
  __status: "ok",
899
- __numberOfRunningGraphs: self?.numberOfRunningGraphs ?? 0,
900
- __health: self?.health ?? {},
901
- __active: self?.isActive ?? false
1192
+ __serviceName: report.serviceName,
1193
+ __serviceInstanceId: report.serviceInstanceId,
1194
+ __numberOfRunningGraphs: report.numberOfRunningGraphs,
1195
+ __health: report.health ?? {},
1196
+ __active: report.isActive,
1197
+ reportedAt: report.reportedAt,
1198
+ serviceName: report.serviceName,
1199
+ serviceInstanceId: report.serviceInstanceId,
1200
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
1201
+ health: report.health ?? {},
1202
+ isActive: report.isActive,
1203
+ isNonResponsive: report.isNonResponsive,
1204
+ isBlocked: report.isBlocked,
1205
+ state: report.state,
1206
+ acceptingWork: report.acceptingWork
902
1207
  };
903
- }).doOn("meta.socket.status_check_requested");
1208
+ }).doOn(
1209
+ "meta.socket.status_check_requested",
1210
+ "meta.rest.status_check_requested"
1211
+ );
1212
+ CadenzaService.createMetaTask(
1213
+ "Track local routine start",
1214
+ (ctx, emit) => {
1215
+ const sourceTaskName = String(ctx.__signalEmission?.taskName ?? "");
1216
+ if (INTERNAL_RUNTIME_STATUS_TASK_NAMES.has(sourceTaskName)) {
1217
+ return false;
1218
+ }
1219
+ const routineId = String(
1220
+ ctx.filter?.uuid ?? ctx.__routineExecId ?? ""
1221
+ );
1222
+ if (!routineId) {
1223
+ return false;
1224
+ }
1225
+ this.activeRoutineExecutionIds.add(routineId);
1226
+ this.numberOfRunningGraphs = this.activeRoutineExecutionIds.size;
1227
+ const localInstance = this.getLocalInstance();
1228
+ if (!localInstance) {
1229
+ return true;
1230
+ }
1231
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1232
+ this.numberOfRunningGraphs,
1233
+ localInstance.isActive,
1234
+ localInstance.isNonResponsive,
1235
+ localInstance.isBlocked
1236
+ );
1237
+ if (hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1238
+ emit("meta.service_registry.runtime_status_broadcast_requested", {
1239
+ reason: "runtime-state-change"
1240
+ });
1241
+ }
1242
+ return true;
1243
+ },
1244
+ "Tracks local routine starts for runtime load status."
1245
+ ).doOn("meta.node.started_routine_execution");
1246
+ CadenzaService.createMetaTask(
1247
+ "Track local routine end",
1248
+ (ctx, emit) => {
1249
+ const sourceTaskName = String(ctx.__signalEmission?.taskName ?? "");
1250
+ if (INTERNAL_RUNTIME_STATUS_TASK_NAMES.has(sourceTaskName)) {
1251
+ return false;
1252
+ }
1253
+ const routineId = String(
1254
+ ctx.filter?.uuid ?? ctx.__routineExecId ?? ""
1255
+ );
1256
+ if (!routineId) {
1257
+ return false;
1258
+ }
1259
+ this.activeRoutineExecutionIds.delete(routineId);
1260
+ this.numberOfRunningGraphs = this.activeRoutineExecutionIds.size;
1261
+ const localInstance = this.getLocalInstance();
1262
+ if (!localInstance) {
1263
+ return true;
1264
+ }
1265
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1266
+ this.numberOfRunningGraphs,
1267
+ localInstance.isActive,
1268
+ localInstance.isNonResponsive,
1269
+ localInstance.isBlocked
1270
+ );
1271
+ if (hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1272
+ emit("meta.service_registry.runtime_status_broadcast_requested", {
1273
+ reason: "runtime-state-change"
1274
+ });
1275
+ }
1276
+ return true;
1277
+ },
1278
+ "Tracks local routine completion for runtime load status."
1279
+ ).doOn("meta.node.ended_routine_execution");
1280
+ CadenzaService.createMetaTask(
1281
+ "Start runtime status sharing intervals",
1282
+ () => {
1283
+ if (this.runtimeStatusHeartbeatStarted) {
1284
+ return false;
1285
+ }
1286
+ this.runtimeStatusHeartbeatStarted = true;
1287
+ CadenzaService.interval(
1288
+ META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL,
1289
+ { reason: "heartbeat" },
1290
+ this.runtimeStatusHeartbeatIntervalMs,
1291
+ true
1292
+ );
1293
+ CadenzaService.interval(
1294
+ META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL,
1295
+ {},
1296
+ this.runtimeStatusHeartbeatIntervalMs
1297
+ );
1298
+ return true;
1299
+ },
1300
+ "Starts runtime status heartbeat and heartbeat-monitor loops once per service instance."
1301
+ ).doOn("meta.service_registry.instance_inserted");
1302
+ CadenzaService.createMetaTask(
1303
+ "Broadcast runtime status",
1304
+ (ctx, emit) => {
1305
+ const report = this.buildLocalRuntimeStatusReport(
1306
+ ctx.detailLevel === "full" ? "full" : "minimal"
1307
+ );
1308
+ if (!report) {
1309
+ return false;
1310
+ }
1311
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1312
+ report.numberOfRunningGraphs,
1313
+ report.isActive,
1314
+ report.isNonResponsive,
1315
+ report.isBlocked
1316
+ );
1317
+ const force = ctx.reason === "heartbeat" || ctx.force === true || this.lastRuntimeStatusSnapshot === null;
1318
+ if (!force && !hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1319
+ return false;
1320
+ }
1321
+ this.lastRuntimeStatusSnapshot = snapshot;
1322
+ emit("meta.service.updated", {
1323
+ __serviceName: report.serviceName,
1324
+ __serviceInstanceId: report.serviceInstanceId,
1325
+ __reportedAt: report.reportedAt,
1326
+ __numberOfRunningGraphs: report.numberOfRunningGraphs,
1327
+ __health: report.health ?? {},
1328
+ __active: report.isActive,
1329
+ serviceName: report.serviceName,
1330
+ serviceInstanceId: report.serviceInstanceId,
1331
+ serviceAddress: report.serviceAddress,
1332
+ servicePort: report.servicePort,
1333
+ exposed: report.exposed,
1334
+ isFrontend: report.isFrontend,
1335
+ reportedAt: report.reportedAt,
1336
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
1337
+ health: report.health ?? {},
1338
+ isActive: report.isActive,
1339
+ isNonResponsive: report.isNonResponsive,
1340
+ isBlocked: report.isBlocked,
1341
+ state: report.state,
1342
+ acceptingWork: report.acceptingWork
1343
+ });
1344
+ return true;
1345
+ },
1346
+ "Broadcasts local runtime status to connected dependees."
1347
+ ).doOn(
1348
+ META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL,
1349
+ "meta.service_registry.runtime_status_broadcast_requested"
1350
+ );
1351
+ CadenzaService.createMetaTask(
1352
+ "Monitor dependee heartbeat freshness",
1353
+ (ctx, emit) => {
1354
+ if (!this.useSocket) {
1355
+ return false;
1356
+ }
1357
+ const now = Date.now();
1358
+ for (const [serviceName, instanceIds] of this.dependeesByService) {
1359
+ for (const serviceInstanceId of instanceIds) {
1360
+ const instance = this.getInstance(serviceName, serviceInstanceId);
1361
+ if (!instance || !instance.isActive || instance.isBlocked) {
1362
+ continue;
1363
+ }
1364
+ const lastHeartbeat = this.lastHeartbeatAtByInstance.get(serviceInstanceId) ?? 0;
1365
+ const misses = this.missedHeartbeatsByInstance.get(serviceInstanceId) ?? 0;
1366
+ const heartbeatBudget = this.runtimeStatusHeartbeatIntervalMs * (misses + 1);
1367
+ if (lastHeartbeat > 0 && now - lastHeartbeat < heartbeatBudget) {
1368
+ continue;
1369
+ }
1370
+ const nextMisses = misses + 1;
1371
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, nextMisses);
1372
+ if (nextMisses < this.runtimeStatusMissThreshold || this.runtimeStatusFallbackInFlightByInstance.has(serviceInstanceId)) {
1373
+ continue;
1374
+ }
1375
+ this.runtimeStatusFallbackInFlightByInstance.add(serviceInstanceId);
1376
+ emit("meta.service_registry.runtime_status_fallback_requested", {
1377
+ ...ctx,
1378
+ serviceName,
1379
+ serviceInstanceId,
1380
+ serviceAddress: instance.address,
1381
+ servicePort: instance.port
1382
+ });
1383
+ }
1384
+ }
1385
+ return true;
1386
+ },
1387
+ "Monitors dependee heartbeat freshness and requests inquiry fallback after repeated misses."
1388
+ ).doOn(META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL);
1389
+ CadenzaService.createMetaTask(
1390
+ "Resolve runtime status fallback inquiry",
1391
+ async (ctx, emit) => {
1392
+ const serviceName = ctx.serviceName;
1393
+ const serviceInstanceId = ctx.serviceInstanceId;
1394
+ if (!serviceName || !serviceInstanceId) {
1395
+ return false;
1396
+ }
1397
+ try {
1398
+ const inquiryResult = await CadenzaService.inquire(
1399
+ META_RUNTIME_STATUS_INTENT,
1400
+ {
1401
+ targetServiceName: serviceName,
1402
+ targetServiceInstanceId: serviceInstanceId,
1403
+ detailLevel: ctx.detailLevel === "full" ? "full" : "minimal"
1404
+ },
1405
+ {
1406
+ overallTimeoutMs: ctx.overallTimeoutMs ?? this.runtimeStatusFallbackTimeoutMs,
1407
+ perResponderTimeoutMs: ctx.perResponderTimeoutMs ?? Math.max(250, Math.floor(this.runtimeStatusFallbackTimeoutMs * 0.75)),
1408
+ requireComplete: ctx.requireComplete ?? false
1409
+ }
1410
+ );
1411
+ const report = this.selectRuntimeStatusReportForTarget(
1412
+ inquiryResult,
1413
+ serviceName,
1414
+ serviceInstanceId
1415
+ );
1416
+ if (!report) {
1417
+ throw new Error(
1418
+ `No runtime status report for ${serviceName}/${serviceInstanceId}`
1419
+ );
1420
+ }
1421
+ this.applyRuntimeStatusReport(report);
1422
+ this.lastHeartbeatAtByInstance.set(serviceInstanceId, Date.now());
1423
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, 0);
1424
+ return {
1425
+ ...ctx,
1426
+ runtimeStatusReport: report,
1427
+ __inquiryMeta: inquiryResult.__inquiryMeta
1428
+ };
1429
+ } catch (error) {
1430
+ const instance = this.getInstance(serviceName, serviceInstanceId);
1431
+ const message = error instanceof Error ? error.message : String(error);
1432
+ CadenzaService.log(
1433
+ "Runtime status fallback inquiry failed.",
1434
+ {
1435
+ serviceName,
1436
+ serviceInstanceId,
1437
+ error: message
1438
+ },
1439
+ "warning"
1440
+ );
1441
+ emit("meta.service_registry.runtime_status_unreachable", {
1442
+ ...ctx,
1443
+ serviceName,
1444
+ serviceInstanceId,
1445
+ serviceAddress: instance?.address ?? ctx.serviceAddress,
1446
+ servicePort: instance?.port ?? ctx.servicePort,
1447
+ __error: message,
1448
+ errored: true
1449
+ });
1450
+ return {
1451
+ ...ctx,
1452
+ __error: message,
1453
+ errored: true
1454
+ };
1455
+ } finally {
1456
+ this.runtimeStatusFallbackInFlightByInstance.delete(serviceInstanceId);
1457
+ }
1458
+ },
1459
+ "Runs runtime-status inquiry fallback for a dependee instance after missed heartbeats."
1460
+ ).doOn("meta.service_registry.runtime_status_fallback_requested").emits("meta.service_registry.runtime_status_fallback_resolved").emitsOnFail("meta.service_registry.runtime_status_fallback_failed");
904
1461
  this.collectTransportDiagnosticsTask = CadenzaService.createMetaTask(
905
1462
  "Collect transport diagnostics",
906
1463
  async (ctx) => {
@@ -1240,6 +1797,183 @@ var ServiceRegistry = class _ServiceRegistry {
1240
1797
  localTaskName: task.name
1241
1798
  };
1242
1799
  }
1800
+ getInstance(serviceName, instanceId) {
1801
+ return this.instances.get(serviceName)?.find((instance) => instance.uuid === instanceId);
1802
+ }
1803
+ getLocalInstance() {
1804
+ if (!this.serviceName || !this.serviceInstanceId) {
1805
+ return void 0;
1806
+ }
1807
+ return this.getInstance(this.serviceName, this.serviceInstanceId);
1808
+ }
1809
+ registerDependee(serviceName, serviceInstanceId) {
1810
+ if (!serviceName || !serviceInstanceId) {
1811
+ return;
1812
+ }
1813
+ if (!this.dependeesByService.has(serviceName)) {
1814
+ this.dependeesByService.set(serviceName, /* @__PURE__ */ new Set());
1815
+ }
1816
+ this.dependeesByService.get(serviceName).add(serviceInstanceId);
1817
+ this.dependeeByInstance.set(serviceInstanceId, serviceName);
1818
+ this.lastHeartbeatAtByInstance.set(serviceInstanceId, Date.now());
1819
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, 0);
1820
+ }
1821
+ unregisterDependee(serviceInstanceId, serviceName) {
1822
+ const dependeeServiceName = serviceName ?? this.dependeeByInstance.get(serviceInstanceId);
1823
+ if (dependeeServiceName) {
1824
+ this.dependeesByService.get(dependeeServiceName)?.delete(serviceInstanceId);
1825
+ if (!this.dependeesByService.get(dependeeServiceName)?.size) {
1826
+ this.dependeesByService.delete(dependeeServiceName);
1827
+ }
1828
+ }
1829
+ this.dependeeByInstance.delete(serviceInstanceId);
1830
+ this.lastHeartbeatAtByInstance.delete(serviceInstanceId);
1831
+ this.missedHeartbeatsByInstance.delete(serviceInstanceId);
1832
+ this.runtimeStatusFallbackInFlightByInstance.delete(serviceInstanceId);
1833
+ }
1834
+ resolveRuntimeStatusSnapshot(numberOfRunningGraphs, isActive, isNonResponsive, isBlocked) {
1835
+ return resolveRuntimeStatus({
1836
+ numberOfRunningGraphs,
1837
+ isActive,
1838
+ isNonResponsive,
1839
+ isBlocked,
1840
+ degradedGraphThreshold: this.degradedGraphThreshold,
1841
+ overloadedGraphThreshold: this.overloadedGraphThreshold
1842
+ });
1843
+ }
1844
+ normalizeRuntimeStatusReport(ctx) {
1845
+ const serviceName = ctx.serviceName ?? ctx.__serviceName ?? ctx.serviceInstance?.serviceName;
1846
+ const serviceInstanceId = ctx.serviceInstanceId ?? ctx.__serviceInstanceId ?? ctx.serviceInstance?.uuid;
1847
+ if (!serviceName || !serviceInstanceId) {
1848
+ return null;
1849
+ }
1850
+ const servicePort = ctx.servicePort ?? ctx.port ?? ctx.serviceInstance?.port;
1851
+ const numberOfRunningGraphs = Math.max(
1852
+ 0,
1853
+ Math.trunc(
1854
+ Number(ctx.numberOfRunningGraphs ?? ctx.__numberOfRunningGraphs ?? 0)
1855
+ )
1856
+ );
1857
+ const isActive = Boolean(ctx.isActive ?? ctx.__active ?? true);
1858
+ const isNonResponsive = Boolean(ctx.isNonResponsive ?? false);
1859
+ const isBlocked = Boolean(ctx.isBlocked ?? false);
1860
+ const resolved = this.resolveRuntimeStatusSnapshot(
1861
+ numberOfRunningGraphs,
1862
+ isActive,
1863
+ isNonResponsive,
1864
+ isBlocked
1865
+ );
1866
+ return {
1867
+ serviceName,
1868
+ serviceInstanceId,
1869
+ serviceAddress: ctx.serviceAddress ?? ctx.address ?? ctx.serviceInstance?.address,
1870
+ servicePort: typeof servicePort === "number" ? servicePort : void 0,
1871
+ exposed: typeof ctx.exposed === "boolean" ? ctx.exposed : typeof ctx.serviceInstance?.exposed === "boolean" ? ctx.serviceInstance.exposed : void 0,
1872
+ isFrontend: typeof ctx.isFrontend === "boolean" ? ctx.isFrontend : typeof ctx.serviceInstance?.isFrontend === "boolean" ? ctx.serviceInstance.isFrontend : void 0,
1873
+ reportedAt: ctx.reportedAt ?? (typeof ctx.__reportedAt === "string" ? ctx.__reportedAt : void 0) ?? (/* @__PURE__ */ new Date()).toISOString(),
1874
+ state: ctx.state === "healthy" || ctx.state === "degraded" || ctx.state === "overloaded" || ctx.state === "unavailable" ? ctx.state : resolved.state,
1875
+ acceptingWork: typeof ctx.acceptingWork === "boolean" ? ctx.acceptingWork : resolved.acceptingWork,
1876
+ numberOfRunningGraphs,
1877
+ isActive,
1878
+ isNonResponsive,
1879
+ isBlocked,
1880
+ health: ctx.health ?? ctx.__health ?? {}
1881
+ };
1882
+ }
1883
+ applyRuntimeStatusReport(report) {
1884
+ const instance = this.getInstance(report.serviceName, report.serviceInstanceId);
1885
+ if (!instance) {
1886
+ return false;
1887
+ }
1888
+ if (report.serviceAddress) {
1889
+ instance.address = report.serviceAddress;
1890
+ }
1891
+ if (typeof report.servicePort === "number") {
1892
+ instance.port = report.servicePort;
1893
+ }
1894
+ if (typeof report.exposed === "boolean") {
1895
+ instance.exposed = report.exposed;
1896
+ }
1897
+ if (typeof report.isFrontend === "boolean") {
1898
+ instance.isFrontend = report.isFrontend;
1899
+ }
1900
+ instance.numberOfRunningGraphs = report.numberOfRunningGraphs;
1901
+ instance.isActive = report.isActive;
1902
+ instance.isNonResponsive = report.isNonResponsive;
1903
+ instance.isBlocked = report.isBlocked;
1904
+ instance.runtimeState = report.state;
1905
+ instance.acceptingWork = report.acceptingWork;
1906
+ instance.reportedAt = report.reportedAt;
1907
+ instance.health = {
1908
+ ...instance.health ?? {},
1909
+ ...report.health ?? {},
1910
+ runtimeStatus: {
1911
+ state: report.state,
1912
+ acceptingWork: report.acceptingWork,
1913
+ reportedAt: report.reportedAt
1914
+ }
1915
+ };
1916
+ return true;
1917
+ }
1918
+ buildLocalRuntimeStatusReport(detailLevel = "minimal") {
1919
+ if (!this.serviceName || !this.serviceInstanceId) {
1920
+ return null;
1921
+ }
1922
+ const localInstance = this.getLocalInstance();
1923
+ if (!localInstance) {
1924
+ return null;
1925
+ }
1926
+ const numberOfRunningGraphs = this.activeRoutineExecutionIds.size || this.numberOfRunningGraphs || 0;
1927
+ this.numberOfRunningGraphs = numberOfRunningGraphs;
1928
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1929
+ numberOfRunningGraphs,
1930
+ localInstance.isActive,
1931
+ localInstance.isNonResponsive,
1932
+ localInstance.isBlocked
1933
+ );
1934
+ const reportedAt = (/* @__PURE__ */ new Date()).toISOString();
1935
+ const report = {
1936
+ serviceName: this.serviceName,
1937
+ serviceInstanceId: this.serviceInstanceId,
1938
+ serviceAddress: localInstance.address,
1939
+ servicePort: localInstance.port,
1940
+ exposed: localInstance.exposed,
1941
+ isFrontend: localInstance.isFrontend,
1942
+ reportedAt,
1943
+ state: snapshot.state,
1944
+ acceptingWork: snapshot.acceptingWork,
1945
+ numberOfRunningGraphs: snapshot.numberOfRunningGraphs,
1946
+ isActive: snapshot.isActive,
1947
+ isNonResponsive: snapshot.isNonResponsive,
1948
+ isBlocked: snapshot.isBlocked,
1949
+ health: {
1950
+ ...localInstance.health ?? {},
1951
+ runtimeStatus: {
1952
+ state: snapshot.state,
1953
+ acceptingWork: snapshot.acceptingWork,
1954
+ reportedAt
1955
+ }
1956
+ }
1957
+ };
1958
+ this.applyRuntimeStatusReport(report);
1959
+ if (detailLevel !== "full") {
1960
+ delete report.health;
1961
+ }
1962
+ return report;
1963
+ }
1964
+ selectRuntimeStatusReportForTarget(inquiryResult, targetServiceName, targetServiceInstanceId) {
1965
+ const reports = Array.isArray(inquiryResult.runtimeStatusReports) ? inquiryResult.runtimeStatusReports : [];
1966
+ for (const candidate of reports) {
1967
+ const report = this.normalizeRuntimeStatusReport(candidate);
1968
+ if (!report) {
1969
+ continue;
1970
+ }
1971
+ if (report.serviceName === targetServiceName && report.serviceInstanceId === targetServiceInstanceId) {
1972
+ return report;
1973
+ }
1974
+ }
1975
+ return null;
1976
+ }
1243
1977
  reset() {
1244
1978
  this.instances.clear();
1245
1979
  this.deputies.clear();
@@ -1247,6 +1981,15 @@ var ServiceRegistry = class _ServiceRegistry {
1247
1981
  this.remoteIntents.clear();
1248
1982
  this.remoteIntentDeputiesByKey.clear();
1249
1983
  this.remoteIntentDeputiesByTask.clear();
1984
+ this.dependeesByService.clear();
1985
+ this.dependeeByInstance.clear();
1986
+ this.lastHeartbeatAtByInstance.clear();
1987
+ this.missedHeartbeatsByInstance.clear();
1988
+ this.runtimeStatusFallbackInFlightByInstance.clear();
1989
+ this.activeRoutineExecutionIds.clear();
1990
+ this.numberOfRunningGraphs = 0;
1991
+ this.runtimeStatusHeartbeatStarted = false;
1992
+ this.lastRuntimeStatusSnapshot = null;
1250
1993
  }
1251
1994
  };
1252
1995