@cadenza.io/service 2.6.1 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -250,6 +250,7 @@ var isBrowser = typeof window !== "undefined" && typeof window.document !== "und
250
250
  // src/utils/inquiry.ts
251
251
  var META_INTENT_PREFIX = "meta-";
252
252
  var META_RUNTIME_TRANSPORT_DIAGNOSTICS_INTENT = "meta-runtime-transport-diagnostics";
253
+ var META_RUNTIME_STATUS_INTENT = "meta-runtime-status";
253
254
  function isPlainObject(value) {
254
255
  return typeof value === "object" && value !== null && !Array.isArray(value) && Object.getPrototypeOf(value) === Object.prototype;
255
256
  }
@@ -313,8 +314,104 @@ function summarizeResponderStatuses(statuses) {
313
314
  return { responded, failed, timedOut, pending };
314
315
  }
315
316
 
317
+ // src/utils/runtimeStatus.ts
318
+ function resolveRuntimeStatus(input) {
319
+ const numberOfRunningGraphs = Math.max(
320
+ 0,
321
+ Math.trunc(Number(input.numberOfRunningGraphs) || 0)
322
+ );
323
+ const isActive = Boolean(input.isActive);
324
+ const isNonResponsive = Boolean(input.isNonResponsive);
325
+ const isBlocked = Boolean(input.isBlocked);
326
+ if (!isActive || isNonResponsive || isBlocked) {
327
+ return {
328
+ state: "unavailable",
329
+ acceptingWork: false,
330
+ numberOfRunningGraphs,
331
+ isActive,
332
+ isNonResponsive,
333
+ isBlocked
334
+ };
335
+ }
336
+ if (numberOfRunningGraphs >= input.overloadedGraphThreshold) {
337
+ return {
338
+ state: "overloaded",
339
+ acceptingWork: true,
340
+ numberOfRunningGraphs,
341
+ isActive,
342
+ isNonResponsive,
343
+ isBlocked
344
+ };
345
+ }
346
+ if (numberOfRunningGraphs >= input.degradedGraphThreshold) {
347
+ return {
348
+ state: "degraded",
349
+ acceptingWork: true,
350
+ numberOfRunningGraphs,
351
+ isActive,
352
+ isNonResponsive,
353
+ isBlocked
354
+ };
355
+ }
356
+ return {
357
+ state: "healthy",
358
+ acceptingWork: true,
359
+ numberOfRunningGraphs,
360
+ isActive,
361
+ isNonResponsive,
362
+ isBlocked
363
+ };
364
+ }
365
+ function runtimeStatusPriority(state) {
366
+ switch (state) {
367
+ case "healthy":
368
+ return 0;
369
+ case "degraded":
370
+ return 1;
371
+ case "overloaded":
372
+ return 2;
373
+ case "unavailable":
374
+ return 3;
375
+ default:
376
+ return 4;
377
+ }
378
+ }
379
+ function hasSignificantRuntimeStatusChange(previous, next) {
380
+ if (!previous) {
381
+ return true;
382
+ }
383
+ return previous.state !== next.state || previous.acceptingWork !== next.acceptingWork || previous.isActive !== next.isActive || previous.isNonResponsive !== next.isNonResponsive || previous.isBlocked !== next.isBlocked;
384
+ }
385
+
316
386
  // src/registry/ServiceRegistry.ts
317
387
  var META_SERVICE_REGISTRY_FULL_SYNC_INTENT = "meta-service-registry-full-sync";
388
+ var META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL = "meta.service_registry.runtime_status.heartbeat_tick";
389
+ var META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL = "meta.service_registry.runtime_status.monitor_tick";
390
+ var INTERNAL_RUNTIME_STATUS_TASK_NAMES = /* @__PURE__ */ new Set([
391
+ "Track local routine start",
392
+ "Track local routine end",
393
+ "Start runtime status sharing intervals",
394
+ "Broadcast runtime status",
395
+ "Monitor dependee heartbeat freshness",
396
+ "Resolve runtime status fallback inquiry",
397
+ "Respond runtime status inquiry",
398
+ "Get status"
399
+ ]);
400
+ function readPositiveIntegerEnv(name, fallback) {
401
+ if (typeof process === "undefined") {
402
+ return fallback;
403
+ }
404
+ const raw = process.env?.[name];
405
+ const parsed = Number(raw);
406
+ if (!Number.isFinite(parsed)) {
407
+ return fallback;
408
+ }
409
+ const normalized = Math.trunc(parsed);
410
+ if (normalized <= 0) {
411
+ return fallback;
412
+ }
413
+ return normalized;
414
+ }
318
415
  var ServiceRegistry = class _ServiceRegistry {
319
416
  /**
320
417
  * Initializes a private constructor for managing service instances, remote signals,
@@ -332,6 +429,34 @@ var ServiceRegistry = class _ServiceRegistry {
332
429
  this.remoteIntents = /* @__PURE__ */ new Map();
333
430
  this.remoteIntentDeputiesByKey = /* @__PURE__ */ new Map();
334
431
  this.remoteIntentDeputiesByTask = /* @__PURE__ */ new Map();
432
+ this.dependeesByService = /* @__PURE__ */ new Map();
433
+ this.dependeeByInstance = /* @__PURE__ */ new Map();
434
+ this.lastHeartbeatAtByInstance = /* @__PURE__ */ new Map();
435
+ this.missedHeartbeatsByInstance = /* @__PURE__ */ new Map();
436
+ this.runtimeStatusFallbackInFlightByInstance = /* @__PURE__ */ new Set();
437
+ this.activeRoutineExecutionIds = /* @__PURE__ */ new Set();
438
+ this.runtimeStatusHeartbeatStarted = false;
439
+ this.lastRuntimeStatusSnapshot = null;
440
+ this.runtimeStatusHeartbeatIntervalMs = readPositiveIntegerEnv(
441
+ "CADENZA_RUNTIME_STATUS_HEARTBEAT_MS",
442
+ 3e4
443
+ );
444
+ this.runtimeStatusMissThreshold = readPositiveIntegerEnv(
445
+ "CADENZA_RUNTIME_STATUS_MISSED_HEARTBEATS",
446
+ 3
447
+ );
448
+ this.runtimeStatusFallbackTimeoutMs = readPositiveIntegerEnv(
449
+ "CADENZA_RUNTIME_STATUS_FALLBACK_TIMEOUT_MS",
450
+ 1500
451
+ );
452
+ this.degradedGraphThreshold = readPositiveIntegerEnv(
453
+ "CADENZA_RUNTIME_STATUS_DEGRADED_GRAPH_THRESHOLD",
454
+ 10
455
+ );
456
+ this.overloadedGraphThreshold = readPositiveIntegerEnv(
457
+ "CADENZA_RUNTIME_STATUS_OVERLOADED_GRAPH_THRESHOLD",
458
+ 20
459
+ );
335
460
  this.serviceName = null;
336
461
  this.serviceInstanceId = null;
337
462
  this.numberOfRunningGraphs = 0;
@@ -370,10 +495,77 @@ var ServiceRegistry = class _ServiceRegistry {
370
495
  }
371
496
  }
372
497
  });
498
+ CadenzaService.defineIntent({
499
+ name: META_RUNTIME_STATUS_INTENT,
500
+ description: "Gather lightweight runtime status reports from services in the distributed runtime.",
501
+ input: {
502
+ type: "object",
503
+ properties: {
504
+ detailLevel: {
505
+ type: "string",
506
+ constraints: {
507
+ oneOf: ["minimal", "full"]
508
+ }
509
+ },
510
+ targetServiceName: {
511
+ type: "string"
512
+ },
513
+ targetServiceInstanceId: {
514
+ type: "string"
515
+ }
516
+ }
517
+ },
518
+ output: {
519
+ type: "object",
520
+ properties: {
521
+ runtimeStatusReports: {
522
+ type: "array"
523
+ }
524
+ }
525
+ }
526
+ });
527
+ CadenzaService.createMetaTask(
528
+ "Respond runtime status inquiry",
529
+ (ctx) => {
530
+ const targetServiceName = ctx.targetServiceName;
531
+ const targetServiceInstanceId = ctx.targetServiceInstanceId;
532
+ const detailLevel = ctx.detailLevel === "full" ? "full" : "minimal";
533
+ const report = this.buildLocalRuntimeStatusReport(detailLevel);
534
+ if (!report) {
535
+ return {};
536
+ }
537
+ if (targetServiceName && targetServiceName !== report.serviceName) {
538
+ return {};
539
+ }
540
+ if (targetServiceInstanceId && targetServiceInstanceId !== report.serviceInstanceId) {
541
+ return {};
542
+ }
543
+ return {
544
+ runtimeStatusReports: [report]
545
+ };
546
+ },
547
+ "Responds to runtime-status inquiries with local service instance status."
548
+ ).respondsTo(META_RUNTIME_STATUS_INTENT);
373
549
  this.handleInstanceUpdateTask = CadenzaService.createMetaTask(
374
550
  "Handle Instance Update",
375
551
  (ctx, emit) => {
376
- const { serviceInstance } = ctx;
552
+ const serviceInstance = ctx.serviceInstance ?? (ctx.__serviceInstanceId || ctx.serviceInstanceId ? {
553
+ uuid: ctx.__serviceInstanceId ?? ctx.serviceInstanceId,
554
+ serviceName: ctx.__serviceName ?? ctx.serviceName,
555
+ address: ctx.serviceAddress ?? "",
556
+ port: ctx.servicePort ?? 0,
557
+ exposed: !!ctx.exposed,
558
+ isFrontend: !!ctx.isFrontend,
559
+ isActive: typeof ctx.isActive === "boolean" ? ctx.isActive : typeof ctx.__active === "boolean" ? ctx.__active : true,
560
+ isNonResponsive: !!ctx.isNonResponsive,
561
+ isBlocked: !!ctx.isBlocked,
562
+ health: ctx.health ?? ctx.__health ?? {},
563
+ numberOfRunningGraphs: ctx.numberOfRunningGraphs ?? ctx.__numberOfRunningGraphs ?? 0,
564
+ isPrimary: false
565
+ } : void 0);
566
+ if (!serviceInstance?.uuid || !serviceInstance?.serviceName) {
567
+ return false;
568
+ }
377
569
  const {
378
570
  uuid: uuid4,
379
571
  serviceName,
@@ -395,6 +587,7 @@ var ServiceRegistry = class _ServiceRegistry {
395
587
  emit(`meta.socket_shutdown_requested:${address}_${port}`, {});
396
588
  emit(`meta.fetch.destroy_requested:${address}_${port}`, {});
397
589
  }
590
+ this.unregisterDependee(uuid4, serviceName);
398
591
  return;
399
592
  }
400
593
  if (!this.instances.has(serviceName))
@@ -406,6 +599,18 @@ var ServiceRegistry = class _ServiceRegistry {
406
599
  } else {
407
600
  instances.push(serviceInstance);
408
601
  }
602
+ const trackedInstance = existing ?? instances.find((instance) => instance.uuid === uuid4);
603
+ if (trackedInstance) {
604
+ const snapshot = this.resolveRuntimeStatusSnapshot(
605
+ trackedInstance.numberOfRunningGraphs ?? 0,
606
+ trackedInstance.isActive,
607
+ trackedInstance.isNonResponsive,
608
+ trackedInstance.isBlocked
609
+ );
610
+ trackedInstance.runtimeState = snapshot.state;
611
+ trackedInstance.acceptingWork = snapshot.acceptingWork;
612
+ trackedInstance.reportedAt = trackedInstance.reportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
613
+ }
409
614
  if (this.serviceName === serviceName) {
410
615
  return false;
411
616
  }
@@ -445,13 +650,23 @@ var ServiceRegistry = class _ServiceRegistry {
445
650
  "global.meta.service_instance.inserted",
446
651
  "global.meta.service_instance.updated",
447
652
  "meta.service_instance.inserted",
448
- "meta.service_instance.updated",
449
- "meta.socket_client.status_received"
653
+ "meta.service_instance.updated"
450
654
  ).attachSignal(
451
655
  "meta.service_registry.dependee_registered",
452
656
  "meta.socket_shutdown_requested",
453
657
  "meta.fetch.destroy_requested"
454
658
  );
659
+ CadenzaService.createMetaTask(
660
+ "Track dependee registration",
661
+ (ctx) => {
662
+ if (!ctx.serviceName || !ctx.serviceInstanceId) {
663
+ return false;
664
+ }
665
+ this.registerDependee(ctx.serviceName, ctx.serviceInstanceId);
666
+ return true;
667
+ },
668
+ "Tracks remote dependency instances for runtime heartbeat monitoring."
669
+ ).doOn("meta.service_registry.dependee_registered");
455
670
  CadenzaService.createMetaTask("Split service instances", function* (ctx) {
456
671
  if (!ctx.serviceInstances) {
457
672
  return;
@@ -551,6 +766,15 @@ var ServiceRegistry = class _ServiceRegistry {
551
766
  for (const instance of instances ?? []) {
552
767
  instance.isActive = false;
553
768
  instance.isNonResponsive = true;
769
+ const snapshot = this.resolveRuntimeStatusSnapshot(
770
+ instance.numberOfRunningGraphs ?? 0,
771
+ instance.isActive,
772
+ instance.isNonResponsive,
773
+ instance.isBlocked
774
+ );
775
+ instance.runtimeState = snapshot.state;
776
+ instance.acceptingWork = snapshot.acceptingWork;
777
+ instance.reportedAt = (/* @__PURE__ */ new Date()).toISOString();
554
778
  emit("global.meta.service_registry.service_not_responding", {
555
779
  data: {
556
780
  isActive: false,
@@ -568,7 +792,8 @@ var ServiceRegistry = class _ServiceRegistry {
568
792
  "meta.fetch.handshake_failed",
569
793
  "meta.fetch.handshake_failed.*",
570
794
  "meta.socket_client.disconnected",
571
- "meta.socket_client.disconnected.*"
795
+ "meta.socket_client.disconnected.*",
796
+ "meta.service_registry.runtime_status_unreachable"
572
797
  ).attachSignal("global.meta.service_registry.service_not_responding");
573
798
  this.handleServiceHandshakeTask = CadenzaService.createMetaTask(
574
799
  "Handle service handshake",
@@ -583,6 +808,15 @@ var ServiceRegistry = class _ServiceRegistry {
583
808
  }
584
809
  instance.isActive = true;
585
810
  instance.isNonResponsive = false;
811
+ const snapshot = this.resolveRuntimeStatusSnapshot(
812
+ instance.numberOfRunningGraphs ?? 0,
813
+ instance.isActive,
814
+ instance.isNonResponsive,
815
+ instance.isBlocked
816
+ );
817
+ instance.runtimeState = snapshot.state;
818
+ instance.acceptingWork = snapshot.acceptingWork;
819
+ instance.reportedAt = (/* @__PURE__ */ new Date()).toISOString();
586
820
  emit("global.meta.service_registry.service_handshake", {
587
821
  data: {
588
822
  isActive: instance.isActive,
@@ -600,6 +834,7 @@ var ServiceRegistry = class _ServiceRegistry {
600
834
  if (indexToDelete >= 0) {
601
835
  this.instances.get(serviceName)?.splice(indexToDelete, 1);
602
836
  }
837
+ this.unregisterDependee(i.uuid, serviceName);
603
838
  emit("global.meta.service_registry.deleted", {
604
839
  data: {
605
840
  isActive: false,
@@ -621,14 +856,46 @@ var ServiceRegistry = class _ServiceRegistry {
621
856
  this.handleSocketStatusUpdateTask = CadenzaService.createMetaTask(
622
857
  "Handle Socket Status Update",
623
858
  (ctx) => {
624
- const instanceId = ctx.__serviceInstanceId;
625
- const serviceName = ctx.__serviceName;
626
- const instances = this.instances.get(serviceName);
627
- const instance = instances?.find((i) => i.uuid === instanceId);
628
- if (instance) {
629
- instance.health = ctx.health;
630
- instance.numberOfRunningGraphs = ctx.numberOfRunningGraphs;
859
+ const report = this.normalizeRuntimeStatusReport(ctx);
860
+ if (!report) {
861
+ return false;
631
862
  }
863
+ if (report.serviceName === this.serviceName && report.serviceInstanceId === this.serviceInstanceId) {
864
+ return false;
865
+ }
866
+ let applied = this.applyRuntimeStatusReport(report);
867
+ if (!applied && report.serviceAddress && typeof report.servicePort === "number") {
868
+ if (!this.instances.has(report.serviceName)) {
869
+ this.instances.set(report.serviceName, []);
870
+ }
871
+ this.instances.get(report.serviceName).push({
872
+ uuid: report.serviceInstanceId,
873
+ serviceName: report.serviceName,
874
+ address: report.serviceAddress,
875
+ port: report.servicePort,
876
+ exposed: !!report.exposed,
877
+ isFrontend: !!report.isFrontend,
878
+ isActive: report.isActive,
879
+ isNonResponsive: report.isNonResponsive,
880
+ isBlocked: report.isBlocked,
881
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
882
+ runtimeState: report.state,
883
+ acceptingWork: report.acceptingWork,
884
+ reportedAt: report.reportedAt,
885
+ health: report.health ?? {},
886
+ isPrimary: false
887
+ });
888
+ applied = true;
889
+ }
890
+ if (!applied) {
891
+ return false;
892
+ }
893
+ this.registerDependee(report.serviceName, report.serviceInstanceId);
894
+ this.lastHeartbeatAtByInstance.set(report.serviceInstanceId, Date.now());
895
+ this.missedHeartbeatsByInstance.set(report.serviceInstanceId, 0);
896
+ this.runtimeStatusFallbackInFlightByInstance.delete(
897
+ report.serviceInstanceId
898
+ );
632
899
  return true;
633
900
  },
634
901
  "Handles status update from socket broadcast"
@@ -758,7 +1025,25 @@ var ServiceRegistry = class _ServiceRegistry {
758
1025
  const { __serviceName, __triedInstances, __retries, __broadcast } = context;
759
1026
  let retries = __retries ?? 0;
760
1027
  let triedInstances = __triedInstances ?? [];
761
- const instances = this.instances.get(__serviceName)?.filter((i) => i.isActive && !i.isNonResponsive && !i.isBlocked).sort((a, b) => a.numberOfRunningGraphs - b.numberOfRunningGraphs);
1028
+ const instances = this.instances.get(__serviceName)?.filter((i) => i.isActive && !i.isNonResponsive && !i.isBlocked).sort((a, b) => {
1029
+ const leftStatus = this.resolveRuntimeStatusSnapshot(
1030
+ a.numberOfRunningGraphs ?? 0,
1031
+ a.isActive,
1032
+ a.isNonResponsive,
1033
+ a.isBlocked
1034
+ );
1035
+ const rightStatus = this.resolveRuntimeStatusSnapshot(
1036
+ b.numberOfRunningGraphs ?? 0,
1037
+ b.isActive,
1038
+ b.isNonResponsive,
1039
+ b.isBlocked
1040
+ );
1041
+ const priorityDelta = runtimeStatusPriority(leftStatus.state) - runtimeStatusPriority(rightStatus.state);
1042
+ if (priorityDelta !== 0) {
1043
+ return priorityDelta;
1044
+ }
1045
+ return (a.numberOfRunningGraphs ?? 0) - (b.numberOfRunningGraphs ?? 0);
1046
+ });
762
1047
  if (!instances || instances.length === 0 || retries > this.retryCount) {
763
1048
  context.errored = true;
764
1049
  context.__error = `No active instances for ${__serviceName}. Retries: ${retries}. ${this.instances.get(
@@ -844,15 +1129,287 @@ var ServiceRegistry = class _ServiceRegistry {
844
1129
  errored: true
845
1130
  };
846
1131
  }
847
- const self = this.instances.get(this.serviceName)?.find((i) => i.uuid === this.serviceInstanceId);
1132
+ const report = this.buildLocalRuntimeStatusReport("full");
1133
+ if (!report) {
1134
+ return {
1135
+ ...ctx,
1136
+ __status: "error",
1137
+ __error: "No local service instance available for status check",
1138
+ errored: true
1139
+ };
1140
+ }
848
1141
  return {
849
1142
  ...ctx,
850
1143
  __status: "ok",
851
- __numberOfRunningGraphs: self?.numberOfRunningGraphs ?? 0,
852
- __health: self?.health ?? {},
853
- __active: self?.isActive ?? false
1144
+ __serviceName: report.serviceName,
1145
+ __serviceInstanceId: report.serviceInstanceId,
1146
+ __numberOfRunningGraphs: report.numberOfRunningGraphs,
1147
+ __health: report.health ?? {},
1148
+ __active: report.isActive,
1149
+ reportedAt: report.reportedAt,
1150
+ serviceName: report.serviceName,
1151
+ serviceInstanceId: report.serviceInstanceId,
1152
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
1153
+ health: report.health ?? {},
1154
+ isActive: report.isActive,
1155
+ isNonResponsive: report.isNonResponsive,
1156
+ isBlocked: report.isBlocked,
1157
+ state: report.state,
1158
+ acceptingWork: report.acceptingWork
854
1159
  };
855
- }).doOn("meta.socket.status_check_requested");
1160
+ }).doOn(
1161
+ "meta.socket.status_check_requested",
1162
+ "meta.rest.status_check_requested"
1163
+ );
1164
+ CadenzaService.createMetaTask(
1165
+ "Track local routine start",
1166
+ (ctx, emit) => {
1167
+ const sourceTaskName = String(ctx.__signalEmission?.taskName ?? "");
1168
+ if (INTERNAL_RUNTIME_STATUS_TASK_NAMES.has(sourceTaskName)) {
1169
+ return false;
1170
+ }
1171
+ const routineId = String(
1172
+ ctx.filter?.uuid ?? ctx.__routineExecId ?? ""
1173
+ );
1174
+ if (!routineId) {
1175
+ return false;
1176
+ }
1177
+ this.activeRoutineExecutionIds.add(routineId);
1178
+ this.numberOfRunningGraphs = this.activeRoutineExecutionIds.size;
1179
+ const localInstance = this.getLocalInstance();
1180
+ if (!localInstance) {
1181
+ return true;
1182
+ }
1183
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1184
+ this.numberOfRunningGraphs,
1185
+ localInstance.isActive,
1186
+ localInstance.isNonResponsive,
1187
+ localInstance.isBlocked
1188
+ );
1189
+ if (hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1190
+ emit("meta.service_registry.runtime_status_broadcast_requested", {
1191
+ reason: "runtime-state-change"
1192
+ });
1193
+ }
1194
+ return true;
1195
+ },
1196
+ "Tracks local routine starts for runtime load status."
1197
+ ).doOn("meta.node.started_routine_execution");
1198
+ CadenzaService.createMetaTask(
1199
+ "Track local routine end",
1200
+ (ctx, emit) => {
1201
+ const sourceTaskName = String(ctx.__signalEmission?.taskName ?? "");
1202
+ if (INTERNAL_RUNTIME_STATUS_TASK_NAMES.has(sourceTaskName)) {
1203
+ return false;
1204
+ }
1205
+ const routineId = String(
1206
+ ctx.filter?.uuid ?? ctx.__routineExecId ?? ""
1207
+ );
1208
+ if (!routineId) {
1209
+ return false;
1210
+ }
1211
+ this.activeRoutineExecutionIds.delete(routineId);
1212
+ this.numberOfRunningGraphs = this.activeRoutineExecutionIds.size;
1213
+ const localInstance = this.getLocalInstance();
1214
+ if (!localInstance) {
1215
+ return true;
1216
+ }
1217
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1218
+ this.numberOfRunningGraphs,
1219
+ localInstance.isActive,
1220
+ localInstance.isNonResponsive,
1221
+ localInstance.isBlocked
1222
+ );
1223
+ if (hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1224
+ emit("meta.service_registry.runtime_status_broadcast_requested", {
1225
+ reason: "runtime-state-change"
1226
+ });
1227
+ }
1228
+ return true;
1229
+ },
1230
+ "Tracks local routine completion for runtime load status."
1231
+ ).doOn("meta.node.ended_routine_execution");
1232
+ CadenzaService.createMetaTask(
1233
+ "Start runtime status sharing intervals",
1234
+ () => {
1235
+ if (this.runtimeStatusHeartbeatStarted) {
1236
+ return false;
1237
+ }
1238
+ this.runtimeStatusHeartbeatStarted = true;
1239
+ CadenzaService.interval(
1240
+ META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL,
1241
+ { reason: "heartbeat" },
1242
+ this.runtimeStatusHeartbeatIntervalMs,
1243
+ true
1244
+ );
1245
+ CadenzaService.interval(
1246
+ META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL,
1247
+ {},
1248
+ this.runtimeStatusHeartbeatIntervalMs
1249
+ );
1250
+ return true;
1251
+ },
1252
+ "Starts runtime status heartbeat and heartbeat-monitor loops once per service instance."
1253
+ ).doOn("meta.service_registry.instance_inserted");
1254
+ CadenzaService.createMetaTask(
1255
+ "Broadcast runtime status",
1256
+ (ctx, emit) => {
1257
+ const report = this.buildLocalRuntimeStatusReport(
1258
+ ctx.detailLevel === "full" ? "full" : "minimal"
1259
+ );
1260
+ if (!report) {
1261
+ return false;
1262
+ }
1263
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1264
+ report.numberOfRunningGraphs,
1265
+ report.isActive,
1266
+ report.isNonResponsive,
1267
+ report.isBlocked
1268
+ );
1269
+ const force = ctx.reason === "heartbeat" || ctx.force === true || this.lastRuntimeStatusSnapshot === null;
1270
+ if (!force && !hasSignificantRuntimeStatusChange(this.lastRuntimeStatusSnapshot, snapshot)) {
1271
+ return false;
1272
+ }
1273
+ this.lastRuntimeStatusSnapshot = snapshot;
1274
+ emit("meta.service.updated", {
1275
+ __serviceName: report.serviceName,
1276
+ __serviceInstanceId: report.serviceInstanceId,
1277
+ __reportedAt: report.reportedAt,
1278
+ __numberOfRunningGraphs: report.numberOfRunningGraphs,
1279
+ __health: report.health ?? {},
1280
+ __active: report.isActive,
1281
+ serviceName: report.serviceName,
1282
+ serviceInstanceId: report.serviceInstanceId,
1283
+ serviceAddress: report.serviceAddress,
1284
+ servicePort: report.servicePort,
1285
+ exposed: report.exposed,
1286
+ isFrontend: report.isFrontend,
1287
+ reportedAt: report.reportedAt,
1288
+ numberOfRunningGraphs: report.numberOfRunningGraphs,
1289
+ health: report.health ?? {},
1290
+ isActive: report.isActive,
1291
+ isNonResponsive: report.isNonResponsive,
1292
+ isBlocked: report.isBlocked,
1293
+ state: report.state,
1294
+ acceptingWork: report.acceptingWork
1295
+ });
1296
+ return true;
1297
+ },
1298
+ "Broadcasts local runtime status to connected dependees."
1299
+ ).doOn(
1300
+ META_RUNTIME_STATUS_HEARTBEAT_TICK_SIGNAL,
1301
+ "meta.service_registry.runtime_status_broadcast_requested"
1302
+ );
1303
+ CadenzaService.createMetaTask(
1304
+ "Monitor dependee heartbeat freshness",
1305
+ (ctx, emit) => {
1306
+ if (!this.useSocket) {
1307
+ return false;
1308
+ }
1309
+ const now = Date.now();
1310
+ for (const [serviceName, instanceIds] of this.dependeesByService) {
1311
+ for (const serviceInstanceId of instanceIds) {
1312
+ const instance = this.getInstance(serviceName, serviceInstanceId);
1313
+ if (!instance || !instance.isActive || instance.isBlocked) {
1314
+ continue;
1315
+ }
1316
+ const lastHeartbeat = this.lastHeartbeatAtByInstance.get(serviceInstanceId) ?? 0;
1317
+ const misses = this.missedHeartbeatsByInstance.get(serviceInstanceId) ?? 0;
1318
+ const heartbeatBudget = this.runtimeStatusHeartbeatIntervalMs * (misses + 1);
1319
+ if (lastHeartbeat > 0 && now - lastHeartbeat < heartbeatBudget) {
1320
+ continue;
1321
+ }
1322
+ const nextMisses = misses + 1;
1323
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, nextMisses);
1324
+ if (nextMisses < this.runtimeStatusMissThreshold || this.runtimeStatusFallbackInFlightByInstance.has(serviceInstanceId)) {
1325
+ continue;
1326
+ }
1327
+ this.runtimeStatusFallbackInFlightByInstance.add(serviceInstanceId);
1328
+ emit("meta.service_registry.runtime_status_fallback_requested", {
1329
+ ...ctx,
1330
+ serviceName,
1331
+ serviceInstanceId,
1332
+ serviceAddress: instance.address,
1333
+ servicePort: instance.port
1334
+ });
1335
+ }
1336
+ }
1337
+ return true;
1338
+ },
1339
+ "Monitors dependee heartbeat freshness and requests inquiry fallback after repeated misses."
1340
+ ).doOn(META_RUNTIME_STATUS_MONITOR_TICK_SIGNAL);
1341
+ CadenzaService.createMetaTask(
1342
+ "Resolve runtime status fallback inquiry",
1343
+ async (ctx, emit) => {
1344
+ const serviceName = ctx.serviceName;
1345
+ const serviceInstanceId = ctx.serviceInstanceId;
1346
+ if (!serviceName || !serviceInstanceId) {
1347
+ return false;
1348
+ }
1349
+ try {
1350
+ const inquiryResult = await CadenzaService.inquire(
1351
+ META_RUNTIME_STATUS_INTENT,
1352
+ {
1353
+ targetServiceName: serviceName,
1354
+ targetServiceInstanceId: serviceInstanceId,
1355
+ detailLevel: ctx.detailLevel === "full" ? "full" : "minimal"
1356
+ },
1357
+ {
1358
+ overallTimeoutMs: ctx.overallTimeoutMs ?? this.runtimeStatusFallbackTimeoutMs,
1359
+ perResponderTimeoutMs: ctx.perResponderTimeoutMs ?? Math.max(250, Math.floor(this.runtimeStatusFallbackTimeoutMs * 0.75)),
1360
+ requireComplete: ctx.requireComplete ?? false
1361
+ }
1362
+ );
1363
+ const report = this.selectRuntimeStatusReportForTarget(
1364
+ inquiryResult,
1365
+ serviceName,
1366
+ serviceInstanceId
1367
+ );
1368
+ if (!report) {
1369
+ throw new Error(
1370
+ `No runtime status report for ${serviceName}/${serviceInstanceId}`
1371
+ );
1372
+ }
1373
+ this.applyRuntimeStatusReport(report);
1374
+ this.lastHeartbeatAtByInstance.set(serviceInstanceId, Date.now());
1375
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, 0);
1376
+ return {
1377
+ ...ctx,
1378
+ runtimeStatusReport: report,
1379
+ __inquiryMeta: inquiryResult.__inquiryMeta
1380
+ };
1381
+ } catch (error) {
1382
+ const instance = this.getInstance(serviceName, serviceInstanceId);
1383
+ const message = error instanceof Error ? error.message : String(error);
1384
+ CadenzaService.log(
1385
+ "Runtime status fallback inquiry failed.",
1386
+ {
1387
+ serviceName,
1388
+ serviceInstanceId,
1389
+ error: message
1390
+ },
1391
+ "warning"
1392
+ );
1393
+ emit("meta.service_registry.runtime_status_unreachable", {
1394
+ ...ctx,
1395
+ serviceName,
1396
+ serviceInstanceId,
1397
+ serviceAddress: instance?.address ?? ctx.serviceAddress,
1398
+ servicePort: instance?.port ?? ctx.servicePort,
1399
+ __error: message,
1400
+ errored: true
1401
+ });
1402
+ return {
1403
+ ...ctx,
1404
+ __error: message,
1405
+ errored: true
1406
+ };
1407
+ } finally {
1408
+ this.runtimeStatusFallbackInFlightByInstance.delete(serviceInstanceId);
1409
+ }
1410
+ },
1411
+ "Runs runtime-status inquiry fallback for a dependee instance after missed heartbeats."
1412
+ ).doOn("meta.service_registry.runtime_status_fallback_requested").emits("meta.service_registry.runtime_status_fallback_resolved").emitsOnFail("meta.service_registry.runtime_status_fallback_failed");
856
1413
  this.collectTransportDiagnosticsTask = CadenzaService.createMetaTask(
857
1414
  "Collect transport diagnostics",
858
1415
  async (ctx) => {
@@ -1192,6 +1749,183 @@ var ServiceRegistry = class _ServiceRegistry {
1192
1749
  localTaskName: task.name
1193
1750
  };
1194
1751
  }
1752
+ getInstance(serviceName, instanceId) {
1753
+ return this.instances.get(serviceName)?.find((instance) => instance.uuid === instanceId);
1754
+ }
1755
+ getLocalInstance() {
1756
+ if (!this.serviceName || !this.serviceInstanceId) {
1757
+ return void 0;
1758
+ }
1759
+ return this.getInstance(this.serviceName, this.serviceInstanceId);
1760
+ }
1761
+ registerDependee(serviceName, serviceInstanceId) {
1762
+ if (!serviceName || !serviceInstanceId) {
1763
+ return;
1764
+ }
1765
+ if (!this.dependeesByService.has(serviceName)) {
1766
+ this.dependeesByService.set(serviceName, /* @__PURE__ */ new Set());
1767
+ }
1768
+ this.dependeesByService.get(serviceName).add(serviceInstanceId);
1769
+ this.dependeeByInstance.set(serviceInstanceId, serviceName);
1770
+ this.lastHeartbeatAtByInstance.set(serviceInstanceId, Date.now());
1771
+ this.missedHeartbeatsByInstance.set(serviceInstanceId, 0);
1772
+ }
1773
+ unregisterDependee(serviceInstanceId, serviceName) {
1774
+ const dependeeServiceName = serviceName ?? this.dependeeByInstance.get(serviceInstanceId);
1775
+ if (dependeeServiceName) {
1776
+ this.dependeesByService.get(dependeeServiceName)?.delete(serviceInstanceId);
1777
+ if (!this.dependeesByService.get(dependeeServiceName)?.size) {
1778
+ this.dependeesByService.delete(dependeeServiceName);
1779
+ }
1780
+ }
1781
+ this.dependeeByInstance.delete(serviceInstanceId);
1782
+ this.lastHeartbeatAtByInstance.delete(serviceInstanceId);
1783
+ this.missedHeartbeatsByInstance.delete(serviceInstanceId);
1784
+ this.runtimeStatusFallbackInFlightByInstance.delete(serviceInstanceId);
1785
+ }
1786
+ resolveRuntimeStatusSnapshot(numberOfRunningGraphs, isActive, isNonResponsive, isBlocked) {
1787
+ return resolveRuntimeStatus({
1788
+ numberOfRunningGraphs,
1789
+ isActive,
1790
+ isNonResponsive,
1791
+ isBlocked,
1792
+ degradedGraphThreshold: this.degradedGraphThreshold,
1793
+ overloadedGraphThreshold: this.overloadedGraphThreshold
1794
+ });
1795
+ }
1796
+ normalizeRuntimeStatusReport(ctx) {
1797
+ const serviceName = ctx.serviceName ?? ctx.__serviceName ?? ctx.serviceInstance?.serviceName;
1798
+ const serviceInstanceId = ctx.serviceInstanceId ?? ctx.__serviceInstanceId ?? ctx.serviceInstance?.uuid;
1799
+ if (!serviceName || !serviceInstanceId) {
1800
+ return null;
1801
+ }
1802
+ const servicePort = ctx.servicePort ?? ctx.port ?? ctx.serviceInstance?.port;
1803
+ const numberOfRunningGraphs = Math.max(
1804
+ 0,
1805
+ Math.trunc(
1806
+ Number(ctx.numberOfRunningGraphs ?? ctx.__numberOfRunningGraphs ?? 0)
1807
+ )
1808
+ );
1809
+ const isActive = Boolean(ctx.isActive ?? ctx.__active ?? true);
1810
+ const isNonResponsive = Boolean(ctx.isNonResponsive ?? false);
1811
+ const isBlocked = Boolean(ctx.isBlocked ?? false);
1812
+ const resolved = this.resolveRuntimeStatusSnapshot(
1813
+ numberOfRunningGraphs,
1814
+ isActive,
1815
+ isNonResponsive,
1816
+ isBlocked
1817
+ );
1818
+ return {
1819
+ serviceName,
1820
+ serviceInstanceId,
1821
+ serviceAddress: ctx.serviceAddress ?? ctx.address ?? ctx.serviceInstance?.address,
1822
+ servicePort: typeof servicePort === "number" ? servicePort : void 0,
1823
+ exposed: typeof ctx.exposed === "boolean" ? ctx.exposed : typeof ctx.serviceInstance?.exposed === "boolean" ? ctx.serviceInstance.exposed : void 0,
1824
+ isFrontend: typeof ctx.isFrontend === "boolean" ? ctx.isFrontend : typeof ctx.serviceInstance?.isFrontend === "boolean" ? ctx.serviceInstance.isFrontend : void 0,
1825
+ reportedAt: ctx.reportedAt ?? (typeof ctx.__reportedAt === "string" ? ctx.__reportedAt : void 0) ?? (/* @__PURE__ */ new Date()).toISOString(),
1826
+ state: ctx.state === "healthy" || ctx.state === "degraded" || ctx.state === "overloaded" || ctx.state === "unavailable" ? ctx.state : resolved.state,
1827
+ acceptingWork: typeof ctx.acceptingWork === "boolean" ? ctx.acceptingWork : resolved.acceptingWork,
1828
+ numberOfRunningGraphs,
1829
+ isActive,
1830
+ isNonResponsive,
1831
+ isBlocked,
1832
+ health: ctx.health ?? ctx.__health ?? {}
1833
+ };
1834
+ }
1835
+ applyRuntimeStatusReport(report) {
1836
+ const instance = this.getInstance(report.serviceName, report.serviceInstanceId);
1837
+ if (!instance) {
1838
+ return false;
1839
+ }
1840
+ if (report.serviceAddress) {
1841
+ instance.address = report.serviceAddress;
1842
+ }
1843
+ if (typeof report.servicePort === "number") {
1844
+ instance.port = report.servicePort;
1845
+ }
1846
+ if (typeof report.exposed === "boolean") {
1847
+ instance.exposed = report.exposed;
1848
+ }
1849
+ if (typeof report.isFrontend === "boolean") {
1850
+ instance.isFrontend = report.isFrontend;
1851
+ }
1852
+ instance.numberOfRunningGraphs = report.numberOfRunningGraphs;
1853
+ instance.isActive = report.isActive;
1854
+ instance.isNonResponsive = report.isNonResponsive;
1855
+ instance.isBlocked = report.isBlocked;
1856
+ instance.runtimeState = report.state;
1857
+ instance.acceptingWork = report.acceptingWork;
1858
+ instance.reportedAt = report.reportedAt;
1859
+ instance.health = {
1860
+ ...instance.health ?? {},
1861
+ ...report.health ?? {},
1862
+ runtimeStatus: {
1863
+ state: report.state,
1864
+ acceptingWork: report.acceptingWork,
1865
+ reportedAt: report.reportedAt
1866
+ }
1867
+ };
1868
+ return true;
1869
+ }
1870
+ buildLocalRuntimeStatusReport(detailLevel = "minimal") {
1871
+ if (!this.serviceName || !this.serviceInstanceId) {
1872
+ return null;
1873
+ }
1874
+ const localInstance = this.getLocalInstance();
1875
+ if (!localInstance) {
1876
+ return null;
1877
+ }
1878
+ const numberOfRunningGraphs = this.activeRoutineExecutionIds.size || this.numberOfRunningGraphs || 0;
1879
+ this.numberOfRunningGraphs = numberOfRunningGraphs;
1880
+ const snapshot = this.resolveRuntimeStatusSnapshot(
1881
+ numberOfRunningGraphs,
1882
+ localInstance.isActive,
1883
+ localInstance.isNonResponsive,
1884
+ localInstance.isBlocked
1885
+ );
1886
+ const reportedAt = (/* @__PURE__ */ new Date()).toISOString();
1887
+ const report = {
1888
+ serviceName: this.serviceName,
1889
+ serviceInstanceId: this.serviceInstanceId,
1890
+ serviceAddress: localInstance.address,
1891
+ servicePort: localInstance.port,
1892
+ exposed: localInstance.exposed,
1893
+ isFrontend: localInstance.isFrontend,
1894
+ reportedAt,
1895
+ state: snapshot.state,
1896
+ acceptingWork: snapshot.acceptingWork,
1897
+ numberOfRunningGraphs: snapshot.numberOfRunningGraphs,
1898
+ isActive: snapshot.isActive,
1899
+ isNonResponsive: snapshot.isNonResponsive,
1900
+ isBlocked: snapshot.isBlocked,
1901
+ health: {
1902
+ ...localInstance.health ?? {},
1903
+ runtimeStatus: {
1904
+ state: snapshot.state,
1905
+ acceptingWork: snapshot.acceptingWork,
1906
+ reportedAt
1907
+ }
1908
+ }
1909
+ };
1910
+ this.applyRuntimeStatusReport(report);
1911
+ if (detailLevel !== "full") {
1912
+ delete report.health;
1913
+ }
1914
+ return report;
1915
+ }
1916
+ selectRuntimeStatusReportForTarget(inquiryResult, targetServiceName, targetServiceInstanceId) {
1917
+ const reports = Array.isArray(inquiryResult.runtimeStatusReports) ? inquiryResult.runtimeStatusReports : [];
1918
+ for (const candidate of reports) {
1919
+ const report = this.normalizeRuntimeStatusReport(candidate);
1920
+ if (!report) {
1921
+ continue;
1922
+ }
1923
+ if (report.serviceName === targetServiceName && report.serviceInstanceId === targetServiceInstanceId) {
1924
+ return report;
1925
+ }
1926
+ }
1927
+ return null;
1928
+ }
1195
1929
  reset() {
1196
1930
  this.instances.clear();
1197
1931
  this.deputies.clear();
@@ -1199,6 +1933,15 @@ var ServiceRegistry = class _ServiceRegistry {
1199
1933
  this.remoteIntents.clear();
1200
1934
  this.remoteIntentDeputiesByKey.clear();
1201
1935
  this.remoteIntentDeputiesByTask.clear();
1936
+ this.dependeesByService.clear();
1937
+ this.dependeeByInstance.clear();
1938
+ this.lastHeartbeatAtByInstance.clear();
1939
+ this.missedHeartbeatsByInstance.clear();
1940
+ this.runtimeStatusFallbackInFlightByInstance.clear();
1941
+ this.activeRoutineExecutionIds.clear();
1942
+ this.numberOfRunningGraphs = 0;
1943
+ this.runtimeStatusHeartbeatStarted = false;
1944
+ this.lastRuntimeStatusSnapshot = null;
1202
1945
  }
1203
1946
  };
1204
1947