@horizon-republic/nestjs-jetstream 2.11.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -144,7 +144,7 @@ var DEFAULT_ORDERED_STREAM_CONFIG = {
144
144
  };
145
145
  var DEFAULT_DLQ_STREAM_CONFIG = {
146
146
  ...baseStreamConfig,
147
- retention: RetentionPolicy.Workqueue,
147
+ retention: RetentionPolicy.Limits,
148
148
  allow_rollup_hdrs: false,
149
149
  max_consumers: 100,
150
150
  max_msg_size: 10 * MB,
@@ -208,6 +208,7 @@ var RESERVED_HEADERS = /* @__PURE__ */ new Set([
208
208
  "x-reply-to" /* ReplyTo */,
209
209
  "x-error" /* Error */
210
210
  ]);
211
+ var NATS_CONTROL_HEADER_PREFIX = "nats-";
211
212
  var internalName = (name) => `${name}__microservice`;
212
213
  var buildSubject = (serviceName, kind, pattern) => `${internalName(serviceName)}.${kind}.${pattern}`;
213
214
  var buildBroadcastSubject = (pattern) => `broadcast.${pattern}`;
@@ -260,6 +261,9 @@ var ATTR_JETSTREAM_RPC_REPLY_ERROR_CODE = "jetstream.rpc.reply.error.code";
260
261
  var ATTR_JETSTREAM_PROVISIONING_ENTITY = "jetstream.provisioning.entity";
261
262
  var ATTR_JETSTREAM_PROVISIONING_ACTION = "jetstream.provisioning.action";
262
263
  var ATTR_JETSTREAM_PROVISIONING_NAME = "jetstream.provisioning.name";
264
+ var ATTR_JETSTREAM_PROVISIONING_MAX_BYTES = "jetstream.provisioning.max_bytes";
265
+ var ATTR_JETSTREAM_PROVISIONING_NUM_REPLICAS = "jetstream.provisioning.num_replicas";
266
+ var ATTR_JETSTREAM_PROVISIONING_RESERVATION = "jetstream.provisioning.reservation_bytes";
263
267
  var ATTR_JETSTREAM_SELF_HEALING_REASON = "jetstream.self_healing.reason";
264
268
  var ATTR_JETSTREAM_MIGRATION_REASON = "jetstream.migration.reason";
265
269
  var ATTR_JETSTREAM_DEAD_LETTER_REASON = "jetstream.dead_letter.reason";
@@ -529,7 +533,7 @@ var extractContext = (ctx, carrier, getter) => propagation.extract(ctx, carrier,
529
533
 
530
534
  // src/otel/tracer.ts
531
535
  import { trace } from "@opentelemetry/api";
532
- var PACKAGE_VERSION = true ? "2.11.1" : "0.0.0";
536
+ var PACKAGE_VERSION = true ? "2.12.0" : "0.0.0";
533
537
  var getTracer = () => trace.getTracer(TRACER_NAME, PACKAGE_VERSION);
534
538
 
535
539
  // src/otel/carrier.ts
@@ -1129,7 +1133,10 @@ var withProvisioningSpan = (config, ctx, op) => wrapInfra(
1129
1133
  {
1130
1134
  [ATTR_JETSTREAM_PROVISIONING_ENTITY]: ctx.entity,
1131
1135
  [ATTR_JETSTREAM_PROVISIONING_ACTION]: ctx.action,
1132
- [ATTR_JETSTREAM_PROVISIONING_NAME]: ctx.name
1136
+ [ATTR_JETSTREAM_PROVISIONING_NAME]: ctx.name,
1137
+ [ATTR_JETSTREAM_PROVISIONING_MAX_BYTES]: ctx.maxBytes,
1138
+ [ATTR_JETSTREAM_PROVISIONING_NUM_REPLICAS]: ctx.numReplicas,
1139
+ [ATTR_JETSTREAM_PROVISIONING_RESERVATION]: ctx.reservation
1133
1140
  },
1134
1141
  op
1135
1142
  );
@@ -1305,7 +1312,13 @@ var JetstreamRecordBuilder = class {
1305
1312
  * lockstep. `RESERVED_HEADERS` is defined as an all-lowercase set.
1306
1313
  */
1307
1314
  validateHeaderKey(key) {
1308
- if (RESERVED_HEADERS.has(key.toLowerCase())) {
1315
+ const normalized = key.toLowerCase();
1316
+ if (normalized.startsWith(NATS_CONTROL_HEADER_PREFIX)) {
1317
+ throw new Error(
1318
+ `Header "${key}" is reserved for the NATS server and cannot be set manually. Use setMessageId() for deduplication, ttl() for per-message expiry, and scheduleAt() for delayed delivery.`
1319
+ );
1320
+ }
1321
+ if (RESERVED_HEADERS.has(normalized)) {
1309
1322
  throw new Error(
1310
1323
  `Header "${key}" is reserved by the JetStream transport and cannot be set manually. Reserved headers: ${[...RESERVED_HEADERS].join(", ")}`
1311
1324
  );
@@ -1451,13 +1464,18 @@ var JetstreamClient = class extends ClientProxy {
1451
1464
  async dispatchEvent(packet) {
1452
1465
  if (!this.readyForPublish) await this.connect();
1453
1466
  const { data, hdrs, messageId, schedule, ttl } = this.extractRecordData(packet.data);
1467
+ const publishKind = detectEventKind(packet.pattern);
1468
+ if (schedule && publishKind === "ordered" /* Ordered */) {
1469
+ throw new Error(
1470
+ `scheduleAt() is not supported for ordered events (pattern: ${packet.pattern}). Scheduled delivery is available for workqueue events and broadcasts.`
1471
+ );
1472
+ }
1454
1473
  const eventSubject = this.buildEventSubject(packet.pattern);
1455
1474
  const publishSubject = schedule ? this.buildScheduleSubject(eventSubject) : eventSubject;
1456
1475
  const msgHeaders = this.buildHeaders(hdrs, { subject: eventSubject });
1457
1476
  const encoded = this.codec.encode(data);
1458
1477
  const effectiveMsgId = messageId ?? nuid.next();
1459
1478
  const record = packet.data instanceof JetstreamRecord ? packet.data : new JetstreamRecord(data, /* @__PURE__ */ new Map());
1460
- const publishKind = detectEventKind(packet.pattern);
1461
1479
  const declaredPattern = declaredEventPattern(packet.pattern);
1462
1480
  const streamKind = eventStreamKind(publishKind);
1463
1481
  const startedAt = performance.now();
@@ -1489,10 +1507,10 @@ var JetstreamClient = class extends ClientProxy {
1489
1507
  const ack2 = await this.connection.getJetStreamClient().publish(publishSubject, encoded, {
1490
1508
  headers: msgHeaders,
1491
1509
  msgID: effectiveMsgId,
1492
- ttl,
1493
1510
  schedule: {
1494
1511
  specification: schedule.at,
1495
- target: eventSubject
1512
+ target: eventSubject,
1513
+ ttl
1496
1514
  }
1497
1515
  });
1498
1516
  warnIfDuplicate("scheduled", ack2);
@@ -1682,13 +1700,18 @@ var JetstreamClient = class extends ClientProxy {
1682
1700
  });
1683
1701
  return;
1684
1702
  }
1685
- await context6.with(
1703
+ const ack = await context6.with(
1686
1704
  spanHandle.activeContext,
1687
1705
  () => this.connection.getJetStreamClient().publish(subject, encoded, {
1688
1706
  headers: hdrs,
1689
1707
  msgID: messageId ?? nuid.next()
1690
1708
  })
1691
1709
  );
1710
+ if (ack.duplicate) {
1711
+ throw new Error(
1712
+ `Duplicate RPC publish for ${subject}: the messageId was already used within the stream dedup window, so the reply belongs to the original request`
1713
+ );
1714
+ }
1692
1715
  this.reportPublished(declaredPattern, "cmd" /* Command */, startedAt, "success");
1693
1716
  } catch (err) {
1694
1717
  const existingTimeout = this.pendingTimeouts.get(correlationId);
@@ -1858,13 +1881,17 @@ var JetstreamClient = class extends ClientProxy {
1858
1881
  * uses a separate `_sch` namespace that is NOT matched by any consumer filter.
1859
1882
  * NATS holds the message and publishes it to the target subject after the delay.
1860
1883
  *
1884
+ * A unique per-message suffix is appended because the server stores schedules
1885
+ * as rollup messages — one active schedule per subject (ADR-51). Without it,
1886
+ * concurrent schedules of the same pattern would silently replace each other.
1887
+ *
1861
1888
  * Examples:
1862
- * - `{svc}__microservice.ev.order.reminder` → `{svc}__microservice._sch.order.reminder`
1863
- * - `broadcast.config.updated` → `broadcast._sch.config.updated`
1889
+ * - `{svc}__microservice.ev.order.reminder` → `{svc}__microservice._sch.order.reminder.<nuid>`
1890
+ * - `broadcast.config.updated` → `broadcast._sch.config.updated.<nuid>`
1864
1891
  */
1865
1892
  buildScheduleSubject(eventSubject) {
1866
1893
  if (eventSubject.startsWith("broadcast.")) {
1867
- return eventSubject.replace("broadcast.", "broadcast._sch.");
1894
+ return `${eventSubject.replace("broadcast.", "broadcast._sch.")}.${nuid.next()}`;
1868
1895
  }
1869
1896
  const targetPrefix = `${internalName(this.targetName)}.`;
1870
1897
  if (!eventSubject.startsWith(targetPrefix)) {
@@ -1876,7 +1903,7 @@ var JetstreamClient = class extends ClientProxy {
1876
1903
  throw new Error(`Event subject missing pattern segment: ${eventSubject}`);
1877
1904
  }
1878
1905
  const pattern = withoutPrefix.slice(dotIndex + 1);
1879
- return `${targetPrefix}_sch.${pattern}`;
1906
+ return `${targetPrefix}_sch.${pattern}.${nuid.next()}`;
1880
1907
  }
1881
1908
  };
1882
1909
 
@@ -1888,6 +1915,7 @@ var JsonCodec = class {
1888
1915
  return encoder.encode(JSON.stringify(data));
1889
1916
  }
1890
1917
  decode(data) {
1918
+ if (data.length === 0) return void 0;
1891
1919
  return JSON.parse(decoder.decode(data));
1892
1920
  }
1893
1921
  };
@@ -1901,6 +1929,7 @@ var MsgpackCodec = class {
1901
1929
  return this.packr.pack(data);
1902
1930
  }
1903
1931
  decode(data) {
1932
+ if (data.length === 0) return void 0;
1904
1933
  return this.packr.unpack(data);
1905
1934
  }
1906
1935
  };
@@ -2981,43 +3010,11 @@ var JetstreamStrategy = class extends Server {
2981
3010
  * Called by NestJS when `connectMicroservice()` is used, or internally by the module.
2982
3011
  */
2983
3012
  async listen(callback) {
2984
- if (this.started) {
2985
- this.logger.warn("listen() called more than once \u2014 ignoring");
2986
- return;
2987
- }
2988
- this.started = true;
2989
- this.patternRegistry.registerHandlers(this.getHandlers());
2990
- const { streams: streamKinds, durableConsumers: durableKinds } = this.resolveRequiredKinds();
2991
- if (streamKinds.length > 0) {
2992
- await this.streamProvider.ensureStreams(streamKinds);
2993
- if (durableKinds.length > 0) {
2994
- const consumers = await this.consumerProvider.ensureConsumers(durableKinds);
2995
- this.populateAckWaitMap(consumers);
2996
- this.eventRouter.updateMaxDeliverMap(this.buildMaxDeliverMap(consumers));
2997
- this.messageProvider.start(consumers);
2998
- }
2999
- if (this.patternRegistry.hasOrderedHandlers()) {
3000
- const orderedStreamName = this.streamProvider.getStreamName("ordered" /* Ordered */);
3001
- await this.messageProvider.startOrdered(
3002
- orderedStreamName,
3003
- this.patternRegistry.getOrderedSubjects(),
3004
- this.options.ordered
3005
- );
3006
- }
3007
- if (this.patternRegistry.hasEventHandlers() || this.patternRegistry.hasBroadcastHandlers() || this.patternRegistry.hasOrderedHandlers()) {
3008
- this.eventRouter.start();
3009
- }
3010
- if (isJetStreamRpcMode(this.options.rpc) && this.patternRegistry.hasRpcHandlers()) {
3011
- await this.rpcRouter.start();
3012
- }
3013
- }
3014
- if (isCoreRpcMode(this.options.rpc) && this.patternRegistry.hasRpcHandlers()) {
3015
- await this.coreRpcServer.start();
3016
- }
3017
- if (this.metadataProvider && this.patternRegistry.hasMetadata()) {
3018
- await this.metadataProvider.publish(this.patternRegistry.getMetadataEntries());
3013
+ try {
3014
+ await this.doListen(callback);
3015
+ } catch (err) {
3016
+ callback(err);
3019
3017
  }
3020
- callback();
3021
3018
  }
3022
3019
  /** Stop all consumers, routers, subscriptions, and metadata heartbeat. Called during shutdown. */
3023
3020
  close() {
@@ -3076,6 +3073,33 @@ var JetstreamStrategy = class extends Server {
3076
3073
  getPatternRegistry() {
3077
3074
  return this.patternRegistry;
3078
3075
  }
3076
+ async doListen(callback) {
3077
+ if (this.started) {
3078
+ this.logger.warn("listen() called more than once \u2014 ignoring");
3079
+ return;
3080
+ }
3081
+ this.started = true;
3082
+ this.patternRegistry.registerHandlers(this.getHandlers());
3083
+ const { streams: streamKinds, durableConsumers: durableKinds } = this.resolveRequiredKinds();
3084
+ if (streamKinds.length > 0) {
3085
+ await this.streamProvider.ensureStreams(streamKinds);
3086
+ let consumers = null;
3087
+ if (durableKinds.length > 0) {
3088
+ consumers = await this.consumerProvider.ensureConsumers(durableKinds);
3089
+ this.populateAckWaitMap(consumers);
3090
+ this.eventRouter.updateMaxDeliverMap(this.buildMaxDeliverMap(consumers));
3091
+ }
3092
+ await this.startRouters();
3093
+ await this.startConsumption(consumers);
3094
+ }
3095
+ if (isCoreRpcMode(this.options.rpc) && this.patternRegistry.hasRpcHandlers()) {
3096
+ await this.coreRpcServer.start();
3097
+ }
3098
+ if (this.metadataProvider && this.patternRegistry.hasMetadata()) {
3099
+ await this.metadataProvider.publish(this.patternRegistry.getMetadataEntries());
3100
+ }
3101
+ callback();
3102
+ }
3079
3103
  /** Determine which streams and durable consumers are needed. */
3080
3104
  resolveRequiredKinds() {
3081
3105
  const streams = [];
@@ -3097,7 +3121,29 @@ var JetstreamStrategy = class extends Server {
3097
3121
  }
3098
3122
  return { streams, durableConsumers };
3099
3123
  }
3100
- /** Populate the shared ack_wait map from actual NATS consumer configs. */
3124
+ /** Subscribe the event and RPC routers to the message subjects. */
3125
+ async startRouters() {
3126
+ if (this.patternRegistry.hasEventHandlers() || this.patternRegistry.hasBroadcastHandlers() || this.patternRegistry.hasOrderedHandlers()) {
3127
+ this.eventRouter.start();
3128
+ }
3129
+ if (isJetStreamRpcMode(this.options.rpc) && this.patternRegistry.hasRpcHandlers()) {
3130
+ await this.rpcRouter.start();
3131
+ }
3132
+ }
3133
+ /** Begin durable and ordered consumption; routers must already be subscribed. */
3134
+ async startConsumption(consumers) {
3135
+ if (consumers !== null) {
3136
+ this.messageProvider.start(consumers);
3137
+ }
3138
+ if (this.patternRegistry.hasOrderedHandlers()) {
3139
+ const orderedStreamName = this.streamProvider.getStreamName("ordered" /* Ordered */);
3140
+ await this.messageProvider.startOrdered(
3141
+ orderedStreamName,
3142
+ this.patternRegistry.getOrderedSubjects(),
3143
+ this.options.ordered
3144
+ );
3145
+ }
3146
+ }
3101
3147
  populateAckWaitMap(consumers) {
3102
3148
  for (const [kind, info] of consumers) {
3103
3149
  if (info.config.ack_wait) {
@@ -3336,6 +3382,15 @@ var serializeError = (err) => {
3336
3382
  return err;
3337
3383
  };
3338
3384
 
3385
+ // src/utils/settle-quietly.ts
3386
+ var settleQuietly = (logger5, label, action) => {
3387
+ try {
3388
+ action();
3389
+ } catch (err) {
3390
+ logger5.error(label, err);
3391
+ }
3392
+ };
3393
+
3339
3394
  // src/utils/unwrap-result.ts
3340
3395
  import { isObservable } from "rxjs";
3341
3396
  var unwrapResult = (result) => {
@@ -3501,16 +3556,168 @@ var CoreRpcServer = class {
3501
3556
 
3502
3557
  // src/server/infrastructure/stream.provider.ts
3503
3558
  import { Logger as Logger13 } from "@nestjs/common";
3504
- import { JetStreamApiError as JetStreamApiError2 } from "@nats-io/jetstream";
3559
+ import {
3560
+ JetStreamApiError as JetStreamApiError2,
3561
+ RetentionPolicy as RetentionPolicy2,
3562
+ StorageType as StorageType4
3563
+ } from "@nats-io/jetstream";
3505
3564
 
3506
3565
  // src/server/infrastructure/nats-error-codes.ts
3507
3566
  var NatsErrorCode = /* @__PURE__ */ ((NatsErrorCode2) => {
3508
3567
  NatsErrorCode2[NatsErrorCode2["ConsumerNotFound"] = 10014] = "ConsumerNotFound";
3509
3568
  NatsErrorCode2[NatsErrorCode2["ConsumerAlreadyExists"] = 10148] = "ConsumerAlreadyExists";
3510
3569
  NatsErrorCode2[NatsErrorCode2["StreamNotFound"] = 10059] = "StreamNotFound";
3570
+ NatsErrorCode2[NatsErrorCode2["StorageResourcesExceeded"] = 10047] = "StorageResourcesExceeded";
3571
+ NatsErrorCode2[NatsErrorCode2["NoSuitablePeers"] = 10005] = "NoSuitablePeers";
3511
3572
  return NatsErrorCode2;
3512
3573
  })(NatsErrorCode || {});
3513
3574
 
3575
+ // src/server/infrastructure/provisioning-budget.ts
3576
+ import { StorageType as StorageType2 } from "@nats-io/jetstream";
3577
+ var GIB = 1024 ** 3;
3578
+ var fmt = (bytes) => `${(bytes / GIB).toFixed(2)} GiB`;
3579
+ var resolveTierBudget = (info, replicas) => {
3580
+ const tier = info.tiers?.[`R${replicas}`];
3581
+ const limits = tier?.limits ?? info.limits;
3582
+ return {
3583
+ maxStorage: limits?.max_storage ?? 0,
3584
+ reserved: tier?.reserved_storage ?? info.reserved_storage ?? 0,
3585
+ tiered: tier !== void 0
3586
+ };
3587
+ };
3588
+ var groupByReplicas = (reservations) => {
3589
+ const groups = /* @__PURE__ */ new Map();
3590
+ for (const r of reservations) {
3591
+ if (r.storage !== StorageType2.File) continue;
3592
+ const prev = groups.get(r.numReplicas) ?? 0;
3593
+ groups.set(r.numReplicas, prev + r.maxBytes * r.numReplicas);
3594
+ }
3595
+ return groups;
3596
+ };
3597
+ var assertStorageBudget = async (jsm, serviceName, reservations, logger5) => {
3598
+ try {
3599
+ const info = await jsm.getAccountInfo();
3600
+ const groups = groupByReplicas(reservations);
3601
+ let limitNotSetWarned = false;
3602
+ let okReserved = 0;
3603
+ let anyWarned = false;
3604
+ for (const [replicas, incremental] of groups) {
3605
+ const { maxStorage, reserved, tiered } = resolveTierBudget(info, replicas);
3606
+ const tierNote = tiered ? ` (tier R${replicas})` : "";
3607
+ if (maxStorage <= 0) {
3608
+ if (!limitNotSetWarned) {
3609
+ limitNotSetWarned = true;
3610
+ logger5.warn(
3611
+ `Storage preflight for "${serviceName}": account file-storage limit not set (max_storage=${maxStorage}); the server max_file_store cannot be verified from the client.`
3612
+ );
3613
+ }
3614
+ continue;
3615
+ }
3616
+ const remaining = maxStorage - reserved;
3617
+ if (incremental > remaining) {
3618
+ anyWarned = true;
3619
+ logger5.warn(
3620
+ `Storage preflight for "${serviceName}"${tierNote}: needs ~${fmt(incremental)} but only ~${fmt(remaining)} remains (reserved ${fmt(reserved)} / limit ${fmt(maxStorage)}). Provisioning will likely fail with insufficient storage. Lower max_bytes/num_replicas, or raise the account/server storage limit.`
3621
+ );
3622
+ continue;
3623
+ }
3624
+ okReserved += incremental;
3625
+ }
3626
+ if (!anyWarned && !limitNotSetWarned && okReserved > 0) {
3627
+ logger5.log(
3628
+ `Storage preflight for "${serviceName}" OK: reserving ~${fmt(okReserved)} across file-backed streams within account limits.`
3629
+ );
3630
+ }
3631
+ } catch (err) {
3632
+ logger5.debug(`Storage preflight skipped \u2014 account info unavailable: ${String(err)}`);
3633
+ }
3634
+ };
3635
+
3636
+ // src/server/infrastructure/provisioning-error.ts
3637
+ var REMEDIATION = {
3638
+ [10047 /* StorageResourcesExceeded */]: "Aggregate stream reservation exceeds the server `max_file_store` (or account `max_storage`). Lower `max_bytes`/`num_replicas` for this service, or raise `max_file_store` on the NATS servers.",
3639
+ [10005 /* NoSuitablePeers */]: "Fewer healthy peers than `num_replicas`, or no peer has enough reserved storage headroom. Reduce replicas or add/repair cluster nodes."
3640
+ };
3641
+ var GENERIC_REMEDIATION = "Inspect the NATS server logs and JetStream account limits for the underlying cause.";
3642
+ var JetstreamProvisioningError = class _JetstreamProvisioningError extends Error {
3643
+ entity;
3644
+ target;
3645
+ kind;
3646
+ errCode;
3647
+ errDescription;
3648
+ remediation;
3649
+ maxBytes;
3650
+ numReplicas;
3651
+ reservation;
3652
+ constructor(fields) {
3653
+ const reservationNote = fields.reservation !== void 0 ? ` reservation=${fields.reservation}B (max_bytes=${fields.maxBytes}B \xD7 replicas=${fields.numReplicas}).` : "";
3654
+ super(
3655
+ `JetStream ${fields.entity} provisioning failed for "${fields.target}" (kind=${fields.kind}): ${fields.errDescription} [err_code=${fields.errCode}].${reservationNote} ${fields.remediation}`,
3656
+ { cause: fields.cause }
3657
+ );
3658
+ this.name = "JetstreamProvisioningError";
3659
+ this.entity = fields.entity;
3660
+ this.target = fields.target;
3661
+ this.kind = fields.kind;
3662
+ this.errCode = fields.errCode;
3663
+ this.errDescription = fields.errDescription;
3664
+ this.remediation = fields.remediation;
3665
+ this.maxBytes = fields.maxBytes;
3666
+ this.numReplicas = fields.numReplicas;
3667
+ this.reservation = fields.reservation;
3668
+ Object.setPrototypeOf(this, _JetstreamProvisioningError.prototype);
3669
+ }
3670
+ };
3671
+ var mapProvisioningError = (err, ctx) => {
3672
+ const api = err.apiError();
3673
+ const remediation = REMEDIATION[api.err_code] ?? GENERIC_REMEDIATION;
3674
+ const reservation = ctx.maxBytes !== void 0 && ctx.numReplicas !== void 0 ? ctx.maxBytes * ctx.numReplicas : void 0;
3675
+ return new JetstreamProvisioningError({
3676
+ entity: ctx.entity,
3677
+ target: ctx.name,
3678
+ kind: ctx.kind,
3679
+ errCode: api.err_code,
3680
+ errDescription: api.description,
3681
+ remediation,
3682
+ maxBytes: ctx.maxBytes,
3683
+ numReplicas: ctx.numReplicas,
3684
+ reservation,
3685
+ cause: err
3686
+ });
3687
+ };
3688
+
3689
+ // src/server/infrastructure/provisioning-summary.ts
3690
+ import { StorageType as StorageType3 } from "@nats-io/jetstream";
3691
+ var GIB2 = 1024 ** 3;
3692
+ var NANOS_PER_SECOND = 1e9;
3693
+ var NANOS_PER_HOUR = 3600 * NANOS_PER_SECOND;
3694
+ var NANOS_PER_DAY = 86400 * NANOS_PER_SECOND;
3695
+ var formatBytes = (bytes) => {
3696
+ if (bytes <= 0) return "0 B";
3697
+ return `${(bytes / GIB2).toFixed(2)} GiB`;
3698
+ };
3699
+ var formatAge = (nanos) => {
3700
+ if (nanos <= 0) return "unlimited";
3701
+ if (nanos >= NANOS_PER_DAY) return `${(nanos / NANOS_PER_DAY).toFixed(1)}d`;
3702
+ if (nanos >= NANOS_PER_HOUR) return `${(nanos / NANOS_PER_HOUR).toFixed(1)}h`;
3703
+ return `${(nanos / NANOS_PER_SECOND).toFixed(0)}s`;
3704
+ };
3705
+ var formatProvisioningSummary = (serviceName, reservations) => {
3706
+ const lines = [`Provisioning ${reservations.length} stream(s) for "${serviceName}":`];
3707
+ let totalFileMaxBytes = 0;
3708
+ for (const r of reservations) {
3709
+ if (r.storage === StorageType3.File) totalFileMaxBytes += r.maxBytes;
3710
+ const clusterReservation = r.maxBytes * r.numReplicas;
3711
+ lines.push(
3712
+ ` \u2022 ${r.name} [${r.kind}] storage=${r.storage} replicas=${r.numReplicas} max_bytes=${formatBytes(r.maxBytes)} max_age=${formatAge(r.maxAge)} retention=${r.retention} \u2192 cluster reservation ${formatBytes(clusterReservation)}`
3713
+ );
3714
+ }
3715
+ lines.push(
3716
+ ` \u03A3 per-node file-backed footprint \u2248 ${formatBytes(totalFileMaxBytes)} (sum of max_bytes; worst case replicas = nodes). Ensure the NATS server max_file_store accommodates the sum across ALL services.`
3717
+ );
3718
+ return lines.join("\n");
3719
+ };
3720
+
3514
3721
  // src/server/infrastructure/stream-config-diff.ts
3515
3722
  var TRANSPORT_CONTROLLED_PROPERTIES = /* @__PURE__ */ new Set([
3516
3723
  "retention"
@@ -3568,85 +3775,201 @@ var isEqual = (a, b) => {
3568
3775
 
3569
3776
  // src/server/infrastructure/stream-migration.ts
3570
3777
  import { Logger as Logger12 } from "@nestjs/common";
3571
- import { JetStreamApiError } from "@nats-io/jetstream";
3778
+ import {
3779
+ JetStreamApiError
3780
+ } from "@nats-io/jetstream";
3572
3781
  var MIGRATION_BACKUP_SUFFIX = "__migration_backup";
3573
3782
  var DEFAULT_SOURCING_TIMEOUT_MS = 3e4;
3574
3783
  var SOURCING_POLL_INTERVAL_MS = 100;
3784
+ var DEFAULT_PEER_WAIT_MS = 6e4;
3785
+ var ACTIVE_MIGRATION_GRACE_MS = 9e4;
3786
+ var MIGRATION_STARTED_AT_KEY = "nestjs-jetstream-migration-started-at";
3575
3787
  var StreamMigration = class {
3576
- constructor(sourcingTimeoutMs = DEFAULT_SOURCING_TIMEOUT_MS) {
3788
+ constructor(sourcingTimeoutMs = DEFAULT_SOURCING_TIMEOUT_MS, peerWaitMs = DEFAULT_PEER_WAIT_MS) {
3577
3789
  this.sourcingTimeoutMs = sourcingTimeoutMs;
3790
+ this.peerWaitMs = peerWaitMs;
3578
3791
  }
3579
3792
  logger = new Logger12("Jetstream:Stream");
3580
3793
  async migrate(jsm, streamName2, newConfig) {
3581
3794
  const backupName = `${streamName2}${MIGRATION_BACKUP_SUFFIX}`;
3582
3795
  const startTime = Date.now();
3796
+ const peerFinished = await this.waitOutPeerMigration(jsm, backupName);
3583
3797
  const currentInfo = await jsm.streams.info(streamName2);
3584
- await this.cleanupOrphanedBackup(jsm, backupName);
3585
- const messageCount = currentInfo.state.messages;
3798
+ if (peerFinished && !compareStreamConfig(currentInfo.config, newConfig).hasImmutableChanges) {
3799
+ this.logger.log(`Stream ${streamName2}: migration completed by another instance`);
3800
+ await jsm.streams.update(streamName2, newConfig);
3801
+ return;
3802
+ }
3586
3803
  this.logger.log(`Stream ${streamName2}: destructive migration started`);
3587
3804
  let originalDeleted = false;
3805
+ let drainedCount = 0;
3588
3806
  try {
3589
- if (messageCount > 0) {
3590
- this.logger.log(` Phase 1/4: Backing up ${messageCount} messages \u2192 ${backupName}`);
3807
+ this.logger.log(` Phase 1/4: Quiescing ${streamName2} (publishes rejected during migration)`);
3808
+ await jsm.streams.update(streamName2, { ...currentInfo.config, subjects: [] });
3809
+ drainedCount = (await jsm.streams.info(streamName2)).state.messages;
3810
+ if (drainedCount > 0) {
3811
+ this.logger.log(` Phase 2/4: Backing up ${drainedCount} messages \u2192 ${backupName}`);
3591
3812
  await jsm.streams.add({
3592
3813
  ...currentInfo.config,
3593
3814
  name: backupName,
3594
3815
  subjects: [],
3595
- sources: [{ name: streamName2 }]
3816
+ sources: [{ name: streamName2 }],
3817
+ metadata: { [MIGRATION_STARTED_AT_KEY]: (/* @__PURE__ */ new Date()).toISOString() }
3596
3818
  });
3597
- await this.waitForSourcing(jsm, backupName, messageCount);
3819
+ await this.waitForSourceDrained(jsm, backupName, streamName2, drainedCount);
3598
3820
  }
3599
- this.logger.log(` Phase 2/4: Deleting old stream`);
3821
+ this.logger.log(` Phase 3/4: Recreating ${streamName2} with the new config`);
3600
3822
  await jsm.streams.delete(streamName2);
3601
3823
  originalDeleted = true;
3602
- this.logger.log(` Phase 3/4: Creating stream with new config`);
3603
3824
  await jsm.streams.add(newConfig);
3604
- if (messageCount > 0) {
3605
- const backupInfo = await jsm.streams.info(backupName);
3606
- await jsm.streams.update(backupName, { ...backupInfo.config, sources: [] });
3607
- this.logger.log(` Phase 4/4: Restoring ${messageCount} messages from backup`);
3608
- await jsm.streams.update(streamName2, {
3609
- ...newConfig,
3610
- sources: [{ name: backupName }]
3611
- });
3612
- await this.waitForSourcing(jsm, streamName2, messageCount);
3613
- await jsm.streams.update(streamName2, { ...newConfig, sources: [] });
3614
- await jsm.streams.delete(backupName);
3825
+ if (drainedCount > 0) {
3826
+ this.logger.log(` Phase 4/4: Restoring ${drainedCount} messages from backup`);
3827
+ await this.restoreFromBackup(jsm, streamName2, newConfig, backupName);
3615
3828
  }
3616
3829
  } catch (err) {
3617
- if (originalDeleted && messageCount > 0) {
3830
+ if (originalDeleted) {
3618
3831
  this.logger.error(
3619
- `Migration failed after deleting original stream. Backup ${backupName} preserved for manual recovery.`
3832
+ `Migration of ${streamName2} failed after the original was deleted. Backup ${backupName} preserved \u2014 restoration resumes on the next startup.`
3620
3833
  );
3621
3834
  } else {
3622
- await this.cleanupOrphanedBackup(jsm, backupName);
3835
+ await this.rollbackBeforeDelete(jsm, streamName2, currentInfo, backupName);
3623
3836
  }
3624
3837
  throw err;
3625
3838
  }
3626
3839
  const durationMs = Date.now() - startTime;
3627
3840
  this.logger.log(
3628
- `Stream ${streamName2}: migration complete (${messageCount} messages preserved, took ${(durationMs / 1e3).toFixed(1)}s)`
3841
+ `Stream ${streamName2}: migration complete (${drainedCount} messages preserved, took ${(durationMs / 1e3).toFixed(1)}s)`
3629
3842
  );
3630
3843
  }
3631
- async waitForSourcing(jsm, streamName2, expectedCount) {
3844
+ /**
3845
+ * Detect and finish a migration that a previous process left unfinished.
3846
+ * Safe against concurrent instances: a backup fresh enough to belong to a
3847
+ * live migration is left alone.
3848
+ *
3849
+ * @returns true when recovery work was performed.
3850
+ */
3851
+ async recoverInterrupted(jsm, streamName2, desiredConfig) {
3852
+ const backupName = `${streamName2}${MIGRATION_BACKUP_SUFFIX}`;
3853
+ const backupInfo = await this.tryInfo(jsm, backupName);
3854
+ if (backupInfo === null) return false;
3855
+ if (this.isPeerMigrationActive(backupInfo)) return false;
3856
+ const streamInfo = await this.tryInfo(jsm, streamName2);
3857
+ if (streamInfo === null) {
3858
+ this.logger.warn(`Stream ${streamName2}: resuming interrupted migration from ${backupName}`);
3859
+ await jsm.streams.add(desiredConfig);
3860
+ if (backupInfo.state.messages > 0) {
3861
+ await this.restoreFromBackup(jsm, streamName2, desiredConfig, backupName);
3862
+ } else {
3863
+ await jsm.streams.delete(backupName);
3864
+ }
3865
+ return true;
3866
+ }
3867
+ const hasBackupSource = (streamInfo.config.sources ?? []).some((s) => s.name === backupName);
3868
+ if (hasBackupSource) {
3869
+ this.logger.warn(`Stream ${streamName2}: finishing interrupted restore from ${backupName}`);
3870
+ await this.waitForSourceDrained(jsm, streamName2, backupName, backupInfo.state.messages);
3871
+ await jsm.streams.delete(backupName);
3872
+ await jsm.streams.update(streamName2, { ...streamInfo.config, sources: [] });
3873
+ return true;
3874
+ }
3875
+ if (backupInfo.state.messages === 0) {
3876
+ this.logger.warn(`Removing empty migration backup ${backupName}`);
3877
+ await jsm.streams.delete(backupName);
3878
+ return true;
3879
+ }
3880
+ this.logger.warn(
3881
+ `Stream ${streamName2}: restoring ${backupInfo.state.messages} messages from stale ${backupName}`
3882
+ );
3883
+ await this.restoreFromBackup(
3884
+ jsm,
3885
+ streamName2,
3886
+ { ...streamInfo.config, name: streamName2, subjects: streamInfo.config.subjects },
3887
+ backupName
3888
+ );
3889
+ return true;
3890
+ }
3891
+ /** Attach the backup as a source, drain it fully, then clean up. */
3892
+ async restoreFromBackup(jsm, streamName2, streamConfig, backupName) {
3893
+ const backupInfo = await jsm.streams.info(backupName);
3894
+ if ((backupInfo.config.sources ?? []).length > 0) {
3895
+ await jsm.streams.update(backupName, { ...backupInfo.config, sources: [] });
3896
+ }
3897
+ await jsm.streams.update(streamName2, { ...streamConfig, sources: [{ name: backupName }] });
3898
+ await this.waitForSourceDrained(jsm, streamName2, backupName, backupInfo.state.messages);
3899
+ await jsm.streams.delete(backupName);
3900
+ await jsm.streams.update(streamName2, { ...streamConfig, sources: [] });
3901
+ }
3902
+ /**
3903
+ * Wait until `sourceName` is fully drained into `streamName`. Lag-based, so
3904
+ * concurrent live publishes to the target cannot fake completion the way a
3905
+ * bare message-count comparison could. A freshly attached source reports
3906
+ * `lag: 0, active: -1` before its first sync — `active >= 0` filters that
3907
+ * false positive out (verified against NATS 2.12.6).
3908
+ */
3909
+ async waitForSourceDrained(jsm, streamName2, sourceName, minimumMessages) {
3632
3910
  const deadline = Date.now() + this.sourcingTimeoutMs;
3633
3911
  while (Date.now() < deadline) {
3634
3912
  const info = await jsm.streams.info(streamName2);
3635
- if (info.state.messages >= expectedCount) return;
3913
+ const source = (info.sources ?? []).find((s) => s.name === sourceName);
3914
+ if (source !== void 0 && source.active >= 0 && source.lag === 0 && info.state.messages >= minimumMessages) {
3915
+ return;
3916
+ }
3636
3917
  await new Promise((r) => setTimeout(r, SOURCING_POLL_INTERVAL_MS));
3637
3918
  }
3638
3919
  throw new Error(
3639
- `Stream sourcing timeout: ${streamName2} has not reached ${expectedCount} messages within ${this.sourcingTimeoutMs / 1e3}s`
3920
+ `Stream sourcing timeout: ${sourceName} has not drained into ${streamName2} within ${this.sourcingTimeoutMs / 1e3}s. The backup is preserved; restoration resumes on the next startup.`
3921
+ );
3922
+ }
3923
+ /**
3924
+ * A backup already present when migrate() begins belongs to another
3925
+ * instance migrating right now (rolling deploy) — wait for it to finish.
3926
+ * Stale leftovers are handled by recoverInterrupted() before migrate() runs,
3927
+ * so a timeout here means something is genuinely stuck.
3928
+ *
3929
+ * @returns true when a peer's backup was observed and cleared.
3930
+ */
3931
+ async waitOutPeerMigration(jsm, backupName) {
3932
+ if (await this.tryInfo(jsm, backupName) === null) return false;
3933
+ this.logger.warn(
3934
+ `Migration backup ${backupName} exists \u2014 another instance appears to be migrating; waiting`
3935
+ );
3936
+ const deadline = Date.now() + this.peerWaitMs;
3937
+ while (Date.now() < deadline) {
3938
+ if (await this.tryInfo(jsm, backupName) === null) return true;
3939
+ await new Promise((r) => setTimeout(r, SOURCING_POLL_INTERVAL_MS * 5));
3940
+ }
3941
+ throw new Error(
3942
+ `Migration backup ${backupName} did not clear within ${this.peerWaitMs / 1e3}s. If no other instance is migrating, recover or remove the backup manually.`
3640
3943
  );
3641
3944
  }
3642
- async cleanupOrphanedBackup(jsm, backupName) {
3945
+ /** Failure before the original was deleted: undo the quiesce, drop our backup. */
3946
+ async rollbackBeforeDelete(jsm, streamName2, originalInfo, backupName) {
3643
3947
  try {
3644
- await jsm.streams.info(backupName);
3645
- this.logger.warn(`Found orphaned migration backup stream: ${backupName}, cleaning up`);
3646
- await jsm.streams.delete(backupName);
3948
+ await jsm.streams.update(streamName2, { ...originalInfo.config });
3949
+ const backupInfo = await this.tryInfo(jsm, backupName);
3950
+ if (backupInfo !== null) {
3951
+ await jsm.streams.delete(backupName);
3952
+ }
3953
+ } catch (rollbackErr) {
3954
+ this.logger.error(
3955
+ `Rollback of ${streamName2} after a failed migration also failed \u2014 the stream may be left quiesced:`,
3956
+ rollbackErr
3957
+ );
3958
+ }
3959
+ }
3960
+ isPeerMigrationActive(backupInfo) {
3961
+ const startedAt = backupInfo.config.metadata?.[MIGRATION_STARTED_AT_KEY];
3962
+ if (!startedAt) return false;
3963
+ const startedMs = Date.parse(startedAt);
3964
+ if (Number.isNaN(startedMs)) return false;
3965
+ return Date.now() - startedMs < ACTIVE_MIGRATION_GRACE_MS;
3966
+ }
3967
+ async tryInfo(jsm, name) {
3968
+ try {
3969
+ return await jsm.streams.info(name);
3647
3970
  } catch (err) {
3648
3971
  if (err instanceof JetStreamApiError && err.apiError().err_code === 10059 /* StreamNotFound */) {
3649
- return;
3972
+ return null;
3650
3973
  }
3651
3974
  throw err;
3652
3975
  }
@@ -3677,6 +4000,15 @@ var StreamProvider = class {
3677
4000
  */
3678
4001
  async ensureStreams(kinds) {
3679
4002
  const jsm = await this.connection.getJetStreamManager();
4003
+ const reservations = kinds.map((kind) => this.buildReservation(kind, this.buildConfig(kind)));
4004
+ if (this.options.dlq) {
4005
+ reservations.push(this.buildReservation("dlq", this.buildDlqConfig()));
4006
+ }
4007
+ this.logger.log(`
4008
+ ${formatProvisioningSummary(this.options.name, reservations)}`);
4009
+ if (this.options.provisioning?.preflightStorageCheck) {
4010
+ await assertStorageBudget(jsm, this.options.name, reservations, this.logger);
4011
+ }
3680
4012
  await Promise.all(kinds.map((kind) => this.ensureStream(jsm, kind)));
3681
4013
  if (this.options.dlq) {
3682
4014
  await this.ensureDlqStream(jsm);
@@ -3713,6 +4045,7 @@ var StreamProvider = class {
3713
4045
  /** Ensure a single stream exists, creating or updating as needed. */
3714
4046
  async ensureStream(jsm, kind) {
3715
4047
  const config = this.buildConfig(kind);
4048
+ const ctx = this.errorContext(kind, config);
3716
4049
  return withProvisioningSpan(
3717
4050
  this.otel,
3718
4051
  {
@@ -3720,17 +4053,21 @@ var StreamProvider = class {
3720
4053
  endpoint: this.otelEndpoint,
3721
4054
  entity: "stream",
3722
4055
  name: config.name,
3723
- action: "ensure"
4056
+ action: "ensure",
4057
+ maxBytes: ctx.maxBytes,
4058
+ numReplicas: ctx.numReplicas,
4059
+ reservation: ctx.maxBytes !== void 0 && ctx.numReplicas !== void 0 ? ctx.maxBytes * ctx.numReplicas : void 0
3724
4060
  },
3725
4061
  async () => {
3726
4062
  this.logger.log(`Ensuring stream: ${config.name}`);
4063
+ await this.migration.recoverInterrupted(jsm, config.name, config);
3727
4064
  try {
3728
4065
  const currentInfo = await jsm.streams.info(config.name);
3729
- return await this.handleExistingStream(jsm, currentInfo, config);
4066
+ return await this.handleExistingStream(jsm, currentInfo, config, ctx);
3730
4067
  } catch (err) {
3731
4068
  if (err instanceof JetStreamApiError2 && err.apiError().err_code === 10059 /* StreamNotFound */) {
3732
4069
  this.logger.log(`Creating stream: ${config.name}`);
3733
- return await jsm.streams.add(config);
4070
+ return await this.runStreamOp(ctx, () => jsm.streams.add(config));
3734
4071
  }
3735
4072
  throw err;
3736
4073
  }
@@ -3740,6 +4077,7 @@ var StreamProvider = class {
3740
4077
  /** Ensure a dead-letter queue stream exists, creating or updating as needed. */
3741
4078
  async ensureDlqStream(jsm) {
3742
4079
  const config = this.buildDlqConfig();
4080
+ const ctx = this.errorContext("dlq", config);
3743
4081
  return withProvisioningSpan(
3744
4082
  this.otel,
3745
4083
  {
@@ -3747,24 +4085,30 @@ var StreamProvider = class {
3747
4085
  endpoint: this.otelEndpoint,
3748
4086
  entity: "stream",
3749
4087
  name: config.name,
3750
- action: "ensure"
4088
+ action: "ensure",
4089
+ maxBytes: ctx.maxBytes,
4090
+ numReplicas: ctx.numReplicas,
4091
+ reservation: ctx.maxBytes !== void 0 && ctx.numReplicas !== void 0 ? ctx.maxBytes * ctx.numReplicas : void 0
3751
4092
  },
3752
4093
  async () => {
3753
4094
  this.logger.log(`Ensuring DLQ stream: ${config.name}`);
3754
4095
  try {
3755
4096
  const currentInfo = await jsm.streams.info(config.name);
3756
- return await this.handleExistingStream(jsm, currentInfo, config);
4097
+ return await this.handleExistingStream(jsm, currentInfo, config, ctx);
3757
4098
  } catch (err) {
3758
4099
  if (err instanceof JetStreamApiError2 && err.apiError().err_code === 10059 /* StreamNotFound */) {
3759
4100
  this.logger.log(`Creating DLQ stream: ${config.name}`);
3760
- return await jsm.streams.add(config);
4101
+ return await this.runStreamOp(ctx, () => jsm.streams.add(config));
3761
4102
  }
3762
4103
  throw err;
3763
4104
  }
3764
4105
  }
3765
4106
  );
3766
4107
  }
3767
- async handleExistingStream(jsm, currentInfo, config) {
4108
+ async handleExistingStream(jsm, currentInfo, config, ctx) {
4109
+ if (this.isSharedStream(config.name)) {
4110
+ config.subjects = [.../* @__PURE__ */ new Set([...config.subjects, ...currentInfo.config.subjects])];
4111
+ }
3768
4112
  const diff = compareStreamConfig(currentInfo.config, config);
3769
4113
  if (!diff.hasChanges) {
3770
4114
  this.logger.debug(`Stream ${config.name}: no config changes`);
@@ -3779,7 +4123,7 @@ var StreamProvider = class {
3779
4123
  }
3780
4124
  if (!diff.hasImmutableChanges) {
3781
4125
  this.logger.debug(`Stream exists, updating: ${config.name}`);
3782
- return await jsm.streams.update(config.name, config);
4126
+ return await this.runStreamOp(ctx, () => jsm.streams.update(config.name, config));
3783
4127
  }
3784
4128
  if (!this.options.allowDestructiveMigration) {
3785
4129
  this.logger.warn(
@@ -3787,10 +4131,15 @@ var StreamProvider = class {
3787
4131
  );
3788
4132
  if (diff.hasMutableChanges) {
3789
4133
  const mutableConfig = this.buildMutableOnlyConfig(config, currentInfo.config, diff);
3790
- return await jsm.streams.update(config.name, mutableConfig);
4134
+ return await this.runStreamOp(ctx, () => jsm.streams.update(config.name, mutableConfig));
3791
4135
  }
3792
4136
  return currentInfo;
3793
4137
  }
4138
+ if (this.isSharedStream(config.name)) {
4139
+ throw new Error(
4140
+ `Stream ${config.name} is shared across services and cannot be destructively migrated: recreating it would delete every other service's durable broadcast consumers and replay retained history to them. Coordinate a manual migration instead.`
4141
+ );
4142
+ }
3794
4143
  await withMigrationSpan(
3795
4144
  this.otel,
3796
4145
  {
@@ -3829,11 +4178,47 @@ var StreamProvider = class {
3829
4178
  }
3830
4179
  }
3831
4180
  }
4181
+ buildReservation(kind, config) {
4182
+ const mb = config.max_bytes;
4183
+ return {
4184
+ kind,
4185
+ name: config.name,
4186
+ storage: config.storage ?? StorageType4.File,
4187
+ numReplicas: config.num_replicas ?? 1,
4188
+ maxBytes: mb !== void 0 && mb >= 0 ? mb : 0,
4189
+ // NATS uses -1 for unlimited
4190
+ maxAge: config.max_age ?? 0,
4191
+ retention: config.retention ?? RetentionPolicy2.Limits
4192
+ };
4193
+ }
4194
+ errorContext(kind, config) {
4195
+ return {
4196
+ entity: "stream",
4197
+ name: config.name,
4198
+ kind,
4199
+ maxBytes: config.max_bytes,
4200
+ numReplicas: config.num_replicas ?? 1
4201
+ };
4202
+ }
4203
+ async runStreamOp(ctx, op) {
4204
+ try {
4205
+ return await op();
4206
+ } catch (err) {
4207
+ if (err instanceof JetStreamApiError2) {
4208
+ throw mapProvisioningError(err, ctx);
4209
+ }
4210
+ throw err;
4211
+ }
4212
+ }
4213
+ /** The broadcast stream is global — every service in the cluster shares it. */
4214
+ isSharedStream(name) {
4215
+ return name === this.getStreamName("broadcast" /* Broadcast */);
4216
+ }
3832
4217
  /** Build the full stream config by merging defaults with user overrides. */
3833
4218
  buildConfig(kind) {
3834
4219
  const name = this.getStreamName(kind);
3835
4220
  const subjects = this.getSubjects(kind);
3836
- const description = `JetStream ${kind} stream for ${this.options.name}`;
4221
+ const description = kind === "broadcast" /* Broadcast */ ? "JetStream broadcast stream (shared across services)" : `JetStream ${kind} stream for ${this.options.name}`;
3837
4222
  const defaults = this.getDefaults(kind);
3838
4223
  const overrides = this.getOverrides(kind);
3839
4224
  return {
@@ -3975,15 +4360,16 @@ var ConsumerProvider = class {
3975
4360
  },
3976
4361
  async () => {
3977
4362
  this.logger.log(`Ensuring consumer: ${name} on stream: ${stream}`);
4363
+ const ctx = { entity: "consumer", name, kind };
3978
4364
  try {
3979
4365
  await jsm.consumers.info(stream, name);
3980
4366
  this.logger.debug(`Consumer exists, updating: ${name}`);
3981
- return await jsm.consumers.update(stream, name, config);
4367
+ return await this.runConsumerOp(ctx, () => jsm.consumers.update(stream, name, config));
3982
4368
  } catch (err) {
3983
4369
  if (!(err instanceof JetStreamApiError3) || err.apiError().err_code !== 10014 /* ConsumerNotFound */) {
3984
4370
  throw err;
3985
4371
  }
3986
- return await this.createConsumer(jsm, stream, name, config);
4372
+ return await this.createConsumer(jsm, stream, name, kind, config);
3987
4373
  }
3988
4374
  }
3989
4375
  );
@@ -4023,7 +4409,7 @@ var ConsumerProvider = class {
4023
4409
  if (!(err instanceof JetStreamApiError3) || err.apiError().err_code !== 10014 /* ConsumerNotFound */) {
4024
4410
  throw err;
4025
4411
  }
4026
- return await this.createConsumer(jsm, stream, name, config);
4412
+ return await this.createConsumer(jsm, stream, name, kind, config);
4027
4413
  }
4028
4414
  }
4029
4415
  );
@@ -4050,8 +4436,9 @@ var ConsumerProvider = class {
4050
4436
  /**
4051
4437
  * Create a consumer, handling the race where another pod creates it first.
4052
4438
  */
4053
- async createConsumer(jsm, stream, name, config) {
4439
+ async createConsumer(jsm, stream, name, kind, config) {
4054
4440
  this.logger.log(`Creating consumer: ${name}`);
4441
+ const ctx = { entity: "consumer", name, kind };
4055
4442
  try {
4056
4443
  return await jsm.consumers.add(stream, config);
4057
4444
  } catch (addErr) {
@@ -4059,9 +4446,22 @@ var ConsumerProvider = class {
4059
4446
  this.logger.debug(`Consumer ${name} created by another pod, using existing`);
4060
4447
  return await jsm.consumers.info(stream, name);
4061
4448
  }
4449
+ if (addErr instanceof JetStreamApiError3) {
4450
+ throw mapProvisioningError(addErr, ctx);
4451
+ }
4062
4452
  throw addErr;
4063
4453
  }
4064
4454
  }
4455
+ async runConsumerOp(ctx, op) {
4456
+ try {
4457
+ return await op();
4458
+ } catch (err) {
4459
+ if (err instanceof JetStreamApiError3) {
4460
+ throw mapProvisioningError(err, ctx);
4461
+ }
4462
+ throw err;
4463
+ }
4464
+ }
4065
4465
  /** Build consumer config by merging defaults with user overrides. */
4066
4466
  // eslint-disable-next-line @typescript-eslint/naming-convention -- NATS API uses snake_case
4067
4467
  buildConfig(kind) {
@@ -4522,6 +4922,7 @@ var MetadataProvider = class {
4522
4922
  // src/server/routing/event.router.ts
4523
4923
  import { Logger as Logger17 } from "@nestjs/common";
4524
4924
  import { headers as natsHeaders3 } from "@nats-io/transport-node";
4925
+ var DLQ_PUBLISH_ATTEMPTS = 3;
4525
4926
  var eventConsumeKindFor = (kind) => {
4526
4927
  if (kind === "broadcast" /* Broadcast */) return "broadcast" /* Broadcast */;
4527
4928
  if (kind === "ordered" /* Ordered */) return "ordered" /* Ordered */;
@@ -4608,33 +5009,80 @@ var EventRouter = class {
4608
5009
  return msg.info.deliveryCount >= maxDeliver;
4609
5010
  };
4610
5011
  const handleDeadLetter = hasDlqCheck ? (msg, data, err) => this.handleDeadLetter(msg, data, err) : null;
4611
- const settleSuccess = (msg, ctx) => {
4612
- if (ctx.shouldTerminate) msg.term(ctx.terminateReason);
4613
- else if (ctx.shouldRetry) msg.nak(ctx.retryDelay);
4614
- else msg.ack();
5012
+ const settleSuccess = (msg, ctx, data) => {
5013
+ if (ctx.shouldTerminate) {
5014
+ settleQuietly(logger5, `Failed to term ${msg.subject}:`, () => {
5015
+ msg.term(ctx.terminateReason);
5016
+ });
5017
+ return void 0;
5018
+ }
5019
+ if (ctx.shouldRetry) {
5020
+ if (handleDeadLetter !== null && isDeadLetter(msg)) {
5021
+ return handleDeadLetter(
5022
+ msg,
5023
+ data,
5024
+ new Error("Retry requested on the final delivery attempt")
5025
+ );
5026
+ }
5027
+ settleQuietly(logger5, `Failed to nak ${msg.subject}:`, () => {
5028
+ msg.nak(ctx.retryDelay);
5029
+ });
5030
+ return void 0;
5031
+ }
5032
+ settleQuietly(logger5, `Failed to ack ${msg.subject}:`, () => {
5033
+ msg.ack();
5034
+ });
5035
+ return void 0;
4615
5036
  };
4616
5037
  const settleFailure = async (msg, data, err) => {
4617
5038
  if (handleDeadLetter !== null && isDeadLetter(msg)) {
4618
5039
  await handleDeadLetter(msg, data, err);
4619
5040
  return;
4620
5041
  }
4621
- msg.nak();
5042
+ settleQuietly(logger5, `Failed to nak ${msg.subject}:`, () => {
5043
+ msg.nak();
5044
+ });
5045
+ };
5046
+ const captureUnroutable = (capture, msg, err) => {
5047
+ let data;
5048
+ try {
5049
+ data = codec.decode(msg.data);
5050
+ } catch {
5051
+ data = void 0;
5052
+ }
5053
+ return capture(msg, data, err).catch((captureErr) => {
5054
+ logger5.error(`Dead-letter capture failed for unroutable ${msg.subject}:`, captureErr);
5055
+ });
4622
5056
  };
4623
5057
  const resolveEvent = (msg) => {
4624
5058
  const subject = msg.subject;
4625
5059
  try {
4626
5060
  const handler = patternRegistry.getHandler(subject);
4627
5061
  if (!handler) {
4628
- msg.term(`No handler for event: ${subject}`);
4629
5062
  logger5.error(`No handler for subject: ${subject}`);
5063
+ if (handleDeadLetter !== null) {
5064
+ return captureUnroutable(
5065
+ handleDeadLetter,
5066
+ msg,
5067
+ new Error(`No handler for event: ${subject}`)
5068
+ );
5069
+ }
5070
+ msg.term(`No handler for event: ${subject}`);
4630
5071
  return null;
4631
5072
  }
4632
5073
  let data;
4633
5074
  try {
4634
5075
  data = codec.decode(msg.data);
4635
5076
  } catch (err) {
4636
- msg.term("Decode error");
4637
5077
  logger5.error(`Decode error for ${subject}:`, err);
5078
+ if (handleDeadLetter !== null) {
5079
+ return captureUnroutable(
5080
+ handleDeadLetter,
5081
+ msg,
5082
+ new Error(`Decode error: ${err instanceof Error ? err.message : String(err)}`)
5083
+ );
5084
+ }
5085
+ msg.term("Decode error");
4638
5086
  return null;
4639
5087
  }
4640
5088
  eventBus.emitMessageRouted(subject, "event" /* Event */);
@@ -4657,6 +5105,7 @@ var EventRouter = class {
4657
5105
  const handleSafe = (msg) => {
4658
5106
  const resolved = resolveEvent(msg);
4659
5107
  if (resolved === null) return void 0;
5108
+ if (isPromiseLike2(resolved)) return resolved;
4660
5109
  const { handler, data } = resolved;
4661
5110
  const ctx = new RpcContext([msg]);
4662
5111
  const stopAckExtension = hasAckExtension ? startAckExtensionTimer(msg, ackExtensionInterval) : null;
@@ -4689,16 +5138,24 @@ var EventRouter = class {
4689
5138
  });
4690
5139
  }
4691
5140
  if (!isPromiseLike2(pending)) {
4692
- settleSuccess(msg, ctx);
5141
+ const settled = settleSuccess(msg, ctx, data);
4693
5142
  reportHandlerCompleted(msg, startedAt, statusForContext(ctx));
4694
- if (stopAckExtension !== null) stopAckExtension();
4695
- return void 0;
5143
+ if (settled === void 0) {
5144
+ if (stopAckExtension !== null) stopAckExtension();
5145
+ return void 0;
5146
+ }
5147
+ return settled.finally(() => {
5148
+ if (stopAckExtension !== null) stopAckExtension();
5149
+ });
4696
5150
  }
4697
5151
  return pending.then(
4698
- () => {
4699
- settleSuccess(msg, ctx);
4700
- reportHandlerCompleted(msg, startedAt, statusForContext(ctx));
4701
- if (stopAckExtension !== null) stopAckExtension();
5152
+ async () => {
5153
+ try {
5154
+ await settleSuccess(msg, ctx, data);
5155
+ reportHandlerCompleted(msg, startedAt, statusForContext(ctx));
5156
+ } finally {
5157
+ if (stopAckExtension !== null) stopAckExtension();
5158
+ }
4702
5159
  },
4703
5160
  async (err) => {
4704
5161
  eventBus.emit(
@@ -4792,14 +5249,28 @@ var EventRouter = class {
4792
5249
  active--;
4793
5250
  drainBacklog();
4794
5251
  };
5252
+ const routeSafely = (msg) => {
5253
+ try {
5254
+ return route(msg);
5255
+ } catch (err) {
5256
+ logger5.error(`Unexpected routing failure for ${msg.subject}:`, err);
5257
+ return void 0;
5258
+ }
5259
+ };
5260
+ const trackAsync = (result, msg) => {
5261
+ void result.catch((err) => {
5262
+ logger5.error(`Unexpected routing failure for ${msg.subject}:`, err);
5263
+ }).finally(onAsyncDone);
5264
+ };
4795
5265
  const drainBacklog = () => {
4796
5266
  while (active < maxActive) {
4797
5267
  const next = backlog.shift();
4798
5268
  if (next === void 0) return;
5269
+ next.stopAckExtension?.();
4799
5270
  active++;
4800
- const result = route(next);
5271
+ const result = routeSafely(next.msg);
4801
5272
  if (result !== void 0) {
4802
- void result.finally(onAsyncDone);
5273
+ trackAsync(result, next.msg);
4803
5274
  } else {
4804
5275
  active--;
4805
5276
  }
@@ -4809,7 +5280,10 @@ var EventRouter = class {
4809
5280
  const subscription = stream$.subscribe({
4810
5281
  next: (msg) => {
4811
5282
  if (active >= maxActive) {
4812
- backlog.push(msg);
5283
+ backlog.push({
5284
+ msg,
5285
+ stopAckExtension: hasAckExtension ? startAckExtensionTimer(msg, ackExtensionInterval) : null
5286
+ });
4813
5287
  if (!backlogWarned && backlog.length >= backlogWarnThreshold) {
4814
5288
  backlogWarned = true;
4815
5289
  logger5.warn(
@@ -4819,9 +5293,9 @@ var EventRouter = class {
4819
5293
  return;
4820
5294
  }
4821
5295
  active++;
4822
- const result = route(msg);
5296
+ const result = routeSafely(msg);
4823
5297
  if (result !== void 0) {
4824
- void result.finally(onAsyncDone);
5298
+ trackAsync(result, msg);
4825
5299
  } else {
4826
5300
  active--;
4827
5301
  if (backlog.length > 0) drainBacklog();
@@ -4831,6 +5305,12 @@ var EventRouter = class {
4831
5305
  logger5.error(`Stream error in ${kind} router`, err);
4832
5306
  }
4833
5307
  });
5308
+ subscription.add(() => {
5309
+ for (const queued of backlog) {
5310
+ queued.stopAckExtension?.();
5311
+ }
5312
+ backlog.length = 0;
5313
+ });
4834
5314
  this.subscriptions.push(subscription);
4835
5315
  }
4836
5316
  getConcurrency(kind) {
@@ -4845,26 +5325,79 @@ var EventRouter = class {
4845
5325
  }
4846
5326
  /**
4847
5327
  * Last-resort path for a dead letter: invoke `onDeadLetter`, then `term` on
4848
- * success or `nak` on hook failure so NATS retries on the next delivery
4849
- * cycle. Used when DLQ stream isn't configured, or when publishing to it
4850
- * failed and we still have to surface the message somewhere observable.
5328
+ * success. On failure the message is nak'd to release it, but the server
5329
+ * never redelivers past `max_deliver` it stays in the stream for manual
5330
+ * recovery. Used when the DLQ stream isn't configured, or when publishing
5331
+ * to it failed and we still have to surface the message somewhere.
4851
5332
  */
4852
5333
  async fallbackToOnDeadLetterCallback(info, msg) {
4853
- if (!this.deadLetterConfig) {
4854
- msg.term("Dead letter config unavailable");
5334
+ const onDeadLetter = this.deadLetterConfig?.onDeadLetter;
5335
+ if (!onDeadLetter) {
5336
+ this.logger.error(
5337
+ `Dead letter for ${msg.subject} could not be captured (DLQ publish failed, no onDeadLetter callback) \u2014 leaving the message in the stream`
5338
+ );
5339
+ settleQuietly(this.logger, `Failed to nak ${msg.subject}:`, () => {
5340
+ msg.nak();
5341
+ });
4855
5342
  return;
4856
5343
  }
4857
5344
  try {
4858
- await this.deadLetterConfig.onDeadLetter(info);
4859
- msg.term("Dead letter processed via fallback callback");
5345
+ await onDeadLetter(info);
5346
+ settleQuietly(this.logger, `Failed to term ${msg.subject}:`, () => {
5347
+ msg.term("Dead letter processed via fallback callback");
5348
+ });
4860
5349
  } catch (hookErr) {
4861
5350
  this.logger.error(
4862
- `Fallback onDeadLetter callback failed for ${msg.subject}, nak for retry:`,
5351
+ `Fallback onDeadLetter callback failed for ${msg.subject} \u2014 the message stays in the stream and will not be redelivered (max_deliver exhausted); recover it manually:`,
4863
5352
  hookErr
4864
5353
  );
4865
- msg.nak();
5354
+ settleQuietly(this.logger, `Failed to nak ${msg.subject}:`, () => {
5355
+ msg.nak();
5356
+ });
4866
5357
  }
4867
5358
  }
5359
+ /**
5360
+ * Copy the original message headers for the DLQ republish, dropping NATS
5361
+ * server control headers: a copied Nats-TTL expires the DLQ entry (or gets
5362
+ * the publish rejected when the DLQ stream has no allow_msg_ttl), a copied
5363
+ * Nats-Msg-Id collides with the DLQ dedup window.
5364
+ */
5365
+ buildDlqHeaders(msg) {
5366
+ const hdrs = natsHeaders3();
5367
+ if (!msg.headers) return hdrs;
5368
+ for (const [k, v] of msg.headers) {
5369
+ if (k.toLowerCase().startsWith(NATS_CONTROL_HEADER_PREFIX)) continue;
5370
+ for (const val of v) {
5371
+ hdrs.append(k, val);
5372
+ }
5373
+ }
5374
+ return hdrs;
5375
+ }
5376
+ /**
5377
+ * Attempt the DLQ publish up to {@link DLQ_PUBLISH_ATTEMPTS} times.
5378
+ *
5379
+ * Past `max_deliver` the server never redelivers, so an in-process retry is
5380
+ * the only second chance a dead letter gets. There is no artificial delay
5381
+ * between attempts: when the broker is unreachable each publish already
5382
+ * spends its own request timeout, which spaces the attempts naturally.
5383
+ */
5384
+ async publishToDlqWithRetry(connection, subject, data, headers2) {
5385
+ let lastErr;
5386
+ for (let attempt = 1; attempt <= DLQ_PUBLISH_ATTEMPTS; attempt += 1) {
5387
+ try {
5388
+ await connection.getJetStreamClient().publish(subject, data, { headers: headers2 });
5389
+ return;
5390
+ } catch (err) {
5391
+ lastErr = err;
5392
+ if (attempt < DLQ_PUBLISH_ATTEMPTS) {
5393
+ this.logger.warn(
5394
+ `DLQ publish attempt ${attempt}/${DLQ_PUBLISH_ATTEMPTS} failed for ${subject}, retrying`
5395
+ );
5396
+ }
5397
+ }
5398
+ }
5399
+ throw lastErr;
5400
+ }
4868
5401
  /**
4869
5402
  * Publish a dead letter to the configured Dead-Letter Queue (DLQ) stream.
4870
5403
  *
@@ -4883,14 +5416,7 @@ var EventRouter = class {
4883
5416
  return;
4884
5417
  }
4885
5418
  const destinationSubject = dlqStreamName(serviceName);
4886
- const hdrs = natsHeaders3();
4887
- if (msg.headers) {
4888
- for (const [k, v] of msg.headers) {
4889
- for (const val of v) {
4890
- hdrs.append(k, val);
4891
- }
4892
- }
4893
- }
5419
+ const hdrs = this.buildDlqHeaders(msg);
4894
5420
  let reason = String(error);
4895
5421
  if (error instanceof Error) {
4896
5422
  reason = error.message;
@@ -4903,8 +5429,7 @@ var EventRouter = class {
4903
5429
  hdrs.set("x-failed-at" /* FailedAt */, (/* @__PURE__ */ new Date()).toISOString());
4904
5430
  hdrs.set("x-delivery-count" /* DeliveryCount */, msg.info.deliveryCount.toString());
4905
5431
  try {
4906
- const js = this.connection.getJetStreamClient();
4907
- await js.publish(destinationSubject, msg.data, { headers: hdrs });
5432
+ await this.publishToDlqWithRetry(this.connection, destinationSubject, msg.data, hdrs);
4908
5433
  this.logger.log(`Message sent to DLQ: ${msg.subject}`);
4909
5434
  if (this.deadLetterConfig?.onDeadLetter) {
4910
5435
  try {
@@ -4916,7 +5441,9 @@ var EventRouter = class {
4916
5441
  );
4917
5442
  }
4918
5443
  }
4919
- msg.term("Moved to DLQ stream");
5444
+ settleQuietly(this.logger, `Failed to term ${msg.subject}:`, () => {
5445
+ msg.term("Moved to DLQ stream");
5446
+ });
4920
5447
  } catch (publishErr) {
4921
5448
  this.logger.error(`Failed to publish to DLQ for ${msg.subject}:`, publishErr);
4922
5449
  await this.fallbackToOnDeadLetterCallback(info, msg);
@@ -5110,7 +5637,9 @@ var RpcRouter = class {
5110
5637
  `rpc-handler:${subject}`
5111
5638
  );
5112
5639
  publishErrorReply(replyTo, correlationId, subject, err);
5113
- msg.term(`Handler error: ${subject}`);
5640
+ settleQuietly(logger5, `Failed to term ${subject}:`, () => {
5641
+ msg.term(`Handler error: ${subject}`);
5642
+ });
5114
5643
  };
5115
5644
  const abortController = new AbortController();
5116
5645
  let pending;
@@ -5138,7 +5667,9 @@ var RpcRouter = class {
5138
5667
  }
5139
5668
  if (!isPromiseLike2(pending)) {
5140
5669
  if (stopAckExtension !== null) stopAckExtension();
5141
- msg.ack();
5670
+ settleQuietly(logger5, `Failed to ack ${subject}:`, () => {
5671
+ msg.ack();
5672
+ });
5142
5673
  publishReply(replyTo, correlationId, pending);
5143
5674
  reportHandlerCompleted(msg, startedAt, "success");
5144
5675
  return void 0;
@@ -5150,7 +5681,9 @@ var RpcRouter = class {
5150
5681
  if (stopAckExtension !== null) stopAckExtension();
5151
5682
  abortController.abort();
5152
5683
  emitRpcTimeout(subject, correlationId);
5153
- msg.term("Handler timeout");
5684
+ settleQuietly(logger5, `Failed to term ${subject}:`, () => {
5685
+ msg.term("Handler timeout");
5686
+ });
5154
5687
  reportHandlerCompleted(msg, startedAt, "terminated");
5155
5688
  }, timeout);
5156
5689
  return pending.then(
@@ -5159,7 +5692,9 @@ var RpcRouter = class {
5159
5692
  settled = true;
5160
5693
  clearTimeout(timeoutId);
5161
5694
  if (stopAckExtension !== null) stopAckExtension();
5162
- msg.ack();
5695
+ settleQuietly(logger5, `Failed to ack ${subject}:`, () => {
5696
+ msg.ack();
5697
+ });
5163
5698
  publishReply(replyTo, correlationId, result);
5164
5699
  reportHandlerCompleted(msg, startedAt, "success");
5165
5700
  },
@@ -5181,14 +5716,28 @@ var RpcRouter = class {
5181
5716
  active--;
5182
5717
  drainBacklog();
5183
5718
  };
5719
+ const routeSafely = (msg) => {
5720
+ try {
5721
+ return handleSafe(msg);
5722
+ } catch (err) {
5723
+ logger5.error(`Unexpected routing failure for ${msg.subject}:`, err);
5724
+ return void 0;
5725
+ }
5726
+ };
5727
+ const trackAsync = (result, msg) => {
5728
+ void result.catch((err) => {
5729
+ logger5.error(`Unexpected routing failure for ${msg.subject}:`, err);
5730
+ }).finally(onAsyncDone);
5731
+ };
5184
5732
  const drainBacklog = () => {
5185
5733
  while (active < maxActive) {
5186
5734
  const next = backlog.shift();
5187
5735
  if (next === void 0) return;
5736
+ next.stopAckExtension?.();
5188
5737
  active++;
5189
- const result = handleSafe(next);
5738
+ const result = routeSafely(next.msg);
5190
5739
  if (result !== void 0) {
5191
- void result.finally(onAsyncDone);
5740
+ trackAsync(result, next.msg);
5192
5741
  } else {
5193
5742
  active--;
5194
5743
  }
@@ -5198,7 +5747,10 @@ var RpcRouter = class {
5198
5747
  this.subscription = this.messageProvider.commands$.subscribe({
5199
5748
  next: (msg) => {
5200
5749
  if (active >= maxActive) {
5201
- backlog.push(msg);
5750
+ backlog.push({
5751
+ msg,
5752
+ stopAckExtension: hasAckExtension ? startAckExtensionTimer(msg, ackExtensionInterval) : null
5753
+ });
5202
5754
  if (!backlogWarned && backlog.length >= backlogWarnThreshold) {
5203
5755
  backlogWarned = true;
5204
5756
  logger5.warn(
@@ -5208,9 +5760,9 @@ var RpcRouter = class {
5208
5760
  return;
5209
5761
  }
5210
5762
  active++;
5211
- const result = handleSafe(msg);
5763
+ const result = routeSafely(msg);
5212
5764
  if (result !== void 0) {
5213
- void result.finally(onAsyncDone);
5765
+ trackAsync(result, msg);
5214
5766
  } else {
5215
5767
  active--;
5216
5768
  if (backlog.length > 0) drainBacklog();
@@ -5220,6 +5772,12 @@ var RpcRouter = class {
5220
5772
  logger5.error("Stream error in RPC router", err);
5221
5773
  }
5222
5774
  });
5775
+ this.subscription.add(() => {
5776
+ for (const queued of backlog) {
5777
+ queued.stopAckExtension?.();
5778
+ }
5779
+ backlog.length = 0;
5780
+ });
5223
5781
  }
5224
5782
  /** Stop routing and unsubscribe. */
5225
5783
  destroy() {
@@ -5502,7 +6060,7 @@ var JetstreamModule = class {
5502
6060
  ],
5503
6061
  useFactory: (options, messageProvider, patternRegistry, codec, eventBus, ackWaitMap, connection) => {
5504
6062
  if (options.consumer === false) return null;
5505
- const deadLetterConfig = options.onDeadLetter ? {
6063
+ const deadLetterConfig = options.onDeadLetter || options.dlq ? {
5506
6064
  maxDeliverByStream: /* @__PURE__ */ new Map(),
5507
6065
  onDeadLetter: options.onDeadLetter
5508
6066
  } : void 0;
@@ -5702,6 +6260,7 @@ export {
5702
6260
  JetstreamHeader,
5703
6261
  JetstreamHealthIndicator,
5704
6262
  JetstreamModule,
6263
+ JetstreamProvisioningError,
5705
6264
  JetstreamRecord,
5706
6265
  JetstreamRecordBuilder,
5707
6266
  JetstreamStrategy,