@hotmeshio/hotmesh 0.19.1 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,25 @@
1
1
  import { ActivityDuplex } from '../types/activity';
2
2
  import { CollationFaultType, CollationStage } from '../types/collator';
3
3
  import { DurableChildErrorType, DurableContinueAsNewErrorType, DurableProxyErrorType, DurableSleepErrorType, DurableWaitForAllErrorType, DurableWaitForErrorType } from '../types/error';
4
+ /**
5
+ * Error classification for dispatcher logging.
6
+ *
7
+ * FATAL — lease expired, invariant violation, corrupt state.
8
+ * The activity must stop immediately; message is NOT acked.
9
+ * RETRYABLE — transient infrastructure error (DB timeout, network).
10
+ * Normal retry/backoff applies.
11
+ * TERMINAL — permanent failure (user code threw, max retries exceeded).
12
+ * Message is acked; job is marked failed.
13
+ * COLLATION — duplicate delivery detected via GUID ledger.
14
+ * Silent ack; no work needed.
15
+ */
16
+ export declare enum ErrorCategory {
17
+ FATAL = "fatal",
18
+ RETRYABLE = "retryable",
19
+ TERMINAL = "terminal",
20
+ COLLATION = "collation"
21
+ }
22
+ export declare function classifyError(error: unknown): ErrorCategory;
4
23
  declare class GetStateError extends Error {
5
24
  jobId: string;
6
25
  code: number;
@@ -139,6 +158,13 @@ declare class GenerationalError extends Error {
139
158
  declare class ExecActivityError extends Error {
140
159
  constructor();
141
160
  }
161
+ declare class LeaseExpiredError extends Error {
162
+ code: number;
163
+ type: string;
164
+ deadlineMs: number;
165
+ reservationTimeoutS: number;
166
+ constructor(deadlineMs: number, reservationTimeoutS: number);
167
+ }
142
168
  declare class CollationError extends Error {
143
169
  status: number;
144
170
  leg: ActivityDuplex;
@@ -146,4 +172,4 @@ declare class CollationError extends Error {
146
172
  fault: CollationFaultType;
147
173
  constructor(status: number, leg: ActivityDuplex, stage: CollationStage, fault?: CollationFaultType);
148
174
  }
149
- export { CollationError, DurableChildError, DurableContinueAsNewError, DurableFatalError, DurableMaxedError, DurableProxyError, DurableRetryError, DurableSleepError, DurableTimeoutError, DurableWaitForAllError, DurableWaitForError, DuplicateJobError, ExecActivityError, GenerationalError, GetStateError, InactiveJobError, MapDataError, RegisterTimeoutError, SetStateError, };
175
+ export { CollationError, DurableChildError, DurableContinueAsNewError, DurableFatalError, DurableMaxedError, DurableProxyError, DurableRetryError, DurableSleepError, DurableTimeoutError, DurableWaitForAllError, DurableWaitForError, DuplicateJobError, ExecActivityError, GenerationalError, GetStateError, InactiveJobError, LeaseExpiredError, MapDataError, RegisterTimeoutError, SetStateError, };
@@ -1,7 +1,53 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.SetStateError = exports.RegisterTimeoutError = exports.MapDataError = exports.InactiveJobError = exports.GetStateError = exports.GenerationalError = exports.ExecActivityError = exports.DuplicateJobError = exports.DurableWaitForError = exports.DurableWaitForAllError = exports.DurableTimeoutError = exports.DurableSleepError = exports.DurableRetryError = exports.DurableProxyError = exports.DurableMaxedError = exports.DurableFatalError = exports.DurableContinueAsNewError = exports.DurableChildError = exports.CollationError = void 0;
3
+ exports.SetStateError = exports.RegisterTimeoutError = exports.MapDataError = exports.LeaseExpiredError = exports.InactiveJobError = exports.GetStateError = exports.GenerationalError = exports.ExecActivityError = exports.DuplicateJobError = exports.DurableWaitForError = exports.DurableWaitForAllError = exports.DurableTimeoutError = exports.DurableSleepError = exports.DurableRetryError = exports.DurableProxyError = exports.DurableMaxedError = exports.DurableFatalError = exports.DurableContinueAsNewError = exports.DurableChildError = exports.CollationError = exports.classifyError = exports.ErrorCategory = void 0;
4
4
  const enums_1 = require("./enums");
5
+ /**
6
+ * Error classification for dispatcher logging.
7
+ *
8
+ * FATAL — lease expired, invariant violation, corrupt state.
9
+ * The activity must stop immediately; message is NOT acked.
10
+ * RETRYABLE — transient infrastructure error (DB timeout, network).
11
+ * Normal retry/backoff applies.
12
+ * TERMINAL — permanent failure (user code threw, max retries exceeded).
13
+ * Message is acked; job is marked failed.
14
+ * COLLATION — duplicate delivery detected via GUID ledger.
15
+ * Silent ack; no work needed.
16
+ */
17
+ var ErrorCategory;
18
+ (function (ErrorCategory) {
19
+ ErrorCategory["FATAL"] = "fatal";
20
+ ErrorCategory["RETRYABLE"] = "retryable";
21
+ ErrorCategory["TERMINAL"] = "terminal";
22
+ ErrorCategory["COLLATION"] = "collation";
23
+ })(ErrorCategory = exports.ErrorCategory || (exports.ErrorCategory = {}));
24
+ function classifyError(error) {
25
+ if (error instanceof LeaseExpiredError) {
26
+ return ErrorCategory.FATAL;
27
+ }
28
+ if (error instanceof CollationError || error instanceof DuplicateJobError) {
29
+ return ErrorCategory.COLLATION;
30
+ }
31
+ if (error instanceof DurableRetryError) {
32
+ return ErrorCategory.RETRYABLE;
33
+ }
34
+ if (error instanceof DurableTimeoutError) {
35
+ return ErrorCategory.RETRYABLE;
36
+ }
37
+ if (error instanceof DurableFatalError ||
38
+ error instanceof DurableMaxedError) {
39
+ return ErrorCategory.TERMINAL;
40
+ }
41
+ if (error instanceof InactiveJobError ||
42
+ error instanceof GenerationalError ||
43
+ error instanceof GetStateError) {
44
+ return ErrorCategory.TERMINAL;
45
+ }
46
+ // Unknown errors default to retryable — the retry budget
47
+ // will promote them to terminal if they persist.
48
+ return ErrorCategory.RETRYABLE;
49
+ }
50
+ exports.classifyError = classifyError;
5
51
  class GetStateError extends Error {
6
52
  constructor(jobId) {
7
53
  super(`${jobId} Not Found`);
@@ -207,6 +253,17 @@ class ExecActivityError extends Error {
207
253
  }
208
254
  }
209
255
  exports.ExecActivityError = ExecActivityError;
256
+ class LeaseExpiredError extends Error {
257
+ constructor(deadlineMs, reservationTimeoutS) {
258
+ super(`Activity exceeded lease deadline (${deadlineMs}ms of ${reservationTimeoutS}s reservation). ` +
259
+ `Aborting to prevent unauthorized writes after lease expiry.`);
260
+ this.type = 'LeaseExpiredError';
261
+ this.code = enums_1.HMSH_CODE_DURABLE_FATAL;
262
+ this.deadlineMs = deadlineMs;
263
+ this.reservationTimeoutS = reservationTimeoutS;
264
+ }
265
+ }
266
+ exports.LeaseExpiredError = LeaseExpiredError;
210
267
  class CollationError extends Error {
211
268
  constructor(status, leg, stage, fault) {
212
269
  super('collation-error');
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.19.1",
3
+ "version": "0.19.3",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -71,6 +71,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
71
71
  telemetry.setActivityAttributes({});
72
72
  }
73
73
  catch (error) {
74
+ const category = (0, errors_1.classifyError)(error);
74
75
  if (error instanceof errors_1.CollationError) {
75
76
  //FORBIDDEN: Leg1 not complete — should not occur after the fix
76
77
  //that moved setHookSignal to post-commit. If seen, it indicates
@@ -78,6 +79,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
78
79
  //retry in processWebHookEvent can attempt recovery.
79
80
  if (error.fault === collator_1.CollationFaultType.FORBIDDEN) {
80
81
  instance.logger.warn('process-event-forbidden-retry', {
82
+ category,
81
83
  jid: instance.context.metadata.jid,
82
84
  aid: instance.metadata.aid,
83
85
  message: 'Leg1 not committed yet; rethrowing for stream retry',
@@ -98,6 +100,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
98
100
  collationErrorCount++;
99
101
  if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
100
102
  instance.logger.warn('process-event-collation-rate-exceeded', {
103
+ category,
101
104
  count: collationErrorCount,
102
105
  windowMs: COLLATION_WINDOW_MS,
103
106
  reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
@@ -108,6 +111,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
108
111
  });
109
112
  }
110
113
  instance.logger.warn(`process-event-${error.fault}-error`, {
114
+ category,
111
115
  jid: instance.context.metadata.jid,
112
116
  aid: instance.metadata.aid,
113
117
  error,
@@ -115,20 +119,28 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
115
119
  return;
116
120
  }
117
121
  else if (error instanceof errors_1.InactiveJobError) {
118
- instance.logger.info('process-event-inactive-job-error', { error });
122
+ instance.logger.info('process-event-inactive-job-error', {
123
+ category,
124
+ error,
125
+ });
119
126
  return;
120
127
  }
121
128
  else if (error instanceof errors_1.GenerationalError) {
122
129
  instance.logger.info('process-event-generational-job-error', {
130
+ category,
123
131
  error,
124
132
  });
125
133
  return;
126
134
  }
127
135
  else if (error instanceof errors_1.GetStateError) {
128
- instance.logger.info('process-event-get-job-error', { error });
136
+ instance.logger.info('process-event-get-job-error', {
137
+ category,
138
+ error,
139
+ });
129
140
  return;
130
141
  }
131
142
  instance.logger.error('activity-process-event-error', {
143
+ category,
132
144
  error,
133
145
  message: error.message,
134
146
  stack: error.stack,
@@ -97,6 +97,7 @@ async function dispatchAwait(instance, streamData, context) {
97
97
  // will deliver its RESULT back to the parent via the normal path.
98
98
  // Acknowledge the message so it doesn't loop.
99
99
  instance.logger.info('dispatch-await-child-exists', {
100
+ category: (0, errors_1.classifyError)(error),
100
101
  childJobId: error.jobId,
101
102
  parentJobId: streamData.metadata.jid,
102
103
  parentDad: streamData.metadata.dad,
@@ -32,6 +32,7 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
32
32
  private static readonly DEPTH_CHECK_INTERVAL_MS;
33
33
  private static readonly DEPTH_SCALE_UP_THRESHOLD;
34
34
  private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
35
+ private static readonly LEASE_BUFFER_S;
35
36
  constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
36
37
  /**
37
38
  * Adjusts reservation timeout based on stream depth. Called periodically
@@ -2,6 +2,7 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ConsumptionManager = void 0;
4
4
  const utils_1 = require("../../../modules/utils");
5
+ const errors_1 = require("../../../modules/errors");
5
6
  const telemetry_1 = require("../telemetry");
6
7
  const config_1 = require("../config");
7
8
  const stream_1 = require("../../../types/stream");
@@ -65,7 +66,8 @@ class ConsumptionManager {
65
66
  this.adaptiveBatchSize = Math.min(this.adaptiveBatchSize * 2, config_1.HMSH_BATCH_SIZE);
66
67
  }
67
68
  if (this.adaptiveReservationTimeout !== prevTimeout) {
68
- this.stream.reservationTimeout = this.adaptiveReservationTimeout;
69
+ this.stream.reservationTimeout =
70
+ this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S;
69
71
  this.logger.info('stream-reservation-timeout-adjusted', {
70
72
  stream,
71
73
  depth,
@@ -244,7 +246,7 @@ class ConsumptionManager {
244
246
  enableNotifications: true,
245
247
  notificationCallback,
246
248
  blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
247
- reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
249
+ reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S + ConsumptionManager.LEASE_BUFFER_S,
248
250
  });
249
251
  // Don't block here - let the worker initialization complete
250
252
  // The notification system will handle message processing asynchronously
@@ -297,7 +299,7 @@ class ConsumptionManager {
297
299
  messages = await this.stream.consumeMessages(stream, group, consumer, {
298
300
  blockTimeout: streamDuration,
299
301
  batchSize,
300
- reservationTimeout: this.adaptiveReservationTimeout,
302
+ reservationTimeout: this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S,
301
303
  enableBackoff: true,
302
304
  initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
303
305
  maxBackoff: config_1.MAX_STREAM_BACKOFF,
@@ -312,7 +314,7 @@ class ConsumptionManager {
312
314
  messages = await this.stream.consumeMessages(stream, group, consumer, {
313
315
  blockTimeout: streamDuration,
314
316
  batchSize,
315
- reservationTimeout: this.adaptiveReservationTimeout,
317
+ reservationTimeout: this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S,
316
318
  enableBackoff: false,
317
319
  maxRetries: 1,
318
320
  });
@@ -491,23 +493,63 @@ class ConsumptionManager {
491
493
  }
492
494
  return;
493
495
  }
496
+ // Lease deadline: the full configured reservation timeout (N).
497
+ // The reclaim interval is N+5s, so the deadline always fires
498
+ // before a reclaimant can pick up the message. This preserves
499
+ // the user's contract — if they set 30s, the function gets 30s.
500
+ const deadlineMs = this.adaptiveReservationTimeout * 1000;
494
501
  let output;
495
502
  const telemetry = new telemetry_1.RouterTelemetry(this.appId);
496
503
  try {
497
504
  telemetry.startStreamSpan(input, this.role);
498
- output = await this.execStreamLeg(input, stream, id, callback.bind(this));
505
+ let deadlineTimer;
506
+ const deadlinePromise = new Promise((_, reject) => {
507
+ deadlineTimer = setTimeout(() => reject(new errors_1.LeaseExpiredError(deadlineMs, this.adaptiveReservationTimeout)), deadlineMs);
508
+ });
509
+ try {
510
+ output = await Promise.race([
511
+ this.execStreamLeg(input, stream, id, callback.bind(this)),
512
+ deadlinePromise,
513
+ ]);
514
+ }
515
+ finally {
516
+ clearTimeout(deadlineTimer);
517
+ }
499
518
  telemetry.setStreamErrorFromOutput(output);
500
519
  this.errorCount = 0;
501
520
  }
502
521
  catch (err) {
503
- this.logger.error(`stream-read-one-error`, { group, stream, id, err });
522
+ const category = (0, errors_1.classifyError)(err);
523
+ if (err instanceof errors_1.LeaseExpiredError) {
524
+ // FATAL: lease expired — do NOT ack. The message remains in the
525
+ // stream for a reclaimant to pick up cleanly. Any partial writes
526
+ // from this consumer are idempotent via collation.
527
+ this.logger.error('stream-lease-expired', {
528
+ category,
529
+ group,
530
+ stream,
531
+ id,
532
+ deadlineMs,
533
+ reservationTimeoutS: this.adaptiveReservationTimeout,
534
+ topic: input.metadata?.topic,
535
+ activityId: input.metadata?.aid,
536
+ jobId: input.metadata?.jid,
537
+ });
538
+ telemetry.setStreamErrorFromException(err);
539
+ telemetry.endStreamSpan();
540
+ return; // NO ack — leave for reclaimant
541
+ }
542
+ this.logger.error(`stream-read-one-error`, {
543
+ category,
544
+ group,
545
+ stream,
546
+ id,
547
+ err,
548
+ });
504
549
  telemetry.setStreamErrorFromException(err);
505
550
  output = this.errorHandler.structureUnhandledError(input, err instanceof Error ? err : new Error(String(err)));
506
551
  }
507
552
  try {
508
- // When the ENGINE itself fails to process a message (e.g., schema not
509
- // found, missing subscription), do NOT republish the error back to the
510
- // engine stream — that creates an infinite poison loop. The engine
511
553
  // When the ENGINE encounters an infrastructure error (schema not found,
512
554
  // subscription missing — code 598), the message is permanently unprocessable.
513
555
  // Do NOT republish it — that creates an infinite poison loop. Only suppress
@@ -515,6 +557,7 @@ class ConsumptionManager {
515
557
  // duplicates, workflow failures) must still flow through normally.
516
558
  if (group === 'ENGINE' && output?.code === 598) {
517
559
  this.logger.error(`stream-engine-dispatch-fatal`, {
560
+ category: errors_1.ErrorCategory.FATAL,
518
561
  stream, id, group,
519
562
  aid: input.metadata?.aid,
520
563
  jid: input.metadata?.jid,
@@ -530,6 +573,7 @@ class ConsumptionManager {
530
573
  // If publishResponse fails, still ack the message to prevent
531
574
  // infinite reprocessing. Log the error for debugging.
532
575
  this.logger.error(`stream-publish-response-error`, {
576
+ category: (0, errors_1.classifyError)(publishErr),
533
577
  group, stream, id, error: publishErr,
534
578
  });
535
579
  this.errorCount++;
@@ -547,6 +591,7 @@ class ConsumptionManager {
547
591
  }
548
592
  catch (error) {
549
593
  this.logger.error(`stream-call-function-error`, {
594
+ category: (0, errors_1.classifyError)(error),
550
595
  error,
551
596
  input: input,
552
597
  stack: error.stack,
@@ -620,4 +665,8 @@ class ConsumptionManager {
620
665
  ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
621
666
  ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
622
667
  ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
668
+ // Buffer between the activity deadline (N) and the reclaim interval
669
+ // (N+5). The function gets the full configured timeout; the extra 5s
670
+ // ensures the deadline fires before a reclaimant can pick up the message.
671
+ ConsumptionManager.LEASE_BUFFER_S = 5;
623
672
  exports.ConsumptionManager = ConsumptionManager;
@@ -5,7 +5,9 @@ class StreamService {
5
5
  constructor(streamClient, storeClient, config = {}) {
6
6
  // Adaptive reservation timeout — set by the consumption manager
7
7
  // based on stream depth. Providers read this when reserving messages.
8
- this.reservationTimeout = 30;
8
+ // Includes a +5s buffer over the activity deadline so the deadline
9
+ // always fires before reclaim (see ConsumptionManager.LEASE_BUFFER_S).
10
+ this.reservationTimeout = 35;
9
11
  this.streamClient = streamClient;
10
12
  this.storeClient = storeClient;
11
13
  this.config = config;
@@ -161,17 +161,12 @@ async function ensureIndexes(client, schemaName) {
161
161
  WHERE expired_at IS NULL;
162
162
  `);
163
163
  // v0.18.0: add jid column to engine_streams for job tracing
164
- const { rows: jidCol } = await client.query(`SELECT 1 FROM information_schema.columns
165
- WHERE table_schema = $1 AND table_name = 'engine_streams' AND column_name = 'jid'
166
- LIMIT 1`, [schemaName]);
167
- if (jidCol.length === 0) {
168
- await client.query(`ALTER TABLE ${engineTable} ADD COLUMN jid TEXT NOT NULL DEFAULT ''`);
169
- await client.query(`
170
- CREATE INDEX IF NOT EXISTS idx_engine_streams_jid_created
171
- ON ${engineTable} (jid, created_at)
172
- WHERE jid != '';
173
- `);
174
- }
164
+ await client.query(`ALTER TABLE ${engineTable} ADD COLUMN IF NOT EXISTS jid TEXT NOT NULL DEFAULT ''`);
165
+ await client.query(`
166
+ CREATE INDEX IF NOT EXISTS idx_engine_streams_jid_created
167
+ ON ${engineTable} (jid, created_at)
168
+ WHERE jid != '';
169
+ `);
175
170
  }
176
171
  async function createTables(client, schemaName) {
177
172
  await client.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName};`);
@@ -223,7 +223,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
223
223
  while (retries < maxRetries) {
224
224
  retries++;
225
225
  const batchSize = options?.batchSize || 1;
226
- const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
226
+ const reservationTimeout = options?.reservationTimeout || (enums_1.HMSH_RESERVATION_TIMEOUT_S + 5);
227
227
  const res = await client.query(`UPDATE ${tableName}
228
228
  SET reserved_at = NOW(), reserved_by = $3
229
229
  WHERE id IN (
@@ -21,7 +21,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
21
21
  const maxBackoff = options?.maxBackoff ?? 3000;
22
22
  const maxRetries = options?.maxRetries ?? 3;
23
23
  const batchSize = options?.batchSize || 1;
24
- const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
24
+ const reservationTimeout = options?.reservationTimeout || (enums_1.HMSH_RESERVATION_TIMEOUT_S + 5);
25
25
  let backoff = initialBackoff;
26
26
  let retries = 0;
27
27
  try {
@@ -4,8 +4,10 @@ export interface PostgresClientOptions {
4
4
  user?: string;
5
5
  password?: string;
6
6
  database?: string;
7
+ connectionString?: string;
7
8
  max?: number;
8
9
  idleTimeoutMillis?: number;
10
+ ssl?: boolean | Record<string, unknown>;
9
11
  }
10
12
  export type PostgresJobEnumType = 'status' | 'jdata' | 'adata' | 'udata' | 'jmark' | 'hmark' | 'other';
11
13
  export type PostgresClassType = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.19.1",
3
+ "version": "0.19.3",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",