npm - @hotmeshio/hotmesh - Versions diffs - 0.14.5 → 0.14.7 - Mend

@hotmeshio/hotmesh 0.14.5 → 0.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +48 -111
package/build/modules/enums.d.ts +77 -0
package/build/modules/enums.js +79 -2
package/build/package.json +3 -2
package/build/services/activities/activity/process.js +31 -1
package/build/services/router/config/index.d.ts +2 -2
package/build/services/router/config/index.js +5 -1
package/build/services/router/consumption/index.d.ts +14 -0
package/build/services/router/consumption/index.js +71 -2
package/build/services/stream/index.d.ts +2 -0
package/build/services/stream/index.js +3 -0
package/build/services/stream/providers/postgres/messages.js +2 -1
package/build/services/stream/providers/postgres/postgres.js +2 -2
package/build/services/stream/providers/postgres/secured.js +2 -1
package/package.json +3 -2

package/README.md CHANGED Viewed

@@ -29,34 +29,17 @@ Install the package:
 npm install @hotmeshio/hotmesh
 ```
-The repo includes a `docker-compose.yml` that starts Postgres, NATS, and a development container:
+The repo includes a `docker-compose.yml` that starts Postgres and a development container:
 ```bash
 docker compose up -d
 ```
-Then follow the [Quick Start guide](https://github.com/hotmeshio/sdk-typescript/blob/main/docs/quickstart.md) for a progressive walkthrough — from a single trigger to conditional, parallel, and compositional workflows.
+See the [Durable API reference](https://docs.hotmesh.io/classes/services_durable.Durable.html) for the full API surface — workflows, activities, signals, child workflows, and more.
-## Two ways to write workflows
+## Writing workflows
-Both approaches reuse your activity functions:
-```typescript
-// activities.ts (shared between both approaches)
-export async function checkInventory(itemId: string): Promise<number> {
-  return getInventoryCount(itemId);
-}
-export async function reserveItem(itemId: string, quantity: number): Promise<string> {
-  return createReservation(itemId, quantity);
-}
-export async function notifyBackorder(itemId: string): Promise<void> {
-  await sendBackorderEmail(itemId);
-}
-```
-### Option 1: Code
+**Define the workflow** — plain TypeScript with branching, loops, and error handling. Activities are proxied so their results are checkpointed and replayed on restart.
 ```typescript
 // workflows.ts
@@ -76,124 +59,72 @@ export async function orderWorkflow(itemId: string, qty: number) {
     return 'backordered';
   }
 }
+```
-// main.ts
-import * as activities from './activities';
+**Start a worker** — connects to Postgres and begins processing workflows on the given task queue.
+```typescript
+// worker.ts
+import { Durable } from '@hotmeshio/hotmesh';
+import { Client as Postgres } from 'pg';
+import { orderWorkflow } from './workflows';
 const connection = {
   class: Postgres,
   options: { connectionString: 'postgresql://localhost:5432/mydb' }
 };
-await Durable.Worker.create({
+const worker = await Durable.Worker.create({
   connection,
   taskQueue: 'orders',
   workflow: orderWorkflow,
-  activities,
 });
+await worker.run();
+```
+**Run a workflow** — start an execution and await its result. The client can run in a different process, container, or server.
+```typescript
+// client.ts
+import { Durable } from '@hotmeshio/hotmesh';
+import { Client as Postgres } from 'pg';
+const connection = {
+  class: Postgres,
+  options: { connectionString: 'postgresql://localhost:5432/mydb' }
+};
 const client = new Durable.Client({ connection });
 const handle = await client.workflow.start({
   args: ['item-123', 5],
   taskQueue: 'orders',
   workflowName: 'orderWorkflow',
-  workflowId: 'order-456'
+  workflowId: 'order-456',
 });
 const result = await handle.result();
 ```
-### Option 2: YAML (functional approach)
-```yaml
-# order.yaml
-activities:
-  trigger:
-    type: trigger
-  checkInventory:
-    type: worker
-    topic: inventory.check
-  reserveItem:
-    type: worker
-    topic: inventory.reserve
-  notifyBackorder:
-    type: worker
-    topic: inventory.backorder.notify
-transitions:
-  trigger:
-    - to: checkInventory
-  checkInventory:
-    - to: reserveItem
-      conditions:
-        match:
-          - expected: true
-            actual:
-              '@pipe':
-                - ['{checkInventory.output.data.availableQty}', '{trigger.output.data.requestedQty}']
-                - ['{@conditional.gte}']
-    - to: notifyBackorder
-      conditions:
-        match:
-          - expected: false
-            actual:
-              '@pipe':
-                - ['{checkInventory.output.data.availableQty}', '{trigger.output.data.requestedQty}']
-                - ['{@conditional.gte}']
-```
+### Activities
-Deploy and run as follows:
-```typescript
-// main.ts (reuses same activities.ts)
-import * as activities from './activities';
+Activities are your side-effectful functions — database calls, API requests, anything non-deterministic. HotMesh checkpoints their results so they're never re-executed on replay.
-const hotMesh = await HotMesh.init({
-  appId: 'orders',
-  engine: { connection },
-  workers: [
-    {
-      topic: 'inventory.check',
-      connection,
-      callback: async (data) => {
-        const availableQty = await activities.checkInventory(data.data.itemId);
-        return { metadata: { ...data.metadata }, data: { availableQty } };
-      }
-    },
-    {
-      topic: 'inventory.reserve',
-      connection,
-      callback: async (data) => {
-        const reservationId = await activities.reserveItem(data.data.itemId, data.data.quantity);
-        return { metadata: { ...data.metadata }, data: { reservationId } };
-      }
-    },
-    {
-      topic: 'inventory.backorder.notify',
-      connection,
-      callback: async (data) => {
-        await activities.notifyBackorder(data.data.itemId);
-        return { metadata: { ...data.metadata } };
-      }
-    }
-  ]
-});
+```typescript
+// activities.ts
+export async function checkInventory(itemId: string): Promise<number> {
+  return getInventoryCount(itemId);
+}
-await hotMesh.deploy('./order.yaml');
-await hotMesh.activate('1');
+export async function reserveItem(itemId: string, quantity: number): Promise<string> {
+  return createReservation(itemId, quantity);
+}
-const result = await hotMesh.pubsub('order.requested', {
-  itemId: 'item-123',
-  requestedQty: 5
-});
+export async function notifyBackorder(itemId: string): Promise<void> {
+  await sendBackorderEmail(itemId);
+}
 ```
-Both compile to the same distributed execution model.
 ## Common patterns
 All snippets below run inside a workflow function (like `orderWorkflow` above). Durable methods are available as static imports:
@@ -321,6 +252,12 @@ There is no proprietary dashboard. Workflow state lives in Postgres, so use what
 - **Logging** — set `HMSH_LOGLEVEL` (`debug`, `info`, `warn`, `error`, `silent`) to control log verbosity.
 - **OpenTelemetry** — set `HMSH_TELEMETRY=true` to emit spans and metrics. Plug in any OTel-compatible collector.
+## YAML workflows
+HotMesh also supports a declarative YAML syntax. The same activities run in both modes — the difference is compilation speed. YAML workflows compile ~10x faster because the execution graph is declared upfront rather than discovered through replay. The tradeoff is expressiveness: YAML uses a functional pipe syntax for conditions and transformations instead of native TypeScript control flow.
+See the [Quick Start guide](https://github.com/hotmeshio/sdk-typescript/blob/main/docs/quickstart.md) for YAML examples and the `tests/functional/` directory for working implementations.
 ## Architecture
 For a deep dive into the transactional execution model — how every step is crash-safe, how the monotonic collation ledger guarantees exactly-once delivery, and how cycles and retries remain correct under arbitrary failure — see the [Collation Design Document](https://github.com/hotmeshio/sdk-typescript/blob/main/services/collator/README.md). The symbolic system (how to design workflows) and lifecycle details (how to deploy workflows) are covered in the [Architectural Overview](https://zenodo.org/records/12168558).

package/build/modules/enums.d.ts CHANGED Viewed

@@ -161,6 +161,83 @@ export declare const HMSH_BLOCK_TIME_MS: number;
 export declare const HMSH_XCLAIM_DELAY_MS: number;
 export declare const HMSH_XCLAIM_COUNT: number;
 export declare const HMSH_XPENDING_COUNT: number;
+export declare const HMSH_BATCH_SIZE: number;
+/**
+ * Minimum batch size under adaptive scaling (default: 1).
+ *
+ * When stream depth is high, the adaptive logic reduces batch size
+ * to relieve back-pressure. This value is the floor — the smallest
+ * batch the system will fetch per consume cycle.
+ *
+ *   - 1 (default): fully serial under max stress, safest
+ *   - 2: retains some parallelism while limiting contention
+ *
+ * Both values produce equivalent throughput in practice (~233s for
+ * 1000 concurrent workflows). The reduction from the configured
+ * HMSH_BATCH_SIZE is what matters most — the floor is a safety net.
+ */
+export declare const HMSH_BATCH_SIZE_MIN: number;
+/**
+ * Postgres stream reservation timeout in seconds (default: 30).
+ *
+ * This is the **starting** reservation timeout for the Postgres stream
+ * consumer. When a consumer reserves a message from the stream, it must
+ * acknowledge it within this window. If processing takes longer, the
+ * message becomes available to other consumers — causing duplicate
+ * delivery, collation errors, and wasted CPU.
+ *
+ * **Adaptive behavior:** The router automatically adjusts this timeout
+ * at runtime based on stream depth. When the queue backs up (depth > 100),
+ * the timeout doubles (up to 600s). When the queue drains (depth < 10),
+ * it halves back toward this configured default. This prevents duplicate
+ * delivery under burst load without manual intervention.
+ *
+ * **When to increase this value:** If you see `process-event-*-error`
+ * warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
+ * scaling up frequently, your baseline is too low for your workload.
+ * Setting a higher default reduces how aggressively the system must
+ * adapt during load spikes.
+ *
+ * **Symptoms of a value that is too low:**
+ *   - `collation-error` from `verifySyntheticInteger` (warn level)
+ *   - `process-event-collation-rate-exceeded` warning with guidance
+ *   - `stream-reservation-timeout-adjusted` logs showing rapid scaling
+ *   - Workflow stalls or timeouts under sustained concurrent load
+ *
+ * @example
+ * // Production with sustained high concurrency
+ * HMSH_RESERVATION_TIMEOUT_S=120
+ *
+ * // Low-latency environments with fast processing
+ * HMSH_RESERVATION_TIMEOUT_S=30  (default)
+ */
+export declare const HMSH_RESERVATION_TIMEOUT_S: number;
+/**
+ * Maximum reservation timeout in seconds for adaptive scaling (default: 1800).
+ *
+ * This is the ceiling for the adaptive reservation timeout — how far the
+ * system is allowed to stretch under sustained load. The adaptive logic
+ * only uses what it needs based on stream depth; this value defines the
+ * upper bound, not the steady state.
+ *
+ * The tradeoff is recovery time after a consumer crash: if a consumer
+ * reserves a message and dies, that message is unavailable until the
+ * timeout expires. A higher ceiling means longer recovery from crashes
+ * but prevents duplicate delivery under heavy sustained load.
+ *
+ * In practice, crashes are rare and the delay is bounded. The cost of
+ * a ceiling that is too low — duplicate delivery, collation errors,
+ * wasted CPU, workflow stalls — is far higher than a slightly longer
+ * recovery window after a crash.
+ *
+ * **Tuning guidance:**
+ *   - Dedicated infrastructure with ample CPU: lower ceiling is fine (600s)
+ *   - Shared/multi-tenant or CPU-constrained: use the default (1800s)
+ *   - Long-running batch imports or large workflow graphs: increase (3600s+)
+ *   - Cloud deployments without CPU contention: the adaptive logic will
+ *     naturally stay near the starting timeout and rarely approach the ceiling
+ */
+export declare const HMSH_RESERVATION_TIMEOUT_MAX_S: number;
 export declare const HMSH_EXPIRE_DURATION: number;
 export declare const HMSH_FIDELITY_SECONDS: number;
 export declare const HMSH_SCOUT_INTERVAL_SECONDS: number;

package/build/modules/enums.js CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
-exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = void 0;
+exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
+exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = void 0;
 /**
  * Determines the log level for the application. The default is 'info'.
  */
@@ -178,6 +178,83 @@ exports.HMSH_BLOCK_TIME_MS = process.env.HMSH_BLOCK_TIME_MS
 exports.HMSH_XCLAIM_DELAY_MS = parseInt(process.env.HMSH_XCLAIM_DELAY_MS, 10) || 1000 * 60;
 exports.HMSH_XCLAIM_COUNT = parseInt(process.env.HMSH_XCLAIM_COUNT, 10) || 3;
 exports.HMSH_XPENDING_COUNT = parseInt(process.env.HMSH_XPENDING_COUNT, 10) || 10;
+exports.HMSH_BATCH_SIZE = parseInt(process.env.HMSH_BATCH_SIZE, 10) || 10;
+/**
+ * Minimum batch size under adaptive scaling (default: 1).
+ *
+ * When stream depth is high, the adaptive logic reduces batch size
+ * to relieve back-pressure. This value is the floor — the smallest
+ * batch the system will fetch per consume cycle.
+ *
+ *   - 1 (default): fully serial under max stress, safest
+ *   - 2: retains some parallelism while limiting contention
+ *
+ * Both values produce equivalent throughput in practice (~233s for
+ * 1000 concurrent workflows). The reduction from the configured
+ * HMSH_BATCH_SIZE is what matters most — the floor is a safety net.
+ */
+exports.HMSH_BATCH_SIZE_MIN = parseInt(process.env.HMSH_BATCH_SIZE_MIN, 10) || 1;
+/**
+ * Postgres stream reservation timeout in seconds (default: 30).
+ *
+ * This is the **starting** reservation timeout for the Postgres stream
+ * consumer. When a consumer reserves a message from the stream, it must
+ * acknowledge it within this window. If processing takes longer, the
+ * message becomes available to other consumers — causing duplicate
+ * delivery, collation errors, and wasted CPU.
+ *
+ * **Adaptive behavior:** The router automatically adjusts this timeout
+ * at runtime based on stream depth. When the queue backs up (depth > 100),
+ * the timeout doubles (up to 600s). When the queue drains (depth < 10),
+ * it halves back toward this configured default. This prevents duplicate
+ * delivery under burst load without manual intervention.
+ *
+ * **When to increase this value:** If you see `process-event-*-error`
+ * warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
+ * scaling up frequently, your baseline is too low for your workload.
+ * Setting a higher default reduces how aggressively the system must
+ * adapt during load spikes.
+ *
+ * **Symptoms of a value that is too low:**
+ *   - `collation-error` from `verifySyntheticInteger` (warn level)
+ *   - `process-event-collation-rate-exceeded` warning with guidance
+ *   - `stream-reservation-timeout-adjusted` logs showing rapid scaling
+ *   - Workflow stalls or timeouts under sustained concurrent load
+ *
+ * @example
+ * // Production with sustained high concurrency
+ * HMSH_RESERVATION_TIMEOUT_S=120
+ *
+ * // Low-latency environments with fast processing
+ * HMSH_RESERVATION_TIMEOUT_S=30  (default)
+ */
+exports.HMSH_RESERVATION_TIMEOUT_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_S, 10) || 30;
+/**
+ * Maximum reservation timeout in seconds for adaptive scaling (default: 1800).
+ *
+ * This is the ceiling for the adaptive reservation timeout — how far the
+ * system is allowed to stretch under sustained load. The adaptive logic
+ * only uses what it needs based on stream depth; this value defines the
+ * upper bound, not the steady state.
+ *
+ * The tradeoff is recovery time after a consumer crash: if a consumer
+ * reserves a message and dies, that message is unavailable until the
+ * timeout expires. A higher ceiling means longer recovery from crashes
+ * but prevents duplicate delivery under heavy sustained load.
+ *
+ * In practice, crashes are rare and the delay is bounded. The cost of
+ * a ceiling that is too low — duplicate delivery, collation errors,
+ * wasted CPU, workflow stalls — is far higher than a slightly longer
+ * recovery window after a crash.
+ *
+ * **Tuning guidance:**
+ *   - Dedicated infrastructure with ample CPU: lower ceiling is fine (600s)
+ *   - Shared/multi-tenant or CPU-constrained: use the default (1800s)
+ *   - Long-running batch imports or large workflow graphs: increase (3600s+)
+ *   - Cloud deployments without CPU contention: the adaptive logic will
+ *     naturally stay near the starting timeout and rarely approach the ceiling
+ */
+exports.HMSH_RESERVATION_TIMEOUT_MAX_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_MAX_S, 10) || 1800;
 // TASK WORKER
 exports.HMSH_EXPIRE_DURATION = parseInt(process.env.HMSH_EXPIRE_DURATION, 10) || 1;
 const BASE_FIDELITY_SECONDS = 5;

package/build/package.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
     "name": "@hotmeshio/hotmesh",
-    "version": "0.14.5",
+    "version": "0.14.7",
     "description": "Durable Workflow",
     "main": "./build/index.js",
     "types": "./build/index.d.ts",
-    "homepage": "https://github.com/hotmeshio/sdk-typescript/",
+    "homepage": "https://docs.hotmesh.io/",
     "publishConfig": {
         "access": "public"
     },
@@ -30,6 +30,7 @@
         "test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
         "test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
         "test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
+        "test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
         "test:durable:fatal": "vitest run tests/durable/fatal",
         "test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
         "test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",

package/build/services/activities/activity/process.js CHANGED Viewed

@@ -6,6 +6,11 @@ const errors_1 = require("../../../modules/errors");
 const collator_1 = require("../../collator");
 const telemetry_1 = require("../../telemetry");
 const stream_1 = require("../../../types/stream");
+// Per-instance collation error tracking for reservation timeout detection
+let collationErrorCount = 0;
+let collationWindowStart = Date.now();
+const COLLATION_WARN_THRESHOLD = 10;
+const COLLATION_WINDOW_MS = 60000;
 async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, code = 200, type = 'output') {
     instance.setLeg(2);
     const jid = instance.context.metadata.jid;
@@ -66,7 +71,32 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
     }
     catch (error) {
         if (error instanceof errors_1.CollationError) {
-            instance.logger.info(`process-event-${error.fault}-error`, { error });
+            // INACTIVE is legitimate duplicate detection — the Postgres atomic
+            // CTE (collateLeg2Entry) serializes via row locks, so the GUID
+            // ledger value is correct. Silent ack is the right behavior:
+            // the work was already done by a prior delivery of this message.
+            const now = Date.now();
+            if (now - collationWindowStart > COLLATION_WINDOW_MS) {
+                collationErrorCount = 0;
+                collationWindowStart = now;
+            }
+            collationErrorCount++;
+            if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
+                instance.logger.warn('process-event-collation-rate-exceeded', {
+                    count: collationErrorCount,
+                    windowMs: COLLATION_WINDOW_MS,
+                    reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
+                    message: `${COLLATION_WARN_THRESHOLD} collation errors in ${COLLATION_WINDOW_MS / 1000}s. ` +
+                        `This typically means HMSH_RESERVATION_TIMEOUT_S (currently ${enums_1.HMSH_RESERVATION_TIMEOUT_S}s) ` +
+                        `is too short for your workload — messages are being re-reserved before processing completes, ` +
+                        `causing duplicate delivery. Increase HMSH_RESERVATION_TIMEOUT_S.`,
+                });
+            }
+            instance.logger.warn(`process-event-${error.fault}-error`, {
+                jid: instance.context.metadata.jid,
+                aid: instance.metadata.aid,
+                error,
+            });
             return;
         }
         else if (error instanceof errors_1.InactiveJobError) {

package/build/services/router/config/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
+import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
 import { RouterConfig } from '../../../types/stream';
 export declare class RouterConfigManager {
     static validateThrottle(delayInMillis: number): void;
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
         readonly: boolean;
     };
 }
-export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
+export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };

package/build/services/router/config/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
+exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
 const enums_1 = require("../../../modules/enums");
 Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
 Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
@@ -12,6 +12,10 @@ Object.defineProperty(exports, "HMSH_STATUS_UNKNOWN", { enumerable: true, get: f
 Object.defineProperty(exports, "HMSH_XCLAIM_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_COUNT; } });
 Object.defineProperty(exports, "HMSH_XCLAIM_DELAY_MS", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_DELAY_MS; } });
 Object.defineProperty(exports, "HMSH_XPENDING_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XPENDING_COUNT; } });
+Object.defineProperty(exports, "HMSH_BATCH_SIZE", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE; } });
+Object.defineProperty(exports, "HMSH_BATCH_SIZE_MIN", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE_MIN; } });
+Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_S; } });
+Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_MAX_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_MAX_S; } });
 Object.defineProperty(exports, "MAX_DELAY", { enumerable: true, get: function () { return enums_1.MAX_DELAY; } });
 Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.MAX_STREAM_BACKOFF; } });
 Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });

package/build/services/router/consumption/index.d.ts CHANGED Viewed

@@ -26,7 +26,21 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
     private set hasReachedMaxBackoff(value);
     private router;
     private retry;
+    private adaptiveReservationTimeout;
+    private adaptiveBatchSize;
+    private lastDepthCheckAt;
+    private static readonly DEPTH_CHECK_INTERVAL_MS;
+    private static readonly DEPTH_SCALE_UP_THRESHOLD;
+    private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
     constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
+    /**
+     * Adjusts reservation timeout based on stream depth. Called periodically
+     * from the consume loop. When depth is high:
+     *   - reservation timeout grows (prevents duplicate re-reservation)
+     *   - batch size shrinks (reduces in-memory blocking, shares the stream)
+     * When depth drops, both restore toward configured defaults.
+     */
+    private adjustConsumptionPressure;
     createGroup(stream: string, group: string): Promise<void>;
     publishMessage(topic: string, streamData: StreamData | StreamDataResponse, transaction?: ProviderTransaction): Promise<string | ProviderTransaction>;
     consumeMessages(stream: string, group: string, consumer: string, callback: (streamData: StreamData) => Promise<StreamDataResponse | void>): Promise<void>;

package/build/services/router/consumption/index.js CHANGED Viewed

@@ -17,6 +17,14 @@ class ConsumptionManager {
     get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
     set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
     constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
+        // Adaptive consumption pressure — scales reservation timeout AND batch
+        // size based on stream depth. Under load: timeout grows (prevents
+        // duplicate re-reservation) and batch size shrinks (reduces in-memory
+        // blocking, lets other consumers share the stream). When idle, both
+        // restore toward configured defaults.
+        this.adaptiveReservationTimeout = config_1.HMSH_RESERVATION_TIMEOUT_S;
+        this.adaptiveBatchSize = config_1.HMSH_BATCH_SIZE;
+        this.lastDepthCheckAt = 0;
         this.stream = stream;
         this.logger = logger;
         this.throttleManager = throttleManager;
@@ -29,6 +37,57 @@ class ConsumptionManager {
         this.router = router;
         this.retry = retry;
     }
+    /**
+     * Adjusts reservation timeout based on stream depth. Called periodically
+     * from the consume loop. When depth is high:
+     *   - reservation timeout grows (prevents duplicate re-reservation)
+     *   - batch size shrinks (reduces in-memory blocking, shares the stream)
+     * When depth drops, both restore toward configured defaults.
+     */
+    async adjustConsumptionPressure(stream) {
+        const now = Date.now();
+        if (now - this.lastDepthCheckAt < ConsumptionManager.DEPTH_CHECK_INTERVAL_MS) {
+            return;
+        }
+        this.lastDepthCheckAt = now;
+        try {
+            const depth = await this.stream.getStreamDepth(stream);
+            const prevTimeout = this.adaptiveReservationTimeout;
+            const prevBatch = this.adaptiveBatchSize;
+            if (depth > ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD) {
+                // Scale up timeout, scale down batch size
+                this.adaptiveReservationTimeout = Math.min(this.adaptiveReservationTimeout * 2, config_1.HMSH_RESERVATION_TIMEOUT_MAX_S);
+                this.adaptiveBatchSize = Math.max(Math.floor(this.adaptiveBatchSize / 2), config_1.HMSH_BATCH_SIZE_MIN);
+            }
+            else if (depth < ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD) {
+                // Scale down timeout, scale up batch size
+                this.adaptiveReservationTimeout = Math.max(Math.floor(this.adaptiveReservationTimeout / 2), config_1.HMSH_RESERVATION_TIMEOUT_S);
+                this.adaptiveBatchSize = Math.min(this.adaptiveBatchSize * 2, config_1.HMSH_BATCH_SIZE);
+            }
+            if (this.adaptiveReservationTimeout !== prevTimeout) {
+                this.stream.reservationTimeout = this.adaptiveReservationTimeout;
+                this.logger.info('stream-reservation-timeout-adjusted', {
+                    stream,
+                    depth,
+                    previousTimeoutS: prevTimeout,
+                    newTimeoutS: this.adaptiveReservationTimeout,
+                    configuredDefaultS: config_1.HMSH_RESERVATION_TIMEOUT_S,
+                });
+            }
+            if (this.adaptiveBatchSize !== prevBatch) {
+                this.logger.info('stream-batch-size-adjusted', {
+                    stream,
+                    depth,
+                    previousBatchSize: prevBatch,
+                    newBatchSize: this.adaptiveBatchSize,
+                    configuredDefaultBatchSize: config_1.HMSH_BATCH_SIZE,
+                });
+            }
+        }
+        catch {
+            // Stream depth check is best-effort; don't fail the consume loop
+        }
+    }
     async createGroup(stream, group) {
         try {
             await this.stream.createConsumerGroup(stream, group);
@@ -107,6 +166,8 @@ class ConsumptionManager {
             if (this.lifecycleManager.isStopped(group, consumer, stream)) {
                 return;
             }
+            // Adapt reservation timeout based on stream depth
+            await this.adjustConsumptionPressure(stream);
             await this.throttleManager.customSleep(); // respect throttle
             if (this.lifecycleManager.isStopped(group, consumer, stream) ||
                 this.throttleManager.isPaused()) {
@@ -183,6 +244,7 @@ class ConsumptionManager {
                 enableNotifications: true,
                 notificationCallback,
                 blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
+                reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
             });
             // Don't block here - let the worker initialization complete
             // The notification system will handle message processing asynchronously
@@ -225,14 +287,17 @@ class ConsumptionManager {
             const streamDuration = config_1.HMSH_BLOCK_TIME_MS + Math.round(config_1.HMSH_BLOCK_TIME_MS * Math.random());
             try {
                 let messages = [];
+                // Adapt reservation timeout based on stream depth
+                await this.adjustConsumptionPressure(stream);
                 if (!this.hasReachedMaxBackoff) {
                     // Normal mode: try with backoff and finite retries
                     const features = this.stream.getProviderSpecificFeatures();
                     const isPostgres = features.supportsParallelProcessing;
-                    const batchSize = isPostgres ? 10 : 1; // Use batch size of 10 for PostgreSQL, 1 for others
+                    const batchSize = isPostgres ? this.adaptiveBatchSize : 1;
                     messages = await this.stream.consumeMessages(stream, group, consumer, {
                         blockTimeout: streamDuration,
                         batchSize,
+                        reservationTimeout: this.adaptiveReservationTimeout,
                         enableBackoff: true,
                         initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
                         maxBackoff: config_1.MAX_STREAM_BACKOFF,
@@ -243,10 +308,11 @@ class ConsumptionManager {
                     // Fallback mode: just try once, no backoff
                     const features = this.stream.getProviderSpecificFeatures();
                     const isPostgres = features.supportsParallelProcessing;
-                    const batchSize = isPostgres ? 10 : 1; // Use batch size of 10 for PostgreSQL, 1 for others
+                    const batchSize = isPostgres ? this.adaptiveBatchSize : 1;
                     messages = await this.stream.consumeMessages(stream, group, consumer, {
                         blockTimeout: streamDuration,
                         batchSize,
+                        reservationTimeout: this.adaptiveReservationTimeout,
                         enableBackoff: false,
                         maxRetries: 1,
                     });
@@ -542,4 +608,7 @@ class ConsumptionManager {
         return Array.isArray(result) && Array.isArray(result[0]);
     }
 }
+ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
+ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
+ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
 exports.ConsumptionManager = ConsumptionManager;

package/build/services/stream/index.d.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
         batchSize?: number;
         blockTimeout?: number;
         autoAck?: boolean;
+        reservationTimeout?: number;
         enableBackoff?: boolean;
         initialBackoff?: number;
         maxBackoff?: number;
@@ -41,6 +42,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
         maxRetries?: number;
         limit?: number;
     }): Promise<StreamMessage[]>;
+    reservationTimeout: number;
     abstract getStreamStats(streamName: string): Promise<StreamStats>;
     abstract getStreamDepth(streamName: string): Promise<number>;
     abstract getStreamDepths(streamName: {

package/build/services/stream/index.js CHANGED Viewed

@@ -3,6 +3,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.StreamService = void 0;
 class StreamService {
     constructor(streamClient, storeClient, config = {}) {
+        // Adaptive reservation timeout — set by the consumption manager
+        // based on stream depth. Providers read this when reserving messages.
+        this.reservationTimeout = 30;
         this.streamClient = streamClient;
         this.storeClient = storeClient;
         this.config = config;

package/build/services/stream/providers/postgres/messages.js CHANGED Viewed

@@ -1,6 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.retryMessages = exports.deadLetterMessages = exports.ackAndDelete = exports.deleteMessages = exports.acknowledgeMessages = exports.fetchMessages = exports.buildPublishSQL = exports.publishMessages = void 0;
+const enums_1 = require("../../../../modules/enums");
 const utils_1 = require("../../../../modules/utils");
 /**
  * Publish messages to a stream. Can be used within a transaction.
@@ -205,7 +206,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
         while (retries < maxRetries) {
             retries++;
             const batchSize = options?.batchSize || 1;
-            const reservationTimeout = options?.reservationTimeout || 30;
+            const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
             const res = await client.query(`UPDATE ${tableName}
          SET reserved_at = NOW(), reserved_by = $3
          WHERE id IN (

package/build/services/stream/providers/postgres/postgres.js CHANGED Viewed

@@ -79,12 +79,12 @@ class PostgresStreamService extends index_1.StreamService {
     }
     async checkForMissedMessages() {
         await this.notificationManager.checkForMissedMessages(async (instance, consumer) => {
-            return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
+            return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: instance.reservationTimeout, enableBackoff: false, maxRetries: 1 });
         });
     }
     async fetchAndDeliverMessages(consumer) {
         try {
-            const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
+            const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
             if (messages.length > 0) {
                 consumer.callback(messages);
             }

package/build/services/stream/providers/postgres/secured.js CHANGED Viewed

@@ -8,6 +8,7 @@
  */
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.publishMessagesSecured = exports.deadLetterMessagesSecured = exports.ackAndDeleteSecured = exports.fetchMessagesSecured = void 0;
+const enums_1 = require("../../../../modules/enums");
 const utils_1 = require("../../../../modules/utils");
 const utils_2 = require("../../../../modules/utils");
 /**
@@ -19,7 +20,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
     const maxBackoff = options?.maxBackoff ?? 3000;
     const maxRetries = options?.maxRetries ?? 3;
     const batchSize = options?.batchSize || 1;
-    const reservationTimeout = options?.reservationTimeout || 30;
+    const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
     let backoff = initialBackoff;
     let retries = 0;
     try {

package/package.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "name": "@hotmeshio/hotmesh",
-  "version": "0.14.5",
+  "version": "0.14.7",
   "description": "Durable Workflow",
   "main": "./build/index.js",
   "types": "./build/index.d.ts",
-  "homepage": "https://github.com/hotmeshio/sdk-typescript/",
+  "homepage": "https://docs.hotmesh.io/",
   "publishConfig": {
     "access": "public"
   },
@@ -30,6 +30,7 @@
     "test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
     "test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
     "test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
+    "test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
     "test:durable:fatal": "vitest run tests/durable/fatal",
     "test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
     "test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",