@hotmeshio/hotmesh 0.14.5 → 0.14.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -111
- package/build/modules/enums.d.ts +77 -0
- package/build/modules/enums.js +79 -2
- package/build/package.json +3 -2
- package/build/services/activities/activity/process.js +31 -1
- package/build/services/router/config/index.d.ts +2 -2
- package/build/services/router/config/index.js +5 -1
- package/build/services/router/consumption/index.d.ts +14 -0
- package/build/services/router/consumption/index.js +71 -2
- package/build/services/stream/index.d.ts +2 -0
- package/build/services/stream/index.js +3 -0
- package/build/services/stream/providers/postgres/messages.js +2 -1
- package/build/services/stream/providers/postgres/postgres.js +2 -2
- package/build/services/stream/providers/postgres/secured.js +2 -1
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -29,34 +29,17 @@ Install the package:
|
|
|
29
29
|
npm install @hotmeshio/hotmesh
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
-
The repo includes a `docker-compose.yml` that starts Postgres
|
|
32
|
+
The repo includes a `docker-compose.yml` that starts Postgres and a development container:
|
|
33
33
|
|
|
34
34
|
```bash
|
|
35
35
|
docker compose up -d
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
See the [Durable API reference](https://docs.hotmesh.io/classes/services_durable.Durable.html) for the full API surface — workflows, activities, signals, child workflows, and more.
|
|
39
39
|
|
|
40
|
-
##
|
|
40
|
+
## Writing workflows
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
```typescript
|
|
45
|
-
// activities.ts (shared between both approaches)
|
|
46
|
-
export async function checkInventory(itemId: string): Promise<number> {
|
|
47
|
-
return getInventoryCount(itemId);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export async function reserveItem(itemId: string, quantity: number): Promise<string> {
|
|
51
|
-
return createReservation(itemId, quantity);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export async function notifyBackorder(itemId: string): Promise<void> {
|
|
55
|
-
await sendBackorderEmail(itemId);
|
|
56
|
-
}
|
|
57
|
-
```
|
|
58
|
-
|
|
59
|
-
### Option 1: Code
|
|
42
|
+
**Define the workflow** — plain TypeScript with branching, loops, and error handling. Activities are proxied so their results are checkpointed and replayed on restart.
|
|
60
43
|
|
|
61
44
|
```typescript
|
|
62
45
|
// workflows.ts
|
|
@@ -76,124 +59,72 @@ export async function orderWorkflow(itemId: string, qty: number) {
|
|
|
76
59
|
return 'backordered';
|
|
77
60
|
}
|
|
78
61
|
}
|
|
62
|
+
```
|
|
79
63
|
|
|
80
|
-
|
|
81
|
-
|
|
64
|
+
**Start a worker** — connects to Postgres and begins processing workflows on the given task queue.
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
// worker.ts
|
|
68
|
+
import { Durable } from '@hotmeshio/hotmesh';
|
|
69
|
+
import { Client as Postgres } from 'pg';
|
|
70
|
+
import { orderWorkflow } from './workflows';
|
|
82
71
|
|
|
83
72
|
const connection = {
|
|
84
73
|
class: Postgres,
|
|
85
74
|
options: { connectionString: 'postgresql://localhost:5432/mydb' }
|
|
86
75
|
};
|
|
87
76
|
|
|
88
|
-
await Durable.Worker.create({
|
|
77
|
+
const worker = await Durable.Worker.create({
|
|
89
78
|
connection,
|
|
90
79
|
taskQueue: 'orders',
|
|
91
80
|
workflow: orderWorkflow,
|
|
92
|
-
activities,
|
|
93
81
|
});
|
|
94
82
|
|
|
83
|
+
await worker.run();
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Run a workflow** — start an execution and await its result. The client can run in a different process, container, or server.
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
// client.ts
|
|
90
|
+
import { Durable } from '@hotmeshio/hotmesh';
|
|
91
|
+
import { Client as Postgres } from 'pg';
|
|
92
|
+
|
|
93
|
+
const connection = {
|
|
94
|
+
class: Postgres,
|
|
95
|
+
options: { connectionString: 'postgresql://localhost:5432/mydb' }
|
|
96
|
+
};
|
|
97
|
+
|
|
95
98
|
const client = new Durable.Client({ connection });
|
|
96
99
|
const handle = await client.workflow.start({
|
|
97
100
|
args: ['item-123', 5],
|
|
98
101
|
taskQueue: 'orders',
|
|
99
102
|
workflowName: 'orderWorkflow',
|
|
100
|
-
workflowId: 'order-456'
|
|
103
|
+
workflowId: 'order-456',
|
|
101
104
|
});
|
|
102
105
|
|
|
103
106
|
const result = await handle.result();
|
|
104
107
|
```
|
|
105
108
|
|
|
106
|
-
###
|
|
107
|
-
|
|
108
|
-
```yaml
|
|
109
|
-
# order.yaml
|
|
110
|
-
activities:
|
|
111
|
-
trigger:
|
|
112
|
-
type: trigger
|
|
113
|
-
|
|
114
|
-
checkInventory:
|
|
115
|
-
type: worker
|
|
116
|
-
topic: inventory.check
|
|
117
|
-
|
|
118
|
-
reserveItem:
|
|
119
|
-
type: worker
|
|
120
|
-
topic: inventory.reserve
|
|
121
|
-
|
|
122
|
-
notifyBackorder:
|
|
123
|
-
type: worker
|
|
124
|
-
topic: inventory.backorder.notify
|
|
125
|
-
|
|
126
|
-
transitions:
|
|
127
|
-
trigger:
|
|
128
|
-
- to: checkInventory
|
|
129
|
-
|
|
130
|
-
checkInventory:
|
|
131
|
-
- to: reserveItem
|
|
132
|
-
conditions:
|
|
133
|
-
match:
|
|
134
|
-
- expected: true
|
|
135
|
-
actual:
|
|
136
|
-
'@pipe':
|
|
137
|
-
- ['{checkInventory.output.data.availableQty}', '{trigger.output.data.requestedQty}']
|
|
138
|
-
- ['{@conditional.gte}']
|
|
139
|
-
|
|
140
|
-
- to: notifyBackorder
|
|
141
|
-
conditions:
|
|
142
|
-
match:
|
|
143
|
-
- expected: false
|
|
144
|
-
actual:
|
|
145
|
-
'@pipe':
|
|
146
|
-
- ['{checkInventory.output.data.availableQty}', '{trigger.output.data.requestedQty}']
|
|
147
|
-
- ['{@conditional.gte}']
|
|
148
|
-
```
|
|
109
|
+
### Activities
|
|
149
110
|
|
|
150
|
-
|
|
151
|
-
```typescript
|
|
152
|
-
// main.ts (reuses same activities.ts)
|
|
153
|
-
import * as activities from './activities';
|
|
111
|
+
Activities are your side-effectful functions — database calls, API requests, anything non-deterministic. HotMesh checkpoints their results so they're never re-executed on replay.
|
|
154
112
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
topic: 'inventory.check',
|
|
161
|
-
connection,
|
|
162
|
-
callback: async (data) => {
|
|
163
|
-
const availableQty = await activities.checkInventory(data.data.itemId);
|
|
164
|
-
return { metadata: { ...data.metadata }, data: { availableQty } };
|
|
165
|
-
}
|
|
166
|
-
},
|
|
167
|
-
{
|
|
168
|
-
topic: 'inventory.reserve',
|
|
169
|
-
connection,
|
|
170
|
-
callback: async (data) => {
|
|
171
|
-
const reservationId = await activities.reserveItem(data.data.itemId, data.data.quantity);
|
|
172
|
-
return { metadata: { ...data.metadata }, data: { reservationId } };
|
|
173
|
-
}
|
|
174
|
-
},
|
|
175
|
-
{
|
|
176
|
-
topic: 'inventory.backorder.notify',
|
|
177
|
-
connection,
|
|
178
|
-
callback: async (data) => {
|
|
179
|
-
await activities.notifyBackorder(data.data.itemId);
|
|
180
|
-
return { metadata: { ...data.metadata } };
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
]
|
|
184
|
-
});
|
|
113
|
+
```typescript
|
|
114
|
+
// activities.ts
|
|
115
|
+
export async function checkInventory(itemId: string): Promise<number> {
|
|
116
|
+
return getInventoryCount(itemId);
|
|
117
|
+
}
|
|
185
118
|
|
|
186
|
-
|
|
187
|
-
|
|
119
|
+
export async function reserveItem(itemId: string, quantity: number): Promise<string> {
|
|
120
|
+
return createReservation(itemId, quantity);
|
|
121
|
+
}
|
|
188
122
|
|
|
189
|
-
|
|
190
|
-
itemId
|
|
191
|
-
|
|
192
|
-
});
|
|
123
|
+
export async function notifyBackorder(itemId: string): Promise<void> {
|
|
124
|
+
await sendBackorderEmail(itemId);
|
|
125
|
+
}
|
|
193
126
|
```
|
|
194
127
|
|
|
195
|
-
Both compile to the same distributed execution model.
|
|
196
|
-
|
|
197
128
|
## Common patterns
|
|
198
129
|
|
|
199
130
|
All snippets below run inside a workflow function (like `orderWorkflow` above). Durable methods are available as static imports:
|
|
@@ -321,6 +252,12 @@ There is no proprietary dashboard. Workflow state lives in Postgres, so use what
|
|
|
321
252
|
- **Logging** — set `HMSH_LOGLEVEL` (`debug`, `info`, `warn`, `error`, `silent`) to control log verbosity.
|
|
322
253
|
- **OpenTelemetry** — set `HMSH_TELEMETRY=true` to emit spans and metrics. Plug in any OTel-compatible collector.
|
|
323
254
|
|
|
255
|
+
## YAML workflows
|
|
256
|
+
|
|
257
|
+
HotMesh also supports a declarative YAML syntax. The same activities run in both modes — the difference is compilation speed. YAML workflows compile ~10x faster because the execution graph is declared upfront rather than discovered through replay. The tradeoff is expressiveness: YAML uses a functional pipe syntax for conditions and transformations instead of native TypeScript control flow.
|
|
258
|
+
|
|
259
|
+
See the [Quick Start guide](https://github.com/hotmeshio/sdk-typescript/blob/main/docs/quickstart.md) for YAML examples and the `tests/functional/` directory for working implementations.
|
|
260
|
+
|
|
324
261
|
## Architecture
|
|
325
262
|
|
|
326
263
|
For a deep dive into the transactional execution model — how every step is crash-safe, how the monotonic collation ledger guarantees exactly-once delivery, and how cycles and retries remain correct under arbitrary failure — see the [Collation Design Document](https://github.com/hotmeshio/sdk-typescript/blob/main/services/collator/README.md). The symbolic system (how to design workflows) and lifecycle details (how to deploy workflows) are covered in the [Architectural Overview](https://zenodo.org/records/12168558).
|
package/build/modules/enums.d.ts
CHANGED
|
@@ -161,6 +161,83 @@ export declare const HMSH_BLOCK_TIME_MS: number;
|
|
|
161
161
|
export declare const HMSH_XCLAIM_DELAY_MS: number;
|
|
162
162
|
export declare const HMSH_XCLAIM_COUNT: number;
|
|
163
163
|
export declare const HMSH_XPENDING_COUNT: number;
|
|
164
|
+
export declare const HMSH_BATCH_SIZE: number;
|
|
165
|
+
/**
|
|
166
|
+
* Minimum batch size under adaptive scaling (default: 1).
|
|
167
|
+
*
|
|
168
|
+
* When stream depth is high, the adaptive logic reduces batch size
|
|
169
|
+
* to relieve back-pressure. This value is the floor — the smallest
|
|
170
|
+
* batch the system will fetch per consume cycle.
|
|
171
|
+
*
|
|
172
|
+
* - 1 (default): fully serial under max stress, safest
|
|
173
|
+
* - 2: retains some parallelism while limiting contention
|
|
174
|
+
*
|
|
175
|
+
* Both values produce equivalent throughput in practice (~233s for
|
|
176
|
+
* 1000 concurrent workflows). The reduction from the configured
|
|
177
|
+
* HMSH_BATCH_SIZE is what matters most — the floor is a safety net.
|
|
178
|
+
*/
|
|
179
|
+
export declare const HMSH_BATCH_SIZE_MIN: number;
|
|
180
|
+
/**
|
|
181
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
182
|
+
*
|
|
183
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
184
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
185
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
186
|
+
* message becomes available to other consumers — causing duplicate
|
|
187
|
+
* delivery, collation errors, and wasted CPU.
|
|
188
|
+
*
|
|
189
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
190
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
191
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
192
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
193
|
+
* delivery under burst load without manual intervention.
|
|
194
|
+
*
|
|
195
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
196
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
197
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
198
|
+
* Setting a higher default reduces how aggressively the system must
|
|
199
|
+
* adapt during load spikes.
|
|
200
|
+
*
|
|
201
|
+
* **Symptoms of a value that is too low:**
|
|
202
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
203
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
204
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
205
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
* // Production with sustained high concurrency
|
|
209
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
210
|
+
*
|
|
211
|
+
* // Low-latency environments with fast processing
|
|
212
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
213
|
+
*/
|
|
214
|
+
export declare const HMSH_RESERVATION_TIMEOUT_S: number;
|
|
215
|
+
/**
|
|
216
|
+
* Maximum reservation timeout in seconds for adaptive scaling (default: 1800).
|
|
217
|
+
*
|
|
218
|
+
* This is the ceiling for the adaptive reservation timeout — how far the
|
|
219
|
+
* system is allowed to stretch under sustained load. The adaptive logic
|
|
220
|
+
* only uses what it needs based on stream depth; this value defines the
|
|
221
|
+
* upper bound, not the steady state.
|
|
222
|
+
*
|
|
223
|
+
* The tradeoff is recovery time after a consumer crash: if a consumer
|
|
224
|
+
* reserves a message and dies, that message is unavailable until the
|
|
225
|
+
* timeout expires. A higher ceiling means longer recovery from crashes
|
|
226
|
+
* but prevents duplicate delivery under heavy sustained load.
|
|
227
|
+
*
|
|
228
|
+
* In practice, crashes are rare and the delay is bounded. The cost of
|
|
229
|
+
* a ceiling that is too low — duplicate delivery, collation errors,
|
|
230
|
+
* wasted CPU, workflow stalls — is far higher than a slightly longer
|
|
231
|
+
* recovery window after a crash.
|
|
232
|
+
*
|
|
233
|
+
* **Tuning guidance:**
|
|
234
|
+
* - Dedicated infrastructure with ample CPU: lower ceiling is fine (600s)
|
|
235
|
+
* - Shared/multi-tenant or CPU-constrained: use the default (1800s)
|
|
236
|
+
* - Long-running batch imports or large workflow graphs: increase (3600s+)
|
|
237
|
+
* - Cloud deployments without CPU contention: the adaptive logic will
|
|
238
|
+
* naturally stay near the starting timeout and rarely approach the ceiling
|
|
239
|
+
*/
|
|
240
|
+
export declare const HMSH_RESERVATION_TIMEOUT_MAX_S: number;
|
|
164
241
|
export declare const HMSH_EXPIRE_DURATION: number;
|
|
165
242
|
export declare const HMSH_FIDELITY_SECONDS: number;
|
|
166
243
|
export declare const HMSH_SCOUT_INTERVAL_SECONDS: number;
|
package/build/modules/enums.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = void 0;
|
|
3
|
+
exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
|
|
4
|
+
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = void 0;
|
|
5
5
|
/**
|
|
6
6
|
* Determines the log level for the application. The default is 'info'.
|
|
7
7
|
*/
|
|
@@ -178,6 +178,83 @@ exports.HMSH_BLOCK_TIME_MS = process.env.HMSH_BLOCK_TIME_MS
|
|
|
178
178
|
exports.HMSH_XCLAIM_DELAY_MS = parseInt(process.env.HMSH_XCLAIM_DELAY_MS, 10) || 1000 * 60;
|
|
179
179
|
exports.HMSH_XCLAIM_COUNT = parseInt(process.env.HMSH_XCLAIM_COUNT, 10) || 3;
|
|
180
180
|
exports.HMSH_XPENDING_COUNT = parseInt(process.env.HMSH_XPENDING_COUNT, 10) || 10;
|
|
181
|
+
exports.HMSH_BATCH_SIZE = parseInt(process.env.HMSH_BATCH_SIZE, 10) || 10;
|
|
182
|
+
/**
|
|
183
|
+
* Minimum batch size under adaptive scaling (default: 1).
|
|
184
|
+
*
|
|
185
|
+
* When stream depth is high, the adaptive logic reduces batch size
|
|
186
|
+
* to relieve back-pressure. This value is the floor — the smallest
|
|
187
|
+
* batch the system will fetch per consume cycle.
|
|
188
|
+
*
|
|
189
|
+
* - 1 (default): fully serial under max stress, safest
|
|
190
|
+
* - 2: retains some parallelism while limiting contention
|
|
191
|
+
*
|
|
192
|
+
* Both values produce equivalent throughput in practice (~233s for
|
|
193
|
+
* 1000 concurrent workflows). The reduction from the configured
|
|
194
|
+
* HMSH_BATCH_SIZE is what matters most — the floor is a safety net.
|
|
195
|
+
*/
|
|
196
|
+
exports.HMSH_BATCH_SIZE_MIN = parseInt(process.env.HMSH_BATCH_SIZE_MIN, 10) || 1;
|
|
197
|
+
/**
|
|
198
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
199
|
+
*
|
|
200
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
201
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
202
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
203
|
+
* message becomes available to other consumers — causing duplicate
|
|
204
|
+
* delivery, collation errors, and wasted CPU.
|
|
205
|
+
*
|
|
206
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
207
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
208
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
209
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
210
|
+
* delivery under burst load without manual intervention.
|
|
211
|
+
*
|
|
212
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
213
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
214
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
215
|
+
* Setting a higher default reduces how aggressively the system must
|
|
216
|
+
* adapt during load spikes.
|
|
217
|
+
*
|
|
218
|
+
* **Symptoms of a value that is too low:**
|
|
219
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
220
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
221
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
222
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* // Production with sustained high concurrency
|
|
226
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
227
|
+
*
|
|
228
|
+
* // Low-latency environments with fast processing
|
|
229
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
230
|
+
*/
|
|
231
|
+
exports.HMSH_RESERVATION_TIMEOUT_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_S, 10) || 30;
|
|
232
|
+
/**
|
|
233
|
+
* Maximum reservation timeout in seconds for adaptive scaling (default: 1800).
|
|
234
|
+
*
|
|
235
|
+
* This is the ceiling for the adaptive reservation timeout — how far the
|
|
236
|
+
* system is allowed to stretch under sustained load. The adaptive logic
|
|
237
|
+
* only uses what it needs based on stream depth; this value defines the
|
|
238
|
+
* upper bound, not the steady state.
|
|
239
|
+
*
|
|
240
|
+
* The tradeoff is recovery time after a consumer crash: if a consumer
|
|
241
|
+
* reserves a message and dies, that message is unavailable until the
|
|
242
|
+
* timeout expires. A higher ceiling means longer recovery from crashes
|
|
243
|
+
* but prevents duplicate delivery under heavy sustained load.
|
|
244
|
+
*
|
|
245
|
+
* In practice, crashes are rare and the delay is bounded. The cost of
|
|
246
|
+
* a ceiling that is too low — duplicate delivery, collation errors,
|
|
247
|
+
* wasted CPU, workflow stalls — is far higher than a slightly longer
|
|
248
|
+
* recovery window after a crash.
|
|
249
|
+
*
|
|
250
|
+
* **Tuning guidance:**
|
|
251
|
+
* - Dedicated infrastructure with ample CPU: lower ceiling is fine (600s)
|
|
252
|
+
* - Shared/multi-tenant or CPU-constrained: use the default (1800s)
|
|
253
|
+
* - Long-running batch imports or large workflow graphs: increase (3600s+)
|
|
254
|
+
* - Cloud deployments without CPU contention: the adaptive logic will
|
|
255
|
+
* naturally stay near the starting timeout and rarely approach the ceiling
|
|
256
|
+
*/
|
|
257
|
+
exports.HMSH_RESERVATION_TIMEOUT_MAX_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_MAX_S, 10) || 1800;
|
|
181
258
|
// TASK WORKER
|
|
182
259
|
exports.HMSH_EXPIRE_DURATION = parseInt(process.env.HMSH_EXPIRE_DURATION, 10) || 1;
|
|
183
260
|
const BASE_FIDELITY_SECONDS = 5;
|
package/build/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.7",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|
|
@@ -6,6 +6,11 @@ const errors_1 = require("../../../modules/errors");
|
|
|
6
6
|
const collator_1 = require("../../collator");
|
|
7
7
|
const telemetry_1 = require("../../telemetry");
|
|
8
8
|
const stream_1 = require("../../../types/stream");
|
|
9
|
+
// Per-instance collation error tracking for reservation timeout detection
|
|
10
|
+
let collationErrorCount = 0;
|
|
11
|
+
let collationWindowStart = Date.now();
|
|
12
|
+
const COLLATION_WARN_THRESHOLD = 10;
|
|
13
|
+
const COLLATION_WINDOW_MS = 60000;
|
|
9
14
|
async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, code = 200, type = 'output') {
|
|
10
15
|
instance.setLeg(2);
|
|
11
16
|
const jid = instance.context.metadata.jid;
|
|
@@ -66,7 +71,32 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
66
71
|
}
|
|
67
72
|
catch (error) {
|
|
68
73
|
if (error instanceof errors_1.CollationError) {
|
|
69
|
-
|
|
74
|
+
// INACTIVE is legitimate duplicate detection — the Postgres atomic
|
|
75
|
+
// CTE (collateLeg2Entry) serializes via row locks, so the GUID
|
|
76
|
+
// ledger value is correct. Silent ack is the right behavior:
|
|
77
|
+
// the work was already done by a prior delivery of this message.
|
|
78
|
+
const now = Date.now();
|
|
79
|
+
if (now - collationWindowStart > COLLATION_WINDOW_MS) {
|
|
80
|
+
collationErrorCount = 0;
|
|
81
|
+
collationWindowStart = now;
|
|
82
|
+
}
|
|
83
|
+
collationErrorCount++;
|
|
84
|
+
if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
|
|
85
|
+
instance.logger.warn('process-event-collation-rate-exceeded', {
|
|
86
|
+
count: collationErrorCount,
|
|
87
|
+
windowMs: COLLATION_WINDOW_MS,
|
|
88
|
+
reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
89
|
+
message: `${COLLATION_WARN_THRESHOLD} collation errors in ${COLLATION_WINDOW_MS / 1000}s. ` +
|
|
90
|
+
`This typically means HMSH_RESERVATION_TIMEOUT_S (currently ${enums_1.HMSH_RESERVATION_TIMEOUT_S}s) ` +
|
|
91
|
+
`is too short for your workload — messages are being re-reserved before processing completes, ` +
|
|
92
|
+
`causing duplicate delivery. Increase HMSH_RESERVATION_TIMEOUT_S.`,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
instance.logger.warn(`process-event-${error.fault}-error`, {
|
|
96
|
+
jid: instance.context.metadata.jid,
|
|
97
|
+
aid: instance.metadata.aid,
|
|
98
|
+
error,
|
|
99
|
+
});
|
|
70
100
|
return;
|
|
71
101
|
}
|
|
72
102
|
else if (error instanceof errors_1.InactiveJobError) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
1
|
+
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
2
2
|
import { RouterConfig } from '../../../types/stream';
|
|
3
3
|
export declare class RouterConfigManager {
|
|
4
4
|
static validateThrottle(delayInMillis: number): void;
|
|
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
|
|
|
8
8
|
readonly: boolean;
|
|
9
9
|
};
|
|
10
10
|
}
|
|
11
|
-
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
11
|
+
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
3
|
+
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
4
4
|
const enums_1 = require("../../../modules/enums");
|
|
5
5
|
Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
|
|
6
6
|
Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
|
|
@@ -12,6 +12,10 @@ Object.defineProperty(exports, "HMSH_STATUS_UNKNOWN", { enumerable: true, get: f
|
|
|
12
12
|
Object.defineProperty(exports, "HMSH_XCLAIM_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_COUNT; } });
|
|
13
13
|
Object.defineProperty(exports, "HMSH_XCLAIM_DELAY_MS", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_DELAY_MS; } });
|
|
14
14
|
Object.defineProperty(exports, "HMSH_XPENDING_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XPENDING_COUNT; } });
|
|
15
|
+
Object.defineProperty(exports, "HMSH_BATCH_SIZE", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE; } });
|
|
16
|
+
Object.defineProperty(exports, "HMSH_BATCH_SIZE_MIN", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE_MIN; } });
|
|
17
|
+
Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_S; } });
|
|
18
|
+
Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_MAX_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_MAX_S; } });
|
|
15
19
|
Object.defineProperty(exports, "MAX_DELAY", { enumerable: true, get: function () { return enums_1.MAX_DELAY; } });
|
|
16
20
|
Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.MAX_STREAM_BACKOFF; } });
|
|
17
21
|
Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
|
|
@@ -26,7 +26,21 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
26
26
|
private set hasReachedMaxBackoff(value);
|
|
27
27
|
private router;
|
|
28
28
|
private retry;
|
|
29
|
+
private adaptiveReservationTimeout;
|
|
30
|
+
private adaptiveBatchSize;
|
|
31
|
+
private lastDepthCheckAt;
|
|
32
|
+
private static readonly DEPTH_CHECK_INTERVAL_MS;
|
|
33
|
+
private static readonly DEPTH_SCALE_UP_THRESHOLD;
|
|
34
|
+
private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
|
|
29
35
|
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
|
|
36
|
+
/**
|
|
37
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
38
|
+
* from the consume loop. When depth is high:
|
|
39
|
+
* - reservation timeout grows (prevents duplicate re-reservation)
|
|
40
|
+
* - batch size shrinks (reduces in-memory blocking, shares the stream)
|
|
41
|
+
* When depth drops, both restore toward configured defaults.
|
|
42
|
+
*/
|
|
43
|
+
private adjustConsumptionPressure;
|
|
30
44
|
createGroup(stream: string, group: string): Promise<void>;
|
|
31
45
|
publishMessage(topic: string, streamData: StreamData | StreamDataResponse, transaction?: ProviderTransaction): Promise<string | ProviderTransaction>;
|
|
32
46
|
consumeMessages(stream: string, group: string, consumer: string, callback: (streamData: StreamData) => Promise<StreamDataResponse | void>): Promise<void>;
|
|
@@ -17,6 +17,14 @@ class ConsumptionManager {
|
|
|
17
17
|
get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
|
|
18
18
|
set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
|
|
19
19
|
constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
|
|
20
|
+
// Adaptive consumption pressure — scales reservation timeout AND batch
|
|
21
|
+
// size based on stream depth. Under load: timeout grows (prevents
|
|
22
|
+
// duplicate re-reservation) and batch size shrinks (reduces in-memory
|
|
23
|
+
// blocking, lets other consumers share the stream). When idle, both
|
|
24
|
+
// restore toward configured defaults.
|
|
25
|
+
this.adaptiveReservationTimeout = config_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
26
|
+
this.adaptiveBatchSize = config_1.HMSH_BATCH_SIZE;
|
|
27
|
+
this.lastDepthCheckAt = 0;
|
|
20
28
|
this.stream = stream;
|
|
21
29
|
this.logger = logger;
|
|
22
30
|
this.throttleManager = throttleManager;
|
|
@@ -29,6 +37,57 @@ class ConsumptionManager {
|
|
|
29
37
|
this.router = router;
|
|
30
38
|
this.retry = retry;
|
|
31
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
42
|
+
* from the consume loop. When depth is high:
|
|
43
|
+
* - reservation timeout grows (prevents duplicate re-reservation)
|
|
44
|
+
* - batch size shrinks (reduces in-memory blocking, shares the stream)
|
|
45
|
+
* When depth drops, both restore toward configured defaults.
|
|
46
|
+
*/
|
|
47
|
+
async adjustConsumptionPressure(stream) {
|
|
48
|
+
const now = Date.now();
|
|
49
|
+
if (now - this.lastDepthCheckAt < ConsumptionManager.DEPTH_CHECK_INTERVAL_MS) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
this.lastDepthCheckAt = now;
|
|
53
|
+
try {
|
|
54
|
+
const depth = await this.stream.getStreamDepth(stream);
|
|
55
|
+
const prevTimeout = this.adaptiveReservationTimeout;
|
|
56
|
+
const prevBatch = this.adaptiveBatchSize;
|
|
57
|
+
if (depth > ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD) {
|
|
58
|
+
// Scale up timeout, scale down batch size
|
|
59
|
+
this.adaptiveReservationTimeout = Math.min(this.adaptiveReservationTimeout * 2, config_1.HMSH_RESERVATION_TIMEOUT_MAX_S);
|
|
60
|
+
this.adaptiveBatchSize = Math.max(Math.floor(this.adaptiveBatchSize / 2), config_1.HMSH_BATCH_SIZE_MIN);
|
|
61
|
+
}
|
|
62
|
+
else if (depth < ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD) {
|
|
63
|
+
// Scale down timeout, scale up batch size
|
|
64
|
+
this.adaptiveReservationTimeout = Math.max(Math.floor(this.adaptiveReservationTimeout / 2), config_1.HMSH_RESERVATION_TIMEOUT_S);
|
|
65
|
+
this.adaptiveBatchSize = Math.min(this.adaptiveBatchSize * 2, config_1.HMSH_BATCH_SIZE);
|
|
66
|
+
}
|
|
67
|
+
if (this.adaptiveReservationTimeout !== prevTimeout) {
|
|
68
|
+
this.stream.reservationTimeout = this.adaptiveReservationTimeout;
|
|
69
|
+
this.logger.info('stream-reservation-timeout-adjusted', {
|
|
70
|
+
stream,
|
|
71
|
+
depth,
|
|
72
|
+
previousTimeoutS: prevTimeout,
|
|
73
|
+
newTimeoutS: this.adaptiveReservationTimeout,
|
|
74
|
+
configuredDefaultS: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
if (this.adaptiveBatchSize !== prevBatch) {
|
|
78
|
+
this.logger.info('stream-batch-size-adjusted', {
|
|
79
|
+
stream,
|
|
80
|
+
depth,
|
|
81
|
+
previousBatchSize: prevBatch,
|
|
82
|
+
newBatchSize: this.adaptiveBatchSize,
|
|
83
|
+
configuredDefaultBatchSize: config_1.HMSH_BATCH_SIZE,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
// Stream depth check is best-effort; don't fail the consume loop
|
|
89
|
+
}
|
|
90
|
+
}
|
|
32
91
|
async createGroup(stream, group) {
|
|
33
92
|
try {
|
|
34
93
|
await this.stream.createConsumerGroup(stream, group);
|
|
@@ -107,6 +166,8 @@ class ConsumptionManager {
|
|
|
107
166
|
if (this.lifecycleManager.isStopped(group, consumer, stream)) {
|
|
108
167
|
return;
|
|
109
168
|
}
|
|
169
|
+
// Adapt reservation timeout based on stream depth
|
|
170
|
+
await this.adjustConsumptionPressure(stream);
|
|
110
171
|
await this.throttleManager.customSleep(); // respect throttle
|
|
111
172
|
if (this.lifecycleManager.isStopped(group, consumer, stream) ||
|
|
112
173
|
this.throttleManager.isPaused()) {
|
|
@@ -183,6 +244,7 @@ class ConsumptionManager {
|
|
|
183
244
|
enableNotifications: true,
|
|
184
245
|
notificationCallback,
|
|
185
246
|
blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
|
|
247
|
+
reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
186
248
|
});
|
|
187
249
|
// Don't block here - let the worker initialization complete
|
|
188
250
|
// The notification system will handle message processing asynchronously
|
|
@@ -225,14 +287,17 @@ class ConsumptionManager {
|
|
|
225
287
|
const streamDuration = config_1.HMSH_BLOCK_TIME_MS + Math.round(config_1.HMSH_BLOCK_TIME_MS * Math.random());
|
|
226
288
|
try {
|
|
227
289
|
let messages = [];
|
|
290
|
+
// Adapt reservation timeout based on stream depth
|
|
291
|
+
await this.adjustConsumptionPressure(stream);
|
|
228
292
|
if (!this.hasReachedMaxBackoff) {
|
|
229
293
|
// Normal mode: try with backoff and finite retries
|
|
230
294
|
const features = this.stream.getProviderSpecificFeatures();
|
|
231
295
|
const isPostgres = features.supportsParallelProcessing;
|
|
232
|
-
const batchSize = isPostgres ?
|
|
296
|
+
const batchSize = isPostgres ? this.adaptiveBatchSize : 1;
|
|
233
297
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
234
298
|
blockTimeout: streamDuration,
|
|
235
299
|
batchSize,
|
|
300
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
236
301
|
enableBackoff: true,
|
|
237
302
|
initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
|
|
238
303
|
maxBackoff: config_1.MAX_STREAM_BACKOFF,
|
|
@@ -243,10 +308,11 @@ class ConsumptionManager {
|
|
|
243
308
|
// Fallback mode: just try once, no backoff
|
|
244
309
|
const features = this.stream.getProviderSpecificFeatures();
|
|
245
310
|
const isPostgres = features.supportsParallelProcessing;
|
|
246
|
-
const batchSize = isPostgres ?
|
|
311
|
+
const batchSize = isPostgres ? this.adaptiveBatchSize : 1;
|
|
247
312
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
248
313
|
blockTimeout: streamDuration,
|
|
249
314
|
batchSize,
|
|
315
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
250
316
|
enableBackoff: false,
|
|
251
317
|
maxRetries: 1,
|
|
252
318
|
});
|
|
@@ -542,4 +608,7 @@ class ConsumptionManager {
|
|
|
542
608
|
return Array.isArray(result) && Array.isArray(result[0]);
|
|
543
609
|
}
|
|
544
610
|
}
|
|
611
|
+
ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
|
|
612
|
+
ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
|
|
613
|
+
ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
|
|
545
614
|
exports.ConsumptionManager = ConsumptionManager;
|
|
@@ -22,6 +22,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
22
22
|
batchSize?: number;
|
|
23
23
|
blockTimeout?: number;
|
|
24
24
|
autoAck?: boolean;
|
|
25
|
+
reservationTimeout?: number;
|
|
25
26
|
enableBackoff?: boolean;
|
|
26
27
|
initialBackoff?: number;
|
|
27
28
|
maxBackoff?: number;
|
|
@@ -41,6 +42,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
41
42
|
maxRetries?: number;
|
|
42
43
|
limit?: number;
|
|
43
44
|
}): Promise<StreamMessage[]>;
|
|
45
|
+
reservationTimeout: number;
|
|
44
46
|
abstract getStreamStats(streamName: string): Promise<StreamStats>;
|
|
45
47
|
abstract getStreamDepth(streamName: string): Promise<number>;
|
|
46
48
|
abstract getStreamDepths(streamName: {
|
|
@@ -3,6 +3,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.StreamService = void 0;
|
|
4
4
|
class StreamService {
|
|
5
5
|
constructor(streamClient, storeClient, config = {}) {
|
|
6
|
+
// Adaptive reservation timeout — set by the consumption manager
|
|
7
|
+
// based on stream depth. Providers read this when reserving messages.
|
|
8
|
+
this.reservationTimeout = 30;
|
|
6
9
|
this.streamClient = streamClient;
|
|
7
10
|
this.storeClient = storeClient;
|
|
8
11
|
this.config = config;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.retryMessages = exports.deadLetterMessages = exports.ackAndDelete = exports.deleteMessages = exports.acknowledgeMessages = exports.fetchMessages = exports.buildPublishSQL = exports.publishMessages = void 0;
|
|
4
|
+
const enums_1 = require("../../../../modules/enums");
|
|
4
5
|
const utils_1 = require("../../../../modules/utils");
|
|
5
6
|
/**
|
|
6
7
|
* Publish messages to a stream. Can be used within a transaction.
|
|
@@ -205,7 +206,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
|
|
|
205
206
|
while (retries < maxRetries) {
|
|
206
207
|
retries++;
|
|
207
208
|
const batchSize = options?.batchSize || 1;
|
|
208
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
209
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
209
210
|
const res = await client.query(`UPDATE ${tableName}
|
|
210
211
|
SET reserved_at = NOW(), reserved_by = $3
|
|
211
212
|
WHERE id IN (
|
|
@@ -79,12 +79,12 @@ class PostgresStreamService extends index_1.StreamService {
|
|
|
79
79
|
}
|
|
80
80
|
async checkForMissedMessages() {
|
|
81
81
|
await this.notificationManager.checkForMissedMessages(async (instance, consumer) => {
|
|
82
|
-
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
82
|
+
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: instance.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
83
83
|
});
|
|
84
84
|
}
|
|
85
85
|
async fetchAndDeliverMessages(consumer) {
|
|
86
86
|
try {
|
|
87
|
-
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
87
|
+
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
88
88
|
if (messages.length > 0) {
|
|
89
89
|
consumer.callback(messages);
|
|
90
90
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
10
|
exports.publishMessagesSecured = exports.deadLetterMessagesSecured = exports.ackAndDeleteSecured = exports.fetchMessagesSecured = void 0;
|
|
11
|
+
const enums_1 = require("../../../../modules/enums");
|
|
11
12
|
const utils_1 = require("../../../../modules/utils");
|
|
12
13
|
const utils_2 = require("../../../../modules/utils");
|
|
13
14
|
/**
|
|
@@ -19,7 +20,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
|
|
|
19
20
|
const maxBackoff = options?.maxBackoff ?? 3000;
|
|
20
21
|
const maxRetries = options?.maxRetries ?? 3;
|
|
21
22
|
const batchSize = options?.batchSize || 1;
|
|
22
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
23
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
23
24
|
let backoff = initialBackoff;
|
|
24
25
|
let retries = 0;
|
|
25
26
|
try {
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.7",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|