@hotmeshio/hotmesh 0.16.3 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.16.3",
3
+ "version": "0.16.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -72,13 +72,10 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
72
72
  }
73
73
  catch (error) {
74
74
  if (error instanceof errors_1.CollationError) {
75
- //FORBIDDEN: Leg1 not complete — signal arrived in the window
76
- //between registerHook (standalone) and Leg1 transaction commit.
77
- //Rethrow so the stream message is retried with backoff; by then
78
- //Leg1 will have committed and Leg2 processing will succeed.
79
- //The GUID marker was already committed by notarizeLeg2Entry;
80
- //on retry, collateLeg2Entry's SETNX is a no-op for the same
81
- //GUID, and verifySyntheticInteger sees no steps done → allowed.
75
+ //FORBIDDEN: Leg1 not complete — should not occur after the fix
76
+ //that moved setHookSignal to post-commit. If seen, it indicates
77
+ //a new race window not covered by the fix. Rethrow so the inline
78
+ //retry in processWebHookEvent can attempt recovery.
82
79
  if (error.fault === collator_1.CollationFaultType.FORBIDDEN) {
83
80
  instance.logger.warn('process-event-forbidden-retry', {
84
81
  jid: instance.context.metadata.jid,
@@ -160,6 +160,28 @@ declare class Hook extends Activity {
160
160
  private redeliverPendingSignal;
161
161
  doPassThrough(telemetry: TelemetryService): Promise<void>;
162
162
  getHookRule(topic: string): Promise<HookRule | undefined>;
163
+ /**
164
+ * Register the time hook (sleep) inside the Leg1 transaction.
165
+ * Time hooks don't participate in the signal race — they're
166
+ * purely internal timeout registrations.
167
+ */
168
+ registerTimeHook(transaction: ProviderTransaction): Promise<void>;
169
+ /**
170
+ * Register the web hook signal AFTER the Leg1 transaction commits.
171
+ * This ensures the hook signal is never visible before Leg1
172
+ * completion, eliminating the FORBIDDEN window where Leg2 could
173
+ * find the hook but fail on the collation check.
174
+ *
175
+ * If a pending signal was stored by an early-arriving Leg2,
176
+ * setHookSignal atomically detects and returns it.
177
+ */
178
+ registerWebHookSignal(): Promise<{
179
+ pending?: string;
180
+ } | void>;
181
+ /**
182
+ * @deprecated Use registerTimeHook + registerWebHookSignal instead.
183
+ * Kept for backward compatibility with tests that monkey-patch this method.
184
+ */
163
185
  registerHook(transaction?: ProviderTransaction): Promise<{
164
186
  jobId?: string;
165
187
  pending?: string;
@@ -6,6 +6,8 @@ const pipe_1 = require("../pipe");
6
6
  const task_1 = require("../task");
7
7
  const telemetry_1 = require("../telemetry");
8
8
  const stream_1 = require("../../types/stream");
9
+ const errors_1 = require("../../modules/errors");
10
+ const collator_2 = require("../../types/collator");
9
11
  const utils_1 = require("../../modules/utils");
10
12
  const activity_1 = require("./activity");
11
13
  /**
@@ -169,6 +171,17 @@ class Hook extends activity_1.Activity {
169
171
  if (!isResume) {
170
172
  await this.doHook(telemetry);
171
173
  }
174
+ else if (this.config.hook?.topic) {
175
+ //DUPLICATE: Leg1 completed previously but hook registration
176
+ //may not have happened (crash between transaction.exec and
177
+ //registerWebHookSignal). Attempt registration — setHookSignal
178
+ //is idempotent (returns success:false if hook already exists).
179
+ const hookResult = await this.registerWebHookSignal();
180
+ const pending = hookResult && hookResult.pending;
181
+ if (pending) {
182
+ await this.redeliverPendingSignal(pending);
183
+ }
184
+ }
172
185
  }
173
186
  else {
174
187
  //Category B: passthrough with crash-safe step protocol + GUID ledger
@@ -204,7 +217,9 @@ class Hook extends activity_1.Activity {
204
217
  }
205
218
  async doHook(telemetry) {
206
219
  const transaction = this.store.transact();
207
- const hookResult = await this.registerHook(transaction);
220
+ //register time hooks (sleep) inside the transaction — these
221
+ //don't participate in the signal race
222
+ await this.registerTimeHook(transaction);
208
223
  this.mapOutputData();
209
224
  this.mapJobData();
210
225
  await this.setState(transaction);
@@ -212,11 +227,15 @@ class Hook extends activity_1.Activity {
212
227
  await this.setStatus(0, transaction);
213
228
  await transaction.exec();
214
229
  telemetry.mapActivityAttributes();
215
- //if a pending signal was detected (signal arrived before hook
216
- //registered), re-publish the WEBHOOK so leg2 processes it
217
- //now that the hook signal is committed and state is saved
218
- if (hookResult && hookResult.pending) {
219
- await this.redeliverPendingSignal(hookResult.pending);
230
+ //register the web hook signal AFTER the transaction commits.
231
+ //this eliminates the FORBIDDEN window: the hook signal is never
232
+ //visible before Leg1 completion. If Leg2 arrives before this
233
+ //point, getHookSignal finds no hook and stores $pending, which
234
+ //setHookSignal will detect and return for redelivery.
235
+ const hookResult = await this.registerWebHookSignal();
236
+ const pending = hookResult && hookResult.pending;
237
+ if (pending) {
238
+ await this.redeliverPendingSignal(pending);
220
239
  }
221
240
  }
222
241
  /**
@@ -257,12 +276,44 @@ class Hook extends activity_1.Activity {
257
276
  const rules = await this.store.getHookRules();
258
277
  return rules?.[topic]?.[0];
259
278
  }
279
+ /**
280
+ * Register the time hook (sleep) inside the Leg1 transaction.
281
+ * Time hooks don't participate in the signal race — they're
282
+ * purely internal timeout registrations.
283
+ */
284
+ async registerTimeHook(transaction) {
285
+ if (this.config.sleep) {
286
+ const duration = pipe_1.Pipe.resolve(this.config.sleep, this.context);
287
+ if (!isNaN(duration) && Number(duration) > 0) {
288
+ await this.engine.taskService.registerTimeHook(this.context.metadata.jid, this.context.metadata.gid, `${this.metadata.aid}${this.metadata.dad || ''}`, 'sleep', duration, this.metadata.dad || '', transaction);
289
+ }
290
+ }
291
+ }
292
+ /**
293
+ * Register the web hook signal AFTER the Leg1 transaction commits.
294
+ * This ensures the hook signal is never visible before Leg1
295
+ * completion, eliminating the FORBIDDEN window where Leg2 could
296
+ * find the hook but fail on the collation check.
297
+ *
298
+ * If a pending signal was stored by an early-arriving Leg2,
299
+ * setHookSignal atomically detects and returns it.
300
+ */
301
+ async registerWebHookSignal() {
302
+ if (this.config.hook?.topic) {
303
+ const hookResult = await this.engine.taskService.registerWebHook(this.config.hook.topic, this.context, this.resolveDad(), this.context.metadata.expire);
304
+ if (hookResult.pending) {
305
+ return { pending: hookResult.pending };
306
+ }
307
+ }
308
+ }
309
+ /**
310
+ * @deprecated Use registerTimeHook + registerWebHookSignal instead.
311
+ * Kept for backward compatibility with tests that monkey-patch this method.
312
+ */
260
313
  async registerHook(transaction) {
261
314
  let jobId;
262
315
  let pending;
263
316
  if (this.config.hook?.topic) {
264
- //hook signal is set standalone (not in the transaction) so the
265
- //single CTE query can atomically detect a pending signal collision
266
317
  const hookResult = await this.engine.taskService.registerWebHook(this.config.hook.topic, this.context, this.resolveDad(), this.context.metadata.expire);
267
318
  jobId = hookResult.jobId;
268
319
  pending = hookResult.pending;
@@ -293,9 +344,38 @@ class Hook extends activity_1.Activity {
293
344
  this.context.metadata.jid = jobId;
294
345
  this.context.metadata.gid = gId;
295
346
  this.context.metadata.dad = dad;
296
- await this.processEvent(status, code, 'hook');
297
- if (code === 200) {
298
- await taskService.deleteWebHookSignal(this.config.hook.topic, data);
347
+ // Inline retry for FORBIDDEN: Leg2 arrived in the window between
348
+ // setHookSignal (standalone) and Leg1 transaction.exec(). The 100B
349
+ // ledger digit is not yet visible. Leg1 needs only milliseconds to
350
+ // commit — retry here, inside the message processing loop, before
351
+ // consumeOne's finally block acks the message. Stream-level retry
352
+ // won't help: ENGINE consumers have no retry policy, so shouldRetry
353
+ // returns [false, 0] and the message is ack'd with no retry.
354
+ const MAX_FORBIDDEN_RETRIES = 5;
355
+ const FORBIDDEN_RETRY_DELAY_MS = 50;
356
+ for (let attempt = 0; attempt <= MAX_FORBIDDEN_RETRIES; attempt++) {
357
+ try {
358
+ await this.processEvent(status, code, 'hook');
359
+ if (code === 200) {
360
+ await taskService.deleteWebHookSignal(this.config.hook.topic, data);
361
+ }
362
+ return;
363
+ }
364
+ catch (error) {
365
+ if (error instanceof errors_1.CollationError &&
366
+ error.fault === collator_2.CollationFaultType.FORBIDDEN &&
367
+ attempt < MAX_FORBIDDEN_RETRIES) {
368
+ this.logger.warn('hook-webhook-forbidden-inline-retry', {
369
+ attempt: attempt + 1,
370
+ maxAttempts: MAX_FORBIDDEN_RETRIES,
371
+ jid: this.context.metadata.jid,
372
+ aid: this.metadata.aid,
373
+ });
374
+ await (0, utils_1.sleepFor)(FORBIDDEN_RETRY_DELAY_MS * (attempt + 1));
375
+ continue;
376
+ }
377
+ throw error;
378
+ }
299
379
  }
300
380
  }
301
381
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.16.3",
3
+ "version": "0.16.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",