@hotmeshio/hotmesh 0.14.4 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,12 @@ export declare const HMSH_TELEMETRY: "debug" | "info";
55
55
  * Default cleanup time for signal in the db when its associated job is completed.
56
56
  */
57
57
  export declare const HMSH_SIGNAL_EXPIRE = 3600;
58
+ /**
59
+ * Default TTL for pending signals (signals that arrived before the hook registered).
60
+ * The signaler can override this via the `$expire` field in the signal data
61
+ * using a natural-language duration (e.g., '1h', '24h').
62
+ */
63
+ export declare const HMSH_PENDING_SIGNAL_EXPIRE = 600;
58
64
  export declare const HMSH_CODE_SUCCESS = 200;
59
65
  export declare const HMSH_CODE_PENDING = 202;
60
66
  export declare const HMSH_CODE_NOTFOUND = 404;
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
4
- exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = void 0;
3
+ exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
4
+ exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = void 0;
5
5
  /**
6
6
  * Determines the log level for the application. The default is 'info'.
7
7
  */
@@ -58,6 +58,12 @@ exports.HMSH_TELEMETRY = process.env.HMSH_TELEMETRY || 'info';
58
58
  * Default cleanup time for signal in the db when its associated job is completed.
59
59
  */
60
60
  exports.HMSH_SIGNAL_EXPIRE = 3600; //seconds
61
+ /**
62
+ * Default TTL for pending signals (signals that arrived before the hook registered).
63
+ * The signaler can override this via the `$expire` field in the signal data
64
+ * using a natural-language duration (e.g., '1h', '24h').
65
+ */
66
+ exports.HMSH_PENDING_SIGNAL_EXPIRE = 600; //seconds (10 minutes)
61
67
  // HOTMESH STATUS CODES
62
68
  exports.HMSH_CODE_SUCCESS = 200;
63
69
  exports.HMSH_CODE_PENDING = 202;
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.14.4",
3
+ "version": "0.14.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -152,9 +152,18 @@ declare class Hook extends Activity {
152
152
  isConfiguredAsHook(): boolean;
153
153
  doesHook(): boolean;
154
154
  doHook(telemetry: TelemetryService): Promise<void>;
155
+ /**
156
+ * Re-publishes a pending signal as a WEBHOOK stream message so the
157
+ * normal leg2 dispatch path processes it. Called when leg1's
158
+ * setHookSignal atomically detected and consumed a pending signal.
159
+ */
160
+ private redeliverPendingSignal;
155
161
  doPassThrough(telemetry: TelemetryService): Promise<void>;
156
162
  getHookRule(topic: string): Promise<HookRule | undefined>;
157
- registerHook(transaction?: ProviderTransaction): Promise<string | void>;
163
+ registerHook(transaction?: ProviderTransaction): Promise<{
164
+ jobId?: string;
165
+ pending?: string;
166
+ } | void>;
158
167
  processWebHookEvent(status?: StreamStatus, code?: StreamCode): Promise<JobStatus | void>;
159
168
  processTimeHookEvent(jobId: string): Promise<JobStatus | void>;
160
169
  }
@@ -6,6 +6,7 @@ const pipe_1 = require("../pipe");
6
6
  const task_1 = require("../task");
7
7
  const telemetry_1 = require("../telemetry");
8
8
  const stream_1 = require("../../types/stream");
9
+ const utils_1 = require("../../modules/utils");
9
10
  const activity_1 = require("./activity");
10
11
  /**
11
12
  * A versatile pause/resume activity that supports three distinct patterns:
@@ -203,7 +204,7 @@ class Hook extends activity_1.Activity {
203
204
  }
204
205
  async doHook(telemetry) {
205
206
  const transaction = this.store.transact();
206
- await this.registerHook(transaction);
207
+ const hookResult = await this.registerHook(transaction);
207
208
  this.mapOutputData();
208
209
  this.mapJobData();
209
210
  await this.setState(transaction);
@@ -211,6 +212,38 @@ class Hook extends activity_1.Activity {
211
212
  await this.setStatus(0, transaction);
212
213
  await transaction.exec();
213
214
  telemetry.mapActivityAttributes();
215
+ //if a pending signal was detected (signal arrived before hook
216
+ //registered), re-publish the WEBHOOK so leg2 processes it
217
+ //now that the hook signal is committed and state is saved
218
+ if (hookResult && hookResult.pending) {
219
+ await this.redeliverPendingSignal(hookResult.pending);
220
+ }
221
+ }
222
+ /**
223
+ * Re-publishes a pending signal as a WEBHOOK stream message so the
224
+ * normal leg2 dispatch path processes it. Called when leg1's
225
+ * setHookSignal atomically detected and consumed a pending signal.
226
+ */
227
+ async redeliverPendingSignal(pendingJson) {
228
+ const data = JSON.parse(pendingJson);
229
+ const hookRule = await this.getHookRule(this.config.hook.topic);
230
+ this.logger.warn('hook-pending-signal-redelivery', {
231
+ topic: this.config.hook.topic,
232
+ aid: hookRule?.to || this.metadata.aid,
233
+ jid: this.context.metadata.jid,
234
+ });
235
+ const streamData = {
236
+ type: stream_1.StreamDataType.WEBHOOK,
237
+ status: stream_1.StreamStatus.SUCCESS,
238
+ code: 200,
239
+ metadata: {
240
+ guid: (0, utils_1.guid)(),
241
+ aid: hookRule?.to || this.metadata.aid,
242
+ topic: this.config.hook.topic,
243
+ },
244
+ data,
245
+ };
246
+ await this.engine.router?.publishMessage(null, streamData);
214
247
  }
215
248
  async doPassThrough(telemetry) {
216
249
  this.adjacencyList = await this.filterAdjacent();
@@ -225,19 +258,25 @@ class Hook extends activity_1.Activity {
225
258
  return rules?.[topic]?.[0];
226
259
  }
227
260
  async registerHook(transaction) {
228
- let result;
261
+ let jobId;
262
+ let pending;
229
263
  if (this.config.hook?.topic) {
230
- result = await this.engine.taskService.registerWebHook(this.config.hook.topic, this.context, this.resolveDad(), this.context.metadata.expire, transaction);
264
+ //hook signal is set standalone (not in the transaction) so the
265
+ //single CTE query can atomically detect a pending signal collision
266
+ const hookResult = await this.engine.taskService.registerWebHook(this.config.hook.topic, this.context, this.resolveDad(), this.context.metadata.expire);
267
+ jobId = hookResult.jobId;
268
+ pending = hookResult.pending;
231
269
  }
232
270
  if (this.config.sleep) {
233
271
  const duration = pipe_1.Pipe.resolve(this.config.sleep, this.context);
234
272
  if (!isNaN(duration) && Number(duration) > 0) {
235
273
  await this.engine.taskService.registerTimeHook(this.context.metadata.jid, this.context.metadata.gid, `${this.metadata.aid}${this.metadata.dad || ''}`, 'sleep', duration, this.metadata.dad || '', transaction);
236
- if (!result)
237
- result = this.context.metadata.jid;
274
+ if (!jobId)
275
+ jobId = this.context.metadata.jid;
238
276
  }
239
277
  }
240
- return result;
278
+ if (jobId)
279
+ return { jobId, pending };
241
280
  }
242
281
  async processWebHookEvent(status = stream_1.StreamStatus.SUCCESS, code = 200) {
243
282
  this.logger.debug('hook-process-web-hook-event', {
@@ -14,6 +14,7 @@ import { PostgresClientType } from '../../types/postgres';
14
14
  * | `{appId}.jobs_attributes` | Execution artifacts (`adata`, `hmark`, `status`, `other`) that are only needed during workflow execution |
15
15
  * | `{appId}.engine_streams` | Processed engine stream messages with `expired_at` set |
16
16
  * | `{appId}.worker_streams` | Processed worker stream messages with `expired_at` set |
17
+ * | `{appId}.signal_registry` | Consumed hook signals and stale pending signals with `expiry` set |
17
18
  *
18
19
  * The `DBA` service addresses this with two methods:
19
20
  *
@@ -17,6 +17,7 @@ const postgres_1 = require("../connector/providers/postgres");
17
17
  * | `{appId}.jobs_attributes` | Execution artifacts (`adata`, `hmark`, `status`, `other`) that are only needed during workflow execution |
18
18
  * | `{appId}.engine_streams` | Processed engine stream messages with `expired_at` set |
19
19
  * | `{appId}.worker_streams` | Processed worker stream messages with `expired_at` set |
20
+ * | `{appId}.signal_registry` | Consumed hook signals and stale pending signals with `expiry` set |
20
21
  *
21
22
  * The `DBA` service addresses this with two methods:
22
23
  *
@@ -186,7 +187,8 @@ class DBA {
186
187
  prune_engine_streams BOOLEAN DEFAULT NULL,
187
188
  prune_worker_streams BOOLEAN DEFAULT NULL,
188
189
  engine_streams_retention INTERVAL DEFAULT NULL,
189
- worker_streams_retention INTERVAL DEFAULT NULL
190
+ worker_streams_retention INTERVAL DEFAULT NULL,
191
+ prune_signals BOOLEAN DEFAULT TRUE
190
192
  )
191
193
  RETURNS TABLE(
192
194
  deleted_jobs BIGINT,
@@ -195,7 +197,8 @@ class DBA {
195
197
  deleted_worker_streams BIGINT,
196
198
  stripped_attributes BIGINT,
197
199
  deleted_transient BIGINT,
198
- marked_pruned BIGINT
200
+ marked_pruned BIGINT,
201
+ deleted_signals BIGINT
199
202
  )
200
203
  LANGUAGE plpgsql
201
204
  AS $$
@@ -206,6 +209,7 @@ class DBA {
206
209
  v_stripped_attributes BIGINT := 0;
207
210
  v_deleted_transient BIGINT := 0;
208
211
  v_marked_pruned BIGINT := 0;
212
+ v_deleted_signals BIGINT := 0;
209
213
  v_do_engine BOOLEAN;
210
214
  v_do_worker BOOLEAN;
211
215
  v_engine_retention INTERVAL;
@@ -287,6 +291,15 @@ class DBA {
287
291
  GET DIAGNOSTICS v_marked_pruned = ROW_COUNT;
288
292
  END IF;
289
293
 
294
+ -- 6. Hard-delete expired signal_registry rows.
295
+ -- Includes consumed hook signals and stale pending signals.
296
+ IF prune_signals THEN
297
+ DELETE FROM ${schema}.signal_registry
298
+ WHERE expiry IS NOT NULL
299
+ AND expiry <= NOW();
300
+ GET DIAGNOSTICS v_deleted_signals = ROW_COUNT;
301
+ END IF;
302
+
290
303
  deleted_jobs := v_deleted_jobs;
291
304
  deleted_streams := v_deleted_engine_streams + v_deleted_worker_streams;
292
305
  deleted_engine_streams := v_deleted_engine_streams;
@@ -294,6 +307,7 @@ class DBA {
294
307
  stripped_attributes := v_stripped_attributes;
295
308
  deleted_transient := v_deleted_transient;
296
309
  marked_pruned := v_marked_pruned;
310
+ deleted_signals := v_deleted_signals;
297
311
  RETURN NEXT;
298
312
  END;
299
313
  $$;
@@ -391,12 +405,14 @@ class DBA {
391
405
  const workerStreams = options.workerStreams ?? null;
392
406
  const engineStreamsExpire = options.engineStreamsExpire ?? null;
393
407
  const workerStreamsExpire = options.workerStreamsExpire ?? null;
408
+ const signals = options.signals ?? true;
394
409
  await DBA.deploy(options.connection, options.appId);
395
410
  const { client, release } = await DBA.getClient(options.connection);
396
411
  try {
397
- const result = await client.query(`SELECT * FROM ${schema}.prune($1::interval, $2::boolean, $3::boolean, $4::boolean, $5::text[], $6::boolean, $7::boolean, $8::boolean, $9::boolean, $10::interval, $11::interval)`, [
412
+ const result = await client.query(`SELECT * FROM ${schema}.prune($1::interval, $2::boolean, $3::boolean, $4::boolean, $5::text[], $6::boolean, $7::boolean, $8::boolean, $9::boolean, $10::interval, $11::interval, $12::boolean)`, [
398
413
  expire, jobs, streams, attributes, entities, pruneTransient, keepHmark,
399
414
  engineStreams, workerStreams, engineStreamsExpire, workerStreamsExpire,
415
+ signals,
400
416
  ]);
401
417
  const row = result.rows[0];
402
418
  return {
@@ -407,6 +423,7 @@ class DBA {
407
423
  attributes: Number(row.stripped_attributes),
408
424
  transient: Number(row.deleted_transient),
409
425
  marked: Number(row.marked_pruned),
426
+ signals: Number(row.deleted_signals),
410
427
  };
411
428
  }
412
429
  finally {
@@ -156,11 +156,21 @@ class ClientService {
156
156
  return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, jobId);
157
157
  },
158
158
  /**
159
- * Sends a message payload to a running workflow that is paused and awaiting the signal
159
+ * Sends a message payload to a running workflow that is paused and awaiting the signal.
160
+ *
161
+ * If the signal arrives before the workflow has registered its hook
162
+ * (race condition under load), it is buffered as a pending signal
163
+ * for up to `expire` (default 10 minutes). Use a longer duration
164
+ * when signaling "early on purpose" (e.g., depositing a payload
165
+ * hours before the workflow starts).
160
166
  */
161
- signal: async (signalId, data, namespace) => {
167
+ signal: async (signalId, data, namespace, expire) => {
162
168
  const topic = `${namespace ?? factory_1.APP_ID}.wfs.signal`;
163
- return await (await this.getHotMeshClient(topic, namespace)).signal(topic, { id: signalId, data });
169
+ return await (await this.getHotMeshClient(topic, namespace)).signal(topic, {
170
+ id: signalId,
171
+ data,
172
+ ...(expire ? { $expire: expire } : {}),
173
+ });
164
174
  },
165
175
  /**
166
176
  * Spawns an a new, isolated execution cycle within the same job.
@@ -57,10 +57,17 @@ export declare class WorkflowHandleService {
57
57
  * on `Durable.workflow.condition(signalId)`, it resumes with the
58
58
  * provided data.
59
59
  *
60
+ * If the signal arrives before the workflow has registered its hook
61
+ * (race condition under load), it is buffered as a pending signal
62
+ * for up to `expire` (default 10 minutes). Use a longer duration
63
+ * when signaling "early on purpose" (e.g., depositing a payload
64
+ * hours before the workflow starts).
65
+ *
60
66
  * @param signalId - Matches the `signalId` passed to `condition()`.
61
67
  * @param data - Payload delivered to the waiting workflow.
68
+ * @param expire - Optional pending signal TTL (e.g., '1h', '30d'). Default '10m'.
62
69
  */
63
- signal(signalId: string, data: Record<any, any>): Promise<void>;
70
+ signal(signalId: string, data: Record<any, any>, expire?: string): Promise<void>;
64
71
  /**
65
72
  * Returns the current workflow state. For a completed workflow this
66
73
  * is the final output; for a running workflow it reflects the latest
@@ -58,13 +58,21 @@ class WorkflowHandleService {
58
58
  * on `Durable.workflow.condition(signalId)`, it resumes with the
59
59
  * provided data.
60
60
  *
61
+ * If the signal arrives before the workflow has registered its hook
62
+ * (race condition under load), it is buffered as a pending signal
63
+ * for up to `expire` (default 10 minutes). Use a longer duration
64
+ * when signaling "early on purpose" (e.g., depositing a payload
65
+ * hours before the workflow starts).
66
+ *
61
67
  * @param signalId - Matches the `signalId` passed to `condition()`.
62
68
  * @param data - Payload delivered to the waiting workflow.
69
+ * @param expire - Optional pending signal TTL (e.g., '1h', '30d'). Default '10m'.
63
70
  */
64
- async signal(signalId, data) {
71
+ async signal(signalId, data, expire) {
65
72
  await this.hotMesh.signal(`${this.hotMesh.appId}.wfs.signal`, {
66
73
  id: signalId,
67
74
  data,
75
+ ...(expire ? { $expire: expire } : {}),
68
76
  });
69
77
  }
70
78
  /**
@@ -55,4 +55,4 @@
55
55
  * @param {Record<any, any>} data - The payload to deliver to the waiting workflow.
56
56
  * @returns {Promise<string>} The resulting hook/stream ID.
57
57
  */
58
- export declare function signal(signalId: string, data: Record<any, any>): Promise<string>;
58
+ export declare function signal(signalId: string, data: Record<any, any>, expire?: string): Promise<string>;
@@ -60,7 +60,7 @@ const isSideEffectAllowed_1 = require("./isSideEffectAllowed");
60
60
  * @param {Record<any, any>} data - The payload to deliver to the waiting workflow.
61
61
  * @returns {Promise<string>} The resulting hook/stream ID.
62
62
  */
63
- async function signal(signalId, data) {
63
+ async function signal(signalId, data, expire) {
64
64
  const store = common_1.asyncLocalStorage.getStore();
65
65
  const workflowTopic = store.get('workflowTopic');
66
66
  const connection = store.get('connection');
@@ -73,6 +73,7 @@ async function signal(signalId, data) {
73
73
  return await hotMeshClient.signal(`${namespace}.wfs.signal`, {
74
74
  id: signalId,
75
75
  data,
76
+ ...(expire ? { $expire: expire } : {}),
76
77
  });
77
78
  }
78
79
  }
@@ -64,8 +64,21 @@ declare abstract class StoreService<Provider extends ProviderClient, Transaction
64
64
  abstract setHookRules(hookRules: Record<string, HookRule[]>): Promise<any>;
65
65
  abstract getHookRules(): Promise<Record<string, HookRule[]>>;
66
66
  abstract getAllSymbols(): Promise<Symbols>;
67
- abstract setHookSignal(hook: HookSignal, transaction?: TransactionProvider): Promise<any>;
68
- abstract getHookSignal(topic: string, resolved: string): Promise<string | undefined>;
67
+ /**
68
+ * Leg1: Attempts to set the hook signal. If a pending signal occupies
69
+ * the key (race condition), overwrites it and returns the pending data.
70
+ * When called with a transaction, queues the setnxex (no pending detection).
71
+ */
72
+ abstract setHookSignal(hook: HookSignal, transaction?: TransactionProvider): Promise<{
73
+ success: boolean;
74
+ pendingData?: string;
75
+ }>;
76
+ /**
77
+ * Leg2: Atomically gets the hook signal OR inserts a pending signal
78
+ * if no hook is registered yet (early signal). Returns the hook
79
+ * signal value, or undefined if we stored a pending signal instead.
80
+ */
81
+ abstract getHookSignal(topic: string, resolved: string, pendingData?: string, pendingExpire?: number): Promise<string | undefined>;
69
82
  abstract deleteHookSignal(topic: string, resolved: string): Promise<number | undefined>;
70
83
  abstract addTaskQueues(keys: string[]): Promise<void>;
71
84
  abstract getActiveTaskQueue(): Promise<string | null>;
@@ -10,6 +10,7 @@ export declare const KVTables: (context: PostgresStoreService) => {
10
10
  hashStringToInt(str: string): number;
11
11
  waitForTablesCreation(lockId: number, appName: string): Promise<void>;
12
12
  checkIfTablesExist(client: PostgresClientType, appName: string): Promise<boolean>;
13
+ migrate(client: PostgresClientType | PostgresPoolClientType, appName: string): Promise<void>;
13
14
  createTables(client: PostgresClientType | PostgresPoolClientType, appName: string): Promise<void>;
14
15
  getTableNames(appName: string): string[];
15
16
  getTableDefinitions(appName: string): Array<{
@@ -26,7 +26,8 @@ const KVTables = (context) => ({
26
26
  // First, check if tables already exist (no lock needed)
27
27
  const tablesExist = await this.checkIfTablesExist(client, appName);
28
28
  if (tablesExist) {
29
- // Tables already exist, no need to acquire lock or create tables
29
+ // Tables exist; apply any pending migrations
30
+ await this.migrate(client, appName);
30
31
  return;
31
32
  }
32
33
  // Tables don't exist, need to acquire lock and create them
@@ -128,6 +129,31 @@ const KVTables = (context) => ({
128
129
  const results = await Promise.all(checkTablePromises);
129
130
  return results.every((res) => res.rows[0].table !== null);
130
131
  },
132
+ async migrate(client, appName) {
133
+ const schemaName = context.storeClient.safeName(appName);
134
+ const jobsTable = `${schemaName}.jobs`;
135
+ // v0.14.5: track updated_at on job status changes
136
+ const { rows } = await client.query(`SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_jobs_updated_at' LIMIT 1`);
137
+ if (rows.length === 0) {
138
+ await client.query(`
139
+ CREATE OR REPLACE FUNCTION ${schemaName}.update_jobs_updated_at()
140
+ RETURNS TRIGGER AS $$
141
+ BEGIN
142
+ IF NEW.status <> OLD.status THEN
143
+ NEW.updated_at = NOW();
144
+ END IF;
145
+ RETURN NEW;
146
+ END;
147
+ $$ LANGUAGE plpgsql;
148
+ `);
149
+ await client.query(`
150
+ DROP TRIGGER IF EXISTS trg_update_jobs_updated_at ON ${jobsTable};
151
+ CREATE TRIGGER trg_update_jobs_updated_at
152
+ BEFORE UPDATE ON ${jobsTable}
153
+ FOR EACH ROW EXECUTE FUNCTION ${schemaName}.update_jobs_updated_at();
154
+ `);
155
+ }
156
+ },
131
157
  async createTables(client, appName) {
132
158
  try {
133
159
  await client.query('BEGIN');
@@ -302,6 +328,25 @@ const KVTables = (context) => ({
302
328
  CREATE TRIGGER trg_enforce_live_job_uniqueness
303
329
  BEFORE INSERT OR UPDATE ON ${fullTableName}
304
330
  FOR EACH ROW EXECUTE PROCEDURE ${schemaName}.enforce_live_job_uniqueness();
331
+ `);
332
+ // Create function to update updated_at on status changes
333
+ await client.query(`
334
+ CREATE OR REPLACE FUNCTION ${schemaName}.update_jobs_updated_at()
335
+ RETURNS TRIGGER AS $$
336
+ BEGIN
337
+ IF NEW.status <> OLD.status THEN
338
+ NEW.updated_at = NOW();
339
+ END IF;
340
+ RETURN NEW;
341
+ END;
342
+ $$ LANGUAGE plpgsql;
343
+ `);
344
+ // Create trigger for updated_at on job status changes
345
+ await client.query(`
346
+ DROP TRIGGER IF EXISTS trg_update_jobs_updated_at ON ${fullTableName};
347
+ CREATE TRIGGER trg_update_jobs_updated_at
348
+ BEFORE UPDATE ON ${fullTableName}
349
+ FOR EACH ROW EXECUTE FUNCTION ${schemaName}.update_jobs_updated_at();
305
350
  `);
306
351
  // Create the attributes table with partitioning
307
352
  const attributesTableName = `${fullTableName}_attributes`;
@@ -115,8 +115,31 @@ declare class PostgresStoreService extends StoreService<ProviderClient, Provider
115
115
  getTransitions(appVersion: AppVID): Promise<Transitions>;
116
116
  setHookRules(hookRules: Record<string, HookRule[]>): Promise<any>;
117
117
  getHookRules(): Promise<Record<string, HookRule[]>>;
118
- setHookSignal(hook: HookSignal, transaction?: ProviderTransaction): Promise<any>;
119
- getHookSignal(topic: string, resolved: string): Promise<string | undefined>;
118
+ /**
119
+ * Leg1: set hook signal, atomically detecting a pending signal.
120
+ *
121
+ * Standalone (no transaction): single CTE query that reads any existing
122
+ * pending value, then inserts the hook signal (overwriting pending or
123
+ * expired entries). Returns `{success, pendingData}` in one round trip.
124
+ *
125
+ * In a transaction: queues the setnxex; pending detection deferred.
126
+ */
127
+ setHookSignal(hook: HookSignal, transaction?: ProviderTransaction): Promise<{
128
+ success: boolean;
129
+ pendingData?: string;
130
+ }>;
131
+ /**
132
+ * Leg2: get hook signal OR atomically set a pending signal.
133
+ *
134
+ * When `pendingData` is provided and no hook signal exists, the
135
+ * pending value is inserted in the SAME SQL statement — no second
136
+ * round trip. This is the transactional edge that prevents the
137
+ * signal from being lost: by the time the query returns, the
138
+ * pending key is already visible to leg1's setnxex.
139
+ *
140
+ * When `pendingData` is omitted, behaves as a plain read.
141
+ */
142
+ getHookSignal(topic: string, resolved: string, pendingData?: string, pendingExpire?: number): Promise<string | undefined>;
120
143
  deleteHookSignal(topic: string, resolved: string): Promise<number | undefined>;
121
144
  addTaskQueues(keys: string[]): Promise<void>;
122
145
  getActiveTaskQueue(): Promise<string | null>;
@@ -754,16 +754,133 @@ class PostgresStoreService extends __1.StoreService {
754
754
  return patterns;
755
755
  }
756
756
  }
757
+ /**
758
+ * Leg1: set hook signal, atomically detecting a pending signal.
759
+ *
760
+ * Standalone (no transaction): single CTE query that reads any existing
761
+ * pending value, then inserts the hook signal (overwriting pending or
762
+ * expired entries). Returns `{success, pendingData}` in one round trip.
763
+ *
764
+ * In a transaction: queues the setnxex; pending detection deferred.
765
+ */
757
766
  async setHookSignal(hook, transaction) {
758
767
  const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
759
768
  const { topic, resolved, jobId } = hook;
760
769
  const signalKey = `${topic}:${resolved}`;
761
- await this.kvsql(transaction).setnxex(`${key}:${signalKey}`, jobId, Math.max(hook.expire, enums_1.HMSH_SIGNAL_EXPIRE));
770
+ const fullKey = `${key}:${signalKey}`;
771
+ const delay = Math.max(hook.expire, enums_1.HMSH_SIGNAL_EXPIRE);
772
+ if (transaction) {
773
+ await this.kvsql(transaction).setnxex(fullKey, jobId, delay);
774
+ return { success: true };
775
+ }
776
+ const kv = this.kvsql();
777
+ const tableName = kv.tableForKey(fullKey);
778
+ const storedKey = kv.storageKey(fullKey);
779
+ const sql = `
780
+ WITH pre AS (
781
+ SELECT value FROM ${tableName}
782
+ WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
783
+ ),
784
+ ins AS (
785
+ INSERT INTO ${tableName} (key, value, expiry)
786
+ VALUES ($1, $2, NOW() + INTERVAL '${delay} seconds')
787
+ ON CONFLICT (key) DO UPDATE
788
+ SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
789
+ WHERE ${tableName}.expiry IS NULL
790
+ OR ${tableName}.expiry <= NOW()
791
+ OR ${tableName}.value LIKE '$pending::%'
792
+ RETURNING true as success
793
+ )
794
+ SELECT
795
+ COALESCE((SELECT success FROM ins), false) as success,
796
+ (SELECT value FROM pre) as existing_value
797
+ `;
798
+ try {
799
+ const res = await this.pgClient.query(sql, [storedKey, jobId]);
800
+ const row = res.rows[0] || {};
801
+ const success = row.success === true;
802
+ const existing = row.existing_value;
803
+ if (success && existing?.startsWith('$pending::')) {
804
+ return {
805
+ success: true,
806
+ pendingData: existing.slice('$pending::'.length),
807
+ };
808
+ }
809
+ return { success };
810
+ }
811
+ catch (error) {
812
+ if (error?.message?.includes('closed') ||
813
+ error?.message?.includes('queryable')) {
814
+ return { success: false };
815
+ }
816
+ throw error;
817
+ }
762
818
  }
763
- async getHookSignal(topic, resolved) {
819
+ /**
820
+ * Leg2: get hook signal OR atomically set a pending signal.
821
+ *
822
+ * When `pendingData` is provided and no hook signal exists, the
823
+ * pending value is inserted in the SAME SQL statement — no second
824
+ * round trip. This is the transactional edge that prevents the
825
+ * signal from being lost: by the time the query returns, the
826
+ * pending key is already visible to leg1's setnxex.
827
+ *
828
+ * When `pendingData` is omitted, behaves as a plain read.
829
+ */
830
+ async getHookSignal(topic, resolved, pendingData, pendingExpire) {
764
831
  const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
765
- const response = await this.kvsql().get(`${key}:${topic}:${resolved}`);
766
- return response ? response.toString() : undefined;
832
+ const fullKey = `${key}:${topic}:${resolved}`;
833
+ if (!pendingData) {
834
+ //plain read (used by deleteWebHookSignal path, tests, etc.)
835
+ const response = await this.kvsql().get(fullKey);
836
+ if (!response)
837
+ return undefined;
838
+ const value = response.toString();
839
+ if (value.startsWith('$pending::'))
840
+ return undefined;
841
+ return value;
842
+ }
843
+ //atomic get-or-set-pending: one round trip
844
+ const kv = this.kvsql();
845
+ const tableName = kv.tableForKey(fullKey);
846
+ const storedKey = kv.storageKey(fullKey);
847
+ const expire = pendingExpire || enums_1.HMSH_PENDING_SIGNAL_EXPIRE;
848
+ const pendingValue = `$pending::${pendingData}`;
849
+ const sql = `
850
+ WITH existing AS (
851
+ SELECT value FROM ${tableName}
852
+ WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
853
+ ),
854
+ pending AS (
855
+ INSERT INTO ${tableName} (key, value, expiry)
856
+ SELECT $1, $2, NOW() + INTERVAL '${expire} seconds'
857
+ WHERE NOT EXISTS (SELECT 1 FROM existing)
858
+ ON CONFLICT (key) DO UPDATE
859
+ SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
860
+ WHERE ${tableName}.expiry IS NULL OR ${tableName}.expiry <= NOW()
861
+ RETURNING true as inserted
862
+ )
863
+ SELECT
864
+ (SELECT value FROM existing) as hook_value,
865
+ (SELECT inserted FROM pending) as pending_inserted
866
+ `;
867
+ try {
868
+ const res = await this.pgClient.query(sql, [storedKey, pendingValue]);
869
+ const row = res.rows[0] || {};
870
+ const hookValue = row.hook_value;
871
+ if (hookValue && !hookValue.startsWith('$pending::')) {
872
+ return hookValue;
873
+ }
874
+ //no hook signal; pending was inserted (or already existed)
875
+ return undefined;
876
+ }
877
+ catch (error) {
878
+ if (error?.message?.includes('closed') ||
879
+ error?.message?.includes('queryable')) {
880
+ return undefined;
881
+ }
882
+ throw error;
883
+ }
767
884
  }
768
885
  async deleteHookSignal(topic, resolved) {
769
886
  const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
@@ -29,7 +29,10 @@ declare class TaskService {
29
29
  processTimeHooks(timeEventCallback: (jobId: string, gId: string, activityId: string, type: WorkListTaskType) => Promise<void>, listKey?: string): Promise<void>;
30
30
  cancelCleanup(): void;
31
31
  getHookRule(topic: string): Promise<HookRule | undefined>;
32
- registerWebHook(topic: string, context: JobState, dad: string, expire: number, transaction?: ProviderTransaction): Promise<string>;
32
+ registerWebHook(topic: string, context: JobState, dad: string, expire: number): Promise<{
33
+ jobId: string;
34
+ pending?: string;
35
+ }>;
33
36
  processWebHookSignal(topic: string, data: Record<string, unknown>): Promise<[string, string, string, string] | undefined>;
34
37
  deleteWebHookSignal(topic: string, data: Record<string, unknown>): Promise<number>;
35
38
  /**
@@ -134,7 +134,7 @@ class TaskService {
134
134
  const rules = await this.store.getHookRules();
135
135
  return rules?.[topic]?.[0];
136
136
  }
137
- async registerWebHook(topic, context, dad, expire, transaction) {
137
+ async registerWebHook(topic, context, dad, expire) {
138
138
  const hookRule = await this.getHookRule(topic);
139
139
  if (hookRule) {
140
140
  const mapExpression = hookRule.conditions.match[0].expected;
@@ -150,8 +150,27 @@ class TaskService {
150
150
  jobId: compositeJobKey,
151
151
  expire,
152
152
  };
153
- await this.store.setHookSignal(hook, transaction);
154
- return jobId;
153
+ //called standalone (no transaction) so the single CTE query can
154
+ //atomically detect and return pending signal data on collision
155
+ const result = await this.store.setHookSignal(hook);
156
+ if (result.pendingData) {
157
+ this.logger.warn('task-signal-race-pending-consumed', {
158
+ topic,
159
+ resolved,
160
+ jobId,
161
+ });
162
+ return { jobId, pending: result.pendingData };
163
+ }
164
+ if (!result.success) {
165
+ //setnxex failed but no pending signal; likely a retry where
166
+ //our own hook signal was already set. continue normally.
167
+ this.logger.debug('task-signal-hook-already-set', {
168
+ topic,
169
+ resolved,
170
+ jobId,
171
+ });
172
+ }
173
+ return { jobId };
155
174
  }
156
175
  else {
157
176
  throw new Error('signaler.registerWebHook:error: hook rule not found');
@@ -165,10 +184,19 @@ class TaskService {
165
184
  const context = { $self: { hook: { data } }, $hook: { data } };
166
185
  const mapExpression = hookRule.conditions.match[0].actual;
167
186
  const resolved = pipe_1.Pipe.resolve(mapExpression, context);
168
- const hookSignalId = await this.store.getHookSignal(topic, resolved);
187
+ //resolve $expire override from the signal data (e.g., '1h', '30d')
188
+ const pendingExpire = typeof data.$expire === 'string'
189
+ ? (0, utils_1.s)(data.$expire)
190
+ : enums_1.HMSH_PENDING_SIGNAL_EXPIRE;
191
+ //atomic: returns the hook signal, or stores a pending signal
192
+ //in the same SQL statement if no hook is registered yet
193
+ const hookSignalId = await this.store.getHookSignal(topic, resolved, JSON.stringify(data), pendingExpire);
169
194
  if (!hookSignalId) {
170
- //messages can be double-processed; not an issue; return `undefined`
171
- //users can also provide a bogus topic; not an issue; return `undefined`
195
+ this.logger.warn('task-signal-race-pending-stored', {
196
+ topic,
197
+ resolved,
198
+ expire: pendingExpire,
199
+ });
172
200
  return undefined;
173
201
  }
174
202
  //`aid` is part of composite key, but the hook `topic` is its public interface;
@@ -103,6 +103,15 @@ export interface PruneOptions {
103
103
  * @default false
104
104
  */
105
105
  keepHmark?: boolean;
106
+ /**
107
+ * If true, hard-deletes expired rows from `signal_registry`.
108
+ * These include consumed hook signals and stale pending signals
109
+ * (signals that arrived before hook registration but were never
110
+ * claimed). All signal_registry entries have a natural `expiry`
111
+ * column; this operation removes rows whose expiry has passed.
112
+ * @default true
113
+ */
114
+ signals?: boolean;
106
115
  }
107
116
  /**
108
117
  * Result returned by `DBA.prune()`, providing deletion
@@ -123,4 +132,6 @@ export interface PruneResult {
123
132
  transient: number;
124
133
  /** Number of jobs marked as pruned (pruned_at set) */
125
134
  marked: number;
135
+ /** Number of expired signal_registry rows hard-deleted */
136
+ signals: number;
126
137
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.14.4",
3
+ "version": "0.14.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",