@hotmeshio/hotmesh 0.22.6 → 0.22.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.22.6",
3
+ "version": "0.22.8",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -167,7 +167,7 @@ class ClientService {
167
167
  pending: options?.pending,
168
168
  entity: options?.entity,
169
169
  });
170
- return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, jobId);
170
+ return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, jobId, this.escalations);
171
171
  },
172
172
  /**
173
173
  * Sends a message payload to a running workflow that is paused and awaiting the signal.
@@ -267,7 +267,7 @@ class ClientService {
267
267
  getHandle: async (taskQueue, workflowName, workflowId, namespace) => {
268
268
  const workflowTopic = `${taskQueue}-${workflowName}`;
269
269
  const hotMeshClient = await this.getHotMeshClient(taskQueue, namespace);
270
- return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, workflowId);
270
+ return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, workflowId, this.escalations);
271
271
  },
272
272
  /**
273
273
  * Provides direct access to the SEARCH backend
@@ -3,6 +3,7 @@ import { DurableJobExport, ExportOptions, ExecutionExportOptions, WorkflowExecut
3
3
  import { JobInterruptOptions } from '../../types/job';
4
4
  import { StreamError } from '../../types/stream';
5
5
  import { ExporterService } from './exporter';
6
+ import { EscalationClientService } from '../escalations/client';
6
7
  /**
7
8
  * Handle to a running or completed workflow execution. Returned by
8
9
  * `client.workflow.start()` and `client.workflow.getHandle()`.
@@ -32,10 +33,12 @@ export declare class WorkflowHandleService {
32
33
  hotMesh: HotMesh;
33
34
  workflowTopic: string;
34
35
  workflowId: string;
36
+ /** @private */
37
+ escalationClient?: EscalationClientService;
35
38
  /**
36
39
  * @private
37
40
  */
38
- constructor(hotMesh: HotMesh, workflowTopic: string, workflowId: string);
41
+ constructor(hotMesh: HotMesh, workflowTopic: string, workflowId: string, escalationClient?: EscalationClientService);
39
42
  /**
40
43
  * Export the raw workflow state as a {@link DurableJobExport} with five sections:
41
44
  *
@@ -126,6 +129,12 @@ export declare class WorkflowHandleService {
126
129
  * subscribers are notified, and the job hash is expired. Unlike
127
130
  * {@link cancel}, this does **not** give the workflow a chance to
128
131
  * run cleanup code.
132
+ *
133
+ * Any pending escalations for this workflow are cancelled in the same
134
+ * Postgres transaction that decrements the job semaphore — one atomic
135
+ * write, no TOCTOU. A `system.escalation.*.cancelled` event is emitted
136
+ * locally for each cancelled row via the configured `events.publish`
137
+ * sink — instance-local only, never broadcast.
129
138
  */
130
139
  terminate(options?: JobInterruptOptions): Promise<string>;
131
140
  /**
@@ -27,10 +27,11 @@ class WorkflowHandleService {
27
27
  /**
28
28
  * @private
29
29
  */
30
- constructor(hotMesh, workflowTopic, workflowId) {
30
+ constructor(hotMesh, workflowTopic, workflowId, escalationClient) {
31
31
  this.workflowTopic = workflowTopic;
32
32
  this.workflowId = workflowId;
33
33
  this.hotMesh = hotMesh;
34
+ this.escalationClient = escalationClient;
34
35
  this.exporter = new exporter_1.ExporterService(this.hotMesh.appId, this.hotMesh.engine.store, this.hotMesh.engine.logger);
35
36
  }
36
37
  /**
@@ -157,9 +158,23 @@ class WorkflowHandleService {
157
158
  * subscribers are notified, and the job hash is expired. Unlike
158
159
  * {@link cancel}, this does **not** give the workflow a chance to
159
160
  * run cleanup code.
161
+ *
162
+ * Any pending escalations for this workflow are cancelled in the same
163
+ * Postgres transaction that decrements the job semaphore — one atomic
164
+ * write, no TOCTOU. A `system.escalation.*.cancelled` event is emitted
165
+ * locally for each cancelled row via the configured `events.publish`
166
+ * sink — instance-local only, never broadcast.
160
167
  */
161
168
  async terminate(options) {
162
- return await this.hotMesh.interrupt(`${this.hotMesh.appId}.execute`, this.workflowId, options);
169
+ let cancelledEntries = [];
170
+ const result = await this.hotMesh.interrupt(`${this.hotMesh.appId}.execute`, this.workflowId, {
171
+ ...options,
172
+ onEscalationsCancelled: (entries) => { cancelledEntries = entries; },
173
+ });
174
+ if (this.escalationClient && cancelledEntries.length > 0) {
175
+ this.escalationClient.emitCancelledBatch(cancelledEntries);
176
+ }
177
+ return result;
163
178
  }
164
179
  /**
165
180
  * Requests cooperative cancellation of the workflow. Unlike
@@ -97,6 +97,7 @@ import { ConditionQueueConfig } from '../../../types/hmsh_escalations';
97
97
  * {@link ConditionQueueConfig} that writes one row to `public.hmsh_escalations`
98
98
  * atomically at suspension time. Cannot specify both; use the config object's
99
99
  * `expiresAt` field for deadline enforcement when an escalation is involved.
100
- * @returns The signal payload, or `false` if a timeout string was given and it expired.
100
+ * @returns The signal payload, `false` if a timeout string was given and it expired,
101
+ * or `null` if the escalation was cancelled via `client.escalations.cancel()`.
101
102
  */
102
- export declare function condition<T>(signalId: string, timeoutOrConfig?: string | ConditionQueueConfig): Promise<T | false>;
103
+ export declare function condition<T>(signalId: string, timeoutOrConfig?: string | ConditionQueueConfig): Promise<T | false | null>;
@@ -102,7 +102,8 @@ const didRun_1 = require("./didRun");
102
102
  * {@link ConditionQueueConfig} that writes one row to `public.hmsh_escalations`
103
103
  * atomically at suspension time. Cannot specify both; use the config object's
104
104
  * `expiresAt` field for deadline enforcement when an escalation is involved.
105
- * @returns The signal payload, or `false` if a timeout string was given and it expired.
105
+ * @returns The signal payload, `false` if a timeout string was given and it expired,
106
+ * or `null` if the escalation was cancelled via `client.escalations.cancel()`.
106
107
  */
107
108
  async function condition(signalId, timeoutOrConfig) {
108
109
  const timeout = typeof timeoutOrConfig === 'string' ? timeoutOrConfig : undefined;
@@ -127,7 +128,13 @@ async function condition(signalId, timeoutOrConfig) {
127
128
  if (result?.timedOut) {
128
129
  return false;
129
130
  }
130
- return result.data.data;
131
+ const signalData = result.data?.data;
132
+ // If the escalation was cancelled via cancel(), the signal carries this marker.
133
+ // Return null so the workflow can distinguish cancellation from a real resolution.
134
+ if (signalData && typeof signalData === 'object' && signalData.__escalation_cancelled === true) {
135
+ return null;
136
+ }
137
+ return signalData;
131
138
  }
132
139
  const store = common_1.asyncLocalStorage.getStore();
133
140
  // Emit DISPATCH span in debug mode
@@ -49,10 +49,21 @@ async function getVID(instance, vid) {
49
49
  }
50
50
  return { id: instance.appId, version: app.version };
51
51
  }
52
- else if (!instance.apps && vid) {
53
- instance.apps = {};
54
- instance.apps[instance.appId] = vid;
55
- return vid;
52
+ else if (!instance.apps) {
53
+ // First call — always DB-refresh to avoid locking in a stale version.
54
+ // The `vid` parameter originates from store.getApp() (which may be
55
+ // cached from before the last activation). If a worker missed the
56
+ // nocache NOTIFY (startup race: LISTEN not yet established when the
57
+ // NOTIFY fired), it would lock in the pre-activation version for its
58
+ // entire lifetime, silently loading the old schema on every request.
59
+ // One extra DB query here, once per engine lifetime, eliminates the
60
+ // race regardless of NOTIFY delivery.
61
+ const id = vid?.id ?? instance.appId;
62
+ const freshApp = await instance.store.getApp(id, true);
63
+ if (!instance.apps)
64
+ instance.apps = {};
65
+ instance.apps[instance.appId] = freshApp;
66
+ return { id: freshApp.id, version: freshApp.version };
56
67
  }
57
68
  else {
58
69
  return await fetchAndVerifyVID(instance, {
@@ -120,10 +120,22 @@ export declare class EscalationClientService {
120
120
  */
121
121
  escalateToRole(params: EscalateToRoleParams): Promise<EscalationEntry | null>;
122
122
  /**
123
- * Cancels a pending escalation without delivering a signal. Terminal rows
124
- * return `already-terminal`.
123
+ * Cancels a pending escalation and delivers a cancellation signal to the
124
+ * waiting workflow so that `condition()` returns `null`. Terminal rows
125
+ * return `already-terminal`. Signal delivery is best-effort post-commit —
126
+ * the committed cancelled row is the durable record; any missed delivery
127
+ * can be detected via a sweep of rows with `status = 'cancelled'` and a
128
+ * non-null `signal_key`.
125
129
  */
126
130
  cancel(id: string, namespace?: string): Promise<CancelEscalationResult>;
131
+ /**
132
+ * Emits local `cancelled` events for a batch of already-cancelled escalation
133
+ * entries. Called by `WorkflowHandleService.terminate()` after the single
134
+ * atomic transaction that interrupts the workflow and cancels its escalations
135
+ * has committed. Fire-and-forget via the configured `events.publish` sink
136
+ * (e.g. NATS) — instance-local, never broadcast via Postgres LISTEN/NOTIFY.
137
+ */
138
+ emitCancelledBatch(entries: EscalationEntry[]): void;
127
139
  /**
128
140
  * Resolves a pending escalation by UUID. Uses an explicit Postgres transaction
129
141
  * with FOR UPDATE + WHERE guard: only one concurrent caller can commit the
@@ -242,16 +242,38 @@ class EscalationClientService {
242
242
  return entry;
243
243
  }
244
244
  /**
245
- * Cancels a pending escalation without delivering a signal. Terminal rows
246
- * return `already-terminal`.
245
+ * Cancels a pending escalation and delivers a cancellation signal to the
246
+ * waiting workflow so that `condition()` returns `null`. Terminal rows
247
+ * return `already-terminal`. Signal delivery is best-effort post-commit —
248
+ * the committed cancelled row is the durable record; any missed delivery
249
+ * can be detected via a sweep of rows with `status = 'cancelled'` and a
250
+ * non-null `signal_key`.
247
251
  */
248
252
  async cancel(id, namespace) {
249
- const hm = await this._engine(null, namespace);
253
+ const ns = namespace ?? factory_1.APP_ID;
254
+ const hm = await this._engine(null, ns);
250
255
  const result = await hm.engine.store.cancelEscalation(id, namespace);
251
- if (result.ok === true)
256
+ if (result.ok === true) {
252
257
  this._emit('cancelled', result.entry);
258
+ if (result.entry.signal_key) {
259
+ await this._deliverEscalationSignal(ns, result.entry.topic, {
260
+ id: result.entry.signal_key,
261
+ data: { __escalation_cancelled: true },
262
+ });
263
+ }
264
+ }
253
265
  return result;
254
266
  }
267
+ /**
268
+ * Emits local `cancelled` events for a batch of already-cancelled escalation
269
+ * entries. Called by `WorkflowHandleService.terminate()` after the single
270
+ * atomic transaction that interrupts the workflow and cancels its escalations
271
+ * has committed. Fire-and-forget via the configured `events.publish` sink
272
+ * (e.g. NATS) — instance-local, never broadcast via Postgres LISTEN/NOTIFY.
273
+ */
274
+ emitCancelledBatch(entries) {
275
+ this._emitMany('cancelled', entries);
276
+ }
255
277
  /**
256
278
  * Resolves a pending escalation by UUID. Uses an explicit Postgres transaction
257
279
  * with FOR UPDATE + WHERE guard: only one concurrent caller can commit the
@@ -1009,32 +1009,23 @@ class PostgresStoreService extends __1.StoreService {
1009
1009
  */
1010
1010
  async interrupt(topic, jobId, options = {}) {
1011
1011
  try {
1012
- //verify job exists
1012
+ //pre-flight: bail early if already inactive (optimization; hincrbyfloat is the real guard)
1013
1013
  const status = await this.getStatus(jobId, this.appId);
1014
1014
  if (status <= 0) {
1015
- //verify still active; job already completed
1016
1015
  throw new Error(`Job ${jobId} already completed`);
1017
1016
  }
1018
- //decrement job status (:) by 1bil
1019
1017
  const amount = -1000000000;
1020
- const jobKey = this.mintKey(key_1.KeyType.JOB_STATE, {
1021
- appId: this.appId,
1022
- jobId,
1023
- });
1024
- const result = await this.kvsql().hincrbyfloat(jobKey, ':', amount);
1025
- if (result <= amount) {
1026
- //verify active state; job already interrupted
1027
- throw new Error(`Job ${jobId} already completed`);
1028
- }
1029
- //persist the error unless specifically told not to
1018
+ const jobKey = this.mintKey(key_1.KeyType.JOB_STATE, { appId: this.appId, jobId });
1019
+ //build error symbol BEFORE opening the transaction — symbol lookup is read-only
1020
+ let errSymbol;
1021
+ let err;
1030
1022
  if (options.throw !== false) {
1031
- const errKey = `metadata/err`; //job errors are stored at the path `metadata/err`
1032
- const symbolNames = [`$${topic}`]; //the symbol for `metadata/err` is in the backend
1023
+ const errKey = `metadata/err`;
1024
+ const symbolNames = [`$${topic}`];
1033
1025
  const symKeys = await this.getSymbolKeys(symbolNames);
1034
1026
  const symVals = await this.getSymbolValues();
1035
1027
  this.serializer.resetSymbols(symKeys, symVals, {});
1036
- //persists the standard 410 error (job is `gone`)
1037
- const err = JSON.stringify({
1028
+ err = JSON.stringify({
1038
1029
  code: options.code ?? enums_1.HMSH_CODE_INTERRUPT,
1039
1030
  message: options.reason ?? `job [${jobId}] interrupted`,
1040
1031
  stack: options.stack ?? '',
@@ -1042,9 +1033,32 @@ class PostgresStoreService extends __1.StoreService {
1042
1033
  });
1043
1034
  const payload = { [errKey]: amount.toString() };
1044
1035
  const hashData = this.serializer.package(payload, symbolNames);
1045
- const errSymbol = Object.keys(hashData)[0];
1046
- await this.kvsql().hset(jobKey, { [errSymbol]: err });
1036
+ errSymbol = Object.keys(hashData)[0];
1037
+ }
1038
+ //single transaction: status decrement + optional error write + escalation cancel.
1039
+ //WHERE guard on the escalation UPDATE prevents double-cancel;
1040
+ //hincrbyfloat is the atomic idempotency proof checked post-commit.
1041
+ const txn = this.kvsql(this.transact());
1042
+ txn.hincrbyfloat(jobKey, ':', amount);
1043
+ if (errSymbol && err) {
1044
+ txn.hset(jobKey, { [errSymbol]: err });
1045
+ }
1046
+ txn.addCommand(`UPDATE public.hmsh_escalations
1047
+ SET status = 'cancelled', updated_at = NOW()
1048
+ WHERE workflow_id = $1
1049
+ AND app_id = $2
1050
+ AND status = 'pending'
1051
+ RETURNING *`, [jobId, this.appId], 'array', (rows) => rows);
1052
+ const results = await txn.exec();
1053
+ //results[0] = new status after hincrbyfloat — the atomic idempotency guard
1054
+ const newStatus = results[0];
1055
+ if (newStatus <= amount) {
1056
+ throw new Error(`Job ${jobId} already completed`);
1047
1057
  }
1058
+ //fire the callback with any escalation rows cancelled in this same transaction;
1059
+ //no second query, no second transaction
1060
+ const cancelledEntries = (results[results.length - 1] || []);
1061
+ options.onEscalationsCancelled?.(cancelledEntries);
1048
1062
  }
1049
1063
  catch (e) {
1050
1064
  if (!options.suppress) {
@@ -130,7 +130,10 @@ type JobInterruptOptions = {
130
130
  */
131
131
  throw?: boolean;
132
132
  /**
133
- * interrupt child/descendant jobs
133
+ * Reserved for future use: interrupt child/descendant jobs.
134
+ * This flag is parsed and threaded through the engine call chain
135
+ * but is **not yet implemented** — child workflows are not interrupted
136
+ * when this is set. Do not rely on it.
134
137
  * @default false
135
138
  */
136
139
  descend?: boolean;
@@ -154,6 +157,13 @@ type JobInterruptOptions = {
154
157
  * Optional stack trace
155
158
  */
156
159
  stack?: string;
160
+ /**
161
+ * Fires synchronously inside `store.interrupt()` after the single
162
+ * transaction that decrements the job semaphore AND cancels pending
163
+ * escalations commits. Only used by `WorkflowHandleService.terminate()`
164
+ * to emit local events without a second transaction or a separate query.
165
+ */
166
+ onEscalationsCancelled?: (entries: any[]) => void;
157
167
  };
158
168
  /**
159
169
  * format when publishing job meta/data on the wire when it completes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.22.6",
3
+ "version": "0.22.8",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",