@hotmeshio/hotmesh 0.16.1 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.16.1",
3
+ "version": "0.16.2",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -3,7 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.processEvent = void 0;
4
4
  const enums_1 = require("../../../modules/enums");
5
5
  const errors_1 = require("../../../modules/errors");
6
- const collator_1 = require("../../collator");
6
+ const collator_1 = require("../../../types/collator");
7
+ const collator_2 = require("../../collator");
7
8
  const telemetry_1 = require("../../telemetry");
8
9
  const stream_1 = require("../../../types/stream");
9
10
  // Per-instance collation error tracking for reservation timeout detection
@@ -34,7 +35,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
34
35
  try {
35
36
  const collationKey = await instance.verifyReentry();
36
37
  instance.adjacentIndex =
37
- collator_1.CollatorService.getDimensionalIndex(collationKey);
38
+ collator_2.CollatorService.getDimensionalIndex(collationKey);
38
39
  telemetry = new telemetry_1.TelemetryService(instance.engine.appId, instance.config, instance.metadata, instance.context);
39
40
  telemetry.startActivitySpan(instance.leg);
40
41
  //bind data per status type
@@ -71,10 +72,27 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
71
72
  }
72
73
  catch (error) {
73
74
  if (error instanceof errors_1.CollationError) {
74
- // INACTIVE is legitimate duplicate detection the Postgres atomic
75
- // CTE (collateLeg2Entry) serializes via row locks, so the GUID
76
- // ledger value is correct. Silent ack is the right behavior:
77
- // the work was already done by a prior delivery of this message.
75
+ //FORBIDDEN: Leg1 not complete signal arrived in the window
76
+ //between registerHook (standalone) and Leg1 transaction commit.
77
+ //Rethrow so the stream message is retried with backoff; by then
78
+ //Leg1 will have committed and Leg2 processing will succeed.
79
+ //The GUID marker was already committed by notarizeLeg2Entry;
80
+ //on retry, collateLeg2Entry's SETNX is a no-op for the same
81
+ //GUID, and verifySyntheticInteger sees no steps done → allowed.
82
+ if (error.fault === collator_1.CollationFaultType.FORBIDDEN) {
83
+ instance.logger.warn('process-event-forbidden-retry', {
84
+ jid: instance.context.metadata.jid,
85
+ aid: instance.metadata.aid,
86
+ message: 'Leg1 not committed yet; rethrowing for stream retry',
87
+ error,
88
+ });
89
+ throw error;
90
+ }
91
+ // INACTIVE/DUPLICATE: legitimate duplicate detection — the
92
+ // Postgres atomic CTE (collateLeg2Entry) serializes via row
93
+ // locks, so the GUID ledger value is correct. Silent ack is
94
+ // the right behavior: the work was already done by a prior
95
+ // delivery of this message.
78
96
  const now = Date.now();
79
97
  if (now - collationWindowStart > COLLATION_WINDOW_MS) {
80
98
  collationErrorCount = 0;
@@ -118,9 +118,14 @@ declare class PostgresStoreService extends StoreService<ProviderClient, Provider
118
118
  /**
119
119
  * Leg1: set hook signal, atomically detecting a pending signal.
120
120
  *
121
- * Standalone (no transaction): single CTE query that reads any existing
122
- * pending value, then inserts the hook signal (overwriting pending or
123
- * expired entries). Returns `{success, pendingData}` in one round trip.
121
+ * Standalone (no transaction): acquires a per-key advisory lock to
122
+ * serialize with concurrent getHookSignal calls, then reads any
123
+ * existing pending value and inserts the hook signal.
124
+ *
125
+ * The advisory lock prevents a race where the CTE's read snapshot
126
+ * misses a concurrently inserted pending signal — under READ
127
+ * COMMITTED, ON CONFLICT sees committed writes but the SELECT CTE
128
+ * does not, causing the pending data to be silently overwritten.
124
129
  *
125
130
  * In a transaction: queues the setnxex; pending detection deferred.
126
131
  */
@@ -132,10 +137,13 @@ declare class PostgresStoreService extends StoreService<ProviderClient, Provider
132
137
  * Leg2: get hook signal OR atomically set a pending signal.
133
138
  *
134
139
  * When `pendingData` is provided and no hook signal exists, the
135
- * pending value is inserted in the SAME SQL statement — no second
136
- * round trip. This is the transactional edge that prevents the
137
- * signal from being lost: by the time the query returns, the
138
- * pending key is already visible to leg1's setnxex.
140
+ * pending value is stored so leg1's setHookSignal can detect it.
141
+ *
142
+ * Uses a per-key advisory lock to serialize with concurrent
143
+ * setHookSignal calls. Without the lock, a CTE race exists where
144
+ * the read snapshot misses a concurrently inserted hook signal AND
145
+ * the pending INSERT fails on conflict (the hook has valid expiry),
146
+ * silently losing the signal.
139
147
  *
140
148
  * When `pendingData` is omitted, behaves as a plain read.
141
149
  */
@@ -755,9 +755,14 @@ class PostgresStoreService extends __1.StoreService {
755
755
  /**
756
756
  * Leg1: set hook signal, atomically detecting a pending signal.
757
757
  *
758
- * Standalone (no transaction): single CTE query that reads any existing
759
- * pending value, then inserts the hook signal (overwriting pending or
760
- * expired entries). Returns `{success, pendingData}` in one round trip.
758
+ * Standalone (no transaction): acquires a per-key advisory lock to
759
+ * serialize with concurrent getHookSignal calls, then reads any
760
+ * existing pending value and inserts the hook signal.
761
+ *
762
+ * The advisory lock prevents a race where the CTE's read snapshot
763
+ * misses a concurrently inserted pending signal — under READ
764
+ * COMMITTED, ON CONFLICT sees committed writes but the SELECT CTE
765
+ * does not, causing the pending data to be silently overwritten.
761
766
  *
762
767
  * In a transaction: queues the setnxex; pending detection deferred.
763
768
  */
@@ -774,37 +779,30 @@ class PostgresStoreService extends __1.StoreService {
774
779
  const kv = this.kvsql();
775
780
  const tableName = kv.tableForKey(fullKey);
776
781
  const storedKey = kv.storageKey(fullKey);
777
- const sql = `
778
- WITH pre AS (
779
- SELECT value FROM ${tableName}
780
- WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
781
- ),
782
- ins AS (
783
- INSERT INTO ${tableName} (key, value, expiry)
784
- VALUES ($1, $2, NOW() + INTERVAL '${delay} seconds')
785
- ON CONFLICT (key) DO UPDATE
786
- SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
787
- WHERE ${tableName}.expiry IS NULL
788
- OR ${tableName}.expiry <= NOW()
789
- OR ${tableName}.value LIKE '$pending::%'
790
- RETURNING true as success
791
- )
792
- SELECT
793
- COALESCE((SELECT success FROM ins), false) as success,
794
- (SELECT value FROM pre) as existing_value
795
- `;
782
+ //acquire per-key advisory lock (session-level) to serialize
783
+ //with concurrent getHookSignal for the same signal key
784
+ await this.pgClient.query('SELECT pg_advisory_lock(901, hashtext($1))', [storedKey]);
796
785
  try {
797
- const res = await this.pgClient.query(sql, [storedKey, jobId]);
798
- const row = res.rows[0] || {};
799
- const success = row.success === true;
800
- const existing = row.existing_value;
801
- if (success && existing?.startsWith('$pending::')) {
802
- return {
803
- success: true,
804
- pendingData: existing.slice('$pending::'.length),
805
- };
786
+ //read existing value under lock
787
+ const readRes = await this.pgClient.query(`SELECT value FROM ${tableName}
788
+ WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())`, [storedKey]);
789
+ let pendingData;
790
+ if (readRes.rows.length > 0) {
791
+ const existing = readRes.rows[0].value;
792
+ if (existing?.startsWith('$pending::')) {
793
+ pendingData = existing.slice('$pending::'.length);
794
+ }
795
+ else {
796
+ //hook already set (retry) — no change needed
797
+ return { success: false };
798
+ }
806
799
  }
807
- return { success };
800
+ //insert hook value (or overwrite pending)
801
+ await this.pgClient.query(`INSERT INTO ${tableName} (key, value, expiry)
802
+ VALUES ($1, $2, NOW() + INTERVAL '${delay} seconds')
803
+ ON CONFLICT (key) DO UPDATE
804
+ SET value = EXCLUDED.value, expiry = EXCLUDED.expiry`, [storedKey, jobId]);
805
+ return { success: true, pendingData };
808
806
  }
809
807
  catch (error) {
810
808
  if (error?.message?.includes('closed') ||
@@ -813,15 +811,26 @@ class PostgresStoreService extends __1.StoreService {
813
811
  }
814
812
  throw error;
815
813
  }
814
+ finally {
815
+ try {
816
+ await this.pgClient.query('SELECT pg_advisory_unlock(901, hashtext($1))', [storedKey]);
817
+ }
818
+ catch {
819
+ //lock auto-releases on session close
820
+ }
821
+ }
816
822
  }
817
823
  /**
818
824
  * Leg2: get hook signal OR atomically set a pending signal.
819
825
  *
820
826
  * When `pendingData` is provided and no hook signal exists, the
821
- * pending value is inserted in the SAME SQL statement — no second
822
- * round trip. This is the transactional edge that prevents the
823
- * signal from being lost: by the time the query returns, the
824
- * pending key is already visible to leg1's setnxex.
827
+ * pending value is stored so leg1's setHookSignal can detect it.
828
+ *
829
+ * Uses a per-key advisory lock to serialize with concurrent
830
+ * setHookSignal calls. Without the lock, a CTE race exists where
831
+ * the read snapshot misses a concurrently inserted hook signal AND
832
+ * the pending INSERT fails on conflict (the hook has valid expiry),
833
+ * silently losing the signal.
825
834
  *
826
835
  * When `pendingData` is omitted, behaves as a plain read.
827
836
  */
@@ -838,38 +847,30 @@ class PostgresStoreService extends __1.StoreService {
838
847
  return undefined;
839
848
  return value;
840
849
  }
841
- //atomic get-or-set-pending: one round trip
842
850
  const kv = this.kvsql();
843
851
  const tableName = kv.tableForKey(fullKey);
844
852
  const storedKey = kv.storageKey(fullKey);
845
853
  const expire = pendingExpire || enums_1.HMSH_PENDING_SIGNAL_EXPIRE;
846
854
  const pendingValue = `$pending::${pendingData}`;
847
- const sql = `
848
- WITH existing AS (
849
- SELECT value FROM ${tableName}
850
- WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
851
- ),
852
- pending AS (
853
- INSERT INTO ${tableName} (key, value, expiry)
854
- SELECT $1, $2, NOW() + INTERVAL '${expire} seconds'
855
- WHERE NOT EXISTS (SELECT 1 FROM existing)
856
- ON CONFLICT (key) DO UPDATE
857
- SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
858
- WHERE ${tableName}.expiry IS NULL OR ${tableName}.expiry <= NOW()
859
- RETURNING true as inserted
860
- )
861
- SELECT
862
- (SELECT value FROM existing) as hook_value,
863
- (SELECT inserted FROM pending) as pending_inserted
864
- `;
855
+ //acquire per-key advisory lock (session-level) to serialize
856
+ //with concurrent setHookSignal for the same signal key
857
+ await this.pgClient.query('SELECT pg_advisory_lock(901, hashtext($1))', [storedKey]);
865
858
  try {
866
- const res = await this.pgClient.query(sql, [storedKey, pendingValue]);
867
- const row = res.rows[0] || {};
868
- const hookValue = row.hook_value;
869
- if (hookValue && !hookValue.startsWith('$pending::')) {
870
- return hookValue;
859
+ //read existing value under lock
860
+ const readRes = await this.pgClient.query(`SELECT value FROM ${tableName}
861
+ WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())`, [storedKey]);
862
+ if (readRes.rows.length > 0) {
863
+ const value = readRes.rows[0].value;
864
+ if (value && !value.startsWith('$pending::')) {
865
+ //hook found — return it
866
+ return value;
867
+ }
871
868
  }
872
- //no hook signal; pending was inserted (or already existed)
869
+ //no hook signal store pending
870
+ await this.pgClient.query(`INSERT INTO ${tableName} (key, value, expiry)
871
+ VALUES ($1, $2, NOW() + INTERVAL '${expire} seconds')
872
+ ON CONFLICT (key) DO UPDATE
873
+ SET value = EXCLUDED.value, expiry = EXCLUDED.expiry`, [storedKey, pendingValue]);
873
874
  return undefined;
874
875
  }
875
876
  catch (error) {
@@ -879,6 +880,14 @@ class PostgresStoreService extends __1.StoreService {
879
880
  }
880
881
  throw error;
881
882
  }
883
+ finally {
884
+ try {
885
+ await this.pgClient.query('SELECT pg_advisory_unlock(901, hashtext($1))', [storedKey]);
886
+ }
887
+ catch {
888
+ //lock auto-releases on session close
889
+ }
890
+ }
882
891
  }
883
892
  async deleteHookSignal(topic, resolved) {
884
893
  const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.16.1",
3
+ "version": "0.16.2",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",