@hogsend/engine 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hogsend/engine",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -35,11 +35,11 @@
35
35
  "resend": "^6.12.3",
36
36
  "winston": "^3.19.0",
37
37
  "zod": "^4.4.3",
38
- "@hogsend/core": "^0.3.0",
39
- "@hogsend/email": "^0.3.0",
40
- "@hogsend/db": "^0.3.0",
41
- "@hogsend/plugin-posthog": "^0.3.0",
42
- "@hogsend/plugin-resend": "^0.3.0"
38
+ "@hogsend/core": "^0.4.0",
39
+ "@hogsend/db": "^0.4.0",
40
+ "@hogsend/email": "^0.4.0",
41
+ "@hogsend/plugin-posthog": "^0.4.0",
42
+ "@hogsend/plugin-resend": "^0.4.0"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@types/node": "^22.15.3",
package/src/index.ts CHANGED
@@ -54,6 +54,7 @@ export {
54
54
  type DefinedJourney,
55
55
  defineJourney,
56
56
  } from "./journeys/define-journey.js";
57
+ export { JourneyExitedError } from "./journeys/errors.js";
57
58
  export { createJourneyContext } from "./journeys/journey-context.js";
58
59
  export {
59
60
  buildJourneyRegistry,
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Max active execution time configured on every journey durable task
3
+ * (`executionTimeout`). It is the single source of truth shared by
4
+ * `define-journey` (the task config) and `journey-context` (the `waitForEvent`
5
+ * timeout ceiling) so the two never drift.
6
+ *
7
+ * NOTE: on eviction-capable Hatchet engines (>= v0.80.0) a durable wait evicts
8
+ * the task and frees the worker slot, so a very long wall-clock wait MAY exceed
9
+ * this. We still treat it as our ceiling: `waitForEvent` rejects timeouts beyond
10
+ * it so they fail fast at authoring time rather than risk a mid-wait
11
+ * termination. Raise this to allow longer waits.
12
+ */
13
+ export const JOURNEY_EXECUTION_TIMEOUT_HOURS = 720;
14
+ export const JOURNEY_EXECUTION_TIMEOUT = `${JOURNEY_EXECUTION_TIMEOUT_HOURS}h`;
@@ -6,7 +6,7 @@ import type {
6
6
  JourneyUser,
7
7
  } from "@hogsend/core/types";
8
8
  import { contacts, journeyConfigs, journeyStates } from "@hogsend/db";
9
- import { and, eq, inArray } from "drizzle-orm";
9
+ import { and, eq, inArray, notInArray } from "drizzle-orm";
10
10
  import { getDb } from "../lib/db.js";
11
11
  import {
12
12
  checkEmailPreferences,
@@ -17,7 +17,9 @@ import { createLogger } from "../lib/logger.js";
17
17
  import { getPostHog } from "../lib/posthog.js";
18
18
  import { resolveTimezoneWithSource } from "../lib/timezone.js";
19
19
  import { getClientScheduleDefaults } from "./client-defaults-singleton.js";
20
- import { createJourneyContext } from "./journey-context.js";
20
+ import { JOURNEY_EXECUTION_TIMEOUT } from "./constants.js";
21
+ import { JourneyExitedError } from "./errors.js";
22
+ import { createJourneyContext, TERMINAL_STATUSES } from "./journey-context.js";
21
23
  import { getJourneyRegistrySingleton } from "./registry-singleton.js";
22
24
 
23
25
  const logger = createLogger(process.env.LOG_LEVEL);
@@ -43,7 +45,7 @@ export function defineJourney(options: {
43
45
  const task = hatchet.durableTask({
44
46
  name: `journey-${meta.id}`,
45
47
  onEvents: [meta.trigger.event],
46
- executionTimeout: "720h",
48
+ executionTimeout: JOURNEY_EXECUTION_TIMEOUT,
47
49
  retries: 0,
48
50
  fn: async (input: EventPayloadInput, hatchetCtx) => {
49
51
  const db = getDb();
@@ -203,17 +205,42 @@ export function defineJourney(options: {
203
205
 
204
206
  return { stateId, status: "completed" };
205
207
  } catch (err) {
208
+ // The journey reached a terminal state (exitOn / cancel) while suspended
209
+ // in a durable wait. The state row is already terminal — stop gracefully
210
+ // without marking it "failed" or re-pushing a journey:failed event.
211
+ if (err instanceof JourneyExitedError) {
212
+ return { stateId, status: "exited" };
213
+ }
214
+
206
215
  const message =
207
216
  err instanceof Error ? err.message : "Unknown error during journey";
208
217
 
209
- await db
218
+ // Mark "failed" ONLY if the row isn't already terminal. A run cancelled
219
+ // by exitOn (ingestEvent sets "exited" then `runs.cancel`) or by the
220
+ // admin route surfaces here as a Hatchet AbortError thrown from the
221
+ // suspended waitFor/sleepFor — NOT a JourneyExitedError. Guarding on a
222
+ // non-terminal status prevents clobbering that "exited" row to "failed"
223
+ // and emitting a spurious journey:failed event.
224
+ const [failed] = await db
210
225
  .update(journeyStates)
211
226
  .set({
212
227
  status: "failed",
213
228
  errorMessage: message,
214
229
  updatedAt: new Date(),
215
230
  })
216
- .where(eq(journeyStates.id, stateId));
231
+ .where(
232
+ and(
233
+ eq(journeyStates.id, stateId),
234
+ notInArray(journeyStates.status, [...TERMINAL_STATUSES]),
235
+ ),
236
+ )
237
+ .returning({ id: journeyStates.id });
238
+
239
+ if (!failed) {
240
+ // Already terminal (cancelled after exit) — swallow the cancellation
241
+ // so the run doesn't double-report as failed.
242
+ return { stateId, status: "exited" };
243
+ }
217
244
 
218
245
  await hatchet.events.push("journey:failed", {
219
246
  journeyId: meta.id,
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Thrown by durable wait primitives (e.g. `ctx.waitForEvent`) when the journey
3
+ * reached a terminal state — exited via `exitOn`, or cancelled — while it was
4
+ * suspended. It is a CONTROL-FLOW SIGNAL, not a failure: `defineJourney` catches
5
+ * it and stops the run gracefully WITHOUT marking the state `"failed"` (the row
6
+ * is already terminal). Consumers generally never observe it; it simply aborts
7
+ * `run()` before any post-wait side effect can fire.
8
+ */
9
+ export class JourneyExitedError extends Error {
10
+ readonly stateId: string;
11
+
12
+ constructor(stateId: string) {
13
+ super(`Journey state ${stateId} is no longer active (exited or cancelled)`);
14
+ this.name = "JourneyExitedError";
15
+ this.stateId = stateId;
16
+ }
17
+ }
@@ -1,6 +1,14 @@
1
- import type { HatchetClient } from "@hatchet-dev/typescript-sdk/v1/index.js";
1
+ import type {
2
+ Conditions,
3
+ HatchetClient,
4
+ } from "@hatchet-dev/typescript-sdk/v1/index.js";
5
+ import {
6
+ Or,
7
+ SleepCondition,
8
+ UserEventCondition,
9
+ } from "@hatchet-dev/typescript-sdk/v1/index.js";
2
10
  import type { DurationObject } from "@hogsend/core";
3
- import { evaluateEventCondition } from "@hogsend/core";
11
+ import { durationToMs, evaluateEventCondition } from "@hogsend/core";
4
12
  import type { JourneyRegistry } from "@hogsend/core/registry";
5
13
  import {
6
14
  isValidTimeZone,
@@ -19,10 +27,32 @@ import type {
19
27
  } from "@hogsend/core/types";
20
28
  import { type Database, emailSends, journeyStates } from "@hogsend/db";
21
29
  import type { PostHogService } from "@hogsend/plugin-posthog";
22
- import { and, count, eq, max } from "drizzle-orm";
30
+ import { and, count, eq, max, notInArray } from "drizzle-orm";
23
31
  import { checkEmailPreferences } from "../lib/enrollment-guards.js";
24
32
  import { ingestEvent } from "../lib/ingestion.js";
25
33
  import type { Logger } from "../lib/logger.js";
34
+ import {
35
+ JOURNEY_EXECUTION_TIMEOUT,
36
+ JOURNEY_EXECUTION_TIMEOUT_HOURS,
37
+ } from "./constants.js";
38
+ import { JourneyExitedError } from "./errors.js";
39
+
40
+ /** Journey statuses that are terminal — a journey in any of these must never be
41
+ * resurrected back to "active" by a wait resuming. Exported so the durable task
42
+ * runner can avoid clobbering a terminal row to "failed" on a cancel. */
43
+ export const TERMINAL_STATUSES = ["completed", "failed", "exited"] as const;
44
+
45
+ /** Upper bound for a `waitForEvent` timeout — the journey task's executionTimeout. */
46
+ const MAX_WAIT_MS = durationToMs({ hours: JOURNEY_EXECUTION_TIMEOUT_HOURS });
47
+
48
+ /**
49
+ * Quote a string as a CEL single-quoted string literal, escaping backslashes
50
+ * then single quotes. Used to embed an externally-supplied userId into a CEL
51
+ * filter expression without breaking it or allowing injection.
52
+ */
53
+ function celStringLiteral(value: string): string {
54
+ return `'${value.replace(/\\/g, "\\\\").replace(/'/g, "\\'")}'`;
55
+ }
26
56
 
27
57
  interface JourneyContextConfig {
28
58
  db: Database;
@@ -31,6 +61,12 @@ interface JourneyContextConfig {
31
61
  // Hatchet's real `sleepFor` accepts a number (milliseconds) in addition to
32
62
  // duration strings/objects; we use the number-ms form for `sleepUntil`.
33
63
  sleepFor: (duration: DurationObject | number) => Promise<unknown>;
64
+ // The forwarded object is the real Hatchet `DurableContext`, which also has
65
+ // `waitFor` (used by `waitForEvent`). Param mirrors the SDK signature so the
66
+ // real context is assignable; we read back the envelope as a plain record.
67
+ waitFor: (
68
+ conditions: Conditions | Conditions[],
69
+ ) => Promise<Record<string, unknown>>;
34
70
  };
35
71
  registry: JourneyRegistry;
36
72
  logger: Logger;
@@ -114,30 +150,103 @@ export function createJourneyContext(
114
150
  defaultSendWindow,
115
151
  } = config;
116
152
 
117
- // Shared wait lifecycle: mark the state "waiting", durably sleep, mark it
118
- // "active" again. `sleep` passes a DurationObject; `sleepUntil` passes a
119
- // precomputed ms delayHatchet's `sleepFor` accepts both.
153
+ // Enter a durable wait: flip "active" "waiting", but ONLY if the journey
154
+ // hasn't already reached a terminal state (e.g. exitOn fired before we got
155
+ // here). A no-op update means the journey is already done abort the run.
156
+ const enterWait = async (nodeId: string): Promise<void> => {
157
+ const entered = await db
158
+ .update(journeyStates)
159
+ .set({ status: "waiting", currentNodeId: nodeId, updatedAt: new Date() })
160
+ .where(
161
+ and(
162
+ eq(journeyStates.id, stateId),
163
+ notInArray(journeyStates.status, [...TERMINAL_STATUSES]),
164
+ ),
165
+ )
166
+ .returning({ id: journeyStates.id });
167
+
168
+ if (entered.length === 0) {
169
+ throw new JourneyExitedError(stateId);
170
+ }
171
+ };
172
+
173
+ // Resume from a durable wait: flip "waiting" → "active", but ONLY if the row
174
+ // is still "waiting". If an exit/cancel landed during the wait the row is no
175
+ // longer "waiting" — abort instead of reviving a terminated journey to active
176
+ // (which would let a post-wait side effect fire after the journey exited).
177
+ const resumeFromWait = async (): Promise<void> => {
178
+ const resumed = await db
179
+ .update(journeyStates)
180
+ .set({ status: "active", updatedAt: new Date() })
181
+ .where(
182
+ and(eq(journeyStates.id, stateId), eq(journeyStates.status, "waiting")),
183
+ )
184
+ .returning({ id: journeyStates.id });
185
+
186
+ if (resumed.length === 0) {
187
+ throw new JourneyExitedError(stateId);
188
+ }
189
+ };
190
+
191
+ // Durable sleep with the guarded waiting → active lifecycle. `sleep` passes a
192
+ // DurationObject; `sleepUntil` passes a precomputed ms delay — Hatchet's
193
+ // `sleepFor` accepts both.
120
194
  const performSleep = async (
121
195
  durationOrMs: DurationObject | number,
122
196
  nodeId: string,
123
197
  ): Promise<{ sleptAt: string; resumedAt: string }> => {
124
198
  const sleptAt = new Date().toISOString();
199
+ await enterWait(nodeId);
200
+ await hatchetCtx.sleepFor(durationOrMs);
201
+ const resumedAt = new Date().toISOString();
202
+ await resumeFromWait();
203
+ return { sleptAt, resumedAt };
204
+ };
125
205
 
126
- await db
127
- .update(journeyStates)
128
- .set({ status: "waiting", currentNodeId: nodeId, updatedAt: new Date() })
129
- .where(eq(journeyStates.id, stateId));
206
+ // Durably wait for THIS user's `event` OR `timeout`, whichever fires first,
207
+ // sharing the same guarded lifecycle as `performSleep`.
208
+ const performWaitForEvent = async (
209
+ event: string,
210
+ timeout: DurationObject,
211
+ nodeId: string,
212
+ ): Promise<{ timedOut: boolean }> => {
213
+ // Reject a timeout longer than the journey task's executionTimeout up front
214
+ // so it fails fast at authoring time. (Eviction-capable engines may allow
215
+ // longer wall-clock waits, but we cap to the configured ceiling — raise
216
+ // JOURNEY_EXECUTION_TIMEOUT to lift it.)
217
+ if (durationToMs(timeout) > MAX_WAIT_MS) {
218
+ throw new RangeError(
219
+ `waitForEvent timeout exceeds the journey execution limit (${JOURNEY_EXECUTION_TIMEOUT})`,
220
+ );
221
+ }
130
222
 
131
- await hatchetCtx.sleepFor(durationOrMs);
223
+ await enterWait(nodeId);
132
224
 
133
- const resumedAt = new Date().toISOString();
225
+ // Wait for the user-scoped event or the timeout. The event branch filters on
226
+ // the pushed payload's top-level `userId` (see `ingestEvent`); the SDK turns
227
+ // the ms number into a Go duration string at serialization time.
228
+ const result = await hatchetCtx.waitFor(
229
+ Or(
230
+ new UserEventCondition(
231
+ event,
232
+ `input.userId == ${celStringLiteral(userId)}`,
233
+ "event",
234
+ ),
235
+ new SleepCondition(durationToMs(timeout), "timeout"),
236
+ ),
237
+ );
134
238
 
135
- await db
136
- .update(journeyStates)
137
- .set({ status: "active", updatedAt: new Date() })
138
- .where(eq(journeyStates.id, stateId));
239
+ // Discriminate on which branch's readableDataKey ("event"/"timeout") is
240
+ // present. The eviction-capable path returns the `{ CREATE: { … } }`
241
+ // envelope; the pre-eviction path returns the inner object UN-wrapped — so
242
+ // strip an optional `CREATE` layer first to handle both shapes identically.
243
+ const fired = (("CREATE" in result ? result.CREATE : result) ??
244
+ {}) as Record<string, unknown>;
245
+ const timedOut = !("event" in fired);
139
246
 
140
- return { sleptAt, resumedAt };
247
+ await resumeFromWait();
248
+
249
+ return { timedOut };
141
250
  };
142
251
 
143
252
  return {
@@ -169,6 +278,14 @@ export function createJourneyContext(
169
278
  );
170
279
  },
171
280
 
281
+ async waitForEvent({ event, timeout, label }) {
282
+ return performWaitForEvent(
283
+ event,
284
+ timeout,
285
+ label ?? `wait-event:${event}`,
286
+ );
287
+ },
288
+
172
289
  async checkpoint(label) {
173
290
  await db
174
291
  .update(journeyStates)
@@ -76,7 +76,7 @@ export async function ingestEvent(opts: {
76
76
  userEmail: event.userEmail,
77
77
  properties: serializableProperties,
78
78
  }),
79
- checkExits(db, registry, {
79
+ checkExits(db, registry, hatchet, logger, {
80
80
  userId: event.userId,
81
81
  eventName: event.event,
82
82
  properties: event.properties,
@@ -130,6 +130,8 @@ export async function ingestEvent(opts: {
130
130
  async function checkExits(
131
131
  db: Database,
132
132
  registry: JourneyRegistry,
133
+ hatchet: HatchetClient,
134
+ logger: Logger,
133
135
  event: {
134
136
  userId: string;
135
137
  eventName: string;
@@ -147,6 +149,7 @@ async function checkExits(
147
149
  });
148
150
 
149
151
  const statesToExit: string[] = [];
152
+ const runIdsToCancel: string[] = [];
150
153
 
151
154
  for (const state of activeStates) {
152
155
  const journey = registry.get(state.journeyId);
@@ -163,6 +166,9 @@ async function checkExits(
163
166
 
164
167
  if (shouldExit) {
165
168
  statesToExit.push(state.id);
169
+ if (state.hatchetRunId) {
170
+ runIdsToCancel.push(state.hatchetRunId);
171
+ }
166
172
  }
167
173
 
168
174
  results.push({
@@ -181,6 +187,21 @@ async function checkExits(
181
187
  updatedAt: new Date(),
182
188
  })
183
189
  .where(inArray(journeyStates.id, statesToExit));
190
+
191
+ // Cancel the live durable runs so a journey suspended in a sleep or
192
+ // `waitForEvent` can't resume and fire after it has exited. Best-effort: a
193
+ // run may have already finished, and the in-run resume guard
194
+ // (JourneyExitedError) is the backstop if a cancel races a resume.
195
+ if (runIdsToCancel.length > 0) {
196
+ try {
197
+ await hatchet.runs.cancel({ ids: runIdsToCancel });
198
+ } catch (err) {
199
+ logger.warn("Failed to cancel exited journey runs", {
200
+ count: runIdsToCancel.length,
201
+ error: err instanceof Error ? err.message : String(err),
202
+ });
203
+ }
204
+ }
184
205
  }
185
206
 
186
207
  return results;