@hogsend/engine 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/index.ts +1 -0
- package/src/journeys/constants.ts +14 -0
- package/src/journeys/define-journey.ts +32 -5
- package/src/journeys/errors.ts +17 -0
- package/src/journeys/journey-context.ts +134 -17
- package/src/lib/ingestion.ts +22 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hogsend/engine",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -35,11 +35,11 @@
|
|
|
35
35
|
"resend": "^6.12.3",
|
|
36
36
|
"winston": "^3.19.0",
|
|
37
37
|
"zod": "^4.4.3",
|
|
38
|
-
"@hogsend/core": "^0.
|
|
39
|
-
"@hogsend/
|
|
40
|
-
"@hogsend/
|
|
41
|
-
"@hogsend/plugin-posthog": "^0.
|
|
42
|
-
"@hogsend/plugin-resend": "^0.
|
|
38
|
+
"@hogsend/core": "^0.4.0",
|
|
39
|
+
"@hogsend/db": "^0.4.0",
|
|
40
|
+
"@hogsend/email": "^0.4.0",
|
|
41
|
+
"@hogsend/plugin-posthog": "^0.4.0",
|
|
42
|
+
"@hogsend/plugin-resend": "^0.4.0"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@types/node": "^22.15.3",
|
package/src/index.ts
CHANGED
|
@@ -54,6 +54,7 @@ export {
|
|
|
54
54
|
type DefinedJourney,
|
|
55
55
|
defineJourney,
|
|
56
56
|
} from "./journeys/define-journey.js";
|
|
57
|
+
export { JourneyExitedError } from "./journeys/errors.js";
|
|
57
58
|
export { createJourneyContext } from "./journeys/journey-context.js";
|
|
58
59
|
export {
|
|
59
60
|
buildJourneyRegistry,
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Max active execution time configured on every journey durable task
|
|
3
|
+
* (`executionTimeout`). It is the single source of truth shared by
|
|
4
|
+
* `define-journey` (the task config) and `journey-context` (the `waitForEvent`
|
|
5
|
+
* timeout ceiling) so the two never drift.
|
|
6
|
+
*
|
|
7
|
+
* NOTE: on eviction-capable Hatchet engines (>= v0.80.0) a durable wait evicts
|
|
8
|
+
* the task and frees the worker slot, so a very long wall-clock wait MAY exceed
|
|
9
|
+
* this. We still treat it as our ceiling: `waitForEvent` rejects timeouts beyond
|
|
10
|
+
* it so they fail fast at authoring time rather than risk a mid-wait
|
|
11
|
+
* termination. Raise this to allow longer waits.
|
|
12
|
+
*/
|
|
13
|
+
export const JOURNEY_EXECUTION_TIMEOUT_HOURS = 720;
|
|
14
|
+
export const JOURNEY_EXECUTION_TIMEOUT = `${JOURNEY_EXECUTION_TIMEOUT_HOURS}h`;
|
|
@@ -6,7 +6,7 @@ import type {
|
|
|
6
6
|
JourneyUser,
|
|
7
7
|
} from "@hogsend/core/types";
|
|
8
8
|
import { contacts, journeyConfigs, journeyStates } from "@hogsend/db";
|
|
9
|
-
import { and, eq, inArray } from "drizzle-orm";
|
|
9
|
+
import { and, eq, inArray, notInArray } from "drizzle-orm";
|
|
10
10
|
import { getDb } from "../lib/db.js";
|
|
11
11
|
import {
|
|
12
12
|
checkEmailPreferences,
|
|
@@ -17,7 +17,9 @@ import { createLogger } from "../lib/logger.js";
|
|
|
17
17
|
import { getPostHog } from "../lib/posthog.js";
|
|
18
18
|
import { resolveTimezoneWithSource } from "../lib/timezone.js";
|
|
19
19
|
import { getClientScheduleDefaults } from "./client-defaults-singleton.js";
|
|
20
|
-
import {
|
|
20
|
+
import { JOURNEY_EXECUTION_TIMEOUT } from "./constants.js";
|
|
21
|
+
import { JourneyExitedError } from "./errors.js";
|
|
22
|
+
import { createJourneyContext, TERMINAL_STATUSES } from "./journey-context.js";
|
|
21
23
|
import { getJourneyRegistrySingleton } from "./registry-singleton.js";
|
|
22
24
|
|
|
23
25
|
const logger = createLogger(process.env.LOG_LEVEL);
|
|
@@ -43,7 +45,7 @@ export function defineJourney(options: {
|
|
|
43
45
|
const task = hatchet.durableTask({
|
|
44
46
|
name: `journey-${meta.id}`,
|
|
45
47
|
onEvents: [meta.trigger.event],
|
|
46
|
-
executionTimeout:
|
|
48
|
+
executionTimeout: JOURNEY_EXECUTION_TIMEOUT,
|
|
47
49
|
retries: 0,
|
|
48
50
|
fn: async (input: EventPayloadInput, hatchetCtx) => {
|
|
49
51
|
const db = getDb();
|
|
@@ -203,17 +205,42 @@ export function defineJourney(options: {
|
|
|
203
205
|
|
|
204
206
|
return { stateId, status: "completed" };
|
|
205
207
|
} catch (err) {
|
|
208
|
+
// The journey reached a terminal state (exitOn / cancel) while suspended
|
|
209
|
+
// in a durable wait. The state row is already terminal — stop gracefully
|
|
210
|
+
// without marking it "failed" or re-pushing a journey:failed event.
|
|
211
|
+
if (err instanceof JourneyExitedError) {
|
|
212
|
+
return { stateId, status: "exited" };
|
|
213
|
+
}
|
|
214
|
+
|
|
206
215
|
const message =
|
|
207
216
|
err instanceof Error ? err.message : "Unknown error during journey";
|
|
208
217
|
|
|
209
|
-
|
|
218
|
+
// Mark "failed" ONLY if the row isn't already terminal. A run cancelled
|
|
219
|
+
// by exitOn (ingestEvent sets "exited" then `runs.cancel`) or by the
|
|
220
|
+
// admin route surfaces here as a Hatchet AbortError thrown from the
|
|
221
|
+
// suspended waitFor/sleepFor — NOT a JourneyExitedError. Guarding on a
|
|
222
|
+
// non-terminal status prevents clobbering that "exited" row to "failed"
|
|
223
|
+
// and emitting a spurious journey:failed event.
|
|
224
|
+
const [failed] = await db
|
|
210
225
|
.update(journeyStates)
|
|
211
226
|
.set({
|
|
212
227
|
status: "failed",
|
|
213
228
|
errorMessage: message,
|
|
214
229
|
updatedAt: new Date(),
|
|
215
230
|
})
|
|
216
|
-
.where(
|
|
231
|
+
.where(
|
|
232
|
+
and(
|
|
233
|
+
eq(journeyStates.id, stateId),
|
|
234
|
+
notInArray(journeyStates.status, [...TERMINAL_STATUSES]),
|
|
235
|
+
),
|
|
236
|
+
)
|
|
237
|
+
.returning({ id: journeyStates.id });
|
|
238
|
+
|
|
239
|
+
if (!failed) {
|
|
240
|
+
// Already terminal (cancelled after exit) — swallow the cancellation
|
|
241
|
+
// so the run doesn't double-report as failed.
|
|
242
|
+
return { stateId, status: "exited" };
|
|
243
|
+
}
|
|
217
244
|
|
|
218
245
|
await hatchet.events.push("journey:failed", {
|
|
219
246
|
journeyId: meta.id,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thrown by durable wait primitives (e.g. `ctx.waitForEvent`) when the journey
|
|
3
|
+
* reached a terminal state — exited via `exitOn`, or cancelled — while it was
|
|
4
|
+
* suspended. It is a CONTROL-FLOW SIGNAL, not a failure: `defineJourney` catches
|
|
5
|
+
* it and stops the run gracefully WITHOUT marking the state `"failed"` (the row
|
|
6
|
+
* is already terminal). Consumers generally never observe it; it simply aborts
|
|
7
|
+
* `run()` before any post-wait side effect can fire.
|
|
8
|
+
*/
|
|
9
|
+
export class JourneyExitedError extends Error {
|
|
10
|
+
readonly stateId: string;
|
|
11
|
+
|
|
12
|
+
constructor(stateId: string) {
|
|
13
|
+
super(`Journey state ${stateId} is no longer active (exited or cancelled)`);
|
|
14
|
+
this.name = "JourneyExitedError";
|
|
15
|
+
this.stateId = stateId;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -1,6 +1,14 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {
|
|
2
|
+
Conditions,
|
|
3
|
+
HatchetClient,
|
|
4
|
+
} from "@hatchet-dev/typescript-sdk/v1/index.js";
|
|
5
|
+
import {
|
|
6
|
+
Or,
|
|
7
|
+
SleepCondition,
|
|
8
|
+
UserEventCondition,
|
|
9
|
+
} from "@hatchet-dev/typescript-sdk/v1/index.js";
|
|
2
10
|
import type { DurationObject } from "@hogsend/core";
|
|
3
|
-
import { evaluateEventCondition } from "@hogsend/core";
|
|
11
|
+
import { durationToMs, evaluateEventCondition } from "@hogsend/core";
|
|
4
12
|
import type { JourneyRegistry } from "@hogsend/core/registry";
|
|
5
13
|
import {
|
|
6
14
|
isValidTimeZone,
|
|
@@ -19,10 +27,32 @@ import type {
|
|
|
19
27
|
} from "@hogsend/core/types";
|
|
20
28
|
import { type Database, emailSends, journeyStates } from "@hogsend/db";
|
|
21
29
|
import type { PostHogService } from "@hogsend/plugin-posthog";
|
|
22
|
-
import { and, count, eq, max } from "drizzle-orm";
|
|
30
|
+
import { and, count, eq, max, notInArray } from "drizzle-orm";
|
|
23
31
|
import { checkEmailPreferences } from "../lib/enrollment-guards.js";
|
|
24
32
|
import { ingestEvent } from "../lib/ingestion.js";
|
|
25
33
|
import type { Logger } from "../lib/logger.js";
|
|
34
|
+
import {
|
|
35
|
+
JOURNEY_EXECUTION_TIMEOUT,
|
|
36
|
+
JOURNEY_EXECUTION_TIMEOUT_HOURS,
|
|
37
|
+
} from "./constants.js";
|
|
38
|
+
import { JourneyExitedError } from "./errors.js";
|
|
39
|
+
|
|
40
|
+
/** Journey statuses that are terminal — a journey in any of these must never be
|
|
41
|
+
* resurrected back to "active" by a wait resuming. Exported so the durable task
|
|
42
|
+
* runner can avoid clobbering a terminal row to "failed" on a cancel. */
|
|
43
|
+
export const TERMINAL_STATUSES = ["completed", "failed", "exited"] as const;
|
|
44
|
+
|
|
45
|
+
/** Upper bound for a `waitForEvent` timeout — the journey task's executionTimeout. */
|
|
46
|
+
const MAX_WAIT_MS = durationToMs({ hours: JOURNEY_EXECUTION_TIMEOUT_HOURS });
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Quote a string as a CEL single-quoted string literal, escaping backslashes
|
|
50
|
+
* then single quotes. Used to embed an externally-supplied userId into a CEL
|
|
51
|
+
* filter expression without breaking it or allowing injection.
|
|
52
|
+
*/
|
|
53
|
+
function celStringLiteral(value: string): string {
|
|
54
|
+
return `'${value.replace(/\\/g, "\\\\").replace(/'/g, "\\'")}'`;
|
|
55
|
+
}
|
|
26
56
|
|
|
27
57
|
interface JourneyContextConfig {
|
|
28
58
|
db: Database;
|
|
@@ -31,6 +61,12 @@ interface JourneyContextConfig {
|
|
|
31
61
|
// Hatchet's real `sleepFor` accepts a number (milliseconds) in addition to
|
|
32
62
|
// duration strings/objects; we use the number-ms form for `sleepUntil`.
|
|
33
63
|
sleepFor: (duration: DurationObject | number) => Promise<unknown>;
|
|
64
|
+
// The forwarded object is the real Hatchet `DurableContext`, which also has
|
|
65
|
+
// `waitFor` (used by `waitForEvent`). Param mirrors the SDK signature so the
|
|
66
|
+
// real context is assignable; we read back the envelope as a plain record.
|
|
67
|
+
waitFor: (
|
|
68
|
+
conditions: Conditions | Conditions[],
|
|
69
|
+
) => Promise<Record<string, unknown>>;
|
|
34
70
|
};
|
|
35
71
|
registry: JourneyRegistry;
|
|
36
72
|
logger: Logger;
|
|
@@ -114,30 +150,103 @@ export function createJourneyContext(
|
|
|
114
150
|
defaultSendWindow,
|
|
115
151
|
} = config;
|
|
116
152
|
|
|
117
|
-
//
|
|
118
|
-
//
|
|
119
|
-
//
|
|
153
|
+
// Enter a durable wait: flip "active" → "waiting", but ONLY if the journey
|
|
154
|
+
// hasn't already reached a terminal state (e.g. exitOn fired before we got
|
|
155
|
+
// here). A no-op update means the journey is already done — abort the run.
|
|
156
|
+
const enterWait = async (nodeId: string): Promise<void> => {
|
|
157
|
+
const entered = await db
|
|
158
|
+
.update(journeyStates)
|
|
159
|
+
.set({ status: "waiting", currentNodeId: nodeId, updatedAt: new Date() })
|
|
160
|
+
.where(
|
|
161
|
+
and(
|
|
162
|
+
eq(journeyStates.id, stateId),
|
|
163
|
+
notInArray(journeyStates.status, [...TERMINAL_STATUSES]),
|
|
164
|
+
),
|
|
165
|
+
)
|
|
166
|
+
.returning({ id: journeyStates.id });
|
|
167
|
+
|
|
168
|
+
if (entered.length === 0) {
|
|
169
|
+
throw new JourneyExitedError(stateId);
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// Resume from a durable wait: flip "waiting" → "active", but ONLY if the row
|
|
174
|
+
// is still "waiting". If an exit/cancel landed during the wait the row is no
|
|
175
|
+
// longer "waiting" — abort instead of reviving a terminated journey to active
|
|
176
|
+
// (which would let a post-wait side effect fire after the journey exited).
|
|
177
|
+
const resumeFromWait = async (): Promise<void> => {
|
|
178
|
+
const resumed = await db
|
|
179
|
+
.update(journeyStates)
|
|
180
|
+
.set({ status: "active", updatedAt: new Date() })
|
|
181
|
+
.where(
|
|
182
|
+
and(eq(journeyStates.id, stateId), eq(journeyStates.status, "waiting")),
|
|
183
|
+
)
|
|
184
|
+
.returning({ id: journeyStates.id });
|
|
185
|
+
|
|
186
|
+
if (resumed.length === 0) {
|
|
187
|
+
throw new JourneyExitedError(stateId);
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// Durable sleep with the guarded waiting → active lifecycle. `sleep` passes a
|
|
192
|
+
// DurationObject; `sleepUntil` passes a precomputed ms delay — Hatchet's
|
|
193
|
+
// `sleepFor` accepts both.
|
|
120
194
|
const performSleep = async (
|
|
121
195
|
durationOrMs: DurationObject | number,
|
|
122
196
|
nodeId: string,
|
|
123
197
|
): Promise<{ sleptAt: string; resumedAt: string }> => {
|
|
124
198
|
const sleptAt = new Date().toISOString();
|
|
199
|
+
await enterWait(nodeId);
|
|
200
|
+
await hatchetCtx.sleepFor(durationOrMs);
|
|
201
|
+
const resumedAt = new Date().toISOString();
|
|
202
|
+
await resumeFromWait();
|
|
203
|
+
return { sleptAt, resumedAt };
|
|
204
|
+
};
|
|
125
205
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
206
|
+
// Durably wait for THIS user's `event` OR `timeout`, whichever fires first,
|
|
207
|
+
// sharing the same guarded lifecycle as `performSleep`.
|
|
208
|
+
const performWaitForEvent = async (
|
|
209
|
+
event: string,
|
|
210
|
+
timeout: DurationObject,
|
|
211
|
+
nodeId: string,
|
|
212
|
+
): Promise<{ timedOut: boolean }> => {
|
|
213
|
+
// Reject a timeout longer than the journey task's executionTimeout up front
|
|
214
|
+
// so it fails fast at authoring time. (Eviction-capable engines may allow
|
|
215
|
+
// longer wall-clock waits, but we cap to the configured ceiling — raise
|
|
216
|
+
// JOURNEY_EXECUTION_TIMEOUT to lift it.)
|
|
217
|
+
if (durationToMs(timeout) > MAX_WAIT_MS) {
|
|
218
|
+
throw new RangeError(
|
|
219
|
+
`waitForEvent timeout exceeds the journey execution limit (${JOURNEY_EXECUTION_TIMEOUT})`,
|
|
220
|
+
);
|
|
221
|
+
}
|
|
130
222
|
|
|
131
|
-
await
|
|
223
|
+
await enterWait(nodeId);
|
|
132
224
|
|
|
133
|
-
|
|
225
|
+
// Wait for the user-scoped event or the timeout. The event branch filters on
|
|
226
|
+
// the pushed payload's top-level `userId` (see `ingestEvent`); the SDK turns
|
|
227
|
+
// the ms number into a Go duration string at serialization time.
|
|
228
|
+
const result = await hatchetCtx.waitFor(
|
|
229
|
+
Or(
|
|
230
|
+
new UserEventCondition(
|
|
231
|
+
event,
|
|
232
|
+
`input.userId == ${celStringLiteral(userId)}`,
|
|
233
|
+
"event",
|
|
234
|
+
),
|
|
235
|
+
new SleepCondition(durationToMs(timeout), "timeout"),
|
|
236
|
+
),
|
|
237
|
+
);
|
|
134
238
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
239
|
+
// Discriminate on which branch's readableDataKey ("event"/"timeout") is
|
|
240
|
+
// present. The eviction-capable path returns the `{ CREATE: { … } }`
|
|
241
|
+
// envelope; the pre-eviction path returns the inner object UN-wrapped — so
|
|
242
|
+
// strip an optional `CREATE` layer first to handle both shapes identically.
|
|
243
|
+
const fired = (("CREATE" in result ? result.CREATE : result) ??
|
|
244
|
+
{}) as Record<string, unknown>;
|
|
245
|
+
const timedOut = !("event" in fired);
|
|
139
246
|
|
|
140
|
-
|
|
247
|
+
await resumeFromWait();
|
|
248
|
+
|
|
249
|
+
return { timedOut };
|
|
141
250
|
};
|
|
142
251
|
|
|
143
252
|
return {
|
|
@@ -169,6 +278,14 @@ export function createJourneyContext(
|
|
|
169
278
|
);
|
|
170
279
|
},
|
|
171
280
|
|
|
281
|
+
async waitForEvent({ event, timeout, label }) {
|
|
282
|
+
return performWaitForEvent(
|
|
283
|
+
event,
|
|
284
|
+
timeout,
|
|
285
|
+
label ?? `wait-event:${event}`,
|
|
286
|
+
);
|
|
287
|
+
},
|
|
288
|
+
|
|
172
289
|
async checkpoint(label) {
|
|
173
290
|
await db
|
|
174
291
|
.update(journeyStates)
|
package/src/lib/ingestion.ts
CHANGED
|
@@ -76,7 +76,7 @@ export async function ingestEvent(opts: {
|
|
|
76
76
|
userEmail: event.userEmail,
|
|
77
77
|
properties: serializableProperties,
|
|
78
78
|
}),
|
|
79
|
-
checkExits(db, registry, {
|
|
79
|
+
checkExits(db, registry, hatchet, logger, {
|
|
80
80
|
userId: event.userId,
|
|
81
81
|
eventName: event.event,
|
|
82
82
|
properties: event.properties,
|
|
@@ -130,6 +130,8 @@ export async function ingestEvent(opts: {
|
|
|
130
130
|
async function checkExits(
|
|
131
131
|
db: Database,
|
|
132
132
|
registry: JourneyRegistry,
|
|
133
|
+
hatchet: HatchetClient,
|
|
134
|
+
logger: Logger,
|
|
133
135
|
event: {
|
|
134
136
|
userId: string;
|
|
135
137
|
eventName: string;
|
|
@@ -147,6 +149,7 @@ async function checkExits(
|
|
|
147
149
|
});
|
|
148
150
|
|
|
149
151
|
const statesToExit: string[] = [];
|
|
152
|
+
const runIdsToCancel: string[] = [];
|
|
150
153
|
|
|
151
154
|
for (const state of activeStates) {
|
|
152
155
|
const journey = registry.get(state.journeyId);
|
|
@@ -163,6 +166,9 @@ async function checkExits(
|
|
|
163
166
|
|
|
164
167
|
if (shouldExit) {
|
|
165
168
|
statesToExit.push(state.id);
|
|
169
|
+
if (state.hatchetRunId) {
|
|
170
|
+
runIdsToCancel.push(state.hatchetRunId);
|
|
171
|
+
}
|
|
166
172
|
}
|
|
167
173
|
|
|
168
174
|
results.push({
|
|
@@ -181,6 +187,21 @@ async function checkExits(
|
|
|
181
187
|
updatedAt: new Date(),
|
|
182
188
|
})
|
|
183
189
|
.where(inArray(journeyStates.id, statesToExit));
|
|
190
|
+
|
|
191
|
+
// Cancel the live durable runs so a journey suspended in a sleep or
|
|
192
|
+
// `waitForEvent` can't resume and fire after it has exited. Best-effort: a
|
|
193
|
+
// run may have already finished, and the in-run resume guard
|
|
194
|
+
// (JourneyExitedError) is the backstop if a cancel races a resume.
|
|
195
|
+
if (runIdsToCancel.length > 0) {
|
|
196
|
+
try {
|
|
197
|
+
await hatchet.runs.cancel({ ids: runIdsToCancel });
|
|
198
|
+
} catch (err) {
|
|
199
|
+
logger.warn("Failed to cancel exited journey runs", {
|
|
200
|
+
count: runIdsToCancel.length,
|
|
201
|
+
error: err instanceof Error ? err.message : String(err),
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
}
|
|
184
205
|
}
|
|
185
206
|
|
|
186
207
|
return results;
|