@hogsend/engine 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/buckets/check-membership.ts +499 -0
- package/src/buckets/define-bucket.ts +29 -0
- package/src/buckets/registry-singleton.ts +21 -0
- package/src/buckets/registry.ts +62 -0
- package/src/container.ts +27 -1
- package/src/env.ts +6 -0
- package/src/index.ts +39 -1
- package/src/lib/bucket-emit.ts +107 -0
- package/src/lib/bucket-posthog-sync.ts +63 -0
- package/src/lib/ingestion.ts +25 -0
- package/src/routes/admin/buckets.ts +464 -0
- package/src/routes/admin/index.ts +2 -0
- package/src/routes/admin/metrics.ts +255 -0
- package/src/worker.ts +35 -0
- package/src/workflows/bucket-backfill.ts +556 -0
- package/src/workflows/bucket-reconcile.ts +721 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import type { JsonObject } from "@hatchet-dev/typescript-sdk/v1/types.js";
|
|
3
|
+
import {
|
|
4
|
+
type BucketMeta,
|
|
5
|
+
type ConditionEval,
|
|
6
|
+
type DurationObject,
|
|
7
|
+
durationToMs,
|
|
8
|
+
evaluateCondition,
|
|
9
|
+
} from "@hogsend/core";
|
|
10
|
+
import {
|
|
11
|
+
bucketConfigs,
|
|
12
|
+
bucketMemberships,
|
|
13
|
+
contacts,
|
|
14
|
+
createDatabase,
|
|
15
|
+
type Database,
|
|
16
|
+
importJobs,
|
|
17
|
+
userEvents,
|
|
18
|
+
} from "@hogsend/db";
|
|
19
|
+
import { and, eq, gte, inArray, isNull, sql } from "drizzle-orm";
|
|
20
|
+
import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
|
|
21
|
+
import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
|
|
22
|
+
import { emitBucketTransition } from "../lib/bucket-emit.js";
|
|
23
|
+
import { hatchet } from "../lib/hatchet.js";
|
|
24
|
+
import type { Logger } from "../lib/logger.js";
|
|
25
|
+
import { createLogger } from "../lib/logger.js";
|
|
26
|
+
|
|
27
|
+
/** Insert chunk size, reusing the import-contacts precedent (Section 6.6). */
|
|
28
|
+
const BATCH_SIZE = 500;
|
|
29
|
+
|
|
30
|
+
/** import_jobs.format discriminator for the reused status record (Section 6.6). */
|
|
31
|
+
const FIRST_TIME_FORMAT = "bucket-backfill";
|
|
32
|
+
const REEVAL_FORMAT = "bucket-reeval";
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* A stable fingerprint of a bucket's criteria (Section 6.6 B). Normalizes the
|
|
36
|
+
* `ConditionEval` tree (sorted object keys so key order does not change the hash),
|
|
37
|
+
* then sha256-hex. Persisted on `bucket_configs.criteriaHash` and diffed on the
|
|
38
|
+
* next boot to detect a criteria change and enqueue re-evaluation.
|
|
39
|
+
*/
|
|
40
|
+
export function computeCriteriaHash(
|
|
41
|
+
criteria: ConditionEval | undefined,
|
|
42
|
+
): string {
|
|
43
|
+
return createHash("sha256")
|
|
44
|
+
.update(stableStringify(criteria ?? null))
|
|
45
|
+
.digest("hex");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function stableStringify(value: unknown): string {
|
|
49
|
+
if (value === null || typeof value !== "object") {
|
|
50
|
+
return JSON.stringify(value);
|
|
51
|
+
}
|
|
52
|
+
if (Array.isArray(value)) {
|
|
53
|
+
return `[${value.map(stableStringify).join(",")}]`;
|
|
54
|
+
}
|
|
55
|
+
const entries = Object.entries(value as Record<string, unknown>)
|
|
56
|
+
.filter(([, v]) => v !== undefined)
|
|
57
|
+
.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
|
|
58
|
+
.map(([k, v]) => `${JSON.stringify(k)}:${stableStringify(v)}`);
|
|
59
|
+
return `{${entries.join(",")}}`;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Engine-owned backfill / criteria-change re-evaluation task (Section 6.6). Runs in
|
|
64
|
+
* two modes:
|
|
65
|
+
*
|
|
66
|
+
* - mode:"first-time" — a NEW bucket id appeared. Materialize the full member set
|
|
67
|
+
* via a SET-BASED query per criteria shape, insert `active` rows
|
|
68
|
+
* (`source:"backfill"`, onConflictDoNothing on the partial-active unique
|
|
69
|
+
* index), and SUPPRESS live join emission (historical matches must not fire
|
|
70
|
+
* `bucket:entered` into live journeys — the Customer.io rule).
|
|
71
|
+
* - mode:"reeval" — an EXISTING bucket's criteria changed (detected via
|
|
72
|
+
* `criteriaHash` diff at boot). A FULL diff: INSERT active rows for new
|
|
73
|
+
* matchers (joins, NO emit) AND transition active members who no longer match
|
|
74
|
+
* → `left` via CAS (leaves EMIT `bucket:left` so in-flight journeys exit).
|
|
75
|
+
*
|
|
76
|
+
* Progress is tracked in `import_jobs` (the precedent), discriminated by `format`
|
|
77
|
+
* (`bucket-backfill` / `bucket-reeval`) with `fileName` carrying the bucketId, so
|
|
78
|
+
* the Studio "building / live" badge derives from a real status record (Section
|
|
79
|
+
* 11.3). Set-based, chunked, idempotent, resumable — never run in a migration.
|
|
80
|
+
*/
|
|
81
|
+
export interface BucketBackfillInput extends JsonObject {
|
|
82
|
+
jobId: string;
|
|
83
|
+
bucketId: string;
|
|
84
|
+
mode: "first-time" | "reeval";
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export const bucketBackfillTask = hatchet.task({
|
|
88
|
+
name: "bucket-backfill",
|
|
89
|
+
retries: 0,
|
|
90
|
+
executionTimeout: "600s",
|
|
91
|
+
fn: async (input: BucketBackfillInput) => {
|
|
92
|
+
const { db } = createDatabase({ url: process.env.DATABASE_URL ?? "" });
|
|
93
|
+
const logger = createLogger(process.env.LOG_LEVEL ?? "info");
|
|
94
|
+
const registry = getBucketRegistrySingleton();
|
|
95
|
+
const journeyRegistry = getJourneyRegistrySingleton();
|
|
96
|
+
|
|
97
|
+
const bucket = registry.get(input.bucketId);
|
|
98
|
+
if (!bucket || bucket.kind === "manual" || !bucket.criteria) {
|
|
99
|
+
await db
|
|
100
|
+
.update(importJobs)
|
|
101
|
+
.set({
|
|
102
|
+
status: "failed",
|
|
103
|
+
errors: [{ row: 0, error: "bucket_unregistered_or_manual" }],
|
|
104
|
+
updatedAt: new Date(),
|
|
105
|
+
})
|
|
106
|
+
.where(eq(importJobs.id, input.jobId));
|
|
107
|
+
return { status: "failed", reason: "bucket_unregistered_or_manual" };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
await db
|
|
111
|
+
.update(importJobs)
|
|
112
|
+
.set({ status: "processing", updatedAt: new Date() })
|
|
113
|
+
.where(eq(importJobs.id, input.jobId));
|
|
114
|
+
|
|
115
|
+
try {
|
|
116
|
+
// (A/B) JOINS — new matchers materialized as active rows (NO emit, both
|
|
117
|
+
// modes suppress join emission, Section 6.6).
|
|
118
|
+
const joined = await backfillJoins({
|
|
119
|
+
db,
|
|
120
|
+
logger,
|
|
121
|
+
bucket,
|
|
122
|
+
jobId: input.jobId,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// (B only) LEAVES — active members who no longer match are transitioned to
|
|
126
|
+
// left via CAS and EMIT bucket:left (so in-flight journeys exit).
|
|
127
|
+
let leftCount = 0;
|
|
128
|
+
if (input.mode === "reeval") {
|
|
129
|
+
leftCount = await reevalLeaves({
|
|
130
|
+
db,
|
|
131
|
+
logger,
|
|
132
|
+
journeyRegistry,
|
|
133
|
+
bucket,
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Persist the current criteria hash so the next boot diff is a no-op until
|
|
138
|
+
// the criteria actually change again (Section 6.6 B).
|
|
139
|
+
await persistCriteriaHash(db, bucket);
|
|
140
|
+
|
|
141
|
+
await db
|
|
142
|
+
.update(importJobs)
|
|
143
|
+
.set({
|
|
144
|
+
status: "completed",
|
|
145
|
+
processedRows: joined + leftCount,
|
|
146
|
+
updatedAt: new Date(),
|
|
147
|
+
})
|
|
148
|
+
.where(eq(importJobs.id, input.jobId));
|
|
149
|
+
|
|
150
|
+
logger.info("Bucket backfill complete", {
|
|
151
|
+
bucketId: bucket.id,
|
|
152
|
+
mode: input.mode,
|
|
153
|
+
joined,
|
|
154
|
+
left: leftCount,
|
|
155
|
+
});
|
|
156
|
+
return { status: "completed", joined, left: leftCount };
|
|
157
|
+
} catch (err) {
|
|
158
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
159
|
+
await db
|
|
160
|
+
.update(importJobs)
|
|
161
|
+
.set({
|
|
162
|
+
status: "failed",
|
|
163
|
+
errors: [{ row: 0, error: message }],
|
|
164
|
+
updatedAt: new Date(),
|
|
165
|
+
})
|
|
166
|
+
.where(eq(importJobs.id, input.jobId));
|
|
167
|
+
logger.error("Bucket backfill failed", { bucketId: bucket.id, message });
|
|
168
|
+
return { status: "failed", reason: message };
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Materialize members for the bucket via a SET-BASED query per criteria shape,
|
|
175
|
+
* inserting `active` rows in BATCH_SIZE chunks (`source:"backfill"`,
|
|
176
|
+
* onConflictDoNothing so existing active rows are untouched and re-runs are
|
|
177
|
+
* idempotent). NO live join emission (Section 6.6). Returns the count of NEW rows.
|
|
178
|
+
*
|
|
179
|
+
* Single-event / count criteria use a set-based SQL query; composite criteria fall
|
|
180
|
+
* back to a chunked per-contact `evaluateCondition` loop (the documented O(P)
|
|
181
|
+
* exception).
|
|
182
|
+
*/
|
|
183
|
+
async function backfillJoins(opts: {
|
|
184
|
+
db: Database;
|
|
185
|
+
logger: Logger;
|
|
186
|
+
bucket: BucketMeta;
|
|
187
|
+
jobId: string;
|
|
188
|
+
}): Promise<number> {
|
|
189
|
+
const { db, bucket, jobId } = opts;
|
|
190
|
+
const criteria = bucket.criteria as ConditionEval;
|
|
191
|
+
|
|
192
|
+
const matcherIds =
|
|
193
|
+
criteria.type === "event"
|
|
194
|
+
? await selectEventMatchers(db, criteria)
|
|
195
|
+
: await selectCompositeMatchers(db, criteria);
|
|
196
|
+
|
|
197
|
+
await db
|
|
198
|
+
.update(importJobs)
|
|
199
|
+
.set({ totalRows: matcherIds.length, updatedAt: new Date() })
|
|
200
|
+
.where(eq(importJobs.id, jobId));
|
|
201
|
+
|
|
202
|
+
let inserted = 0;
|
|
203
|
+
for (let i = 0; i < matcherIds.length; i += BATCH_SIZE) {
|
|
204
|
+
const chunk = matcherIds.slice(i, i + BATCH_SIZE);
|
|
205
|
+
|
|
206
|
+
// userEmail backfilled from the contacts row where available.
|
|
207
|
+
const chunkContacts = await db
|
|
208
|
+
.select({ externalId: contacts.externalId, email: contacts.email })
|
|
209
|
+
.from(contacts)
|
|
210
|
+
.where(
|
|
211
|
+
and(inArray(contacts.externalId, chunk), isNull(contacts.deletedAt)),
|
|
212
|
+
);
|
|
213
|
+
const emailByUser = new Map(
|
|
214
|
+
chunkContacts.map((c) => [c.externalId, c.email]),
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
const rows = chunk.map((userId) => ({
|
|
218
|
+
userId,
|
|
219
|
+
userEmail: emailByUser.get(userId) ?? null,
|
|
220
|
+
bucketId: bucket.id,
|
|
221
|
+
status: "active" as const,
|
|
222
|
+
source: "backfill" as const,
|
|
223
|
+
entryCount: 1,
|
|
224
|
+
expiresAt: computeBackfillExpiresAt(bucket),
|
|
225
|
+
lastEvaluatedAt: new Date(),
|
|
226
|
+
}));
|
|
227
|
+
|
|
228
|
+
const result = await db
|
|
229
|
+
.insert(bucketMemberships)
|
|
230
|
+
.values(rows)
|
|
231
|
+
.onConflictDoNothing()
|
|
232
|
+
.returning({ id: bucketMemberships.id });
|
|
233
|
+
|
|
234
|
+
inserted += result.length;
|
|
235
|
+
|
|
236
|
+
await db
|
|
237
|
+
.update(importJobs)
|
|
238
|
+
.set({ processedRows: inserted, updatedAt: new Date() })
|
|
239
|
+
.where(eq(importJobs.id, jobId));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return inserted;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Re-eval LEAVES (mode:"reeval" only) — active members of the bucket who no longer
|
|
247
|
+
* satisfy the (changed) criteria are transitioned to `left` via CAS and EMIT
|
|
248
|
+
* `bucket:left` (Section 6.6 B asymmetry: criteria-change LEAVES emit). Set-based
|
|
249
|
+
* for single-event criteria; chunked per-member otherwise.
|
|
250
|
+
*/
|
|
251
|
+
async function reevalLeaves(opts: {
|
|
252
|
+
db: Database;
|
|
253
|
+
logger: Logger;
|
|
254
|
+
journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
|
|
255
|
+
bucket: BucketMeta;
|
|
256
|
+
}): Promise<number> {
|
|
257
|
+
const { db, logger, journeyRegistry, bucket } = opts;
|
|
258
|
+
const criteria = bucket.criteria as ConditionEval;
|
|
259
|
+
|
|
260
|
+
// The set of users who STILL match (so non-matching active members = leavers).
|
|
261
|
+
const matcherIds =
|
|
262
|
+
criteria.type === "event"
|
|
263
|
+
? await selectEventMatchers(db, criteria)
|
|
264
|
+
: await selectCompositeMatchers(db, criteria);
|
|
265
|
+
const matcherSet = new Set(matcherIds);
|
|
266
|
+
|
|
267
|
+
const activeMembers = await db
|
|
268
|
+
.select({
|
|
269
|
+
id: bucketMemberships.id,
|
|
270
|
+
userId: bucketMemberships.userId,
|
|
271
|
+
userEmail: bucketMemberships.userEmail,
|
|
272
|
+
entryCount: bucketMemberships.entryCount,
|
|
273
|
+
})
|
|
274
|
+
.from(bucketMemberships)
|
|
275
|
+
.innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
|
|
276
|
+
.where(
|
|
277
|
+
and(
|
|
278
|
+
eq(bucketMemberships.bucketId, bucket.id),
|
|
279
|
+
eq(bucketMemberships.status, "active"),
|
|
280
|
+
isNull(bucketMemberships.deletedAt),
|
|
281
|
+
isNull(contacts.deletedAt),
|
|
282
|
+
),
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
const leavers = activeMembers.filter((m) => !matcherSet.has(m.userId));
|
|
286
|
+
if (leavers.length === 0) return 0;
|
|
287
|
+
|
|
288
|
+
let leftCount = 0;
|
|
289
|
+
for (let i = 0; i < leavers.length; i += BATCH_SIZE) {
|
|
290
|
+
const chunk = leavers.slice(i, i + BATCH_SIZE);
|
|
291
|
+
const flipped = await db
|
|
292
|
+
.update(bucketMemberships)
|
|
293
|
+
.set({
|
|
294
|
+
status: "left",
|
|
295
|
+
leftAt: new Date(),
|
|
296
|
+
lastEvaluatedAt: new Date(),
|
|
297
|
+
updatedAt: new Date(),
|
|
298
|
+
})
|
|
299
|
+
.where(
|
|
300
|
+
and(
|
|
301
|
+
eq(bucketMemberships.bucketId, bucket.id),
|
|
302
|
+
eq(bucketMemberships.status, "active"),
|
|
303
|
+
isNull(bucketMemberships.deletedAt),
|
|
304
|
+
inArray(
|
|
305
|
+
bucketMemberships.id,
|
|
306
|
+
chunk.map((m) => m.id),
|
|
307
|
+
),
|
|
308
|
+
),
|
|
309
|
+
)
|
|
310
|
+
.returning({
|
|
311
|
+
userId: bucketMemberships.userId,
|
|
312
|
+
userEmail: bucketMemberships.userEmail,
|
|
313
|
+
entryCount: bucketMemberships.entryCount,
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
for (const row of flipped) {
|
|
317
|
+
await emitBucketTransition({
|
|
318
|
+
db,
|
|
319
|
+
registry: journeyRegistry,
|
|
320
|
+
hatchet,
|
|
321
|
+
logger,
|
|
322
|
+
kind: "left",
|
|
323
|
+
bucket,
|
|
324
|
+
userId: row.userId,
|
|
325
|
+
userEmail: row.userEmail,
|
|
326
|
+
epoch: row.entryCount,
|
|
327
|
+
source: "backfill",
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
leftCount += flipped.length;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return leftCount;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/** Set-based matcher user-ids for a single-event criterion (Section 6.6). */
|
|
337
|
+
async function selectEventMatchers(
|
|
338
|
+
db: Database,
|
|
339
|
+
criteria: Extract<ConditionEval, { type: "event" }>,
|
|
340
|
+
): Promise<string[]> {
|
|
341
|
+
const cutoff = criteria.within
|
|
342
|
+
? new Date(Date.now() - durationToMs(criteria.within))
|
|
343
|
+
: null;
|
|
344
|
+
|
|
345
|
+
// count gte N / exists → SELECT user_id ... GROUP BY HAVING. not_exists
|
|
346
|
+
// (absence) → live contacts with NO such event in the window (anti-join).
|
|
347
|
+
if (criteria.check === "not_exists") {
|
|
348
|
+
const present = db
|
|
349
|
+
.select({ userId: userEvents.userId })
|
|
350
|
+
.from(userEvents)
|
|
351
|
+
.where(
|
|
352
|
+
and(
|
|
353
|
+
eq(userEvents.event, criteria.eventName),
|
|
354
|
+
cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
|
|
355
|
+
),
|
|
356
|
+
)
|
|
357
|
+
.groupBy(userEvents.userId)
|
|
358
|
+
.as("present");
|
|
359
|
+
|
|
360
|
+
const rows = await db
|
|
361
|
+
.select({ userId: contacts.externalId })
|
|
362
|
+
.from(contacts)
|
|
363
|
+
.leftJoin(present, eq(present.userId, contacts.externalId))
|
|
364
|
+
.where(and(isNull(contacts.deletedAt), isNull(present.userId)));
|
|
365
|
+
return rows.map((r) => r.userId);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// exists / count: group counts then filter by the operator.
|
|
369
|
+
const rows = await db
|
|
370
|
+
.select({
|
|
371
|
+
userId: userEvents.userId,
|
|
372
|
+
cnt: sql<number>`count(*)::int`,
|
|
373
|
+
})
|
|
374
|
+
.from(userEvents)
|
|
375
|
+
.where(
|
|
376
|
+
and(
|
|
377
|
+
eq(userEvents.event, criteria.eventName),
|
|
378
|
+
cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
|
|
379
|
+
),
|
|
380
|
+
)
|
|
381
|
+
.groupBy(userEvents.userId);
|
|
382
|
+
|
|
383
|
+
return rows
|
|
384
|
+
.filter((r) => matchesCount(criteria, Number(r.cnt)))
|
|
385
|
+
.map((r) => r.userId);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/** True when a windowed count satisfies the (exists/count) criterion. */
|
|
389
|
+
function matchesCount(
|
|
390
|
+
criteria: Extract<ConditionEval, { type: "event" }>,
|
|
391
|
+
count: number,
|
|
392
|
+
): boolean {
|
|
393
|
+
switch (criteria.check) {
|
|
394
|
+
case "exists":
|
|
395
|
+
return count > 0;
|
|
396
|
+
case "count": {
|
|
397
|
+
if (!criteria.operator || criteria.value === undefined) return count > 0;
|
|
398
|
+
switch (criteria.operator) {
|
|
399
|
+
case "gt":
|
|
400
|
+
return count > criteria.value;
|
|
401
|
+
case "gte":
|
|
402
|
+
return count >= criteria.value;
|
|
403
|
+
case "lt":
|
|
404
|
+
return count < criteria.value;
|
|
405
|
+
case "lte":
|
|
406
|
+
return count <= criteria.value;
|
|
407
|
+
case "eq":
|
|
408
|
+
return count === criteria.value;
|
|
409
|
+
default:
|
|
410
|
+
return false;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
default:
|
|
414
|
+
return false;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Composite/multi-condition fallback (the documented O(P) exception, Section 6.6):
|
|
420
|
+
* a chunked per-contact `evaluateCondition` loop over live contacts. Property
|
|
421
|
+
* sub-conditions evaluate against the contact's merged properties.
|
|
422
|
+
*/
|
|
423
|
+
async function selectCompositeMatchers(
|
|
424
|
+
db: Database,
|
|
425
|
+
criteria: ConditionEval,
|
|
426
|
+
): Promise<string[]> {
|
|
427
|
+
const liveContacts = await db
|
|
428
|
+
.select({
|
|
429
|
+
externalId: contacts.externalId,
|
|
430
|
+
properties: contacts.properties,
|
|
431
|
+
})
|
|
432
|
+
.from(contacts)
|
|
433
|
+
.where(isNull(contacts.deletedAt));
|
|
434
|
+
|
|
435
|
+
const matchers: string[] = [];
|
|
436
|
+
for (const contact of liveContacts) {
|
|
437
|
+
const isMember = await evaluateCondition({
|
|
438
|
+
condition: criteria,
|
|
439
|
+
ctx: {
|
|
440
|
+
db,
|
|
441
|
+
userId: contact.externalId,
|
|
442
|
+
journeyContext:
|
|
443
|
+
(contact.properties as Record<string, unknown> | null) ?? {},
|
|
444
|
+
},
|
|
445
|
+
});
|
|
446
|
+
if (isMember) matchers.push(contact.externalId);
|
|
447
|
+
}
|
|
448
|
+
return matchers;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/** now + within for time-based / fastExpiry buckets; null otherwise. */
|
|
452
|
+
function computeBackfillExpiresAt(bucket: BucketMeta): Date | null {
|
|
453
|
+
if (!bucket.criteria) return null;
|
|
454
|
+
if (!bucket.timeBased && !bucket.fastExpiry) return null;
|
|
455
|
+
const within = firstWithin(bucket.criteria);
|
|
456
|
+
if (!within) return null;
|
|
457
|
+
return new Date(Date.now() + durationToMs(within));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/** Find the first EventCondition.within in a criteria tree (depth-first). */
|
|
461
|
+
function firstWithin(criteria: ConditionEval): DurationObject | null {
|
|
462
|
+
if (criteria.type === "event" && criteria.within) {
|
|
463
|
+
return criteria.within;
|
|
464
|
+
}
|
|
465
|
+
if (criteria.type === "composite") {
|
|
466
|
+
for (const child of criteria.conditions) {
|
|
467
|
+
const found = firstWithin(child);
|
|
468
|
+
if (found) return found;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return null;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Upsert the bucket's current criteria fingerprint onto `bucket_configs` (Section
|
|
476
|
+
* 6.6 B). Mirrors the admin enable/disable onConflictDoUpdate target.
|
|
477
|
+
*/
|
|
478
|
+
async function persistCriteriaHash(
|
|
479
|
+
db: Database,
|
|
480
|
+
bucket: BucketMeta,
|
|
481
|
+
): Promise<void> {
|
|
482
|
+
const hash = computeCriteriaHash(bucket.criteria);
|
|
483
|
+
await db
|
|
484
|
+
.insert(bucketConfigs)
|
|
485
|
+
.values({ bucketId: bucket.id, criteriaHash: hash })
|
|
486
|
+
.onConflictDoUpdate({
|
|
487
|
+
target: bucketConfigs.bucketId,
|
|
488
|
+
set: { criteriaHash: hash, updatedAt: new Date() },
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Detect first-time / criteria-changed buckets at worker boot and enqueue a
|
|
494
|
+
* backfill / re-eval job per bucket (Section 6.6 B). For each enabled dynamic
|
|
495
|
+
* bucket: read the stored `bucket_configs.criteriaHash`; if absent → first-time
|
|
496
|
+
* backfill; if present but different → re-eval; if equal → no-op. Creates an
|
|
497
|
+
* `import_jobs` status record (discriminated by `format`) and pushes
|
|
498
|
+
* `bucketBackfillTask.run(...)` for it.
|
|
499
|
+
*
|
|
500
|
+
* Idempotent and safe to call on every boot — equal hashes are skipped. Best-effort
|
|
501
|
+
* (a failure to enqueue must not crash worker boot).
|
|
502
|
+
*/
|
|
503
|
+
export async function enqueueBucketBackfills(opts: {
|
|
504
|
+
db: Database;
|
|
505
|
+
logger: Logger;
|
|
506
|
+
}): Promise<void> {
|
|
507
|
+
const { db, logger } = opts;
|
|
508
|
+
const registry = getBucketRegistrySingleton();
|
|
509
|
+
|
|
510
|
+
for (const bucket of registry.getEnabled()) {
|
|
511
|
+
if (bucket.kind === "manual" || !bucket.criteria) continue;
|
|
512
|
+
|
|
513
|
+
try {
|
|
514
|
+
const config = await db.query.bucketConfigs.findFirst({
|
|
515
|
+
where: eq(bucketConfigs.bucketId, bucket.id),
|
|
516
|
+
});
|
|
517
|
+
const currentHash = computeCriteriaHash(bucket.criteria);
|
|
518
|
+
|
|
519
|
+
let mode: BucketBackfillInput["mode"] | null = null;
|
|
520
|
+
if (!config || config.criteriaHash == null) {
|
|
521
|
+
mode = "first-time";
|
|
522
|
+
} else if (config.criteriaHash !== currentHash) {
|
|
523
|
+
mode = "reeval";
|
|
524
|
+
}
|
|
525
|
+
if (!mode) continue;
|
|
526
|
+
|
|
527
|
+
const [job] = await db
|
|
528
|
+
.insert(importJobs)
|
|
529
|
+
.values({
|
|
530
|
+
fileName: bucket.id,
|
|
531
|
+
format: mode === "first-time" ? FIRST_TIME_FORMAT : REEVAL_FORMAT,
|
|
532
|
+
status: "pending",
|
|
533
|
+
})
|
|
534
|
+
.returning({ id: importJobs.id });
|
|
535
|
+
|
|
536
|
+
if (!job) continue;
|
|
537
|
+
|
|
538
|
+
await bucketBackfillTask.run({
|
|
539
|
+
jobId: job.id,
|
|
540
|
+
bucketId: bucket.id,
|
|
541
|
+
mode,
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
logger.info("Bucket backfill enqueued", {
|
|
545
|
+
bucketId: bucket.id,
|
|
546
|
+
mode,
|
|
547
|
+
jobId: job.id,
|
|
548
|
+
});
|
|
549
|
+
} catch (err) {
|
|
550
|
+
logger.warn("Bucket backfill enqueue failed", {
|
|
551
|
+
bucketId: bucket.id,
|
|
552
|
+
error: err instanceof Error ? err.message : String(err),
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|