@hogsend/engine 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/buckets/check-membership.ts +57 -66
- package/src/buckets/define-bucket.ts +35 -12
- package/src/buckets/membership-epoch.ts +186 -0
- package/src/routes/admin/buckets.ts +5 -7
- package/src/worker.ts +7 -5
- package/src/workflows/bucket-backfill.ts +117 -80
- package/src/workflows/bucket-reconcile.ts +420 -131
|
@@ -3,7 +3,6 @@ import type { JsonObject } from "@hatchet-dev/typescript-sdk/v1/types.js";
|
|
|
3
3
|
import {
|
|
4
4
|
type BucketMeta,
|
|
5
5
|
type ConditionEval,
|
|
6
|
-
type DurationObject,
|
|
7
6
|
durationToMs,
|
|
8
7
|
evaluateCondition,
|
|
9
8
|
} from "@hogsend/core";
|
|
@@ -16,7 +15,12 @@ import {
|
|
|
16
15
|
importJobs,
|
|
17
16
|
userEvents,
|
|
18
17
|
} from "@hogsend/db";
|
|
19
|
-
import { and, eq, gte, inArray, isNull, sql } from "drizzle-orm";
|
|
18
|
+
import { and, eq, gt, gte, inArray, isNull, sql } from "drizzle-orm";
|
|
19
|
+
import {
|
|
20
|
+
computeExpiresAt,
|
|
21
|
+
computeMaxDwellAt,
|
|
22
|
+
matchesEventCount,
|
|
23
|
+
} from "../buckets/membership-epoch.js";
|
|
20
24
|
import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
|
|
21
25
|
import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
|
|
22
26
|
import { emitBucketTransition } from "../lib/bucket-emit.js";
|
|
@@ -28,7 +32,7 @@ import { createLogger } from "../lib/logger.js";
|
|
|
28
32
|
const BATCH_SIZE = 500;
|
|
29
33
|
|
|
30
34
|
/** import_jobs.format discriminator for the reused status record (Section 6.6). */
|
|
31
|
-
const FIRST_TIME_FORMAT = "bucket-backfill";
|
|
35
|
+
export const FIRST_TIME_FORMAT = "bucket-backfill";
|
|
32
36
|
const REEVAL_FORMAT = "bucket-reeval";
|
|
33
37
|
|
|
34
38
|
/**
|
|
@@ -199,6 +203,18 @@ async function backfillJoins(opts: {
|
|
|
199
203
|
.set({ totalRows: matcherIds.length, updatedAt: new Date() })
|
|
200
204
|
.where(eq(importJobs.id, jobId));
|
|
201
205
|
|
|
206
|
+
// Unconditional max-dwell TTL deadline, stamped once at insert (mirrors the
|
|
207
|
+
// live join, check-membership.ts). null when the bucket has no maxDwell; the
|
|
208
|
+
// TTL sweep (reconcileBucketTtlLeaves) filters isNotNull(maxDwellAt), so an
|
|
209
|
+
// unset value would never be force-left.
|
|
210
|
+
const maxDwellAt = computeMaxDwellAt(bucket);
|
|
211
|
+
|
|
212
|
+
// Fix C (DEFERRED): backfilled fastExpiry rows are NOT armed with a
|
|
213
|
+
// bucket:arm-expiry durable timer here — they are picked up by the next cron
|
|
214
|
+
// sweep instead (reconcileBucketLeaves / reconcileBucketTtlLeaves are the
|
|
215
|
+
// authoritative backstop). Conscious choice (cron cadence, default 5m), not an
|
|
216
|
+
// omission: arming at backfill would fan out one durable task per inserted row.
|
|
217
|
+
|
|
202
218
|
let inserted = 0;
|
|
203
219
|
for (let i = 0; i < matcherIds.length; i += BATCH_SIZE) {
|
|
204
220
|
const chunk = matcherIds.slice(i, i + BATCH_SIZE);
|
|
@@ -214,14 +230,38 @@ async function backfillJoins(opts: {
|
|
|
214
230
|
chunkContacts.map((c) => [c.externalId, c.email]),
|
|
215
231
|
);
|
|
216
232
|
|
|
233
|
+
// Fix A: entryCount = 1 + prior memberships for each (user, bucket), the
|
|
234
|
+
// same monotonic ordinal the live join computes (check-membership.ts). On a
|
|
235
|
+
// FIRST-TIME backfill priorCount is 0 → entryCount 1 (unchanged); on a
|
|
236
|
+
// REEVAL re-join of a user with historical "left" rows it advances the
|
|
237
|
+
// epoch correctly. ONE batched GROUP BY per chunk (never per-user — the set-
|
|
238
|
+
// based path must not reintroduce the O(P) serial-query trap).
|
|
239
|
+
const priorCounts = await db
|
|
240
|
+
.select({
|
|
241
|
+
userId: bucketMemberships.userId,
|
|
242
|
+
cnt: sql<number>`count(*)::int`,
|
|
243
|
+
})
|
|
244
|
+
.from(bucketMemberships)
|
|
245
|
+
.where(
|
|
246
|
+
and(
|
|
247
|
+
eq(bucketMemberships.bucketId, bucket.id),
|
|
248
|
+
inArray(bucketMemberships.userId, chunk),
|
|
249
|
+
),
|
|
250
|
+
)
|
|
251
|
+
.groupBy(bucketMemberships.userId);
|
|
252
|
+
const priorByUser = new Map(
|
|
253
|
+
priorCounts.map((r) => [r.userId, Number(r.cnt)]),
|
|
254
|
+
);
|
|
255
|
+
|
|
217
256
|
const rows = chunk.map((userId) => ({
|
|
218
257
|
userId,
|
|
219
258
|
userEmail: emailByUser.get(userId) ?? null,
|
|
220
259
|
bucketId: bucket.id,
|
|
221
260
|
status: "active" as const,
|
|
222
261
|
source: "backfill" as const,
|
|
223
|
-
entryCount: 1,
|
|
224
|
-
expiresAt:
|
|
262
|
+
entryCount: 1 + (priorByUser.get(userId) ?? 0),
|
|
263
|
+
expiresAt: computeExpiresAt(bucket),
|
|
264
|
+
maxDwellAt,
|
|
225
265
|
lastEvaluatedAt: new Date(),
|
|
226
266
|
}));
|
|
227
267
|
|
|
@@ -343,8 +383,22 @@ async function selectEventMatchers(
|
|
|
343
383
|
: null;
|
|
344
384
|
|
|
345
385
|
// count gte N / exists → SELECT user_id ... GROUP BY HAVING. not_exists
|
|
346
|
-
// (absence) → live contacts
|
|
386
|
+
// (absence) → live contacts who EVER fired the event but have NONE in the
|
|
387
|
+
// window (lapsed-only). A bare windowed `not_exists within W` is treated as
|
|
388
|
+
// LAPSED-ONLY (never-active EXCLUDED) in BOTH this backfill and the cron
|
|
389
|
+
// (bucket-reconcile.ts reconcileBucketJoins, the everFired floor), so the two
|
|
390
|
+
// writers agree: brand-new never-active signups are NOT materialized for an
|
|
391
|
+
// absence-within-window bucket — only users who once did X and then stopped.
|
|
347
392
|
if (criteria.check === "not_exists") {
|
|
393
|
+
// everFired floor: contacts who fired the event AT LEAST ONCE (no window),
|
|
394
|
+
// mirroring the cron's `ever_fired` semi-join. Excludes never-active
|
|
395
|
+
// contacts so the two writers select the same lapsed-only cohort.
|
|
396
|
+
const everFired = db
|
|
397
|
+
.selectDistinct({ userId: userEvents.userId })
|
|
398
|
+
.from(userEvents)
|
|
399
|
+
.where(eq(userEvents.event, criteria.eventName))
|
|
400
|
+
.as("ever_fired");
|
|
401
|
+
|
|
348
402
|
const present = db
|
|
349
403
|
.select({ userId: userEvents.userId })
|
|
350
404
|
.from(userEvents)
|
|
@@ -360,115 +414,94 @@ async function selectEventMatchers(
|
|
|
360
414
|
const rows = await db
|
|
361
415
|
.select({ userId: contacts.externalId })
|
|
362
416
|
.from(contacts)
|
|
417
|
+
.innerJoin(everFired, eq(everFired.userId, contacts.externalId))
|
|
363
418
|
.leftJoin(present, eq(present.userId, contacts.externalId))
|
|
364
419
|
.where(and(isNull(contacts.deletedAt), isNull(present.userId)));
|
|
365
420
|
return rows.map((r) => r.userId);
|
|
366
421
|
}
|
|
367
422
|
|
|
368
|
-
// exists / count: group counts then filter by the operator.
|
|
423
|
+
// exists / count: group counts then filter by the operator. Fix B: innerJoin
|
|
424
|
+
// live contacts (GDPR — only materialize memberships for non-deleted contacts
|
|
425
|
+
// that actually exist), mirroring selectEventLeavers in bucket-reconcile.ts.
|
|
426
|
+
// The not_exists branch above already filters contacts.deletedAt; without this
|
|
427
|
+
// join the positive-event path could materialize active rows for soft-deleted
|
|
428
|
+
// or orphan-event userIds, diverging from the live/reconcile paths.
|
|
369
429
|
const rows = await db
|
|
370
430
|
.select({
|
|
371
431
|
userId: userEvents.userId,
|
|
372
432
|
cnt: sql<number>`count(*)::int`,
|
|
373
433
|
})
|
|
374
434
|
.from(userEvents)
|
|
435
|
+
.innerJoin(contacts, eq(contacts.externalId, userEvents.userId))
|
|
375
436
|
.where(
|
|
376
437
|
and(
|
|
377
438
|
eq(userEvents.event, criteria.eventName),
|
|
439
|
+
isNull(contacts.deletedAt),
|
|
378
440
|
cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
|
|
379
441
|
),
|
|
380
442
|
)
|
|
381
443
|
.groupBy(userEvents.userId);
|
|
382
444
|
|
|
383
445
|
return rows
|
|
384
|
-
.filter((r) =>
|
|
446
|
+
.filter((r) => matchesEventCount(criteria, Number(r.cnt)))
|
|
385
447
|
.map((r) => r.userId);
|
|
386
448
|
}
|
|
387
449
|
|
|
388
|
-
/** True when a windowed count satisfies the (exists/count) criterion. */
|
|
389
|
-
function matchesCount(
|
|
390
|
-
criteria: Extract<ConditionEval, { type: "event" }>,
|
|
391
|
-
count: number,
|
|
392
|
-
): boolean {
|
|
393
|
-
switch (criteria.check) {
|
|
394
|
-
case "exists":
|
|
395
|
-
return count > 0;
|
|
396
|
-
case "count": {
|
|
397
|
-
if (!criteria.operator || criteria.value === undefined) return count > 0;
|
|
398
|
-
switch (criteria.operator) {
|
|
399
|
-
case "gt":
|
|
400
|
-
return count > criteria.value;
|
|
401
|
-
case "gte":
|
|
402
|
-
return count >= criteria.value;
|
|
403
|
-
case "lt":
|
|
404
|
-
return count < criteria.value;
|
|
405
|
-
case "lte":
|
|
406
|
-
return count <= criteria.value;
|
|
407
|
-
case "eq":
|
|
408
|
-
return count === criteria.value;
|
|
409
|
-
default:
|
|
410
|
-
return false;
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
default:
|
|
414
|
-
return false;
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
450
|
/**
|
|
419
451
|
* Composite/multi-condition fallback (the documented O(P) exception, Section 6.6):
|
|
420
|
-
* a
|
|
452
|
+
* a per-contact `evaluateCondition` loop over live contacts. Property
|
|
421
453
|
* sub-conditions evaluate against the contact's merged properties.
|
|
454
|
+
*
|
|
455
|
+
* KEYSET PAGINATION by `contacts.externalId` in BATCH_SIZE pages (mirrors
|
|
456
|
+
* reconcileBucketJoins' `externalId asc` paging): each page selects
|
|
457
|
+
* `WHERE externalId > :cursor ORDER BY externalId ASC LIMIT BATCH_SIZE`,
|
|
458
|
+
* evaluates the criteria per contact, then advances the cursor to the last
|
|
459
|
+
* externalId of the page — repeating until a short page ends the scan. The whole
|
|
460
|
+
* contacts table is never held in memory at once.
|
|
422
461
|
*/
|
|
423
462
|
async function selectCompositeMatchers(
|
|
424
463
|
db: Database,
|
|
425
464
|
criteria: ConditionEval,
|
|
426
465
|
): Promise<string[]> {
|
|
427
|
-
const liveContacts = await db
|
|
428
|
-
.select({
|
|
429
|
-
externalId: contacts.externalId,
|
|
430
|
-
properties: contacts.properties,
|
|
431
|
-
})
|
|
432
|
-
.from(contacts)
|
|
433
|
-
.where(isNull(contacts.deletedAt));
|
|
434
|
-
|
|
435
466
|
const matchers: string[] = [];
|
|
436
|
-
|
|
437
|
-
const isMember = await evaluateCondition({
|
|
438
|
-
condition: criteria,
|
|
439
|
-
ctx: {
|
|
440
|
-
db,
|
|
441
|
-
userId: contact.externalId,
|
|
442
|
-
journeyContext:
|
|
443
|
-
(contact.properties as Record<string, unknown> | null) ?? {},
|
|
444
|
-
},
|
|
445
|
-
});
|
|
446
|
-
if (isMember) matchers.push(contact.externalId);
|
|
447
|
-
}
|
|
448
|
-
return matchers;
|
|
449
|
-
}
|
|
467
|
+
let cursor: string | null = null;
|
|
450
468
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
469
|
+
for (;;) {
|
|
470
|
+
const page = await db
|
|
471
|
+
.select({
|
|
472
|
+
externalId: contacts.externalId,
|
|
473
|
+
properties: contacts.properties,
|
|
474
|
+
})
|
|
475
|
+
.from(contacts)
|
|
476
|
+
.where(
|
|
477
|
+
and(
|
|
478
|
+
isNull(contacts.deletedAt),
|
|
479
|
+
cursor != null ? gt(contacts.externalId, cursor) : undefined,
|
|
480
|
+
),
|
|
481
|
+
)
|
|
482
|
+
.orderBy(sql`${contacts.externalId} asc`)
|
|
483
|
+
.limit(BATCH_SIZE);
|
|
459
484
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
485
|
+
for (const contact of page) {
|
|
486
|
+
const isMember = await evaluateCondition({
|
|
487
|
+
condition: criteria,
|
|
488
|
+
ctx: {
|
|
489
|
+
db,
|
|
490
|
+
userId: contact.externalId,
|
|
491
|
+
journeyContext:
|
|
492
|
+
(contact.properties as Record<string, unknown> | null) ?? {},
|
|
493
|
+
},
|
|
494
|
+
});
|
|
495
|
+
if (isMember) matchers.push(contact.externalId);
|
|
469
496
|
}
|
|
497
|
+
|
|
498
|
+
// A short page (fewer than a full batch) means the scan is exhausted.
|
|
499
|
+
if (page.length < BATCH_SIZE) break;
|
|
500
|
+
cursor = page[page.length - 1]?.externalId ?? null;
|
|
501
|
+
if (cursor == null) break;
|
|
470
502
|
}
|
|
471
|
-
|
|
503
|
+
|
|
504
|
+
return matchers;
|
|
472
505
|
}
|
|
473
506
|
|
|
474
507
|
/**
|
|
@@ -535,7 +568,11 @@ export async function enqueueBucketBackfills(opts: {
|
|
|
535
568
|
|
|
536
569
|
if (!job) continue;
|
|
537
570
|
|
|
538
|
-
|
|
571
|
+
// runNoWait (fire-and-forget): this is called from worker boot BEFORE the
|
|
572
|
+
// listener starts, so awaiting the run would deadlock (the run needs the
|
|
573
|
+
// listener that `_worker.start()` brings up). The triggered run queues and
|
|
574
|
+
// executes once listening; the task itself persists the criteriaHash.
|
|
575
|
+
await bucketBackfillTask.runNoWait({
|
|
539
576
|
jobId: job.id,
|
|
540
577
|
bucketId: bucket.id,
|
|
541
578
|
mode,
|