@reachy/audience-module 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.gitlab/merge_request_templates/Default.md +31 -0
  2. package/.gitlab-ci.yml +59 -49
  3. package/CLAUDE.md +134 -0
  4. package/dist/AudienceModule.d.ts.map +1 -1
  5. package/dist/AudienceModule.js +1 -0
  6. package/dist/AudienceModule.js.map +1 -1
  7. package/dist/engine/V2AudienceEngine.d.ts +5 -0
  8. package/dist/engine/V2AudienceEngine.d.ts.map +1 -1
  9. package/dist/engine/V2AudienceEngine.js +210 -72
  10. package/dist/engine/V2AudienceEngine.js.map +1 -1
  11. package/dist/executors/ClickHouseEventQueryExecutor.d.ts +23 -0
  12. package/dist/executors/ClickHouseEventQueryExecutor.d.ts.map +1 -0
  13. package/dist/executors/ClickHouseEventQueryExecutor.js +803 -0
  14. package/dist/executors/ClickHouseEventQueryExecutor.js.map +1 -0
  15. package/dist/repositories/SupabaseContactRepository.d.ts +1 -0
  16. package/dist/repositories/SupabaseContactRepository.d.ts.map +1 -1
  17. package/dist/repositories/SupabaseContactRepository.js +1 -0
  18. package/dist/repositories/SupabaseContactRepository.js.map +1 -1
  19. package/dist/types/index.d.ts +1 -0
  20. package/dist/types/index.d.ts.map +1 -1
  21. package/jest.config.js +8 -0
  22. package/package.json +7 -2
  23. package/src/AudienceModule.ts +1 -0
  24. package/src/__tests__/AudienceModule.test.ts +382 -0
  25. package/src/__tests__/CriteriaParser.test.ts +130 -0
  26. package/src/__tests__/QueryBuilder.test.ts +198 -0
  27. package/src/__tests__/RfmEngine.test.ts +284 -0
  28. package/src/__tests__/RfmSegmentBuilder.test.ts +210 -0
  29. package/src/__tests__/StaticAudienceExecutor.test.ts +134 -0
  30. package/src/__tests__/SupabaseContactRepository.test.ts +81 -0
  31. package/src/engine/V2AudienceEngine.ts +240 -85
  32. package/src/executors/ClickHouseEventQueryExecutor.ts +853 -0
  33. package/src/repositories/SupabaseContactRepository.ts +2 -1
  34. package/src/types/index.ts +6 -0
@@ -1,5 +1,6 @@
1
1
  import { CriteriaParser } from '../builders/CriteriaParser'
2
2
  import { AudienceCriteria } from '../types'
3
+ import { ClickHouseEventQueryExecutor } from '../executors/ClickHouseEventQueryExecutor'
3
4
 
4
5
  type Logger = {
5
6
  log: (...args: any[]) => void
@@ -281,23 +282,27 @@ export class V2AudienceEngine {
281
282
  private supabase: any
282
283
  private debug: boolean
283
284
  private logger: Logger
285
+ private chExecutor: ClickHouseEventQueryExecutor | null = null
284
286
 
285
- constructor(params: { supabaseClient: any; debug?: boolean; logger?: Logger }) {
287
+ private static _tzCache = new Map<string, { tz: string; fetchedAt: number }>()
288
+ private static TZ_CACHE_TTL = 5 * 60 * 1000 // 5 minutes
289
+
290
+ constructor(params: { supabaseClient: any; clickhouseClient?: any; debug?: boolean; logger?: Logger }) {
286
291
  this.supabase = params.supabaseClient
287
292
  this.debug = !!params.debug
288
293
  this.logger = params.logger || console
294
+ if (params.clickhouseClient) {
295
+ this.chExecutor = new ClickHouseEventQueryExecutor(params.clickhouseClient, params.logger || console)
296
+ if (this.debug) this.logger.log('[V2AudienceEngine] ClickHouse executor initialized')
297
+ }
289
298
  }
290
299
 
291
- async getContactIdsByAudienceCriteriaV2(
292
- organizationId: string,
293
- projectId: string,
294
- criteriaRaw: any
295
- ): Promise<Set<string>> {
296
- let criteria: AudienceCriteria = CriteriaParser.parse(criteriaRaw as any) as any
297
-
298
- // Timezone do projeto: usado em filtros de tempo (time_of_day/day_of_week/day_of_month)
299
- // Fonte: projects.settings.timezone (mesma tela de Settings > Project no frontend)
300
- let projectTimezone = 'UTC'
300
+ private async getProjectTimezone(organizationId: string, projectId: string): Promise<string> {
301
+ const cacheKey = `${organizationId}:${projectId}`
302
+ const cached = V2AudienceEngine._tzCache.get(cacheKey)
303
+ if (cached && Date.now() - cached.fetchedAt < V2AudienceEngine.TZ_CACHE_TTL) {
304
+ return cached.tz
305
+ }
301
306
  try {
302
307
  const { data } = await this.supabase
303
308
  .from('projects')
@@ -306,12 +311,25 @@ export class V2AudienceEngine {
306
311
  .eq('id', projectId)
307
312
  .single()
308
313
  const tzCandidate = (data as any)?.settings?.timezone
309
- if (typeof tzCandidate === 'string' && isValidIanaTimezone(tzCandidate)) {
310
- projectTimezone = tzCandidate.trim()
311
- }
314
+ const tz = (typeof tzCandidate === 'string' && isValidIanaTimezone(tzCandidate))
315
+ ? tzCandidate.trim()
316
+ : 'UTC'
317
+ V2AudienceEngine._tzCache.set(cacheKey, { tz, fetchedAt: Date.now() })
318
+ return tz
312
319
  } catch {
313
- // fallback silencioso
320
+ V2AudienceEngine._tzCache.set(cacheKey, { tz: 'UTC', fetchedAt: Date.now() })
321
+ return 'UTC'
314
322
  }
323
+ }
324
+
325
+ async getContactIdsByAudienceCriteriaV2(
326
+ organizationId: string,
327
+ projectId: string,
328
+ criteriaRaw: any
329
+ ): Promise<Set<string>> {
330
+ let criteria: AudienceCriteria = CriteriaParser.parse(criteriaRaw as any) as any
331
+
332
+ const projectTimezone = await this.getProjectTimezone(organizationId, projectId)
315
333
 
316
334
  // Compat: permitir critérios legados (filters/conditions) sem groups,
317
335
  // convertendo para groups V2 para que o engine avalie sozinho.
@@ -336,7 +354,8 @@ export class V2AudienceEngine {
336
354
 
337
355
  const typeId = String((criteria as any)?.type || '')
338
356
 
339
- const fetchAll = async (baseQuery: any, pageSize: number): Promise<any[]> => {
357
+ const MAX_FETCH_ROWS = 500_000
358
+ const fetchAll = async (baseQuery: any, pageSize: number, maxRows: number = MAX_FETCH_ROWS): Promise<any[]> => {
340
359
  let offset = 0
341
360
  let out: any[] = []
342
361
  while (true) {
@@ -344,41 +363,72 @@ export class V2AudienceEngine {
344
363
  if (error) throw error
345
364
  if (!data || data.length === 0) break
346
365
  out = out.concat(data)
366
+ if (out.length >= maxRows) {
367
+ this.dlog('fetchAll: max rows reached', { maxRows, fetched: out.length })
368
+ break
369
+ }
347
370
  if (data.length < pageSize) break
348
371
  offset += pageSize
349
372
  }
350
373
  return out
351
374
  }
352
375
 
353
- // Carregar contatos do projeto (necessário para negate e mapeamento de identidade)
354
- const allContacts = await fetchAll(
355
- this.supabase
356
- .from('contacts')
357
- .select('id, reachy_id, email')
358
- .eq('organization_id', organizationId)
359
- .eq('project_id', projectId),
360
- 1000
361
- )
362
-
363
- const allContactIds = new Set<string>((allContacts || []).map((c: any) => c.id as string))
364
- const reachyIdToContactId = new Map<string, string>()
365
- const contactIdToReachyId = new Map<string, string>()
366
- const emailToContactId = new Map<string, string>()
367
-
368
- for (const c of allContacts || []) {
369
- const rid = c?.reachy_id ? String(c.reachy_id).trim() : ''
370
- const cid = c?.id ? String(c.id).trim() : ''
371
- if (rid && cid) {
372
- reachyIdToContactId.set(rid, cid)
373
- contactIdToReachyId.set(cid, rid)
376
+ // Lazy loading: só carrega contatos quando necessário (negate ou event rules que precisam de identity maps)
377
+ const groups = (criteria as any).groups as any[]
378
+ const hasNegate = groups.some((g: any) => (g.rules || []).some((r: any) => r.negate))
379
+ const hasEventRules = groups.some((g: any) => (g.rules || []).some((r: any) => r.kind === 'event'))
380
+
381
+ let allContactIds = new Set<string>()
382
+ let reachyIdToContactId = new Map<string, string>()
383
+ let contactIdToReachyId = new Map<string, string>()
384
+ let emailToContactId = new Map<string, string>()
385
+ let _contactsLoaded = false
386
+
387
+ const ensureContactsLoaded = async () => {
388
+ if (_contactsLoaded) return
389
+ _contactsLoaded = true
390
+ const allContacts = await fetchAll(
391
+ this.supabase
392
+ .from('contacts')
393
+ .select('id, reachy_id, email')
394
+ .eq('organization_id', organizationId)
395
+ .eq('project_id', projectId),
396
+ 1000
397
+ )
398
+ allContactIds = new Set<string>((allContacts || []).map((c: any) => c.id as string))
399
+ for (const c of allContacts || []) {
400
+ const rid = c?.reachy_id ? String(c.reachy_id).trim() : ''
401
+ const cid = c?.id ? String(c.id).trim() : ''
402
+ if (rid && cid) {
403
+ reachyIdToContactId.set(rid, cid)
404
+ contactIdToReachyId.set(cid, rid)
405
+ }
406
+ const email = c?.email ? String(c.email).trim().toLowerCase() : ''
407
+ if (email && cid) emailToContactId.set(email, cid)
374
408
  }
375
- const email = c?.email ? String(c.email).trim().toLowerCase() : ''
376
- if (email && cid) emailToContactId.set(email, cid)
377
409
  }
378
410
 
379
- const union = (a: Set<string>, b: Set<string>) => new Set([...a, ...b])
380
- const intersect = (a: Set<string>, b: Set<string>) => new Set([...a].filter(x => b.has(x)))
381
- const diff = (a: Set<string>, b: Set<string>) => new Set([...a].filter(x => !b.has(x)))
411
+ // Preload contatos se sabemos que será necessário
412
+ if (hasNegate || hasEventRules) {
413
+ await ensureContactsLoaded()
414
+ }
415
+
416
+ const union = (a: Set<string>, b: Set<string>): Set<string> => {
417
+ const result = new Set(a)
418
+ for (const x of b) result.add(x)
419
+ return result
420
+ }
421
+ const intersect = (a: Set<string>, b: Set<string>): Set<string> => {
422
+ const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a]
423
+ const result = new Set<string>()
424
+ for (const x of smaller) { if (larger.has(x)) result.add(x) }
425
+ return result
426
+ }
427
+ const diff = (a: Set<string>, b: Set<string>): Set<string> => {
428
+ const result = new Set<string>()
429
+ for (const x of a) { if (!b.has(x)) result.add(x) }
430
+ return result
431
+ }
382
432
 
383
433
  const resolveEventContactId = (row: any): string | null => {
384
434
  const rawReachyId = row?.reachy_id ? String(row.reachy_id).trim() : ''
@@ -405,14 +455,45 @@ export class V2AudienceEngine {
405
455
 
406
456
  const evalEventRule = async (rule: any): Promise<Set<string>> => {
407
457
  const cfg = (criteria as any)?.config || {}
458
+
459
+ // Route to ClickHouse when available (replaces N paginated Supabase queries + JS aggregation)
460
+ if (this.chExecutor) {
461
+ try {
462
+ return await this.chExecutor.execute(
463
+ rule, criteria, organizationId, projectId, projectTimezone,
464
+ reachyIdToContactId, contactIdToReachyId, allContactIds
465
+ )
466
+ } catch (chErr) {
467
+ this.logger.warn('[V2AudienceEngine] ClickHouse evalEventRule failed, falling back to Supabase:', chErr)
468
+ // Fall through to Supabase path
469
+ }
470
+ }
471
+
408
472
  const effectiveEventName =
409
473
  cfg.eventType && String(cfg.eventType).trim() !== ''
410
474
  ? String(cfg.eventType)
411
475
  : String(rule.eventName)
412
476
 
477
+ // Determine minimal SELECT fields based on what's needed
478
+ const ruleFiltersPrecheck = Array.isArray((rule as any).filters) ? (rule as any).filters : []
479
+ const needsEventData = !!(
480
+ (rule.interest && rule.interest.key) ||
481
+ ruleFiltersPrecheck.some((f: any) => ['event_property', 'time_of_day', 'day_of_week', 'day_of_month'].includes(String(f?.type || '').trim())) ||
482
+ (rule.frequency && rule.frequency.type && rule.frequency.type !== 'count') ||
483
+ typeId === 'live-page-count'
484
+ )
485
+ const needsTimestamp = !!(
486
+ ruleFiltersPrecheck.some((f: any) => ['first_time', 'last_time', 'time_of_day', 'day_of_week', 'day_of_month'].includes(String(f?.type || '').trim()))
487
+ )
488
+
489
+ let selectFields = 'contact_id, reachy_id'
490
+ if (needsEventData) selectFields += ', event_data'
491
+ if (needsTimestamp) selectFields += ', event_timestamp'
492
+ if (needsEventData || needsTimestamp) selectFields += ', event_name'
493
+
413
494
  let query = this.supabase
414
495
  .from('contact_events')
415
- .select('contact_id, reachy_id, event_data, event_timestamp, event_name')
496
+ .select(selectFields)
416
497
  .eq('organization_id', organizationId)
417
498
  .eq('project_id', projectId)
418
499
  .eq('event_name', effectiveEventName)
@@ -597,7 +678,8 @@ export class V2AudienceEngine {
597
678
  if (this.debug) this.logger.warn('[AUDIENCE_MODULE_ENGINE] event attributes filter error')
598
679
  }
599
680
 
600
- const data = await fetchAll(query, 500)
681
+ const MAX_EVENT_ROWS = 200_000
682
+ const data = await fetchAll(query, 1000, MAX_EVENT_ROWS)
601
683
  if (!data || data.length === 0) return new Set<string>()
602
684
 
603
685
  // Event advanced filters (ex.: event_property dentro do DID)
@@ -1173,35 +1255,81 @@ export class V2AudienceEngine {
1173
1255
  return new Set<string>(data.map((r: any) => r.id as string))
1174
1256
  }
1175
1257
 
1258
+ const evalRule = (rule: any) => rule.kind === 'event' ? evalEventRule(rule) : evalPropertyRule(rule)
1259
+
1176
1260
  const evalGroup = async (group: any): Promise<Set<string>> => {
1261
+ const rules = group.rules || []
1262
+ if (rules.length === 0) return new Set<string>()
1263
+
1264
+ const op = group.operator
1265
+
1266
+ // OR: run all rules in parallel then union
1267
+ if (op === 'OR') {
1268
+ const results = await Promise.all(rules.map(async (rule: any) => {
1269
+ const set = await evalRule(rule)
1270
+ return rule.negate ? diff(allContactIds, set) : set
1271
+ }))
1272
+ let acc = results[0]!
1273
+ for (let i = 1; i < results.length; i++) {
1274
+ acc = union(acc, results[i]!)
1275
+ }
1276
+ return acc
1277
+ }
1278
+
1279
+ // AND / NOT: sequential with early exit on empty set
1177
1280
  let acc: Set<string> | null = null
1178
- for (const rule of group.rules || []) {
1179
- const set = rule.kind === 'event' ? await evalEventRule(rule) : await evalPropertyRule(rule)
1180
- const shouldNegate = rule.negate
1181
- const s = shouldNegate ? diff(allContactIds, set) : set
1281
+ for (const rule of rules) {
1282
+ const set = await evalRule(rule)
1283
+ const s = rule.negate ? diff(allContactIds, set) : set
1182
1284
 
1183
1285
  if (acc == null) {
1184
1286
  acc = s
1185
1287
  } else {
1186
- if (group.operator === 'AND') acc = intersect(acc, s)
1187
- else if (group.operator === 'OR') acc = union(acc, s)
1188
- else if (group.operator === 'NOT') acc = diff(acc, s)
1288
+ if (op === 'AND') acc = intersect(acc, s)
1289
+ else if (op === 'NOT') acc = diff(acc, s)
1189
1290
  }
1291
+
1292
+ // Early exit: AND/NOT with empty set can't recover
1293
+ if (acc.size === 0) return acc
1190
1294
  }
1191
1295
  return acc || new Set<string>()
1192
1296
  }
1193
1297
 
1298
+ // Evaluate groups — detect if we can parallelize
1299
+ const allGroups = (criteria as any).groups as any[]
1194
1300
  let result: Set<string> | null = null
1195
- for (let i = 0; i < (criteria as any).groups.length; i++) {
1196
- const group = (criteria as any).groups[i]
1197
- const gset = await evalGroup(group)
1198
- if (result == null) {
1199
- result = gset
1200
- } else {
1201
- const betweenOp = String((group as any).combineOperator || group.operator || 'AND').toUpperCase()
1202
- if (betweenOp === 'AND') result = intersect(result, gset)
1203
- else if (betweenOp === 'OR') result = union(result, gset)
1204
- else if (betweenOp === 'NOT') result = diff(result, gset)
1301
+
1302
+ // Check if all groups combine with OR → can run in parallel
1303
+ const allCombineWithOr = allGroups.length > 1 && allGroups.every((g: any, i: number) => {
1304
+ if (i === 0) return true
1305
+ return String((g as any).combineOperator || g.operator || 'AND').toUpperCase() === 'OR'
1306
+ })
1307
+
1308
+ if (allCombineWithOr) {
1309
+ const groupResults = await Promise.all(allGroups.map((g: any) => evalGroup(g)))
1310
+ result = groupResults[0]!
1311
+ for (let i = 1; i < groupResults.length; i++) {
1312
+ result = union(result, groupResults[i]!)
1313
+ }
1314
+ } else {
1315
+ for (let i = 0; i < allGroups.length; i++) {
1316
+ const group = allGroups[i]
1317
+ const gset = await evalGroup(group)
1318
+ if (result == null) {
1319
+ result = gset
1320
+ } else {
1321
+ const betweenOp = String((group as any).combineOperator || group.operator || 'AND').toUpperCase()
1322
+ if (betweenOp === 'AND') result = intersect(result, gset)
1323
+ else if (betweenOp === 'OR') result = union(result, gset)
1324
+ else if (betweenOp === 'NOT') result = diff(result, gset)
1325
+ }
1326
+ // Early exit for AND between groups
1327
+ if (result.size === 0) {
1328
+ const nextOp = i + 1 < allGroups.length
1329
+ ? String(((allGroups[i + 1] as any).combineOperator || (allGroups[i + 1] as any).operator || 'AND')).toUpperCase()
1330
+ : ''
1331
+ if (nextOp === 'AND' || nextOp === 'NOT') break
1332
+ }
1205
1333
  }
1206
1334
  }
1207
1335
 
@@ -1235,22 +1363,7 @@ export class V2AudienceEngine {
1235
1363
 
1236
1364
  const typeId = String((criteria as any)?.type || '')
1237
1365
 
1238
- // Timezone do projeto (para filtros de tempo em eventos)
1239
- let projectTimezone = 'UTC'
1240
- try {
1241
- const { data } = await this.supabase
1242
- .from('projects')
1243
- .select('settings')
1244
- .eq('organization_id', organizationId)
1245
- .eq('id', projectId)
1246
- .single()
1247
- const tzCandidate = (data as any)?.settings?.timezone
1248
- if (typeof tzCandidate === 'string' && isValidIanaTimezone(tzCandidate)) {
1249
- projectTimezone = tzCandidate.trim()
1250
- }
1251
- } catch {
1252
- // fallback silencioso
1253
- }
1366
+ const projectTimezone = await this.getProjectTimezone(organizationId, projectId)
1254
1367
 
1255
1368
  // Carregar identidade do contato (para reachy_id)
1256
1369
  const { data: contactRow } = await this.supabase
@@ -1265,6 +1378,29 @@ export class V2AudienceEngine {
1265
1378
 
1266
1379
  const evalEventRuleForContact = async (rule: any): Promise<boolean> => {
1267
1380
  const cfg = (criteria as any)?.config || {}
1381
+
1382
+ // Route to ClickHouse when available — use batch engine scoped to 1 contact
1383
+ if (this.chExecutor) {
1384
+ try {
1385
+ // Build single-contact identity maps for the executor
1386
+ const singleContactIdToReachyId = new Map<string, string>()
1387
+ const singleReachyIdToContactId = new Map<string, string>()
1388
+ const singleAllContactIds = new Set<string>([contactId])
1389
+ if (reachyId) {
1390
+ singleReachyIdToContactId.set(reachyId, contactId)
1391
+ singleContactIdToReachyId.set(contactId, reachyId)
1392
+ }
1393
+
1394
+ const matchSet = await this.chExecutor.execute(
1395
+ rule, criteria, organizationId, projectId, projectTimezone,
1396
+ singleReachyIdToContactId, singleContactIdToReachyId, singleAllContactIds
1397
+ )
1398
+ return matchSet.has(contactId)
1399
+ } catch (chErr) {
1400
+ this.logger.warn('[V2AudienceEngine] ClickHouse evalEventRuleForContact failed, falling back to Supabase:', chErr)
1401
+ }
1402
+ }
1403
+
1268
1404
  const effectiveEventName =
1269
1405
  cfg.eventType && String(cfg.eventType).trim() !== ''
1270
1406
  ? String(cfg.eventType)
@@ -1871,15 +2007,23 @@ export class V2AudienceEngine {
1871
2007
  }
1872
2008
 
1873
2009
  const evalGroupForContact = async (group: any): Promise<boolean> => {
2010
+ const rules = group.rules || []
2011
+ if (rules.length === 0) return false
2012
+ const op = group.operator
2013
+
1874
2014
  let acc: boolean | null = null
1875
- for (const rule of group.rules || []) {
2015
+ for (const rule of rules) {
1876
2016
  const r = await evalRuleForContact(rule)
1877
- if (acc == null) acc = r
1878
- else {
1879
- if (group.operator === 'AND') acc = acc && r
1880
- else if (group.operator === 'OR') acc = acc || r
1881
- else if (group.operator === 'NOT') acc = acc && !r
2017
+ if (acc == null) {
2018
+ acc = r
2019
+ } else {
2020
+ if (op === 'AND') acc = acc && r
2021
+ else if (op === 'OR') acc = acc || r
2022
+ else if (op === 'NOT') acc = acc && !r
1882
2023
  }
2024
+ // Early exit: AND false can't become true; OR true can't become false
2025
+ if (op === 'AND' && acc === false) return false
2026
+ if (op === 'OR' && acc === true) return true
1883
2027
  }
1884
2028
  return !!acc
1885
2029
  }
@@ -1887,13 +2031,24 @@ export class V2AudienceEngine {
1887
2031
  let result: boolean | null = null
1888
2032
  for (const group of (criteria as any).groups) {
1889
2033
  const g = await evalGroupForContact(group)
1890
- if (result == null) result = g
1891
- else {
2034
+ if (result == null) {
2035
+ result = g
2036
+ } else {
1892
2037
  const betweenOp = String((group as any).combineOperator || group.operator || 'AND').toUpperCase()
1893
2038
  if (betweenOp === 'AND') result = result && g
1894
2039
  else if (betweenOp === 'OR') result = result || g
1895
2040
  else if (betweenOp === 'NOT') result = result && !g
1896
2041
  }
2042
+ // Early exit between groups
2043
+ if (result === false) {
2044
+ // If all remaining groups are AND/NOT, result stays false
2045
+ const remaining = (criteria as any).groups.slice((criteria as any).groups.indexOf(group) + 1)
2046
+ const allAnd = remaining.every((g2: any) => {
2047
+ const op2 = String(g2.combineOperator || g2.operator || 'AND').toUpperCase()
2048
+ return op2 === 'AND' || op2 === 'NOT'
2049
+ })
2050
+ if (allAnd) return false
2051
+ }
1897
2052
  }
1898
2053
 
1899
2054
  return !!result