@reachy/audience-module 1.0.18 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,853 @@
1
+ /**
2
+ * ClickHouseEventQueryExecutor
3
+ *
4
+ * Evaluates audience rules of kind='event' using ClickHouse instead of
5
+ * Supabase + JavaScript aggregation. This replaces the pattern of:
6
+ * - fetchAll() in pages of 500 rows
7
+ * - JS reduce for frequency/sum/avg/dedup
8
+ * - extra round-trips for first_time / last_time
9
+ *
10
+ * Property rules (kind='property') still run against Supabase contacts table
11
+ * and are NOT handled here.
12
+ *
13
+ * Security: All user-controlled values (orgId, projId, eventName, datetime bounds,
14
+ * attribute values) are passed via ClickHouse query_params to prevent SQL injection.
15
+ * Only field expressions (JSON paths derived from known key formats) are inlined.
16
+ */
17
+
18
+ type Logger = {
19
+ log: (...args: any[]) => void
20
+ warn: (...args: any[]) => void
21
+ error: (...args: any[]) => void
22
+ }
23
+
24
+ // Maximum rows returned by fetch queries to prevent OOM
25
+ const MAX_FETCH_ROWS = 200_000
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Internal helpers
29
+ // ---------------------------------------------------------------------------
30
+
31
+ function safeStr(v: any): string {
32
+ return v === null || v === undefined ? '' : String(v).trim()
33
+ }
34
+
35
+ /**
36
+ * Resolves a ClickHouse result row (with `contact_id` + `reachy_id` fields) to
37
+ * a single contact UUID, using the identity maps from V2AudienceEngine.
38
+ */
39
+ function resolveContactId(
40
+ row: { contact_id?: string | null; reachy_id?: string | null },
41
+ reachyIdToContactId: Map<string, string>,
42
+ allContactIds: Set<string>
43
+ ): string | null {
44
+ const cid = safeStr(row.contact_id)
45
+ if (cid && allContactIds.has(cid)) return cid
46
+
47
+ const rid = safeStr(row.reachy_id)
48
+ if (rid) {
49
+ const mapped = reachyIdToContactId.get(rid)
50
+ if (mapped) return mapped
51
+ }
52
+ return null
53
+ }
54
+
55
+ /** Build a ClickHouse DateTime64-compatible string from a JS ISO date string */
56
+ function chDateTime(iso: string): string {
57
+ return iso.replace('T', ' ').replace('Z', '').split('.')[0] ?? iso.replace('T', ' ').replace('Z', '')
58
+ }
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // JSON field accessor helpers
62
+ // ---------------------------------------------------------------------------
63
+
64
+ type JsonFieldRef = { expr: string; kind: 'string' | 'float' }
65
+
66
+ const COLUMN_FIELDS = new Set([
67
+ 'current_url', 'domain', 'path', 'referrer', 'page_title',
68
+ 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
69
+ 'session_id', 'session_is_new',
70
+ ])
71
+
72
+ // Allowlist of safe identifier characters for field names in JSON path expressions
73
+ const SAFE_FIELD_RE = /^[a-zA-Z0-9_]+$/
74
+
75
+ function isSafeFieldName(name: string): boolean {
76
+ return SAFE_FIELD_RE.test(name)
77
+ }
78
+
79
+ function resolveEventFieldExpr(keyRaw: string): JsonFieldRef {
80
+ const key = safeStr(keyRaw)
81
+ if (!key) return { expr: "''", kind: 'string' }
82
+
83
+ if (COLUMN_FIELDS.has(key)) return { expr: key, kind: 'string' }
84
+
85
+ const extractStr = (base: string, path: string): string => {
86
+ const parts = path.split('.').filter(Boolean)
87
+ // Validate all parts are safe identifiers
88
+ if (!parts.every(isSafeFieldName)) return "''"
89
+ if (parts.length === 0) return `JSONExtractString(${base}, 'value')`
90
+ if (parts.length === 1) return `JSONExtractString(${base}, '${parts[0]}')`
91
+ let expr = base
92
+ for (let i = 0; i < parts.length - 1; i++) {
93
+ expr = `JSONExtractRaw(${expr}, '${parts[i]}')`
94
+ }
95
+ return `JSONExtractString(${expr}, '${parts[parts.length - 1]}')`
96
+ }
97
+
98
+ if (key.startsWith('event_data.')) return { expr: extractStr('event_data', key.replace(/^event_data\./, '')), kind: 'string' }
99
+ if (key.startsWith('custom_data.')) return { expr: extractStr("JSONExtractRaw(event_data, 'custom_data')", key.replace(/^custom_data\./, '')), kind: 'string' }
100
+ if (key.startsWith('url_data.')) return { expr: extractStr("JSONExtractRaw(event_data, 'url_data')", key.replace(/^url_data\./, '')), kind: 'string' }
101
+ if (key.startsWith('utm_data.')) return { expr: extractStr("JSONExtractRaw(event_data, 'utm_data')", key.replace(/^utm_data\./, '')), kind: 'string' }
102
+ if (key.startsWith('session_data.')) return { expr: extractStr("JSONExtractRaw(event_data, 'session_data')", key.replace(/^session_data\./, '')), kind: 'string' }
103
+ if (key.includes('.')) return { expr: extractStr("JSONExtractRaw(event_data, 'custom_data')", key), kind: 'string' }
104
+
105
+ if (!isSafeFieldName(key)) return { expr: "''", kind: 'string' }
106
+ return { expr: `JSONExtractString(event_data, '${key}')`, kind: 'string' }
107
+ }
108
+
109
+ // ---------------------------------------------------------------------------
110
+ // Attribute WHERE clause builder (parameterized)
111
+ // ---------------------------------------------------------------------------
112
+
113
+ type ParamBag = Record<string, string | number>
114
+
115
+ function buildAttributeWhereClause(attributes: any[], params: ParamBag, paramIdx: { n: number }): string {
116
+ if (!Array.isArray(attributes) || attributes.length === 0) return ''
117
+ const parts: string[] = []
118
+ for (const attr of attributes) {
119
+ const key = safeStr(attr.key)
120
+ if (!key) continue
121
+ const op = safeStr(attr.op) || 'contains'
122
+ const value = attr.value
123
+ if (value === null || value === undefined) continue
124
+ const { expr } = resolveEventFieldExpr(key)
125
+ if (expr === "''") continue
126
+
127
+ const pName = `attr_v_${paramIdx.n++}`
128
+ const strValue = String(value)
129
+
130
+ switch (op) {
131
+ case 'equals':
132
+ params[pName] = strValue
133
+ parts.push(`${expr} = {${pName}:String}`)
134
+ break
135
+ case 'not_equals':
136
+ params[pName] = strValue
137
+ parts.push(`${expr} != {${pName}:String}`)
138
+ break
139
+ case 'starts_with':
140
+ params[pName] = `${strValue}%`
141
+ parts.push(`${expr} LIKE {${pName}:String}`)
142
+ break
143
+ case 'ends_with':
144
+ params[pName] = `%${strValue}`
145
+ parts.push(`${expr} LIKE {${pName}:String}`)
146
+ break
147
+ case 'contains':
148
+ default:
149
+ params[pName] = `%${strValue}%`
150
+ parts.push(`${expr} LIKE {${pName}:String}`)
151
+ break
152
+ }
153
+ }
154
+ return parts.length > 0 ? parts.join(' AND ') : ''
155
+ }
156
+
157
+ // ---------------------------------------------------------------------------
158
+ // Time window builder (parameterized)
159
+ // ---------------------------------------------------------------------------
160
+
161
+ function buildTimeWhereClause(rule: any, cfg: any, params: ParamBag, paramIdx: { n: number }): string {
162
+ const parts: string[] = []
163
+
164
+ if (rule.time) {
165
+ if (rule.time.unit && rule.time.value != null) {
166
+ type TU = 'minutes' | 'hours' | 'days' | 'weeks' | 'months'
167
+ const unit = String(rule.time.unit) as TU
168
+ const value = Number(rule.time.value)
169
+ const unitMs: Record<TU, number> = {
170
+ minutes: 60000, hours: 3600000, days: 86400000, weeks: 604800000, months: 2592000000
171
+ }
172
+ const from = new Date(Date.now() - value * (unitMs[unit] ?? unitMs.days))
173
+ const pName = `time_from_${paramIdx.n++}`
174
+ params[pName] = chDateTime(from.toISOString())
175
+ parts.push(`event_timestamp >= {${pName}:DateTime64(3)}`)
176
+ } else {
177
+ if (rule.time.from) {
178
+ const pName = `time_from_${paramIdx.n++}`
179
+ params[pName] = chDateTime(String(rule.time.from))
180
+ parts.push(`event_timestamp >= {${pName}:DateTime64(3)}`)
181
+ }
182
+ if (rule.time.to) {
183
+ const pName = `time_to_${paramIdx.n++}`
184
+ params[pName] = chDateTime(String(rule.time.to))
185
+ parts.push(`event_timestamp <= {${pName}:DateTime64(3)}`)
186
+ }
187
+ }
188
+ } else if (cfg && cfg.timeFrame) {
189
+ const tf = String(cfg.timeFrame).trim()
190
+ type TU = 'minutes' | 'hours' | 'days' | 'weeks' | 'months'
191
+ let unit: TU = 'days'
192
+ let value = 7
193
+ if (/^\d+$/.test(tf)) { value = Number(tf); unit = 'days' }
194
+ else if (/^\d+\s*m$/.test(tf)) { value = Number(tf.replace(/m$/, '')); unit = 'minutes' }
195
+ else if (/^\d+\s*h$/.test(tf)) { value = Number(tf.replace(/h$/, '')); unit = 'hours' }
196
+ else if (/^\d+\s*d$/.test(tf)) { value = Number(tf.replace(/d$/, '')); unit = 'days' }
197
+ else if (/^\d+\s*w$/.test(tf)) { value = Number(tf.replace(/w$/, '')); unit = 'weeks' }
198
+ else if (/^\d+\s*mo$/.test(tf)) { value = Number(tf.replace(/mo$/, '')); unit = 'months' }
199
+ const unitMs: Record<TU, number> = {
200
+ minutes: 60000, hours: 3600000, days: 86400000, weeks: 604800000, months: 2592000000
201
+ }
202
+ const from = new Date(Date.now() - value * (unitMs[unit] ?? unitMs.days))
203
+ const pName = `time_from_${paramIdx.n++}`
204
+ params[pName] = chDateTime(from.toISOString())
205
+ parts.push(`event_timestamp >= {${pName}:DateTime64(3)}`)
206
+ }
207
+
208
+ return parts.join(' AND ')
209
+ }
210
+
211
+ // ---------------------------------------------------------------------------
212
+ // Executor
213
+ // ---------------------------------------------------------------------------
214
+
215
+ export class ClickHouseEventQueryExecutor {
216
+ constructor(
217
+ private client: any,
218
+ private logger: Logger = console
219
+ ) {}
220
+
221
+ /**
222
+ * Main entry point — mirrors the return type of V2AudienceEngine.evalEventRule.
223
+ * Returns the Set of contact UUIDs that match the rule.
224
+ */
225
+ async execute(
226
+ rule: any,
227
+ criteria: any,
228
+ organizationId: string,
229
+ projectId: string,
230
+ projectTimezone: string,
231
+ reachyIdToContactId: Map<string, string>,
232
+ _contactIdToReachyId: Map<string, string>,
233
+ allContactIds: Set<string>
234
+ ): Promise<Set<string>> {
235
+ try {
236
+ const cfg = criteria?.config || {}
237
+ const typeId = String(criteria?.type || '')
238
+ const effectiveEventName =
239
+ cfg.eventType && String(cfg.eventType).trim() !== ''
240
+ ? String(cfg.eventType)
241
+ : String(rule.eventName)
242
+
243
+ const ruleFiltersAll: any[] = Array.isArray(rule.filters) ? rule.filters : []
244
+ const hasFirstTime = ruleFiltersAll.some((f: any) => String(f?.type || '').trim() === 'first_time')
245
+ const hasLastTime = ruleFiltersAll.some((f: any) => String(f?.type || '').trim() === 'last_time')
246
+ const ruleFilters = ruleFiltersAll.filter((f: any) => {
247
+ const t = String(f?.type || '').trim()
248
+ return t && t !== 'first_time' && t !== 'last_time'
249
+ })
250
+
251
+ // Detect if in-memory post-processing is needed
252
+ const needsInMemory =
253
+ ruleFilters.some((f: any) => ['time_of_day', 'day_of_week', 'day_of_month', 'event_property'].includes(String(f?.type || '').trim())) ||
254
+ !!(rule.interest && rule.interest.key)
255
+
256
+ const hasPureFrequency = rule.frequency && rule.frequency.value != null
257
+ const hasFirstOrLast = hasFirstTime || hasLastTime
258
+
259
+ // --- first_time / last_time: use GROUP BY + HAVING min/max ---
260
+ if (hasFirstOrLast && !hasFirstTime && hasLastTime) {
261
+ return this._executeLastTime(rule, cfg, organizationId, projectId, effectiveEventName, reachyIdToContactId, allContactIds)
262
+ }
263
+ if (hasFirstTime && !hasLastTime) {
264
+ return this._executeFirstTime(rule, cfg, organizationId, projectId, effectiveEventName, reachyIdToContactId, allContactIds)
265
+ }
266
+ if (hasFirstTime && hasLastTime) {
267
+ return this._executeFirstAndLastTime(rule, cfg, organizationId, projectId, effectiveEventName, reachyIdToContactId, allContactIds)
268
+ }
269
+
270
+ // --- Pure frequency with GROUP BY + HAVING (no in-memory post-processing) ---
271
+ if (hasPureFrequency && !needsInMemory) {
272
+ return this._executeFrequency(rule, cfg, organizationId, projectId, effectiveEventName, typeId, reachyIdToContactId, allContactIds)
273
+ }
274
+
275
+ // --- Fallback: fetch matching rows from ClickHouse, apply JS post-processing ---
276
+ return this._executeFetchAndProcess(
277
+ rule, cfg, organizationId, projectId, effectiveEventName, typeId,
278
+ projectTimezone, ruleFilters, reachyIdToContactId, allContactIds
279
+ )
280
+ } catch (err) {
281
+ this.logger.error('[ClickHouseEventQueryExecutor] execute error:', err)
282
+ return new Set()
283
+ }
284
+ }
285
+
286
+ // ---------------------------------------------------------------------------
287
+ // first_time: contacts whose FIRST occurrence of the event is within the window
288
+ // ---------------------------------------------------------------------------
289
+ private async _executeFirstTime(
290
+ rule: any, cfg: any, orgId: string, projId: string,
291
+ eventName: string, reachyIdToContactId: Map<string, string>, allContactIds: Set<string>
292
+ ): Promise<Set<string>> {
293
+ const params: ParamBag = {}
294
+ const paramIdx = { n: 0 }
295
+ const windowBounds = this._resolveWindowBounds(rule, cfg)
296
+ let whereClause = this._baseWhere(orgId, projId, eventName, params)
297
+
298
+ const attrWhere = this._buildAttributeWhere(rule, params, paramIdx)
299
+ if (attrWhere) whereClause += ` AND ${attrWhere}`
300
+
301
+ let havingClause = ''
302
+ if (windowBounds.start) {
303
+ params['ft_window_start'] = chDateTime(windowBounds.start)
304
+ havingClause = `min(event_timestamp) >= {ft_window_start:DateTime64(3)}`
305
+ }
306
+ if (windowBounds.end) {
307
+ params['ft_window_end'] = chDateTime(windowBounds.end)
308
+ whereClause += ` AND event_timestamp <= {ft_window_end:DateTime64(3)}`
309
+ }
310
+
311
+ const query = `
312
+ SELECT contact_id, reachy_id
313
+ FROM contact_events
314
+ WHERE ${whereClause}
315
+ GROUP BY contact_id, reachy_id
316
+ ${havingClause ? `HAVING ${havingClause}` : ''}
317
+ `
318
+ return this._queryToContactIdSet(query, params, reachyIdToContactId, allContactIds)
319
+ }
320
+
321
+ // last_time: contacts whose LAST occurrence is within (or before) the window end
322
+ private async _executeLastTime(
323
+ rule: any, cfg: any, orgId: string, projId: string,
324
+ eventName: string, reachyIdToContactId: Map<string, string>, allContactIds: Set<string>
325
+ ): Promise<Set<string>> {
326
+ const params: ParamBag = {}
327
+ const paramIdx = { n: 0 }
328
+ const windowBounds = this._resolveWindowBounds(rule, cfg)
329
+ let whereClause = this._baseWhere(orgId, projId, eventName, params)
330
+
331
+ const attrWhere = this._buildAttributeWhere(rule, params, paramIdx)
332
+ if (attrWhere) whereClause += ` AND ${attrWhere}`
333
+ if (windowBounds.start) {
334
+ params['lt_window_start'] = chDateTime(windowBounds.start)
335
+ whereClause += ` AND event_timestamp >= {lt_window_start:DateTime64(3)}`
336
+ }
337
+
338
+ let havingClause = ''
339
+ if (windowBounds.end) {
340
+ params['lt_window_end'] = chDateTime(windowBounds.end)
341
+ havingClause = `max(event_timestamp) <= {lt_window_end:DateTime64(3)}`
342
+ }
343
+
344
+ const query = `
345
+ SELECT contact_id, reachy_id
346
+ FROM contact_events
347
+ WHERE ${whereClause}
348
+ GROUP BY contact_id, reachy_id
349
+ ${havingClause ? `HAVING ${havingClause}` : ''}
350
+ `
351
+ return this._queryToContactIdSet(query, params, reachyIdToContactId, allContactIds)
352
+ }
353
+
354
+ private async _executeFirstAndLastTime(
355
+ rule: any, cfg: any, orgId: string, projId: string,
356
+ eventName: string, reachyIdToContactId: Map<string, string>, allContactIds: Set<string>
357
+ ): Promise<Set<string>> {
358
+ const params: ParamBag = {}
359
+ const windowBounds = this._resolveWindowBounds(rule, cfg)
360
+ const whereClause = this._baseWhere(orgId, projId, eventName, params)
361
+
362
+ const havingParts: string[] = []
363
+ if (windowBounds.start) {
364
+ params['flt_window_start'] = chDateTime(windowBounds.start)
365
+ havingParts.push(`min(event_timestamp) >= {flt_window_start:DateTime64(3)}`)
366
+ }
367
+ if (windowBounds.end) {
368
+ params['flt_window_end'] = chDateTime(windowBounds.end)
369
+ havingParts.push(`max(event_timestamp) <= {flt_window_end:DateTime64(3)}`)
370
+ }
371
+
372
+ const query = `
373
+ SELECT contact_id, reachy_id
374
+ FROM contact_events
375
+ WHERE ${whereClause}
376
+ GROUP BY contact_id, reachy_id
377
+ ${havingParts.length > 0 ? `HAVING ${havingParts.join(' AND ')}` : ''}
378
+ `
379
+ return this._queryToContactIdSet(query, params, reachyIdToContactId, allContactIds)
380
+ }
381
+
382
+ // ---------------------------------------------------------------------------
383
+ // Frequency: GROUP BY contact + HAVING count/sum/avg
384
+ // ---------------------------------------------------------------------------
385
+ private async _executeFrequency(
386
+ rule: any, cfg: any, orgId: string, projId: string, eventName: string, typeId: string,
387
+ reachyIdToContactId: Map<string, string>, allContactIds: Set<string>
388
+ ): Promise<Set<string>> {
389
+ const params: ParamBag = {}
390
+ const paramIdx = { n: 0 }
391
+ const { op, value, value2, type: freqType = 'count', field = 'value' } = rule.frequency
392
+ let whereClause = this._baseWhere(orgId, projId, eventName, params)
393
+ const timeWhere = buildTimeWhereClause(rule, cfg, params, paramIdx)
394
+ if (timeWhere) whereClause += ` AND ${timeWhere}`
395
+
396
+ const attrWhere = this._buildAttributeWhere(rule, params, paramIdx)
397
+ if (attrWhere) whereClause += ` AND ${attrWhere}`
398
+
399
+ const liveWhere = this._buildLivePresetWhere(cfg, typeId, params, paramIdx)
400
+ if (liveWhere) whereClause += ` AND ${liveWhere}`
401
+
402
+ let aggExpr = 'count()'
403
+ if (freqType === 'sum') {
404
+ const { expr } = resolveEventFieldExpr(field)
405
+ aggExpr = `sum(toFloat64OrZero(${expr}))`
406
+ } else if (freqType === 'avg') {
407
+ const { expr } = resolveEventFieldExpr(field)
408
+ aggExpr = `avg(toFloat64OrZero(${expr}))`
409
+ }
410
+
411
+ params['freq_val'] = Number(value)
412
+ let having = ''
413
+ if (op === 'between') {
414
+ params['freq_val2'] = Number(value2)
415
+ having = `agg_val >= {freq_val:Float64} AND agg_val <= {freq_val2:Float64}`
416
+ } else {
417
+ having = `agg_val ${op} {freq_val:Float64}`
418
+ }
419
+
420
+ const cleanQuery = `
421
+ SELECT contact_id, reachy_id
422
+ FROM (
423
+ SELECT contact_id, reachy_id, ${aggExpr} AS agg_val
424
+ FROM contact_events
425
+ WHERE ${whereClause}
426
+ GROUP BY contact_id, reachy_id
427
+ )
428
+ WHERE ${having}
429
+ `
430
+ return this._queryToContactIdSet(cleanQuery, params, reachyIdToContactId, allContactIds)
431
+ }
432
+
433
+ // ---------------------------------------------------------------------------
434
+ // Generic fetch + optional JS post-processing
435
+ // ---------------------------------------------------------------------------
436
+ private async _executeFetchAndProcess(
437
+ rule: any, cfg: any, orgId: string, projId: string, eventName: string, typeId: string,
438
+ projectTimezone: string, ruleFilters: any[],
439
+ reachyIdToContactId: Map<string, string>, allContactIds: Set<string>
440
+ ): Promise<Set<string>> {
441
+ const params: ParamBag = {}
442
+ const paramIdx = { n: 0 }
443
+ let whereClause = this._baseWhere(orgId, projId, eventName, params)
444
+ const timeWhere = buildTimeWhereClause(rule, cfg, params, paramIdx)
445
+ if (timeWhere) whereClause += ` AND ${timeWhere}`
446
+
447
+ const attrWhere = this._buildAttributeWhere(rule, params, paramIdx)
448
+ if (attrWhere) whereClause += ` AND ${attrWhere}`
449
+
450
+ const liveWhere = this._buildLivePresetWhere(cfg, typeId, params, paramIdx)
451
+ if (liveWhere) whereClause += ` AND ${liveWhere}`
452
+
453
+ // Determine minimal SELECT fields
454
+ const needsEventData = !!(
455
+ (rule.interest && rule.interest.key) ||
456
+ ruleFilters.some((f: any) => ['event_property'].includes(String(f?.type || '').trim())) ||
457
+ (rule.frequency && rule.frequency.type && rule.frequency.type !== 'count') ||
458
+ typeId === 'live-page-count'
459
+ )
460
+ const needsTimestamp = ruleFilters.some((f: any) =>
461
+ ['time_of_day', 'day_of_week', 'day_of_month'].includes(String(f?.type || '').trim())
462
+ )
463
+
464
+ let selectFields = 'contact_id, reachy_id'
465
+ if (needsEventData) selectFields += ', event_data'
466
+ if (needsTimestamp) selectFields += ', event_timestamp'
467
+ if (needsEventData || needsTimestamp) selectFields += ', event_name'
468
+
469
+ const query = `
470
+ SELECT ${selectFields}
471
+ FROM contact_events
472
+ WHERE ${whereClause}
473
+ LIMIT ${MAX_FETCH_ROWS}
474
+ `
475
+
476
+ let rows: any[] = []
477
+ try {
478
+ const result = await this.client.query({ query, query_params: params, format: 'JSONEachRow' })
479
+ rows = await result.json()
480
+ } catch (err) {
481
+ this.logger.error('[ClickHouseEventQueryExecutor] fetch error:', err)
482
+ return new Set()
483
+ }
484
+
485
+ if (!rows || rows.length === 0) return new Set()
486
+
487
+ // Parse event_data from JSON string if needed
488
+ if (needsEventData) {
489
+ for (const r of rows) {
490
+ if (typeof r.event_data === 'string') {
491
+ try { r.event_data = JSON.parse(r.event_data) } catch { r.event_data = {} }
492
+ }
493
+ }
494
+ }
495
+
496
+ // Apply in-memory filters (time_of_day, day_of_week, event_property)
497
+ if (ruleFilters.length > 0) {
498
+ rows = applyEventRuleFilters(rows, ruleFilters, { timezone: projectTimezone })
499
+ }
500
+
501
+ // User Interest
502
+ const interest = rule.interest
503
+ if (interest && interest.key) {
504
+ return computeInterestContactIds(rows, interest, (row) => {
505
+ return resolveContactId(row, reachyIdToContactId, allContactIds)
506
+ })
507
+ }
508
+
509
+ // live-page-count threshold
510
+ if (typeId === 'live-page-count' && cfg && cfg.pageCount != null) {
511
+ const threshold = Number(cfg.pageCount)
512
+ const counts = new Map<string, number>()
513
+ for (const row of rows) {
514
+ const cid = resolveContactId(row, reachyIdToContactId, allContactIds)
515
+ if (cid) counts.set(cid, (counts.get(cid) || 0) + 1)
516
+ }
517
+ const res = new Set<string>()
518
+ counts.forEach((cnt, id) => { if (cnt >= threshold) res.add(id) })
519
+ return res
520
+ }
521
+
522
+ // Frequency with in-memory post-processing
523
+ if (rule.frequency && rule.frequency.value != null) {
524
+ const { op, value, value2, type: freqType = 'count', field = 'value' } = rule.frequency
525
+ const counts = new Map<string, number>()
526
+ const denoms = new Map<string, number>()
527
+ for (const row of rows) {
528
+ const cid = resolveContactId(row, reachyIdToContactId, allContactIds)
529
+ if (!cid) continue
530
+ if (freqType === 'count') {
531
+ counts.set(cid, (counts.get(cid) || 0) + 1)
532
+ } else {
533
+ const raw = getEventPropertyValue(row.event_data, field) ?? row.event_data?.value ?? row.event_data?.amount
534
+ const numVal = Number(raw)
535
+ if (Number.isFinite(numVal)) {
536
+ counts.set(cid, (counts.get(cid) || 0) + numVal)
537
+ if (freqType === 'avg') denoms.set(cid, (denoms.get(cid) || 0) + 1)
538
+ }
539
+ }
540
+ }
541
+ const res = new Set<string>()
542
+ counts.forEach((agg, id) => {
543
+ let finalVal = agg
544
+ if (freqType === 'avg') {
545
+ const d = denoms.get(id) || 0
546
+ finalVal = d > 0 ? agg / d : 0
547
+ }
548
+ if (op === 'between') {
549
+ if (finalVal >= Number(value) && finalVal <= Number(value2)) res.add(id)
550
+ } else if (
551
+ (op === '>=' && finalVal >= value) || (op === '>' && finalVal > value) ||
552
+ (op === '=' && finalVal === value) || (op === '!=' && finalVal !== value) ||
553
+ (op === '<=' && finalVal <= value) || (op === '<' && finalVal < value)
554
+ ) {
555
+ res.add(id)
556
+ }
557
+ })
558
+ return res
559
+ }
560
+
561
+ // Default: any match
562
+ const res = new Set<string>()
563
+ for (const row of rows) {
564
+ const cid = resolveContactId(row, reachyIdToContactId, allContactIds)
565
+ if (cid) res.add(cid)
566
+ }
567
+ return res
568
+ }
569
+
570
+ // ---------------------------------------------------------------------------
571
+ // Query helpers — all parameterized
572
+ // ---------------------------------------------------------------------------
573
+
574
+ private _baseWhere(orgId: string, projId: string, eventName: string, params: ParamBag): string {
575
+ params['p_org_id'] = orgId
576
+ params['p_proj_id'] = projId
577
+ params['p_event_name'] = eventName
578
+ return `organization_id = {p_org_id:String} AND project_id = {p_proj_id:String} AND event_name = {p_event_name:String}`
579
+ }
580
+
581
+ private _buildAttributeWhere(rule: any, params: ParamBag, paramIdx: { n: number }): string {
582
+ let attributes = Array.isArray(rule.attributes) ? rule.attributes : (rule.attributes ? [rule.attributes] : [])
583
+ const interestKey = rule.interest?.key ? String(rule.interest.key).trim() : ''
584
+ if (interestKey) {
585
+ attributes = attributes.filter((a: any) => String(a?.key || '').trim() !== interestKey)
586
+ }
587
+ return buildAttributeWhereClause(attributes, params, paramIdx)
588
+ }
589
+
590
+ private _buildLivePresetWhere(cfg: any, typeId: string, params: ParamBag, paramIdx: { n: number }): string {
591
+ const parts: string[] = []
592
+ if (typeId === 'live-page-visit') {
593
+ const v = cfg.pageUrl
594
+ const d = cfg.domain
595
+ const ors: string[] = []
596
+ if (v && String(v).trim()) {
597
+ const pPath = `lp_path_${paramIdx.n++}`
598
+ const pUrl = `lp_url_${paramIdx.n++}`
599
+ params[pPath] = `%${String(v)}%`
600
+ params[pUrl] = `%${String(v)}%`
601
+ ors.push(`path LIKE {${pPath}:String}`)
602
+ ors.push(`current_url LIKE {${pUrl}:String}`)
603
+ }
604
+ if (d && String(d).trim()) {
605
+ const pDomain = `lp_domain_${paramIdx.n++}`
606
+ params[pDomain] = `%${String(d)}%`
607
+ ors.push(`domain LIKE {${pDomain}:String}`)
608
+ }
609
+ if (ors.length > 0) parts.push(`(${ors.join(' OR ')})`)
610
+ }
611
+ if (typeId === 'live-referrer') {
612
+ const v = cfg.referrerUrl
613
+ if (v && String(v).trim()) {
614
+ const pRef = `lr_ref_${paramIdx.n++}`
615
+ params[pRef] = `%${String(v)}%`
616
+ parts.push(`referrer LIKE {${pRef}:String}`)
617
+ }
618
+ if (cfg.utm_source) {
619
+ const p = `lr_src_${paramIdx.n++}`
620
+ params[p] = `%${String(cfg.utm_source)}%`
621
+ parts.push(`utm_source LIKE {${p}:String}`)
622
+ }
623
+ if (cfg.utm_medium) {
624
+ const p = `lr_med_${paramIdx.n++}`
625
+ params[p] = `%${String(cfg.utm_medium)}%`
626
+ parts.push(`utm_medium LIKE {${p}:String}`)
627
+ }
628
+ if (cfg.utm_campaign) {
629
+ const p = `lr_camp_${paramIdx.n++}`
630
+ params[p] = `%${String(cfg.utm_campaign)}%`
631
+ parts.push(`utm_campaign LIKE {${p}:String}`)
632
+ }
633
+ }
634
+ return parts.join(' AND ')
635
+ }
636
+
637
+ private _resolveWindowBounds(rule: any, cfg: any): { start?: string; end?: string } {
638
+ if (rule.time) {
639
+ if (rule.time.unit && rule.time.value != null) {
640
+ type TU = 'minutes' | 'hours' | 'days' | 'weeks' | 'months'
641
+ const unit = String(rule.time.unit) as TU
642
+ const value = Number(rule.time.value)
643
+ const unitMs: Record<TU, number> = {
644
+ minutes: 60000, hours: 3600000, days: 86400000, weeks: 604800000, months: 2592000000
645
+ }
646
+ const from = new Date(Date.now() - value * (unitMs[unit] ?? unitMs.days))
647
+ return { start: from.toISOString(), end: new Date().toISOString() }
648
+ }
649
+ return { start: rule.time.from ? String(rule.time.from) : undefined, end: rule.time.to ? String(rule.time.to) : undefined }
650
+ }
651
+ if (cfg?.timeFrame) {
652
+ const tf = String(cfg.timeFrame).trim()
653
+ type TU = 'minutes' | 'hours' | 'days' | 'weeks' | 'months'
654
+ let unit: TU = 'days'; let value = 7
655
+ if (/^\d+$/.test(tf)) { value = Number(tf); unit = 'days' }
656
+ else if (/^\d+\s*m$/.test(tf)) { value = Number(tf.replace(/m$/, '')); unit = 'minutes' }
657
+ else if (/^\d+\s*h$/.test(tf)) { value = Number(tf.replace(/h$/, '')); unit = 'hours' }
658
+ else if (/^\d+\s*d$/.test(tf)) { value = Number(tf.replace(/d$/, '')); unit = 'days' }
659
+ else if (/^\d+\s*w$/.test(tf)) { value = Number(tf.replace(/w$/, '')); unit = 'weeks' }
660
+ else if (/^\d+\s*mo$/.test(tf)) { value = Number(tf.replace(/mo$/, '')); unit = 'months' }
661
+ const unitMs: Record<TU, number> = {
662
+ minutes: 60000, hours: 3600000, days: 86400000, weeks: 604800000, months: 2592000000
663
+ }
664
+ const from = new Date(Date.now() - value * (unitMs[unit] ?? unitMs.days))
665
+ return { start: from.toISOString(), end: new Date().toISOString() }
666
+ }
667
+ return {}
668
+ }
669
+
670
+ private async _queryToContactIdSet(
671
+ query: string,
672
+ params: ParamBag,
673
+ reachyIdToContactId: Map<string, string>,
674
+ allContactIds: Set<string>
675
+ ): Promise<Set<string>> {
676
+ const result = await this.client.query({ query, query_params: params, format: 'JSONEachRow' })
677
+ const rows: Array<{ contact_id: string | null; reachy_id: string | null }> = await result.json()
678
+ const res = new Set<string>()
679
+ for (const row of rows) {
680
+ const cid = resolveContactId(row, reachyIdToContactId, allContactIds)
681
+ if (cid) res.add(cid)
682
+ }
683
+ return res
684
+ }
685
+ }
686
+
687
+ // ---------------------------------------------------------------------------
688
+ // In-memory helpers (copied from V2AudienceEngine to maintain parity)
689
+ // ---------------------------------------------------------------------------
690
+
691
+ function getEventPropertyValue(eventData: any, keyRaw: any): any {
692
+ const key = String(keyRaw || '').trim()
693
+ if (!key) return undefined
694
+ const ed = eventData || {}
695
+ if (key.startsWith('event_data.')) return getByPath(ed, key.replace(/^event_data\./, ''))
696
+ if (key.startsWith('custom_data.')) return getByPath(ed?.custom_data, key.replace(/^custom_data\./, ''))
697
+ return getByPath(ed, key) ?? getByPath(ed?.custom_data, key)
698
+ }
699
+
700
+ function getByPath(obj: any, dottedPath: string): any {
701
+ if (!obj || !dottedPath) return undefined
702
+ let cur: any = obj
703
+ for (const p of dottedPath.split('.').filter(Boolean)) {
704
+ if (cur == null) return undefined
705
+ cur = cur[p]
706
+ }
707
+ return cur
708
+ }
709
+
710
+ function matchesEventPropertyFilter(raw: any, opRaw: any, expected: any): boolean {
711
+ const op = String(opRaw ?? '').trim()
712
+ if (op === 'is_empty') return raw === null || raw === undefined || String(raw).trim() === ''
713
+ if (op === 'is_not_empty') return raw !== null && raw !== undefined && String(raw).trim() !== ''
714
+ if (raw === null || raw === undefined) return false
715
+ const a = String(raw); const b = String(expected ?? '')
716
+ if (op === 'equals') return a === b
717
+ if (op === 'not_equals') return a !== b
718
+ if (op === 'contains') return a.toLowerCase().includes(b.toLowerCase())
719
+ if (op === 'not_contains') return !a.toLowerCase().includes(b.toLowerCase())
720
+ if (op === 'starts_with') return a.toLowerCase().startsWith(b.toLowerCase())
721
+ if (op === 'ends_with') return a.toLowerCase().endsWith(b.toLowerCase())
722
+ const aNum = Number(raw); const bNum = Number(expected)
723
+ if (Number.isFinite(aNum) && Number.isFinite(bNum)) {
724
+ if (op === 'gt') return aNum > bNum
725
+ if (op === 'gte') return aNum >= bNum
726
+ if (op === 'lt') return aNum < bNum
727
+ if (op === 'lte') return aNum <= bNum
728
+ }
729
+ return false
730
+ }
731
+
732
+ function parseTimeToMinutes(raw: any): number | null {
733
+ const s = String(raw || '').trim()
734
+ const m = s.match(/^(\d{1,2}):(\d{2})\s*(AM|PM)?$/i)
735
+ if (!m) return null
736
+ let hh = Number(m[1]); const mm = Number(m[2])
737
+ const ampm = m[3] ? String(m[3]).toUpperCase() : null
738
+ if (ampm === 'AM') hh = hh === 12 ? 0 : hh
739
+ if (ampm === 'PM') hh = hh === 12 ? 12 : hh + 12
740
+ return hh * 60 + mm
741
+ }
742
+
743
+ function getLocalTimeParts(iso: any, tz: string) {
744
+ const d = new Date(String(iso || ''))
745
+ if (Number.isNaN(d.getTime())) return null
746
+ const dtf = new Intl.DateTimeFormat('en-US', { timeZone: tz, hour: '2-digit', minute: '2-digit', hour12: false, weekday: 'short', day: '2-digit' })
747
+ const parts = dtf.formatToParts(d)
748
+ let hourStr = '', minStr = '', weekdayStr = '', dayStr = ''
749
+ for (const p of parts) {
750
+ if (p.type === 'hour') hourStr = p.value
751
+ if (p.type === 'minute') minStr = p.value
752
+ if (p.type === 'weekday') weekdayStr = p.value
753
+ if (p.type === 'day') dayStr = p.value
754
+ }
755
+ const hour = Number(hourStr); const minute = Number(minStr); const dayOfMonth = Number(dayStr)
756
+ if (!Number.isFinite(hour) || !Number.isFinite(minute) || !Number.isFinite(dayOfMonth)) return null
757
+ const minutes = hour * 60 + minute
758
+ const weekdayMap: Record<string, number> = { Sun: 0, Mon: 1, Tue: 2, Wed: 3, Thu: 4, Fri: 5, Sat: 6 }
759
+ const weekday = weekdayMap[weekdayStr] ?? -1
760
+ if (weekday < 0) return null
761
+ return { minutes, weekday, dayOfMonth }
762
+ }
763
+
764
+ function parseWeekdayToIndex(raw: any): number | null {
765
+ const s = String(raw || '').trim().toLowerCase()
766
+ const map: Record<string, number> = { sunday: 0, sun: 0, monday: 1, mon: 1, tuesday: 2, tue: 2, wednesday: 3, wed: 3, thursday: 4, thu: 4, friday: 5, fri: 5, saturday: 6, sat: 6 }
767
+ return map[s] ?? null
768
+ }
769
+
770
+ function applyEventRuleFilters(rows: any[], filters: any[], opts?: { timezone?: string }): any[] {
771
+ let out = rows
772
+ const tz = String(opts?.timezone || '').trim()
773
+ for (const f of filters) {
774
+ const type = String(f?.type || '').trim()
775
+ if (type === 'event_property') {
776
+ const key = String(f?.key || '').trim()
777
+ if (!key) continue
778
+ out = out.filter((row) => matchesEventPropertyFilter(getEventPropertyValue(row?.event_data, key), f?.op, f?.value))
779
+ } else if (type === 'time_of_day') {
780
+ if (!tz) continue
781
+ const startMin = parseTimeToMinutes(f?.timeStart)
782
+ const endMin = parseTimeToMinutes(f?.timeEnd)
783
+ if (startMin == null || endMin == null || startMin > endMin) { out = []; continue }
784
+ out = out.filter((row) => { const p = getLocalTimeParts(row?.event_timestamp, tz); return p ? p.minutes >= startMin && p.minutes <= endMin : false })
785
+ } else if (type === 'day_of_week') {
786
+ if (!tz) continue
787
+ const allowed = new Set<number>()
788
+ for (const d of Array.isArray(f?.days) ? f.days : []) { const idx = parseWeekdayToIndex(d); if (idx != null) allowed.add(idx) }
789
+ if (allowed.size === 0) continue
790
+ out = out.filter((row) => { const p = getLocalTimeParts(row?.event_timestamp, tz); return p ? allowed.has(p.weekday) : false })
791
+ } else if (type === 'day_of_month') {
792
+ if (!tz) continue
793
+ const allowed = new Set<number>()
794
+ for (const d of Array.isArray(f?.days) ? f.days : []) { const n = Number(d); if (Number.isFinite(n) && n >= 1 && n <= 31) allowed.add(Math.floor(n)) }
795
+ if (allowed.size === 0) continue
796
+ out = out.filter((row) => { const p = getLocalTimeParts(row?.event_timestamp, tz); return p ? allowed.has(p.dayOfMonth) : false })
797
+ }
798
+ }
799
+ return out
800
+ }
801
+
802
+ function matchesInterest(raw: any, op: string, expected: any): boolean {
803
+ if (raw === null || raw === undefined) return false
804
+ const a = String(raw).trim(); const b = String(expected ?? '').trim()
805
+ if (!b) return false
806
+ const nOp = String(op || 'equals').trim()
807
+ if (nOp === 'equals') return a === b
808
+ if (nOp === 'contains') return a.toLowerCase().includes(b.toLowerCase())
809
+ if (nOp === 'starts_with') return a.toLowerCase().startsWith(b.toLowerCase())
810
+ if (nOp === 'ends_with') return a.toLowerCase().endsWith(b.toLowerCase())
811
+ if (nOp === 'not_equals') return a !== b
812
+ return a.toLowerCase().includes(b.toLowerCase())
813
+ }
814
+
815
+ function computeInterestContactIds(
816
+ rows: any[],
817
+ interest: { key: string; op: string; value: any; occurrenceType?: string; occurrencePercentage?: number },
818
+ resolveId: (row: any) => string | null
819
+ ): Set<string> {
820
+ const { key, op, value, occurrenceType = 'predominantly', occurrencePercentage = 0 } = interest
821
+ const threshold = Number.isFinite(occurrencePercentage) ? Math.max(0, Math.min(100, occurrencePercentage)) / 100 : 0
822
+ const totals = new Map<string, number>()
823
+ const matchCounts = new Map<string, number>()
824
+ const valueCounts = new Map<string, Map<string, number>>()
825
+
826
+ for (const row of rows) {
827
+ const id = resolveId(row)
828
+ if (!id) continue
829
+ totals.set(id, (totals.get(id) || 0) + 1)
830
+ const rawVal = getEventPropertyValue(row?.event_data, key)
831
+ const strVal = rawVal === null || rawVal === undefined ? '' : String(rawVal)
832
+ if (!valueCounts.has(id)) valueCounts.set(id, new Map())
833
+ const per = valueCounts.get(id)!
834
+ per.set(strVal, (per.get(strVal) || 0) + 1)
835
+ if (matchesInterest(rawVal, op, value)) matchCounts.set(id, (matchCounts.get(id) || 0) + 1)
836
+ }
837
+
838
+ const res = new Set<string>()
839
+ totals.forEach((total, id) => {
840
+ if (total <= 0) return
841
+ const match = matchCounts.get(id) || 0
842
+ if (occurrenceType === 'at_least') {
843
+ if (match > 0 && match / total >= threshold) res.add(id)
844
+ return
845
+ }
846
+ const per = valueCounts.get(id)
847
+ if (!per) return
848
+ let maxOther = 0
849
+ per.forEach((cnt, valStr) => { if (!matchesInterest(valStr, op, value)) { if (cnt > maxOther) maxOther = cnt } })
850
+ if (match > 0 && match >= maxOther) res.add(id)
851
+ })
852
+ return res
853
+ }