ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -95,50 +95,99 @@ const isFinalData = (detectionMethod, dayThreshold) => {
95
95
  };
96
96
 
97
97
  /**
98
- * Checks whether a given column name is part of the standard/expected GA4 BigQuery export columns.
98
+ * The standard GA4 BigQuery export top-level column names, based on the official schema.
99
99
  *
100
- * The list of recognized GA4 export columns is based on the official schema as of 2026-02-18.
101
- * This function can be used to filter or validate column names when processing GA4 data exports.
100
+ * list updated 2026-02-18
101
+ */
102
+ const ga4ExportColumns = [
103
+ "event_date",
104
+ "event_timestamp",
105
+ "event_name",
106
+ "event_params",
107
+ "event_previous_timestamp",
108
+ "event_value_in_usd",
109
+ "event_bundle_sequence_id",
110
+ "event_server_timestamp_offset",
111
+ "user_id",
112
+ "user_pseudo_id",
113
+ "privacy_info",
114
+ "user_properties",
115
+ "user_first_touch_timestamp",
116
+ "user_ltv",
117
+ "device",
118
+ "geo",
119
+ "app_info",
120
+ "traffic_source",
121
+ "stream_id",
122
+ "platform",
123
+ "event_dimensions",
124
+ "ecommerce",
125
+ "items",
126
+ "collected_traffic_source",
127
+ "is_active_user",
128
+ "batch_event_index",
129
+ "batch_page_id",
130
+ "batch_ordering_id",
131
+ "session_traffic_source_last_click",
132
+ "publisher"
133
+ ];
134
+
135
+ /**
136
+ * Checks whether a given column name is part of the standard GA4 BigQuery export columns.
102
137
  *
103
138
  * @param {string} columnName - The name of the column to check.
104
139
  * @returns {boolean} True if the column name is a GA4 export column, otherwise false.
105
140
  */
106
- const isGa4ExportColumn = (columnName) => {
107
- // list updated 2026-02-18
108
- const ga4ExportColumns = [
109
- "event_date",
110
- "event_timestamp",
111
- "event_name",
112
- "event_params",
113
- "event_previous_timestamp",
114
- "event_value_in_usd",
115
- "event_bundle_sequence_id",
116
- "event_server_timestamp_offset",
117
- "user_id",
118
- "user_pseudo_id",
119
- "privacy_info",
120
- "user_properties",
121
- "user_first_touch_timestamp",
122
- "user_ltv",
123
- "device",
124
- "geo",
125
- "app_info",
126
- "traffic_source",
127
- "stream_id",
128
- "platform",
129
- "event_dimensions",
130
- "ecommerce",
131
- "items",
132
- "collected_traffic_source",
133
- "is_active_user",
134
- "batch_event_index",
135
- "batch_page_id",
136
- "batch_ordering_id",
137
- "session_traffic_source_last_click",
138
- "publisher"
139
- ];
140
- return ga4ExportColumns.includes(columnName);
141
- };
141
+ const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
142
+
143
+ /**
144
+ * The standard GA4 BigQuery export items-struct field names, based on the official schema.
145
+ * Listed in GA4's source order — `items_rebuilt`'s explicit struct construction emits fields
146
+ * in this order, and consumers may reasonably depend on the items-struct schema field order
147
+ * matching GA4's own.
148
+ *
149
+ * `item_params` is a nested REPEATED RECORD and projects through as a single struct entry
150
+ * (no per-key handling).
151
+ *
152
+ * list updated 2026-05-12
153
+ */
154
+ const ga4ItemStructFields = [
155
+ "item_id",
156
+ "item_name",
157
+ "item_brand",
158
+ "item_variant",
159
+ "item_category",
160
+ "item_category2",
161
+ "item_category3",
162
+ "item_category4",
163
+ "item_category5",
164
+ "price_in_usd",
165
+ "price",
166
+ "quantity",
167
+ "item_revenue_in_usd",
168
+ "item_revenue",
169
+ "item_refund_in_usd",
170
+ "item_refund",
171
+ "coupon",
172
+ "affiliation",
173
+ "location_id",
174
+ "item_list_id",
175
+ "item_list_name",
176
+ "item_list_index",
177
+ "promotion_id",
178
+ "promotion_name",
179
+ "creative_name",
180
+ "creative_slot",
181
+ "item_params"
182
+ ];
183
+
184
+ /**
185
+ * Checks whether a given field name is part of the standard GA4 BigQuery export items struct.
186
+ *
187
+ * @param {string} fieldName - The name of the field to check.
188
+ * @returns {boolean} True if the field name is a standard items-struct field, otherwise false.
189
+ */
190
+ const isGa4ItemStructField = (fieldName) => ga4ItemStructFields.includes(fieldName);
142
191
 
143
192
  /**
144
193
  * Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
@@ -255,7 +304,10 @@ module.exports = {
255
304
  sessionId,
256
305
  fixEcommerceStruct,
257
306
  isFinalData,
307
+ ga4ExportColumns,
258
308
  isGa4ExportColumn,
309
+ ga4ItemStructFields,
310
+ isGa4ItemStructField,
259
311
  getGa4ExportType,
260
312
  itemListAttributionExpr,
261
313
  itemRowId,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.1",
3
+ "version": "0.9.0-dev.11",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,7 @@
17
17
  "createTable.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js && node tests/eventDataColumns.test.js && node tests/utils.test.js",
21
21
  "test:summary": "node tests/testRunner.js",
22
22
  "test:docs": "node tests/documentation.test.js",
23
23
  "test:preops": "node tests/preOperations.test.js",
@@ -28,6 +28,9 @@
28
28
  "test:createTable": "node tests/createTable.test.js",
29
29
  "test:queryBuilder": "node tests/queryBuilder.test.js",
30
30
  "test:customSteps": "node tests/customSteps.test.js",
31
+ "test:enrichments": "node tests/enrichments.test.js",
32
+ "test:eventDataColumns": "node tests/eventDataColumns.test.js",
33
+ "test:utils": "node tests/utils.test.js",
31
34
  "test:integration": "node tests/integration/integration.test.js",
32
35
  "release:dev": "./scripts/release-dev.sh",
33
36
  "readme": "node scripts/updateReadme.js",
@@ -68,6 +68,10 @@ const ga4EventsEnhancedConfig = {
68
68
  // user-defined CTEs appended to the pipeline after enhanced_events
69
69
  // each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
70
70
  customSteps: [],
71
+ // declarative external-data enrichments joined into the pipeline
72
+ // each entry: { name, level: 'event' | 'item', source, joinKey, columns, dedupe? }
73
+ // 'item' level is accepted at config time but throws at SQL gen — not yet implemented
74
+ enrichments: [],
71
75
  };
72
76
 
73
77
  module.exports = { ga4EventsEnhancedConfig };
@@ -197,51 +197,46 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
197
197
  return excludedColumns;
198
198
  };
199
199
 
200
- // initial step: extract data from the export tables
200
+ // initial step: extract data from the export tables.
201
+ // Explicit columns first (transforms + package-promoted + user-excluded sentinels);
202
+ // then pass-through entries for every GA4 export column not already accounted for.
203
+ // After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
204
+ const eventDataExplicitColumns = {
205
+ // exclude default export columns that are not needed
206
+ // do this first so that the columns defined later are not excluded
207
+ ...getExcludedColumns(),
208
+ event_date: helpers.eventDate,
209
+ event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
210
+ event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
211
+ session_id: helpers.sessionId,
212
+ // page details
213
+ page_location: helpers.unnestEventParam('page_location', 'string'),
214
+ page: helpers.extractPageDetails(),
215
+ // promote event params to columns
216
+ ...promotedEventParameters(),
217
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
218
+ // rename traffic_source for clarity
219
+ user_traffic_source: 'traffic_source',
220
+ // ecommerce
221
+ ecommerce: helpers.fixEcommerceStruct('ecommerce'),
222
+ // assign a unique row id, used for handling item-level attribution and enrichment
223
+ _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
224
+ // flag if the data is "final" and is not expected to change anymore
225
+ data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
226
+ export_type: helpers.getGa4ExportType('_table_suffix'),
227
+ // prep columns for later steps
228
+ entrances: helpers.unnestEventParam('entrances', 'int'),
229
+ session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
230
+ };
231
+ // Pass through every GA4 export column not already covered by an explicit transform,
232
+ // promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
233
+ const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
201
234
  const eventDataStep = {
202
235
  name: 'event_data',
203
236
  select: {
204
237
  columns: {
205
- // exclude default export columns that are not needed
206
- // do this first so that the columns defined later are not excluded
207
- ...getExcludedColumns(),
208
- // date and time
209
- event_date: helpers.eventDate,
210
- event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
- event_timestamp: 'event_timestamp',
212
- event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
- // event name
214
- event_name: 'event_name',
215
- // identifiers
216
- session_id: helpers.sessionId,
217
- user_pseudo_id: 'user_pseudo_id',
218
- user_id: 'user_id',
219
- // page
220
- page_location: helpers.unnestEventParam('page_location', 'string'),
221
- page: helpers.extractPageDetails(),
222
- // event parameters and user properties
223
- ...promotedEventParameters(),
224
- event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
- user_properties: 'user_properties',
226
- // traffic source
227
- collected_traffic_source: 'collected_traffic_source',
228
- session_traffic_source_last_click: 'session_traffic_source_last_click',
229
- user_traffic_source: 'traffic_source',
230
- // ecommerce
231
- ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
- items: 'items',
233
- _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
234
- // flag if the data is "final" and is not expected to change anymore
235
- data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
- export_type: helpers.getGa4ExportType('_table_suffix'),
237
- // prep columns for later steps
238
- entrances: helpers.unnestEventParam('entrances', 'int'),
239
- session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
- // include all other columns from the export data
241
- get '[sql]other_columns'() {
242
- const definedColumns = Object.keys(this);
243
- return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
244
- },
238
+ ...eventDataExplicitColumns,
239
+ ...eventDataPassThroughs,
245
240
  },
246
241
  },
247
242
  from: mergedConfig.sourceTable,
@@ -268,64 +263,191 @@ ${excludedEventsSQL}`,
268
263
  'group by': 'session_id',
269
264
  };
270
265
 
271
- // Shared item-array CTEs (currently used by item-list attribution; will also be used by
272
- // item-level data enrichments see design_docs/planned/data-enrichments.md, Q16):
273
- // 1. items_unnested: unnest items from ecommerce events, compute attribution via window function
274
- // 2. items_rebuilt: re-aggregate items with attributed list fields
275
- const itemListSteps = itemListAttribution ? (() => {
276
- const attrExpr = helpers.itemListAttributionExpr(
277
- itemListAttribution.lookbackType,
278
- timestampColumn,
279
- itemListAttribution.lookbackTimeMs
280
- );
266
+ // Build enrichment-source CTEs and gather per-level join/column data. The utility routes
267
+ // event-level and item-level entries through separate output channels.
268
+ const { steps: enrichmentSteps, event: eventEnrichments, item: itemEnrichments }
269
+ = utils.buildEnrichments(mergedConfig.enrichments);
270
+
271
+ // Validate item-level joinKey columns and collect any event_data columns that need to
272
+ // be carried up to items_unnested as top-level columns (so the LEFT JOIN inside
273
+ // items_rebuilt can USING(...) on them). Item-struct fields are already top-level on
274
+ // items_unnested and need no extension.
275
+ const itemJoinKeysFromEventData = new Set();
276
+ for (const [i, e] of (mergedConfig.enrichments ?? []).entries()) {
277
+ const level = e.level ?? 'event';
278
+ if (level !== 'item') continue;
279
+ const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
280
+ for (const c of joinKeys) {
281
+ if (helpers.ga4ItemStructFields.includes(c)) {
282
+ // Already a top-level column on items_unnested.
283
+ } else if (c in eventDataStep.select.columns && eventDataStep.select.columns[c] !== undefined) {
284
+ itemJoinKeysFromEventData.add(c);
285
+ } else {
286
+ throw new Error(
287
+ `config.enrichments[${i}] (name: '${e.name}') uses item-level joinKey '${c}', ` +
288
+ `which is neither a field on the GA4 items struct (helpers.ga4ItemStructFields) ` +
289
+ `nor a column on event_data. Valid item-level joinKeys are item-struct fields ` +
290
+ `(e.g. item_id, item_category) or any event_data column (e.g. user_pseudo_id, event_date).`
291
+ );
292
+ }
293
+ }
294
+ }
295
+
296
+ // Shared item-array CTEs:
297
+ // 1. items_unnested: unnest items from ecommerce events; LAST_VALUE attribution window
298
+ // is emitted only when itemListAttribution is configured.
299
+ // 2. items_rebuilt: re-aggregate items via explicit struct(...) construction;
300
+ // LEFT JOIN enrich_<name> for each item-level enrichment.
301
+ // Activation: emitted when EITHER itemListAttribution is configured OR at least one
302
+ // item-level enrichment is present.
303
+ const itemEnrichmentsActive = itemEnrichments.joins.length > 0;
304
+ const itemsScaffoldActive = !!itemListAttribution || itemEnrichmentsActive;
305
+ const itemListSteps = itemsScaffoldActive ? (() => {
281
306
  const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
282
307
 
308
+ // Flatten the item struct: every standard items-struct field is selected as a
309
+ // top-level column of items_unnested. This makes downstream joins simpler
310
+ // (LEFT JOIN ... USING(item_id) works without aliasing tricks) and lets items_rebuilt
311
+ // reference fields as bare column names instead of `item.<col>`.
312
+ const itemFieldColumns = {};
313
+ for (const f of helpers.ga4ItemStructFields) {
314
+ itemFieldColumns[f] = `item.${f}`;
315
+ }
316
+
317
+ // Carry up any event_data joinKey columns used by item-level enrichments so the
318
+ // USING(...) clause in items_rebuilt can bind against top-level identifiers.
319
+ // Skip ones already in the base columns above
320
+ const baseColumnNames = new Set(['_item_row_id', 'event_name', ...Object.keys(itemFieldColumns)]);
321
+ const extraJoinKeyColumns = {};
322
+ for (const c of itemJoinKeysFromEventData) {
323
+ if (!baseColumnNames.has(c)) {
324
+ extraJoinKeyColumns[c] = c;
325
+ }
326
+ }
327
+
328
+ // items_unnested base columns. The _item_list_attr struct (LAST_VALUE window) is
329
+ // added only when itemListAttribution is configured — when only item enrichments
330
+ // are active, the window function is omitted entirely for cleaner SQL.
331
+ const unnestedSelectColumns = {
332
+ '_item_row_id': '_item_row_id',
333
+ 'event_name': 'event_name',
334
+ ...itemFieldColumns,
335
+ ...extraJoinKeyColumns,
336
+ };
337
+ if (itemListAttribution) {
338
+ unnestedSelectColumns._item_list_attr = helpers.itemListAttributionExpr(
339
+ itemListAttribution.lookbackType,
340
+ timestampColumn,
341
+ itemListAttribution.lookbackTimeMs
342
+ );
343
+ }
344
+
283
345
  const unnestedStep = {
284
346
  name: 'items_unnested',
285
- select: {
286
- columns: {
287
- '_item_row_id': '_item_row_id',
288
- 'event_name': 'event_name',
289
- // event_date is carried forward for ability to use it in data enrichment joins
290
- 'event_date': 'event_date',
291
- 'item': 'item',
292
- '_item_list_attr': attrExpr,
293
- },
294
- },
347
+ select: { columns: unnestedSelectColumns },
295
348
  from: 'event_data, unnest(items) as item',
296
349
  where: `event_name in (${ecommerceEventsFilter})`,
297
350
  };
298
351
 
352
+ // Build the per-field expression map for the items struct. Seed with the canonical
353
+ // GA4 items-struct fields — each references the matching top-level column on
354
+ // items_unnested. When itemListAttribution is configured, override the three
355
+ // attribution entries with their package-generated coalesce-with-passthrough
356
+ // expressions. Item-level enrichment columns layer on top via the spread below.
357
+ const preItemExpressions = {};
358
+ for (const f of helpers.ga4ItemStructFields) {
359
+ preItemExpressions[f] = f;
360
+ }
361
+ if (itemListAttribution) {
362
+ preItemExpressions.item_list_name = `coalesce(if(${passthroughEvents}, item_list_name, _item_list_attr.item_list_name), '(not set)')`;
363
+ preItemExpressions.item_list_id = `coalesce(if(${passthroughEvents}, item_list_id, _item_list_attr.item_list_id), '(not set)')`;
364
+ preItemExpressions.item_list_index = `coalesce(if(${passthroughEvents}, item_list_index, _item_list_attr.item_list_index))`;
365
+ }
366
+
367
+ // Wrap overlapping item-level enrichment columns in coalesce(<enrichExpr>, <originalExpr>)
368
+ // so a missed JOIN falls back to the existing item field value. Purely additive
369
+ // columns (no overlap) pass through unchanged.
370
+ const wrappedItemEnrichmentColumns = {};
371
+ for (const [col, enrichExpr] of Object.entries(itemEnrichments.columns)) {
372
+ const originalExpr = preItemExpressions[col];
373
+ wrappedItemEnrichmentColumns[col] = originalExpr
374
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
375
+ : enrichExpr;
376
+ }
377
+
378
+ // Final struct: standard fields first, then enrichment overrides spread on top
379
+ // (overlapping keys replace preItemExpressions entries; additive keys are appended).
380
+ const finalItemStructFields = { ...preItemExpressions, ...wrappedItemEnrichmentColumns };
381
+
382
+ const itemStructClauses = Object.entries(finalItemStructFields)
383
+ .map(([col, expr]) => `${expr} as ${col}`)
384
+ .join(',\n ');
385
+
299
386
  const rebuiltStep = {
300
387
  name: 'items_rebuilt',
301
388
  select: {
302
389
  columns: {
303
390
  '_item_row_id': '_item_row_id',
304
- 'items': `array_agg(
305
- (select as struct item.* replace(
306
- coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
307
- coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
308
- coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
309
- ))
310
- )`,
391
+ 'items': `array_agg(struct(
392
+ ${itemStructClauses}
393
+ ))`,
311
394
  },
312
395
  },
313
396
  from: 'items_unnested',
314
397
  'group by': '_item_row_id',
315
398
  };
399
+ // Item-level enrichment joins (only attach when present). Each enrichment's LEFT JOIN
400
+ // binds against top-level columns on items_unnested (item-struct fields, or event_data
401
+ // joinKey columns carried up via extraJoinKeyColumns above).
402
+ if (itemEnrichmentsActive) {
403
+ rebuiltStep.joins = itemEnrichments.joins;
404
+ }
316
405
 
317
406
  return [unnestedStep, rebuiltStep];
318
407
  })() : null;
319
408
 
320
409
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
321
410
 
322
- // When item list attribution is enabled, override the items column and exclude _item_row_id
411
+ // When the items scaffold is active, override the items column and exclude _item_row_id
323
412
  // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
324
413
  const itemListOverrides = itemListSteps ? {
325
414
  items: 'coalesce(items_rebuilt.items, event_data.items)',
326
415
  } : {};
327
416
  const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
328
417
 
418
+ // Wrap overlapping event-level enrichment columns in coalesce(enrich_<name>.<col>, <original>)
419
+ // so a missed JOIN falls back to the existing value. Purely additive columns (no overlap)
420
+ // pass through unchanged. Source-of-original precedence matches the final SELECT's spread
421
+ // order: itemListOverrides first (overrides finalColumnOrder for `items`), then
422
+ // session_data (wins over event_data in getFinalColumnOrder when both have the column).
423
+ const wrappedEventEnrichmentColumns = {};
424
+ for (const [col, enrichExpr] of Object.entries(eventEnrichments.columns)) {
425
+ let originalExpr;
426
+ if (col in itemListOverrides) {
427
+ originalExpr = itemListOverrides[col];
428
+ } else if (col in sessionDataStep.select.columns) {
429
+ originalExpr = `session_data.${col}`;
430
+ } else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
431
+ originalExpr = `event_data.${col}`;
432
+ }
433
+ wrappedEventEnrichmentColumns[col] = originalExpr
434
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
435
+ : enrichExpr;
436
+ }
437
+
438
+ // List all column names that have already been defined or should be left out
439
+ // Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
440
+ const alreadyMapped = [
441
+ ...Object.keys(finalColumnOrder),
442
+ ...Object.keys(itemListOverrides),
443
+ ...eventEnrichments.columnNames,
444
+ 'entrances',
445
+ mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
446
+ 'data_is_final',
447
+ 'export_type',
448
+ ...itemListExcludedColumns,
449
+ ];
450
+
329
451
  // Join event_data and session_data, include additional logic
330
452
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
331
453
  const enhancedEventsStep = {
@@ -335,24 +457,11 @@ ${excludedEventsSQL}`,
335
457
  // get the most important columns in the correct order
336
458
  ...finalColumnOrder,
337
459
  ...itemListOverrides,
338
- // get the rest of the event_data columns
339
- '[sql]event_data': utils.selectOtherColumns(
340
- eventDataStep,
341
- Object.keys(finalColumnOrder),
342
- [
343
- 'entrances',
344
- mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
345
- 'data_is_final',
346
- 'export_type',
347
- ...itemListExcludedColumns,
348
- ]
349
- ),
350
- // get the rest of the session_data columns
351
- '[sql]session_data': utils.selectOtherColumns(
352
- sessionDataStep,
353
- Object.keys(finalColumnOrder),
354
- []
355
- ),
460
+ // event-level enrichment columns: coalesce with the original when overlapping; otherwise add.
461
+ ...wrappedEventEnrichmentColumns,
462
+ // explicit pass-throughs for the rest of event_data and session_data
463
+ ...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
464
+ ...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
356
465
  // include additional columns
357
466
  row_inserted_timestamp: 'current_timestamp()',
358
467
  data_is_final: 'data_is_final',
@@ -370,21 +479,22 @@ ${excludedEventsSQL}`,
370
479
  type: 'left',
371
480
  table: 'session_data',
372
481
  on: 'using(session_id)'
373
- }
482
+ },
483
+ // The left joins for the event-level enrichment ctes
484
+ ...eventEnrichments.joins,
374
485
  ],
375
486
  where: helpers.incrementalDateFilter(mergedConfig)
376
487
  };
377
488
 
378
489
  const packageSteps = [
490
+ ...enrichmentSteps,
379
491
  eventDataStep,
380
492
  ...(itemListSteps ?? []),
381
493
  sessionDataStep,
382
494
  enhancedEventsStep,
383
495
  ];
384
496
 
385
- // Layer 2 validation: customSteps name must not collide with package step names.
386
- // Reserved set is derived from packageSteps at runtime (single source of truth) — what
387
- // is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on).
497
+ // Ensure that the custom step names don't collide with the default or data enrichment step names
388
498
  const customSteps = mergedConfig.customSteps ?? [];
389
499
  if (customSteps.length > 0) {
390
500
  const reservedNames = new Set(packageSteps.map(s => s.name));
@@ -398,6 +508,7 @@ ${excludedEventsSQL}`,
398
508
  }
399
509
  }
400
510
 
511
+ // Include custom steps last in the list
401
512
  const steps = [...packageSteps, ...customSteps];
402
513
 
403
514
  return utils.queryBuilder(steps);