ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -4
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +91 -39
- package/package.json +5 -2
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +204 -91
- package/tables/ga4EventsEnhanced/validation.js +99 -4
- package/utils.js +163 -26
package/helpers/ga4Transforms.js
CHANGED
|
@@ -95,50 +95,99 @@ const isFinalData = (detectionMethod, dayThreshold) => {
|
|
|
95
95
|
};
|
|
96
96
|
|
|
97
97
|
/**
|
|
98
|
-
*
|
|
98
|
+
* The standard GA4 BigQuery export top-level column names, based on the official schema.
|
|
99
99
|
*
|
|
100
|
-
*
|
|
101
|
-
|
|
100
|
+
* list updated 2026-02-18
|
|
101
|
+
*/
|
|
102
|
+
const ga4ExportColumns = [
|
|
103
|
+
"event_date",
|
|
104
|
+
"event_timestamp",
|
|
105
|
+
"event_name",
|
|
106
|
+
"event_params",
|
|
107
|
+
"event_previous_timestamp",
|
|
108
|
+
"event_value_in_usd",
|
|
109
|
+
"event_bundle_sequence_id",
|
|
110
|
+
"event_server_timestamp_offset",
|
|
111
|
+
"user_id",
|
|
112
|
+
"user_pseudo_id",
|
|
113
|
+
"privacy_info",
|
|
114
|
+
"user_properties",
|
|
115
|
+
"user_first_touch_timestamp",
|
|
116
|
+
"user_ltv",
|
|
117
|
+
"device",
|
|
118
|
+
"geo",
|
|
119
|
+
"app_info",
|
|
120
|
+
"traffic_source",
|
|
121
|
+
"stream_id",
|
|
122
|
+
"platform",
|
|
123
|
+
"event_dimensions",
|
|
124
|
+
"ecommerce",
|
|
125
|
+
"items",
|
|
126
|
+
"collected_traffic_source",
|
|
127
|
+
"is_active_user",
|
|
128
|
+
"batch_event_index",
|
|
129
|
+
"batch_page_id",
|
|
130
|
+
"batch_ordering_id",
|
|
131
|
+
"session_traffic_source_last_click",
|
|
132
|
+
"publisher"
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Checks whether a given column name is part of the standard GA4 BigQuery export columns.
|
|
102
137
|
*
|
|
103
138
|
* @param {string} columnName - The name of the column to check.
|
|
104
139
|
* @returns {boolean} True if the column name is a GA4 export column, otherwise false.
|
|
105
140
|
*/
|
|
106
|
-
const isGa4ExportColumn = (columnName) =>
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
141
|
+
const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* The standard GA4 BigQuery export items-struct field names, based on the official schema.
|
|
145
|
+
* Listed in GA4's source order — `items_rebuilt`'s explicit struct construction emits fields
|
|
146
|
+
* in this order, and consumers may reasonably depend on the items-struct schema field order
|
|
147
|
+
* matching GA4's own.
|
|
148
|
+
*
|
|
149
|
+
* `item_params` is a nested REPEATED RECORD and projects through as a single struct entry
|
|
150
|
+
* (no per-key handling).
|
|
151
|
+
*
|
|
152
|
+
* list updated 2026-05-12
|
|
153
|
+
*/
|
|
154
|
+
const ga4ItemStructFields = [
|
|
155
|
+
"item_id",
|
|
156
|
+
"item_name",
|
|
157
|
+
"item_brand",
|
|
158
|
+
"item_variant",
|
|
159
|
+
"item_category",
|
|
160
|
+
"item_category2",
|
|
161
|
+
"item_category3",
|
|
162
|
+
"item_category4",
|
|
163
|
+
"item_category5",
|
|
164
|
+
"price_in_usd",
|
|
165
|
+
"price",
|
|
166
|
+
"quantity",
|
|
167
|
+
"item_revenue_in_usd",
|
|
168
|
+
"item_revenue",
|
|
169
|
+
"item_refund_in_usd",
|
|
170
|
+
"item_refund",
|
|
171
|
+
"coupon",
|
|
172
|
+
"affiliation",
|
|
173
|
+
"location_id",
|
|
174
|
+
"item_list_id",
|
|
175
|
+
"item_list_name",
|
|
176
|
+
"item_list_index",
|
|
177
|
+
"promotion_id",
|
|
178
|
+
"promotion_name",
|
|
179
|
+
"creative_name",
|
|
180
|
+
"creative_slot",
|
|
181
|
+
"item_params"
|
|
182
|
+
];
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Checks whether a given field name is part of the standard GA4 BigQuery export items struct.
|
|
186
|
+
*
|
|
187
|
+
* @param {string} fieldName - The name of the field to check.
|
|
188
|
+
* @returns {boolean} True if the field name is a standard items-struct field, otherwise false.
|
|
189
|
+
*/
|
|
190
|
+
const isGa4ItemStructField = (fieldName) => ga4ItemStructFields.includes(fieldName);
|
|
142
191
|
|
|
143
192
|
/**
|
|
144
193
|
* Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
|
|
@@ -255,7 +304,10 @@ module.exports = {
|
|
|
255
304
|
sessionId,
|
|
256
305
|
fixEcommerceStruct,
|
|
257
306
|
isFinalData,
|
|
307
|
+
ga4ExportColumns,
|
|
258
308
|
isGa4ExportColumn,
|
|
309
|
+
ga4ItemStructFields,
|
|
310
|
+
isGa4ItemStructField,
|
|
259
311
|
getGa4ExportType,
|
|
260
312
|
itemListAttributionExpr,
|
|
261
313
|
itemRowId,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.9.0-dev.
|
|
3
|
+
"version": "0.9.0-dev.10",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js",
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js && node tests/eventDataColumns.test.js && node tests/utils.test.js",
|
|
21
21
|
"test:summary": "node tests/testRunner.js",
|
|
22
22
|
"test:docs": "node tests/documentation.test.js",
|
|
23
23
|
"test:preops": "node tests/preOperations.test.js",
|
|
@@ -28,6 +28,9 @@
|
|
|
28
28
|
"test:createTable": "node tests/createTable.test.js",
|
|
29
29
|
"test:queryBuilder": "node tests/queryBuilder.test.js",
|
|
30
30
|
"test:customSteps": "node tests/customSteps.test.js",
|
|
31
|
+
"test:enrichments": "node tests/enrichments.test.js",
|
|
32
|
+
"test:eventDataColumns": "node tests/eventDataColumns.test.js",
|
|
33
|
+
"test:utils": "node tests/utils.test.js",
|
|
31
34
|
"test:integration": "node tests/integration/integration.test.js",
|
|
32
35
|
"release:dev": "./scripts/release-dev.sh",
|
|
33
36
|
"readme": "node scripts/updateReadme.js",
|
|
@@ -68,6 +68,10 @@ const ga4EventsEnhancedConfig = {
|
|
|
68
68
|
// user-defined CTEs appended to the pipeline after enhanced_events
|
|
69
69
|
// each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
|
|
70
70
|
customSteps: [],
|
|
71
|
+
// declarative external-data enrichments joined into the pipeline
|
|
72
|
+
// each entry: { name, level: 'event' | 'item', source, joinKey, columns, dedupe? }
|
|
73
|
+
// 'item' level is accepted at config time but throws at SQL gen — not yet implemented
|
|
74
|
+
enrichments: [],
|
|
71
75
|
};
|
|
72
76
|
|
|
73
77
|
module.exports = { ga4EventsEnhancedConfig };
|
|
@@ -197,51 +197,46 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
197
197
|
return excludedColumns;
|
|
198
198
|
};
|
|
199
199
|
|
|
200
|
-
// initial step: extract data from the export tables
|
|
200
|
+
// initial step: extract data from the export tables.
|
|
201
|
+
// Explicit columns first (transforms + package-promoted + user-excluded sentinels);
|
|
202
|
+
// then pass-through entries for every GA4 export column not already accounted for.
|
|
203
|
+
// After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
|
|
204
|
+
const eventDataExplicitColumns = {
|
|
205
|
+
// exclude default export columns that are not needed
|
|
206
|
+
// do this first so that the columns defined later are not excluded
|
|
207
|
+
...getExcludedColumns(),
|
|
208
|
+
event_date: helpers.eventDate,
|
|
209
|
+
event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
|
|
210
|
+
event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
|
|
211
|
+
session_id: helpers.sessionId,
|
|
212
|
+
// page details
|
|
213
|
+
page_location: helpers.unnestEventParam('page_location', 'string'),
|
|
214
|
+
page: helpers.extractPageDetails(),
|
|
215
|
+
// promote event params to columns
|
|
216
|
+
...promotedEventParameters(),
|
|
217
|
+
event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
|
|
218
|
+
// rename traffic_source for clarity
|
|
219
|
+
user_traffic_source: 'traffic_source',
|
|
220
|
+
// ecommerce
|
|
221
|
+
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
222
|
+
// assign a unique row id, used for handling item-level attribution and enrichment
|
|
223
|
+
_item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
224
|
+
// flag if the data is "final" and is not expected to change anymore
|
|
225
|
+
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
226
|
+
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
227
|
+
// prep columns for later steps
|
|
228
|
+
entrances: helpers.unnestEventParam('entrances', 'int'),
|
|
229
|
+
session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
|
|
230
|
+
};
|
|
231
|
+
// Pass through every GA4 export column not already covered by an explicit transform,
|
|
232
|
+
// promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
|
|
233
|
+
const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
|
|
201
234
|
const eventDataStep = {
|
|
202
235
|
name: 'event_data',
|
|
203
236
|
select: {
|
|
204
237
|
columns: {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
...getExcludedColumns(),
|
|
208
|
-
// date and time
|
|
209
|
-
event_date: helpers.eventDate,
|
|
210
|
-
event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
|
|
211
|
-
event_timestamp: 'event_timestamp',
|
|
212
|
-
event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
|
|
213
|
-
// event name
|
|
214
|
-
event_name: 'event_name',
|
|
215
|
-
// identifiers
|
|
216
|
-
session_id: helpers.sessionId,
|
|
217
|
-
user_pseudo_id: 'user_pseudo_id',
|
|
218
|
-
user_id: 'user_id',
|
|
219
|
-
// page
|
|
220
|
-
page_location: helpers.unnestEventParam('page_location', 'string'),
|
|
221
|
-
page: helpers.extractPageDetails(),
|
|
222
|
-
// event parameters and user properties
|
|
223
|
-
...promotedEventParameters(),
|
|
224
|
-
event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
|
|
225
|
-
user_properties: 'user_properties',
|
|
226
|
-
// traffic source
|
|
227
|
-
collected_traffic_source: 'collected_traffic_source',
|
|
228
|
-
session_traffic_source_last_click: 'session_traffic_source_last_click',
|
|
229
|
-
user_traffic_source: 'traffic_source',
|
|
230
|
-
// ecommerce
|
|
231
|
-
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
232
|
-
items: 'items',
|
|
233
|
-
_item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
234
|
-
// flag if the data is "final" and is not expected to change anymore
|
|
235
|
-
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
236
|
-
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
237
|
-
// prep columns for later steps
|
|
238
|
-
entrances: helpers.unnestEventParam('entrances', 'int'),
|
|
239
|
-
session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
|
|
240
|
-
// include all other columns from the export data
|
|
241
|
-
get '[sql]other_columns'() {
|
|
242
|
-
const definedColumns = Object.keys(this);
|
|
243
|
-
return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
|
|
244
|
-
},
|
|
238
|
+
...eventDataExplicitColumns,
|
|
239
|
+
...eventDataPassThroughs,
|
|
245
240
|
},
|
|
246
241
|
},
|
|
247
242
|
from: mergedConfig.sourceTable,
|
|
@@ -268,64 +263,193 @@ ${excludedEventsSQL}`,
|
|
|
268
263
|
'group by': 'session_id',
|
|
269
264
|
};
|
|
270
265
|
|
|
271
|
-
//
|
|
272
|
-
// item-level
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
266
|
+
// Build enrichment-source CTEs and gather per-level join/column data. The utility routes
|
|
267
|
+
// event-level and item-level entries through separate output channels.
|
|
268
|
+
const { steps: enrichmentSteps, event: eventEnrichments, item: itemEnrichments }
|
|
269
|
+
= utils.buildEnrichments(mergedConfig.enrichments);
|
|
270
|
+
|
|
271
|
+
// Validate item-level joinKey columns and collect any event_data columns that need to
|
|
272
|
+
// be carried up to items_unnested as top-level columns (so the LEFT JOIN inside
|
|
273
|
+
// items_rebuilt can USING(...) on them). Item-struct fields are already top-level on
|
|
274
|
+
// items_unnested and need no extension.
|
|
275
|
+
const itemJoinKeysFromEventData = new Set();
|
|
276
|
+
for (const [i, e] of (mergedConfig.enrichments ?? []).entries()) {
|
|
277
|
+
const level = e.level ?? 'event';
|
|
278
|
+
if (level !== 'item') continue;
|
|
279
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
280
|
+
for (const c of joinKeys) {
|
|
281
|
+
if (helpers.ga4ItemStructFields.includes(c)) {
|
|
282
|
+
// Already a top-level column on items_unnested.
|
|
283
|
+
} else if (c in eventDataStep.select.columns && eventDataStep.select.columns[c] !== undefined) {
|
|
284
|
+
itemJoinKeysFromEventData.add(c);
|
|
285
|
+
} else {
|
|
286
|
+
throw new Error(
|
|
287
|
+
`config.enrichments[${i}] (name: '${e.name}') uses item-level joinKey '${c}', ` +
|
|
288
|
+
`which is neither a field on the GA4 items struct (helpers.ga4ItemStructFields) ` +
|
|
289
|
+
`nor a column on event_data. Valid item-level joinKeys are item-struct fields ` +
|
|
290
|
+
`(e.g. item_id, item_category) or any event_data column (e.g. user_pseudo_id, event_date).`
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Shared item-array CTEs:
|
|
297
|
+
// 1. items_unnested: unnest items from ecommerce events; LAST_VALUE attribution window
|
|
298
|
+
// is emitted only when itemListAttribution is configured.
|
|
299
|
+
// 2. items_rebuilt: re-aggregate items via explicit struct(...) construction;
|
|
300
|
+
// LEFT JOIN enrich_<name> for each item-level enrichment.
|
|
301
|
+
// Activation: emitted when EITHER itemListAttribution is configured OR at least one
|
|
302
|
+
// item-level enrichment is present.
|
|
303
|
+
const itemEnrichmentsActive = itemEnrichments.joins.length > 0;
|
|
304
|
+
const itemsScaffoldActive = !!itemListAttribution || itemEnrichmentsActive;
|
|
305
|
+
const itemListSteps = itemsScaffoldActive ? (() => {
|
|
281
306
|
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
282
307
|
|
|
308
|
+
// Flatten the item struct: every standard items-struct field is selected as a
|
|
309
|
+
// top-level column of items_unnested. This makes downstream joins simpler
|
|
310
|
+
// (LEFT JOIN ... USING(item_id) works without aliasing tricks) and lets items_rebuilt
|
|
311
|
+
// reference fields as bare column names instead of `item.<col>`.
|
|
312
|
+
const itemFieldColumns = {};
|
|
313
|
+
for (const f of helpers.ga4ItemStructFields) {
|
|
314
|
+
itemFieldColumns[f] = `item.${f}`;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Carry up any event_data joinKey columns used by item-level enrichments so the
|
|
318
|
+
// USING(...) clause in items_rebuilt can bind against top-level identifiers.
|
|
319
|
+
// Skip ones already in the base columns above (e.g. event_date is always carried).
|
|
320
|
+
const baseColumnNames = new Set(['_item_row_id', 'event_name', 'event_date', ...Object.keys(itemFieldColumns)]);
|
|
321
|
+
const extraJoinKeyColumns = {};
|
|
322
|
+
for (const c of itemJoinKeysFromEventData) {
|
|
323
|
+
if (!baseColumnNames.has(c)) {
|
|
324
|
+
extraJoinKeyColumns[c] = c;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// items_unnested base columns. The _item_list_attr struct (LAST_VALUE window) is
|
|
329
|
+
// added only when itemListAttribution is configured — when only item enrichments
|
|
330
|
+
// are active, the window function is omitted entirely for cleaner SQL.
|
|
331
|
+
const unnestedSelectColumns = {
|
|
332
|
+
'_item_row_id': '_item_row_id',
|
|
333
|
+
'event_name': 'event_name',
|
|
334
|
+
// event_date is carried forward for ability to use it in data enrichment joins
|
|
335
|
+
'event_date': 'event_date',
|
|
336
|
+
...itemFieldColumns,
|
|
337
|
+
...extraJoinKeyColumns,
|
|
338
|
+
};
|
|
339
|
+
if (itemListAttribution) {
|
|
340
|
+
unnestedSelectColumns._item_list_attr = helpers.itemListAttributionExpr(
|
|
341
|
+
itemListAttribution.lookbackType,
|
|
342
|
+
timestampColumn,
|
|
343
|
+
itemListAttribution.lookbackTimeMs
|
|
344
|
+
);
|
|
345
|
+
}
|
|
346
|
+
|
|
283
347
|
const unnestedStep = {
|
|
284
348
|
name: 'items_unnested',
|
|
285
|
-
select: {
|
|
286
|
-
columns: {
|
|
287
|
-
'_item_row_id': '_item_row_id',
|
|
288
|
-
'event_name': 'event_name',
|
|
289
|
-
// event_date is carried forward for ability to use it in data enrichment joins
|
|
290
|
-
'event_date': 'event_date',
|
|
291
|
-
'item': 'item',
|
|
292
|
-
'_item_list_attr': attrExpr,
|
|
293
|
-
},
|
|
294
|
-
},
|
|
349
|
+
select: { columns: unnestedSelectColumns },
|
|
295
350
|
from: 'event_data, unnest(items) as item',
|
|
296
351
|
where: `event_name in (${ecommerceEventsFilter})`,
|
|
297
352
|
};
|
|
298
353
|
|
|
354
|
+
// Build the per-field expression map for the items struct. Seed with the canonical
|
|
355
|
+
// GA4 items-struct fields — each references the matching top-level column on
|
|
356
|
+
// items_unnested. When itemListAttribution is configured, override the three
|
|
357
|
+
// attribution entries with their package-generated coalesce-with-passthrough
|
|
358
|
+
// expressions. Item-level enrichment columns layer on top via the spread below.
|
|
359
|
+
const preItemExpressions = {};
|
|
360
|
+
for (const f of helpers.ga4ItemStructFields) {
|
|
361
|
+
preItemExpressions[f] = f;
|
|
362
|
+
}
|
|
363
|
+
if (itemListAttribution) {
|
|
364
|
+
preItemExpressions.item_list_name = `coalesce(if(${passthroughEvents}, item_list_name, _item_list_attr.item_list_name), '(not set)')`;
|
|
365
|
+
preItemExpressions.item_list_id = `coalesce(if(${passthroughEvents}, item_list_id, _item_list_attr.item_list_id), '(not set)')`;
|
|
366
|
+
preItemExpressions.item_list_index = `coalesce(if(${passthroughEvents}, item_list_index, _item_list_attr.item_list_index))`;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Wrap overlapping item-level enrichment columns in coalesce(<enrichExpr>, <originalExpr>)
|
|
370
|
+
// so a missed JOIN falls back to the existing item field value. Purely additive
|
|
371
|
+
// columns (no overlap) pass through unchanged.
|
|
372
|
+
const wrappedItemEnrichmentColumns = {};
|
|
373
|
+
for (const [col, enrichExpr] of Object.entries(itemEnrichments.columns)) {
|
|
374
|
+
const originalExpr = preItemExpressions[col];
|
|
375
|
+
wrappedItemEnrichmentColumns[col] = originalExpr
|
|
376
|
+
? `coalesce(${enrichExpr}, ${originalExpr})`
|
|
377
|
+
: enrichExpr;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Final struct: standard fields first, then enrichment overrides spread on top
|
|
381
|
+
// (overlapping keys replace preItemExpressions entries; additive keys are appended).
|
|
382
|
+
const finalItemStructFields = { ...preItemExpressions, ...wrappedItemEnrichmentColumns };
|
|
383
|
+
|
|
384
|
+
const itemStructClauses = Object.entries(finalItemStructFields)
|
|
385
|
+
.map(([col, expr]) => `${expr} as ${col}`)
|
|
386
|
+
.join(',\n ');
|
|
387
|
+
|
|
299
388
|
const rebuiltStep = {
|
|
300
389
|
name: 'items_rebuilt',
|
|
301
390
|
select: {
|
|
302
391
|
columns: {
|
|
303
392
|
'_item_row_id': '_item_row_id',
|
|
304
|
-
'items': `array_agg(
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
|
|
308
|
-
coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
|
|
309
|
-
))
|
|
310
|
-
)`,
|
|
393
|
+
'items': `array_agg(struct(
|
|
394
|
+
${itemStructClauses}
|
|
395
|
+
))`,
|
|
311
396
|
},
|
|
312
397
|
},
|
|
313
398
|
from: 'items_unnested',
|
|
314
399
|
'group by': '_item_row_id',
|
|
315
400
|
};
|
|
401
|
+
// Item-level enrichment joins (only attach when present). Each enrichment's LEFT JOIN
|
|
402
|
+
// binds against top-level columns on items_unnested (item-struct fields, or event_data
|
|
403
|
+
// joinKey columns carried up via extraJoinKeyColumns above).
|
|
404
|
+
if (itemEnrichmentsActive) {
|
|
405
|
+
rebuiltStep.joins = itemEnrichments.joins;
|
|
406
|
+
}
|
|
316
407
|
|
|
317
408
|
return [unnestedStep, rebuiltStep];
|
|
318
409
|
})() : null;
|
|
319
410
|
|
|
320
411
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
321
412
|
|
|
322
|
-
// When
|
|
413
|
+
// When the items scaffold is active, override the items column and exclude _item_row_id
|
|
323
414
|
// COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
|
|
324
415
|
const itemListOverrides = itemListSteps ? {
|
|
325
416
|
items: 'coalesce(items_rebuilt.items, event_data.items)',
|
|
326
417
|
} : {};
|
|
327
418
|
const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
|
|
328
419
|
|
|
420
|
+
// Wrap overlapping event-level enrichment columns in coalesce(enrich_<name>.<col>, <original>)
|
|
421
|
+
// so a missed JOIN falls back to the existing value. Purely additive columns (no overlap)
|
|
422
|
+
// pass through unchanged. Source-of-original precedence matches the final SELECT's spread
|
|
423
|
+
// order: itemListOverrides first (overrides finalColumnOrder for `items`), then
|
|
424
|
+
// session_data (wins over event_data in getFinalColumnOrder when both have the column).
|
|
425
|
+
const wrappedEventEnrichmentColumns = {};
|
|
426
|
+
for (const [col, enrichExpr] of Object.entries(eventEnrichments.columns)) {
|
|
427
|
+
let originalExpr;
|
|
428
|
+
if (col in itemListOverrides) {
|
|
429
|
+
originalExpr = itemListOverrides[col];
|
|
430
|
+
} else if (col in sessionDataStep.select.columns) {
|
|
431
|
+
originalExpr = `session_data.${col}`;
|
|
432
|
+
} else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
|
|
433
|
+
originalExpr = `event_data.${col}`;
|
|
434
|
+
}
|
|
435
|
+
wrappedEventEnrichmentColumns[col] = originalExpr
|
|
436
|
+
? `coalesce(${enrichExpr}, ${originalExpr})`
|
|
437
|
+
: enrichExpr;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// List all column names that have already been defined or should be left out
|
|
441
|
+
// Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
|
|
442
|
+
const alreadyMapped = [
|
|
443
|
+
...Object.keys(finalColumnOrder),
|
|
444
|
+
...Object.keys(itemListOverrides),
|
|
445
|
+
...eventEnrichments.columnNames,
|
|
446
|
+
'entrances',
|
|
447
|
+
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
448
|
+
'data_is_final',
|
|
449
|
+
'export_type',
|
|
450
|
+
...itemListExcludedColumns,
|
|
451
|
+
];
|
|
452
|
+
|
|
329
453
|
// Join event_data and session_data, include additional logic
|
|
330
454
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
331
455
|
const enhancedEventsStep = {
|
|
@@ -335,24 +459,11 @@ ${excludedEventsSQL}`,
|
|
|
335
459
|
// get the most important columns in the correct order
|
|
336
460
|
...finalColumnOrder,
|
|
337
461
|
...itemListOverrides,
|
|
338
|
-
//
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
'entrances',
|
|
344
|
-
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
345
|
-
'data_is_final',
|
|
346
|
-
'export_type',
|
|
347
|
-
...itemListExcludedColumns,
|
|
348
|
-
]
|
|
349
|
-
),
|
|
350
|
-
// get the rest of the session_data columns
|
|
351
|
-
'[sql]session_data': utils.selectOtherColumns(
|
|
352
|
-
sessionDataStep,
|
|
353
|
-
Object.keys(finalColumnOrder),
|
|
354
|
-
[]
|
|
355
|
-
),
|
|
462
|
+
// event-level enrichment columns: coalesce with the original when overlapping; otherwise add.
|
|
463
|
+
...wrappedEventEnrichmentColumns,
|
|
464
|
+
// explicit pass-throughs for the rest of event_data and session_data
|
|
465
|
+
...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
|
|
466
|
+
...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
|
|
356
467
|
// include additional columns
|
|
357
468
|
row_inserted_timestamp: 'current_timestamp()',
|
|
358
469
|
data_is_final: 'data_is_final',
|
|
@@ -370,21 +481,22 @@ ${excludedEventsSQL}`,
|
|
|
370
481
|
type: 'left',
|
|
371
482
|
table: 'session_data',
|
|
372
483
|
on: 'using(session_id)'
|
|
373
|
-
}
|
|
484
|
+
},
|
|
485
|
+
// The left joins for the event-level enrichment ctes
|
|
486
|
+
...eventEnrichments.joins,
|
|
374
487
|
],
|
|
375
488
|
where: helpers.incrementalDateFilter(mergedConfig)
|
|
376
489
|
};
|
|
377
490
|
|
|
378
491
|
const packageSteps = [
|
|
492
|
+
...enrichmentSteps,
|
|
379
493
|
eventDataStep,
|
|
380
494
|
...(itemListSteps ?? []),
|
|
381
495
|
sessionDataStep,
|
|
382
496
|
enhancedEventsStep,
|
|
383
497
|
];
|
|
384
498
|
|
|
385
|
-
//
|
|
386
|
-
// Reserved set is derived from packageSteps at runtime (single source of truth) — what
|
|
387
|
-
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on).
|
|
499
|
+
// Ensure that the custom step names don't collide with the default or data enrichment step names
|
|
388
500
|
const customSteps = mergedConfig.customSteps ?? [];
|
|
389
501
|
if (customSteps.length > 0) {
|
|
390
502
|
const reservedNames = new Set(packageSteps.map(s => s.name));
|
|
@@ -398,6 +510,7 @@ ${excludedEventsSQL}`,
|
|
|
398
510
|
}
|
|
399
511
|
}
|
|
400
512
|
|
|
513
|
+
// Include custom steps last in the list
|
|
401
514
|
const steps = [...packageSteps, ...customSteps];
|
|
402
515
|
|
|
403
516
|
return utils.queryBuilder(steps);
|