ga4-export-fixer 0.9.0-dev.3 → 0.9.0-dev.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helpers/ga4Transforms.js +40 -39
- package/package.json +4 -2
- package/tables/ga4EventsEnhanced/index.js +52 -53
- package/utils.js +40 -0
package/helpers/ga4Transforms.js
CHANGED
|
@@ -95,50 +95,50 @@ const isFinalData = (detectionMethod, dayThreshold) => {
|
|
|
95
95
|
};
|
|
96
96
|
|
|
97
97
|
/**
|
|
98
|
-
*
|
|
98
|
+
* The standard GA4 BigQuery export top-level column names, based on the official schema.
|
|
99
99
|
*
|
|
100
|
-
*
|
|
101
|
-
|
|
100
|
+
* list updated 2026-02-18
|
|
101
|
+
*/
|
|
102
|
+
const ga4ExportColumns = [
|
|
103
|
+
"event_date",
|
|
104
|
+
"event_timestamp",
|
|
105
|
+
"event_name",
|
|
106
|
+
"event_params",
|
|
107
|
+
"event_previous_timestamp",
|
|
108
|
+
"event_value_in_usd",
|
|
109
|
+
"event_bundle_sequence_id",
|
|
110
|
+
"event_server_timestamp_offset",
|
|
111
|
+
"user_id",
|
|
112
|
+
"user_pseudo_id",
|
|
113
|
+
"privacy_info",
|
|
114
|
+
"user_properties",
|
|
115
|
+
"user_first_touch_timestamp",
|
|
116
|
+
"user_ltv",
|
|
117
|
+
"device",
|
|
118
|
+
"geo",
|
|
119
|
+
"app_info",
|
|
120
|
+
"traffic_source",
|
|
121
|
+
"stream_id",
|
|
122
|
+
"platform",
|
|
123
|
+
"event_dimensions",
|
|
124
|
+
"ecommerce",
|
|
125
|
+
"items",
|
|
126
|
+
"collected_traffic_source",
|
|
127
|
+
"is_active_user",
|
|
128
|
+
"batch_event_index",
|
|
129
|
+
"batch_page_id",
|
|
130
|
+
"batch_ordering_id",
|
|
131
|
+
"session_traffic_source_last_click",
|
|
132
|
+
"publisher"
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Checks whether a given column name is part of the standard GA4 BigQuery export columns.
|
|
102
137
|
*
|
|
103
138
|
* @param {string} columnName - The name of the column to check.
|
|
104
139
|
* @returns {boolean} True if the column name is a GA4 export column, otherwise false.
|
|
105
140
|
*/
|
|
106
|
-
const isGa4ExportColumn = (columnName) =>
|
|
107
|
-
// list updated 2026-02-18
|
|
108
|
-
const ga4ExportColumns = [
|
|
109
|
-
"event_date",
|
|
110
|
-
"event_timestamp",
|
|
111
|
-
"event_name",
|
|
112
|
-
"event_params",
|
|
113
|
-
"event_previous_timestamp",
|
|
114
|
-
"event_value_in_usd",
|
|
115
|
-
"event_bundle_sequence_id",
|
|
116
|
-
"event_server_timestamp_offset",
|
|
117
|
-
"user_id",
|
|
118
|
-
"user_pseudo_id",
|
|
119
|
-
"privacy_info",
|
|
120
|
-
"user_properties",
|
|
121
|
-
"user_first_touch_timestamp",
|
|
122
|
-
"user_ltv",
|
|
123
|
-
"device",
|
|
124
|
-
"geo",
|
|
125
|
-
"app_info",
|
|
126
|
-
"traffic_source",
|
|
127
|
-
"stream_id",
|
|
128
|
-
"platform",
|
|
129
|
-
"event_dimensions",
|
|
130
|
-
"ecommerce",
|
|
131
|
-
"items",
|
|
132
|
-
"collected_traffic_source",
|
|
133
|
-
"is_active_user",
|
|
134
|
-
"batch_event_index",
|
|
135
|
-
"batch_page_id",
|
|
136
|
-
"batch_ordering_id",
|
|
137
|
-
"session_traffic_source_last_click",
|
|
138
|
-
"publisher"
|
|
139
|
-
];
|
|
140
|
-
return ga4ExportColumns.includes(columnName);
|
|
141
|
-
};
|
|
141
|
+
const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
|
|
142
142
|
|
|
143
143
|
/**
|
|
144
144
|
* Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
|
|
@@ -255,6 +255,7 @@ module.exports = {
|
|
|
255
255
|
sessionId,
|
|
256
256
|
fixEcommerceStruct,
|
|
257
257
|
isFinalData,
|
|
258
|
+
ga4ExportColumns,
|
|
258
259
|
isGa4ExportColumn,
|
|
259
260
|
getGa4ExportType,
|
|
260
261
|
itemListAttributionExpr,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.9.0-dev.
|
|
3
|
+
"version": "0.9.0-dev.4",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js",
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js && node tests/eventDataColumns.test.js && node tests/utils.test.js",
|
|
21
21
|
"test:summary": "node tests/testRunner.js",
|
|
22
22
|
"test:docs": "node tests/documentation.test.js",
|
|
23
23
|
"test:preops": "node tests/preOperations.test.js",
|
|
@@ -29,6 +29,8 @@
|
|
|
29
29
|
"test:queryBuilder": "node tests/queryBuilder.test.js",
|
|
30
30
|
"test:customSteps": "node tests/customSteps.test.js",
|
|
31
31
|
"test:enrichments": "node tests/enrichments.test.js",
|
|
32
|
+
"test:eventDataColumns": "node tests/eventDataColumns.test.js",
|
|
33
|
+
"test:utils": "node tests/utils.test.js",
|
|
32
34
|
"test:integration": "node tests/integration/integration.test.js",
|
|
33
35
|
"release:dev": "./scripts/release-dev.sh",
|
|
34
36
|
"readme": "node scripts/updateReadme.js",
|
|
@@ -197,51 +197,56 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
197
197
|
return excludedColumns;
|
|
198
198
|
};
|
|
199
199
|
|
|
200
|
-
// initial step: extract data from the export tables
|
|
200
|
+
// initial step: extract data from the export tables.
|
|
201
|
+
// Explicit columns first (transforms + package-promoted + user-excluded sentinels);
|
|
202
|
+
// then pass-through entries for every GA4 export column not already accounted for.
|
|
203
|
+
// After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
|
|
204
|
+
const eventDataExplicitColumns = {
|
|
205
|
+
// exclude default export columns that are not needed
|
|
206
|
+
// do this first so that the columns defined later are not excluded
|
|
207
|
+
...getExcludedColumns(),
|
|
208
|
+
// date and time
|
|
209
|
+
event_date: helpers.eventDate,
|
|
210
|
+
event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
|
|
211
|
+
event_timestamp: 'event_timestamp',
|
|
212
|
+
event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
|
|
213
|
+
// event name
|
|
214
|
+
event_name: 'event_name',
|
|
215
|
+
// identifiers
|
|
216
|
+
session_id: helpers.sessionId,
|
|
217
|
+
user_pseudo_id: 'user_pseudo_id',
|
|
218
|
+
user_id: 'user_id',
|
|
219
|
+
// page
|
|
220
|
+
page_location: helpers.unnestEventParam('page_location', 'string'),
|
|
221
|
+
page: helpers.extractPageDetails(),
|
|
222
|
+
// event parameters and user properties
|
|
223
|
+
...promotedEventParameters(),
|
|
224
|
+
event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
|
|
225
|
+
user_properties: 'user_properties',
|
|
226
|
+
// traffic source
|
|
227
|
+
collected_traffic_source: 'collected_traffic_source',
|
|
228
|
+
session_traffic_source_last_click: 'session_traffic_source_last_click',
|
|
229
|
+
user_traffic_source: 'traffic_source',
|
|
230
|
+
// ecommerce
|
|
231
|
+
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
232
|
+
items: 'items',
|
|
233
|
+
_item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
234
|
+
// flag if the data is "final" and is not expected to change anymore
|
|
235
|
+
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
236
|
+
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
237
|
+
// prep columns for later steps
|
|
238
|
+
entrances: helpers.unnestEventParam('entrances', 'int'),
|
|
239
|
+
session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
|
|
240
|
+
};
|
|
241
|
+
// Pass through every GA4 export column not already covered by an explicit transform,
|
|
242
|
+
// promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
|
|
243
|
+
const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
|
|
201
244
|
const eventDataStep = {
|
|
202
245
|
name: 'event_data',
|
|
203
246
|
select: {
|
|
204
247
|
columns: {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
...getExcludedColumns(),
|
|
208
|
-
// date and time
|
|
209
|
-
event_date: helpers.eventDate,
|
|
210
|
-
event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
|
|
211
|
-
event_timestamp: 'event_timestamp',
|
|
212
|
-
event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
|
|
213
|
-
// event name
|
|
214
|
-
event_name: 'event_name',
|
|
215
|
-
// identifiers
|
|
216
|
-
session_id: helpers.sessionId,
|
|
217
|
-
user_pseudo_id: 'user_pseudo_id',
|
|
218
|
-
user_id: 'user_id',
|
|
219
|
-
// page
|
|
220
|
-
page_location: helpers.unnestEventParam('page_location', 'string'),
|
|
221
|
-
page: helpers.extractPageDetails(),
|
|
222
|
-
// event parameters and user properties
|
|
223
|
-
...promotedEventParameters(),
|
|
224
|
-
event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
|
|
225
|
-
user_properties: 'user_properties',
|
|
226
|
-
// traffic source
|
|
227
|
-
collected_traffic_source: 'collected_traffic_source',
|
|
228
|
-
session_traffic_source_last_click: 'session_traffic_source_last_click',
|
|
229
|
-
user_traffic_source: 'traffic_source',
|
|
230
|
-
// ecommerce
|
|
231
|
-
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
232
|
-
items: 'items',
|
|
233
|
-
_item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
234
|
-
// flag if the data is "final" and is not expected to change anymore
|
|
235
|
-
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
236
|
-
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
237
|
-
// prep columns for later steps
|
|
238
|
-
entrances: helpers.unnestEventParam('entrances', 'int'),
|
|
239
|
-
session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
|
|
240
|
-
// include all other columns from the export data
|
|
241
|
-
get '[sql]other_columns'() {
|
|
242
|
-
const definedColumns = Object.keys(this);
|
|
243
|
-
return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
|
|
244
|
-
},
|
|
248
|
+
...eventDataExplicitColumns,
|
|
249
|
+
...eventDataPassThroughs,
|
|
245
250
|
},
|
|
246
251
|
},
|
|
247
252
|
from: mergedConfig.sourceTable,
|
|
@@ -385,20 +390,14 @@ ${excludedEventsSQL}`,
|
|
|
385
390
|
}
|
|
386
391
|
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
387
392
|
|
|
388
|
-
// Only forward enrichment columns to each wildcard's EXCEPT input if they
|
|
389
|
-
//
|
|
390
|
-
//
|
|
391
|
-
//
|
|
392
|
-
// columns. selectOtherColumns dedupes between internalExcept and externalExcept.
|
|
393
|
+
// Only forward enrichment columns to each wildcard's EXCEPT input if they actually exist
|
|
394
|
+
// in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in SELECT *
|
|
395
|
+
// EXCEPT list does not exist". After M1, Object.keys(step.select.columns) is the complete
|
|
396
|
+
// column set of both event_data and session_data — so the same predicate works for both.
|
|
393
397
|
const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
|
|
394
398
|
const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
|
|
395
|
-
const
|
|
396
|
-
const
|
|
397
|
-
eventDataExplicit.has(c) || (helpers.isGa4ExportColumn(c) && !userExcluded.has(c))
|
|
398
|
-
);
|
|
399
|
-
const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
|
|
400
|
-
sessionDataExplicit.has(c)
|
|
401
|
-
);
|
|
399
|
+
const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => eventDataExplicit.has(c));
|
|
400
|
+
const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => sessionDataExplicit.has(c));
|
|
402
401
|
|
|
403
402
|
// Join event_data and session_data, include additional logic
|
|
404
403
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
package/utils.js
CHANGED
|
@@ -521,6 +521,45 @@ const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns =
|
|
|
521
521
|
};
|
|
522
522
|
|
|
523
523
|
|
|
524
|
+
/**
|
|
525
|
+
* Builds a queryBuilder `select.columns` fragment that passes through every source column
|
|
526
|
+
* not already covered by an explicit columns object.
|
|
527
|
+
*
|
|
528
|
+
* A source column is considered "covered" — and skipped from pass-throughs — when it appears as:
|
|
529
|
+
* - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
|
|
530
|
+
* sentinel like `{ event_dimensions: undefined }`), OR
|
|
531
|
+
* - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
|
|
532
|
+
* rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
|
|
533
|
+
*
|
|
534
|
+
* Values that are SQL expressions, function calls, or non-strings never count as coverage —
|
|
535
|
+
* they reference the source column internally but the column itself is still available as a
|
|
536
|
+
* pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
|
|
537
|
+
* never matches a bare column name.)
|
|
538
|
+
*
|
|
539
|
+
* @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
|
|
540
|
+
* @param {Iterable<string>} sourceColumns - Column names available on the source schema.
|
|
541
|
+
* @returns {Object} A map of `{ column: column }` entries for every source column not covered.
|
|
542
|
+
*
|
|
543
|
+
* @example
|
|
544
|
+
* buildPassThroughs(
|
|
545
|
+
* { event_name: 'event_name', user_traffic_source: 'traffic_source' },
|
|
546
|
+
* ['event_name', 'traffic_source', 'device', 'geo']
|
|
547
|
+
* );
|
|
548
|
+
* // → { device: 'device', geo: 'geo' }
|
|
549
|
+
*/
|
|
550
|
+
const buildPassThroughs = (explicitColumns, sourceColumns) => {
|
|
551
|
+
const explicitKeys = Object.keys(explicitColumns);
|
|
552
|
+
const explicitValues = Object.values(explicitColumns);
|
|
553
|
+
const passThroughs = {};
|
|
554
|
+
for (const column of sourceColumns) {
|
|
555
|
+
if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
|
|
556
|
+
passThroughs[column] = column;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
return passThroughs;
|
|
560
|
+
};
|
|
561
|
+
|
|
562
|
+
|
|
524
563
|
/**
|
|
525
564
|
* Processes a date input string and returns a corresponding SQL date casting expression,
|
|
526
565
|
* or passes through BigQuery SQL statements as-is.
|
|
@@ -596,6 +635,7 @@ module.exports = {
|
|
|
596
635
|
isDataformTableReferenceObject,
|
|
597
636
|
setDataformContext,
|
|
598
637
|
selectOtherColumns,
|
|
638
|
+
buildPassThroughs,
|
|
599
639
|
processDate,
|
|
600
640
|
getDatasetName
|
|
601
641
|
};
|