ga4-export-fixer 0.9.0-dev.3 → 0.9.0-dev.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -95,50 +95,50 @@ const isFinalData = (detectionMethod, dayThreshold) => {
95
95
  };
96
96
 
97
97
  /**
98
- * Checks whether a given column name is part of the standard/expected GA4 BigQuery export columns.
98
+ * The standard GA4 BigQuery export top-level column names, based on the official schema.
99
99
  *
100
- * The list of recognized GA4 export columns is based on the official schema as of 2026-02-18.
101
- * This function can be used to filter or validate column names when processing GA4 data exports.
100
+ * list updated 2026-02-18
101
+ */
102
+ const ga4ExportColumns = [
103
+ "event_date",
104
+ "event_timestamp",
105
+ "event_name",
106
+ "event_params",
107
+ "event_previous_timestamp",
108
+ "event_value_in_usd",
109
+ "event_bundle_sequence_id",
110
+ "event_server_timestamp_offset",
111
+ "user_id",
112
+ "user_pseudo_id",
113
+ "privacy_info",
114
+ "user_properties",
115
+ "user_first_touch_timestamp",
116
+ "user_ltv",
117
+ "device",
118
+ "geo",
119
+ "app_info",
120
+ "traffic_source",
121
+ "stream_id",
122
+ "platform",
123
+ "event_dimensions",
124
+ "ecommerce",
125
+ "items",
126
+ "collected_traffic_source",
127
+ "is_active_user",
128
+ "batch_event_index",
129
+ "batch_page_id",
130
+ "batch_ordering_id",
131
+ "session_traffic_source_last_click",
132
+ "publisher"
133
+ ];
134
+
135
+ /**
136
+ * Checks whether a given column name is part of the standard GA4 BigQuery export columns.
102
137
  *
103
138
  * @param {string} columnName - The name of the column to check.
104
139
  * @returns {boolean} True if the column name is a GA4 export column, otherwise false.
105
140
  */
106
- const isGa4ExportColumn = (columnName) => {
107
- // list updated 2026-02-18
108
- const ga4ExportColumns = [
109
- "event_date",
110
- "event_timestamp",
111
- "event_name",
112
- "event_params",
113
- "event_previous_timestamp",
114
- "event_value_in_usd",
115
- "event_bundle_sequence_id",
116
- "event_server_timestamp_offset",
117
- "user_id",
118
- "user_pseudo_id",
119
- "privacy_info",
120
- "user_properties",
121
- "user_first_touch_timestamp",
122
- "user_ltv",
123
- "device",
124
- "geo",
125
- "app_info",
126
- "traffic_source",
127
- "stream_id",
128
- "platform",
129
- "event_dimensions",
130
- "ecommerce",
131
- "items",
132
- "collected_traffic_source",
133
- "is_active_user",
134
- "batch_event_index",
135
- "batch_page_id",
136
- "batch_ordering_id",
137
- "session_traffic_source_last_click",
138
- "publisher"
139
- ];
140
- return ga4ExportColumns.includes(columnName);
141
- };
141
+ const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
142
142
 
143
143
  /**
144
144
  * Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
@@ -255,6 +255,7 @@ module.exports = {
255
255
  sessionId,
256
256
  fixEcommerceStruct,
257
257
  isFinalData,
258
+ ga4ExportColumns,
258
259
  isGa4ExportColumn,
259
260
  getGa4ExportType,
260
261
  itemListAttributionExpr,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.3",
3
+ "version": "0.9.0-dev.4",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,7 @@
17
17
  "createTable.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js && node tests/eventDataColumns.test.js && node tests/utils.test.js",
21
21
  "test:summary": "node tests/testRunner.js",
22
22
  "test:docs": "node tests/documentation.test.js",
23
23
  "test:preops": "node tests/preOperations.test.js",
@@ -29,6 +29,8 @@
29
29
  "test:queryBuilder": "node tests/queryBuilder.test.js",
30
30
  "test:customSteps": "node tests/customSteps.test.js",
31
31
  "test:enrichments": "node tests/enrichments.test.js",
32
+ "test:eventDataColumns": "node tests/eventDataColumns.test.js",
33
+ "test:utils": "node tests/utils.test.js",
32
34
  "test:integration": "node tests/integration/integration.test.js",
33
35
  "release:dev": "./scripts/release-dev.sh",
34
36
  "readme": "node scripts/updateReadme.js",
@@ -197,51 +197,56 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
197
197
  return excludedColumns;
198
198
  };
199
199
 
200
- // initial step: extract data from the export tables
200
+ // initial step: extract data from the export tables.
201
+ // Explicit columns first (transforms + package-promoted + user-excluded sentinels);
202
+ // then pass-through entries for every GA4 export column not already accounted for.
203
+ // After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
204
+ const eventDataExplicitColumns = {
205
+ // exclude default export columns that are not needed
206
+ // do this first so that the columns defined later are not excluded
207
+ ...getExcludedColumns(),
208
+ // date and time
209
+ event_date: helpers.eventDate,
210
+ event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
+ event_timestamp: 'event_timestamp',
212
+ event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
+ // event name
214
+ event_name: 'event_name',
215
+ // identifiers
216
+ session_id: helpers.sessionId,
217
+ user_pseudo_id: 'user_pseudo_id',
218
+ user_id: 'user_id',
219
+ // page
220
+ page_location: helpers.unnestEventParam('page_location', 'string'),
221
+ page: helpers.extractPageDetails(),
222
+ // event parameters and user properties
223
+ ...promotedEventParameters(),
224
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
+ user_properties: 'user_properties',
226
+ // traffic source
227
+ collected_traffic_source: 'collected_traffic_source',
228
+ session_traffic_source_last_click: 'session_traffic_source_last_click',
229
+ user_traffic_source: 'traffic_source',
230
+ // ecommerce
231
+ ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
+ items: 'items',
233
+ _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
234
+ // flag if the data is "final" and is not expected to change anymore
235
+ data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
+ export_type: helpers.getGa4ExportType('_table_suffix'),
237
+ // prep columns for later steps
238
+ entrances: helpers.unnestEventParam('entrances', 'int'),
239
+ session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
+ };
241
+ // Pass through every GA4 export column not already covered by an explicit transform,
242
+ // promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
243
+ const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
201
244
  const eventDataStep = {
202
245
  name: 'event_data',
203
246
  select: {
204
247
  columns: {
205
- // exclude default export columns that are not needed
206
- // do this first so that the columns defined later are not excluded
207
- ...getExcludedColumns(),
208
- // date and time
209
- event_date: helpers.eventDate,
210
- event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
- event_timestamp: 'event_timestamp',
212
- event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
- // event name
214
- event_name: 'event_name',
215
- // identifiers
216
- session_id: helpers.sessionId,
217
- user_pseudo_id: 'user_pseudo_id',
218
- user_id: 'user_id',
219
- // page
220
- page_location: helpers.unnestEventParam('page_location', 'string'),
221
- page: helpers.extractPageDetails(),
222
- // event parameters and user properties
223
- ...promotedEventParameters(),
224
- event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
- user_properties: 'user_properties',
226
- // traffic source
227
- collected_traffic_source: 'collected_traffic_source',
228
- session_traffic_source_last_click: 'session_traffic_source_last_click',
229
- user_traffic_source: 'traffic_source',
230
- // ecommerce
231
- ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
- items: 'items',
233
- _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
234
- // flag if the data is "final" and is not expected to change anymore
235
- data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
- export_type: helpers.getGa4ExportType('_table_suffix'),
237
- // prep columns for later steps
238
- entrances: helpers.unnestEventParam('entrances', 'int'),
239
- session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
- // include all other columns from the export data
241
- get '[sql]other_columns'() {
242
- const definedColumns = Object.keys(this);
243
- return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
244
- },
248
+ ...eventDataExplicitColumns,
249
+ ...eventDataPassThroughs,
245
250
  },
246
251
  },
247
252
  from: mergedConfig.sourceTable,
@@ -385,20 +390,14 @@ ${excludedEventsSQL}`,
385
390
  }
386
391
  const enrichmentExcludedColumns = [...enrichmentColumnNames];
387
392
 
388
- // Only forward enrichment columns to each wildcard's EXCEPT input if they would actually
389
- // appear in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in
390
- // SELECT * EXCEPT list does not exist". event_data.* expands to its explicit columns plus
391
- // GA4 export columns minus user-excluded ones; session_data.* expands only to its explicit
392
- // columns. selectOtherColumns dedupes between internalExcept and externalExcept.
393
+ // Only forward enrichment columns to each wildcard's EXCEPT input if they actually exist
394
+ // in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in SELECT *
395
+ // EXCEPT list does not exist". After M1, Object.keys(step.select.columns) is the complete
396
+ // column set of both event_data and session_data so the same predicate works for both.
393
397
  const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
394
398
  const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
395
- const userExcluded = new Set(mergedConfig.excludedColumns);
396
- const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
397
- eventDataExplicit.has(c) || (helpers.isGa4ExportColumn(c) && !userExcluded.has(c))
398
- );
399
- const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
400
- sessionDataExplicit.has(c)
401
- );
399
+ const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => eventDataExplicit.has(c));
400
+ const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => sessionDataExplicit.has(c));
402
401
 
403
402
  // Join event_data and session_data, include additional logic
404
403
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
package/utils.js CHANGED
@@ -521,6 +521,45 @@ const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns =
521
521
  };
522
522
 
523
523
 
524
+ /**
525
+ * Builds a queryBuilder `select.columns` fragment that passes through every source column
526
+ * not already covered by an explicit columns object.
527
+ *
528
+ * A source column is considered "covered" — and skipped from pass-throughs — when it appears as:
529
+ * - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
530
+ * sentinel like `{ event_dimensions: undefined }`), OR
531
+ * - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
532
+ * rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
533
+ *
534
+ * Values that are SQL expressions, function calls, or non-strings never count as coverage —
535
+ * they reference the source column internally but the column itself is still available as a
536
+ * pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
537
+ * never matches a bare column name.)
538
+ *
539
+ * @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
540
+ * @param {Iterable<string>} sourceColumns - Column names available on the source schema.
541
+ * @returns {Object} A map of `{ column: column }` entries for every source column not covered.
542
+ *
543
+ * @example
544
+ * buildPassThroughs(
545
+ * { event_name: 'event_name', user_traffic_source: 'traffic_source' },
546
+ * ['event_name', 'traffic_source', 'device', 'geo']
547
+ * );
548
+ * // → { device: 'device', geo: 'geo' }
549
+ */
550
+ const buildPassThroughs = (explicitColumns, sourceColumns) => {
551
+ const explicitKeys = Object.keys(explicitColumns);
552
+ const explicitValues = Object.values(explicitColumns);
553
+ const passThroughs = {};
554
+ for (const column of sourceColumns) {
555
+ if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
556
+ passThroughs[column] = column;
557
+ }
558
+ }
559
+ return passThroughs;
560
+ };
561
+
562
+
524
563
  /**
525
564
  * Processes a date input string and returns a corresponding SQL date casting expression,
526
565
  * or passes through BigQuery SQL statements as-is.
@@ -596,6 +635,7 @@ module.exports = {
596
635
  isDataformTableReferenceObject,
597
636
  setDataformContext,
598
637
  selectOtherColumns,
638
+ buildPassThroughs,
599
639
  processDate,
600
640
  getDatasetName
601
641
  };