ga4-export-fixer 0.9.0-dev.2 → 0.9.0-dev.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -314,7 +314,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
314
314
 
315
315
  | Field | Type | Default/Required | Description |
316
316
  | ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
317
- | `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format ``project.dataset.table`` |
317
+ | `sourceTable` | Dataform ref / object / string | **required** | Source GA4 export table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.events_*` ``` string for an external table. |
318
318
  | `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
319
319
  | `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
320
320
  | `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
@@ -535,35 +535,35 @@ For typical use cases this is the right tool; reach for `customSteps` only when
535
535
  | --- | --- | --- | --- |
536
536
  | `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
537
537
  | `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
538
- | `source` | Dataform `ref()` / string | Yes | Source dim table. Use `ref()` in Dataform or a backtick-quoted ``` `project.dataset.table` ``` string. |
538
+ | `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
539
539
  | `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
540
540
  | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
541
541
  | `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
542
542
 
543
543
  **Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
544
544
 
545
- **Example** — attach user cohort labels by `user_pseudo_id`:
545
+ **Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
546
546
 
547
547
  ```javascript
548
548
  enrichments: [
549
549
  {
550
550
  name: 'cohorts',
551
551
  level: 'event',
552
- source: ctx.ref('user_cohorts'),
552
+ source: { schema: 'analytics', name: 'user_cohorts' },
553
553
  joinKey: 'user_pseudo_id',
554
554
  columns: ['cohort_label', 'lifecycle_stage'],
555
555
  },
556
556
  ],
557
557
  ```
558
558
 
559
- **Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net:
559
+ **Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net (external table referenced by backtick-FQN):
560
560
 
561
561
  ```javascript
562
562
  enrichments: [
563
563
  {
564
564
  name: 'segments',
565
565
  level: 'event',
566
- source: ctx.ref('daily_user_segments'),
566
+ source: '`my-project.analytics.daily_user_segments`',
567
567
  joinKey: ['event_date', 'user_pseudo_id'],
568
568
  columns: ['segment'],
569
569
  dedupe: true,
@@ -580,7 +580,7 @@ enrichments: [
580
580
  {
581
581
  name: 'titles',
582
582
  level: 'event',
583
- source: ctx.ref('page_title_overrides'),
583
+ source: { schema: 'analytics', name: 'page_title_overrides' },
584
584
  joinKey: 'page_location',
585
585
  columns: ['page_title'], // overlaps the promoted column → replaces it
586
586
  },
@@ -95,50 +95,50 @@ const isFinalData = (detectionMethod, dayThreshold) => {
95
95
  };
96
96
 
97
97
  /**
98
- * Checks whether a given column name is part of the standard/expected GA4 BigQuery export columns.
98
+ * The standard GA4 BigQuery export top-level column names, based on the official schema.
99
99
  *
100
- * The list of recognized GA4 export columns is based on the official schema as of 2026-02-18.
101
- * This function can be used to filter or validate column names when processing GA4 data exports.
100
+ * list updated 2026-02-18
101
+ */
102
+ const ga4ExportColumns = [
103
+ "event_date",
104
+ "event_timestamp",
105
+ "event_name",
106
+ "event_params",
107
+ "event_previous_timestamp",
108
+ "event_value_in_usd",
109
+ "event_bundle_sequence_id",
110
+ "event_server_timestamp_offset",
111
+ "user_id",
112
+ "user_pseudo_id",
113
+ "privacy_info",
114
+ "user_properties",
115
+ "user_first_touch_timestamp",
116
+ "user_ltv",
117
+ "device",
118
+ "geo",
119
+ "app_info",
120
+ "traffic_source",
121
+ "stream_id",
122
+ "platform",
123
+ "event_dimensions",
124
+ "ecommerce",
125
+ "items",
126
+ "collected_traffic_source",
127
+ "is_active_user",
128
+ "batch_event_index",
129
+ "batch_page_id",
130
+ "batch_ordering_id",
131
+ "session_traffic_source_last_click",
132
+ "publisher"
133
+ ];
134
+
135
+ /**
136
+ * Checks whether a given column name is part of the standard GA4 BigQuery export columns.
102
137
  *
103
138
  * @param {string} columnName - The name of the column to check.
104
139
  * @returns {boolean} True if the column name is a GA4 export column, otherwise false.
105
140
  */
106
- const isGa4ExportColumn = (columnName) => {
107
- // list updated 2026-02-18
108
- const ga4ExportColumns = [
109
- "event_date",
110
- "event_timestamp",
111
- "event_name",
112
- "event_params",
113
- "event_previous_timestamp",
114
- "event_value_in_usd",
115
- "event_bundle_sequence_id",
116
- "event_server_timestamp_offset",
117
- "user_id",
118
- "user_pseudo_id",
119
- "privacy_info",
120
- "user_properties",
121
- "user_first_touch_timestamp",
122
- "user_ltv",
123
- "device",
124
- "geo",
125
- "app_info",
126
- "traffic_source",
127
- "stream_id",
128
- "platform",
129
- "event_dimensions",
130
- "ecommerce",
131
- "items",
132
- "collected_traffic_source",
133
- "is_active_user",
134
- "batch_event_index",
135
- "batch_page_id",
136
- "batch_ordering_id",
137
- "session_traffic_source_last_click",
138
- "publisher"
139
- ];
140
- return ga4ExportColumns.includes(columnName);
141
- };
141
+ const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
142
142
 
143
143
  /**
144
144
  * Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
@@ -255,6 +255,7 @@ module.exports = {
255
255
  sessionId,
256
256
  fixEcommerceStruct,
257
257
  isFinalData,
258
+ ga4ExportColumns,
258
259
  isGa4ExportColumn,
259
260
  getGa4ExportType,
260
261
  itemListAttributionExpr,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.2",
3
+ "version": "0.9.0-dev.4",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,7 @@
17
17
  "createTable.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js && node tests/eventDataColumns.test.js && node tests/utils.test.js",
21
21
  "test:summary": "node tests/testRunner.js",
22
22
  "test:docs": "node tests/documentation.test.js",
23
23
  "test:preops": "node tests/preOperations.test.js",
@@ -29,6 +29,8 @@
29
29
  "test:queryBuilder": "node tests/queryBuilder.test.js",
30
30
  "test:customSteps": "node tests/customSteps.test.js",
31
31
  "test:enrichments": "node tests/enrichments.test.js",
32
+ "test:eventDataColumns": "node tests/eventDataColumns.test.js",
33
+ "test:utils": "node tests/utils.test.js",
32
34
  "test:integration": "node tests/integration/integration.test.js",
33
35
  "release:dev": "./scripts/release-dev.sh",
34
36
  "readme": "node scripts/updateReadme.js",
@@ -197,51 +197,56 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
197
197
  return excludedColumns;
198
198
  };
199
199
 
200
- // initial step: extract data from the export tables
200
+ // initial step: extract data from the export tables.
201
+ // Explicit columns first (transforms + package-promoted + user-excluded sentinels);
202
+ // then pass-through entries for every GA4 export column not already accounted for.
203
+ // After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
204
+ const eventDataExplicitColumns = {
205
+ // exclude default export columns that are not needed
206
+ // do this first so that the columns defined later are not excluded
207
+ ...getExcludedColumns(),
208
+ // date and time
209
+ event_date: helpers.eventDate,
210
+ event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
+ event_timestamp: 'event_timestamp',
212
+ event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
+ // event name
214
+ event_name: 'event_name',
215
+ // identifiers
216
+ session_id: helpers.sessionId,
217
+ user_pseudo_id: 'user_pseudo_id',
218
+ user_id: 'user_id',
219
+ // page
220
+ page_location: helpers.unnestEventParam('page_location', 'string'),
221
+ page: helpers.extractPageDetails(),
222
+ // event parameters and user properties
223
+ ...promotedEventParameters(),
224
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
+ user_properties: 'user_properties',
226
+ // traffic source
227
+ collected_traffic_source: 'collected_traffic_source',
228
+ session_traffic_source_last_click: 'session_traffic_source_last_click',
229
+ user_traffic_source: 'traffic_source',
230
+ // ecommerce
231
+ ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
+ items: 'items',
233
+ _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
234
+ // flag if the data is "final" and is not expected to change anymore
235
+ data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
+ export_type: helpers.getGa4ExportType('_table_suffix'),
237
+ // prep columns for later steps
238
+ entrances: helpers.unnestEventParam('entrances', 'int'),
239
+ session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
+ };
241
+ // Pass through every GA4 export column not already covered by an explicit transform,
242
+ // promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
243
+ const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
201
244
  const eventDataStep = {
202
245
  name: 'event_data',
203
246
  select: {
204
247
  columns: {
205
- // exclude default export columns that are not needed
206
- // do this first so that the columns defined later are not excluded
207
- ...getExcludedColumns(),
208
- // date and time
209
- event_date: helpers.eventDate,
210
- event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
- event_timestamp: 'event_timestamp',
212
- event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
- // event name
214
- event_name: 'event_name',
215
- // identifiers
216
- session_id: helpers.sessionId,
217
- user_pseudo_id: 'user_pseudo_id',
218
- user_id: 'user_id',
219
- // page
220
- page_location: helpers.unnestEventParam('page_location', 'string'),
221
- page: helpers.extractPageDetails(),
222
- // event parameters and user properties
223
- ...promotedEventParameters(),
224
- event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
- user_properties: 'user_properties',
226
- // traffic source
227
- collected_traffic_source: 'collected_traffic_source',
228
- session_traffic_source_last_click: 'session_traffic_source_last_click',
229
- user_traffic_source: 'traffic_source',
230
- // ecommerce
231
- ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
- items: 'items',
233
- _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
234
- // flag if the data is "final" and is not expected to change anymore
235
- data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
- export_type: helpers.getGa4ExportType('_table_suffix'),
237
- // prep columns for later steps
238
- entrances: helpers.unnestEventParam('entrances', 'int'),
239
- session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
- // include all other columns from the export data
241
- get '[sql]other_columns'() {
242
- const definedColumns = Object.keys(this);
243
- return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
244
- },
248
+ ...eventDataExplicitColumns,
249
+ ...eventDataPassThroughs,
245
250
  },
246
251
  },
247
252
  from: mergedConfig.sourceTable,
@@ -385,6 +390,15 @@ ${excludedEventsSQL}`,
385
390
  }
386
391
  const enrichmentExcludedColumns = [...enrichmentColumnNames];
387
392
 
393
+ // Only forward enrichment columns to each wildcard's EXCEPT input if they actually exist
394
+ // in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in SELECT *
395
+ // EXCEPT list does not exist". After M1, Object.keys(step.select.columns) is the complete
396
+ // column set of both event_data and session_data — so the same predicate works for both.
397
+ const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
398
+ const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
399
+ const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => eventDataExplicit.has(c));
400
+ const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => sessionDataExplicit.has(c));
401
+
388
402
  // Join event_data and session_data, include additional logic
389
403
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
390
404
  const enhancedEventsStep = {
@@ -407,14 +421,14 @@ ${excludedEventsSQL}`,
407
421
  'data_is_final',
408
422
  'export_type',
409
423
  ...itemListExcludedColumns,
410
- ...enrichmentExcludedColumns,
424
+ ...eventDataEnrichmentExcept,
411
425
  ]
412
426
  ),
413
427
  // get the rest of the session_data columns
414
428
  '[sql]session_data': utils.selectOtherColumns(
415
429
  sessionDataStep,
416
430
  Object.keys(finalColumnOrder),
417
- [...enrichmentExcludedColumns],
431
+ sessionDataEnrichmentExcept,
418
432
  ),
419
433
  // include additional columns
420
434
  row_inserted_timestamp: 'current_timestamp()',
package/utils.js CHANGED
@@ -521,6 +521,45 @@ const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns =
521
521
  };
522
522
 
523
523
 
524
+ /**
525
+ * Builds a queryBuilder `select.columns` fragment that passes through every source column
526
+ * not already covered by an explicit columns object.
527
+ *
528
+ * A source column is considered "covered" — and skipped from pass-throughs — when it appears as:
529
+ * - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
530
+ * sentinel like `{ event_dimensions: undefined }`), OR
531
+ * - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
532
+ * rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
533
+ *
534
+ * Values that are SQL expressions, function calls, or non-strings never count as coverage —
535
+ * they reference the source column internally but the column itself is still available as a
536
+ * pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
537
+ * never matches a bare column name.)
538
+ *
539
+ * @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
540
+ * @param {Iterable<string>} sourceColumns - Column names available on the source schema.
541
+ * @returns {Object} A map of `{ column: column }` entries for every source column not covered.
542
+ *
543
+ * @example
544
+ * buildPassThroughs(
545
+ * { event_name: 'event_name', user_traffic_source: 'traffic_source' },
546
+ * ['event_name', 'traffic_source', 'device', 'geo']
547
+ * );
548
+ * // → { device: 'device', geo: 'geo' }
549
+ */
550
+ const buildPassThroughs = (explicitColumns, sourceColumns) => {
551
+ const explicitKeys = Object.keys(explicitColumns);
552
+ const explicitValues = Object.values(explicitColumns);
553
+ const passThroughs = {};
554
+ for (const column of sourceColumns) {
555
+ if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
556
+ passThroughs[column] = column;
557
+ }
558
+ }
559
+ return passThroughs;
560
+ };
561
+
562
+
524
563
  /**
525
564
  * Processes a date input string and returns a corresponding SQL date casting expression,
526
565
  * or passes through BigQuery SQL statements as-is.
@@ -596,6 +635,7 @@ module.exports = {
596
635
  isDataformTableReferenceObject,
597
636
  setDataformContext,
598
637
  selectOtherColumns,
638
+ buildPassThroughs,
599
639
  processDate,
600
640
  getDatasetName
601
641
  };