ga4-export-fixer 0.9.0-dev.2 → 0.9.0-dev.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -314,7 +314,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
314
314
 
315
315
  | Field | Type | Default/Required | Description |
316
316
  | ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
317
- | `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format ``project.dataset.table`` |
317
+ | `sourceTable` | Dataform ref / object / string | **required** | Source GA4 export table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.events_*` ``` string for an external table. |
318
318
  | `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
319
319
  | `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
320
320
  | `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
@@ -535,35 +535,35 @@ For typical use cases this is the right tool; reach for `customSteps` only when
535
535
  | --- | --- | --- | --- |
536
536
  | `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
537
537
  | `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
538
- | `source` | Dataform `ref()` / string | Yes | Source dim table. Use `ref()` in Dataform or a backtick-quoted ``` `project.dataset.table` ``` string. |
538
+ | `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
539
539
  | `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
540
540
  | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
541
541
  | `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
542
542
 
543
543
  **Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
544
544
 
545
- **Example** — attach user cohort labels by `user_pseudo_id`:
545
+ **Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
546
546
 
547
547
  ```javascript
548
548
  enrichments: [
549
549
  {
550
550
  name: 'cohorts',
551
551
  level: 'event',
552
- source: ctx.ref('user_cohorts'),
552
+ source: { schema: 'analytics', name: 'user_cohorts' },
553
553
  joinKey: 'user_pseudo_id',
554
554
  columns: ['cohort_label', 'lifecycle_stage'],
555
555
  },
556
556
  ],
557
557
  ```
558
558
 
559
- **Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net:
559
+ **Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net (external table referenced by backtick-FQN):
560
560
 
561
561
  ```javascript
562
562
  enrichments: [
563
563
  {
564
564
  name: 'segments',
565
565
  level: 'event',
566
- source: ctx.ref('daily_user_segments'),
566
+ source: '`my-project.analytics.daily_user_segments`',
567
567
  joinKey: ['event_date', 'user_pseudo_id'],
568
568
  columns: ['segment'],
569
569
  dedupe: true,
@@ -580,7 +580,7 @@ enrichments: [
580
580
  {
581
581
  name: 'titles',
582
582
  level: 'event',
583
- source: ctx.ref('page_title_overrides'),
583
+ source: { schema: 'analytics', name: 'page_title_overrides' },
584
584
  joinKey: 'page_location',
585
585
  columns: ['page_title'], // overlaps the promoted column → replaces it
586
586
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.2",
3
+ "version": "0.9.0-dev.3",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -385,6 +385,21 @@ ${excludedEventsSQL}`,
385
385
  }
386
386
  const enrichmentExcludedColumns = [...enrichmentColumnNames];
387
387
 
388
+ // Only forward enrichment columns to each wildcard's EXCEPT input if they would actually
389
+ // appear in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in
390
+ // SELECT * EXCEPT list does not exist". event_data.* expands to its explicit columns plus
391
+ // GA4 export columns minus user-excluded ones; session_data.* expands only to its explicit
392
+ // columns. selectOtherColumns dedupes between internalExcept and externalExcept.
393
+ const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
394
+ const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
395
+ const userExcluded = new Set(mergedConfig.excludedColumns);
396
+ const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
397
+ eventDataExplicit.has(c) || (helpers.isGa4ExportColumn(c) && !userExcluded.has(c))
398
+ );
399
+ const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
400
+ sessionDataExplicit.has(c)
401
+ );
402
+
388
403
  // Join event_data and session_data, include additional logic
389
404
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
390
405
  const enhancedEventsStep = {
@@ -407,14 +422,14 @@ ${excludedEventsSQL}`,
407
422
  'data_is_final',
408
423
  'export_type',
409
424
  ...itemListExcludedColumns,
410
- ...enrichmentExcludedColumns,
425
+ ...eventDataEnrichmentExcept,
411
426
  ]
412
427
  ),
413
428
  // get the rest of the session_data columns
414
429
  '[sql]session_data': utils.selectOtherColumns(
415
430
  sessionDataStep,
416
431
  Object.keys(finalColumnOrder),
417
- [...enrichmentExcludedColumns],
432
+ sessionDataEnrichmentExcept,
418
433
  ),
419
434
  // include additional columns
420
435
  row_inserted_timestamp: 'current_timestamp()',