ga4-export-fixer 0.9.0-dev.2 → 0.9.0-dev.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/index.js +17 -2
package/README.md
CHANGED
|
@@ -314,7 +314,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
314
314
|
|
|
315
315
|
| Field | Type | Default/Required | Description |
|
|
316
316
|
| ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
317
|
-
| `sourceTable` | Dataform ref
|
|
317
|
+
| `sourceTable` | Dataform ref / object / string | **required** | Source GA4 export table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.events_*` ``` string for an external table. |
|
|
318
318
|
| `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
|
|
319
319
|
| `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
|
|
320
320
|
| `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
|
|
@@ -535,35 +535,35 @@ For typical use cases this is the right tool; reach for `customSteps` only when
|
|
|
535
535
|
| --- | --- | --- | --- |
|
|
536
536
|
| `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
|
|
537
537
|
| `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
|
|
538
|
-
| `source` | Dataform
|
|
538
|
+
| `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
|
|
539
539
|
| `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
540
540
|
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
|
|
541
541
|
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
542
542
|
|
|
543
543
|
**Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
|
|
544
544
|
|
|
545
|
-
**Example** — attach user cohort labels by `user_pseudo_id
|
|
545
|
+
**Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
|
|
546
546
|
|
|
547
547
|
```javascript
|
|
548
548
|
enrichments: [
|
|
549
549
|
{
|
|
550
550
|
name: 'cohorts',
|
|
551
551
|
level: 'event',
|
|
552
|
-
source:
|
|
552
|
+
source: { schema: 'analytics', name: 'user_cohorts' },
|
|
553
553
|
joinKey: 'user_pseudo_id',
|
|
554
554
|
columns: ['cohort_label', 'lifecycle_stage'],
|
|
555
555
|
},
|
|
556
556
|
],
|
|
557
557
|
```
|
|
558
558
|
|
|
559
|
-
**Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net:
|
|
559
|
+
**Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net (external table referenced by backtick-FQN):
|
|
560
560
|
|
|
561
561
|
```javascript
|
|
562
562
|
enrichments: [
|
|
563
563
|
{
|
|
564
564
|
name: 'segments',
|
|
565
565
|
level: 'event',
|
|
566
|
-
source:
|
|
566
|
+
source: '`my-project.analytics.daily_user_segments`',
|
|
567
567
|
joinKey: ['event_date', 'user_pseudo_id'],
|
|
568
568
|
columns: ['segment'],
|
|
569
569
|
dedupe: true,
|
|
@@ -580,7 +580,7 @@ enrichments: [
|
|
|
580
580
|
{
|
|
581
581
|
name: 'titles',
|
|
582
582
|
level: 'event',
|
|
583
|
-
source:
|
|
583
|
+
source: { schema: 'analytics', name: 'page_title_overrides' },
|
|
584
584
|
joinKey: 'page_location',
|
|
585
585
|
columns: ['page_title'], // overlaps the promoted column → replaces it
|
|
586
586
|
},
|
package/package.json
CHANGED
|
@@ -385,6 +385,21 @@ ${excludedEventsSQL}`,
|
|
|
385
385
|
}
|
|
386
386
|
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
387
387
|
|
|
388
|
+
// Only forward enrichment columns to each wildcard's EXCEPT input if they would actually
|
|
389
|
+
// appear in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in
|
|
390
|
+
// SELECT * EXCEPT list does not exist". event_data.* expands to its explicit columns plus
|
|
391
|
+
// GA4 export columns minus user-excluded ones; session_data.* expands only to its explicit
|
|
392
|
+
// columns. selectOtherColumns dedupes between internalExcept and externalExcept.
|
|
393
|
+
const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
|
|
394
|
+
const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
|
|
395
|
+
const userExcluded = new Set(mergedConfig.excludedColumns);
|
|
396
|
+
const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
|
|
397
|
+
eventDataExplicit.has(c) || (helpers.isGa4ExportColumn(c) && !userExcluded.has(c))
|
|
398
|
+
);
|
|
399
|
+
const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c =>
|
|
400
|
+
sessionDataExplicit.has(c)
|
|
401
|
+
);
|
|
402
|
+
|
|
388
403
|
// Join event_data and session_data, include additional logic
|
|
389
404
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
390
405
|
const enhancedEventsStep = {
|
|
@@ -407,14 +422,14 @@ ${excludedEventsSQL}`,
|
|
|
407
422
|
'data_is_final',
|
|
408
423
|
'export_type',
|
|
409
424
|
...itemListExcludedColumns,
|
|
410
|
-
...
|
|
425
|
+
...eventDataEnrichmentExcept,
|
|
411
426
|
]
|
|
412
427
|
),
|
|
413
428
|
// get the rest of the session_data columns
|
|
414
429
|
'[sql]session_data': utils.selectOtherColumns(
|
|
415
430
|
sessionDataStep,
|
|
416
431
|
Object.keys(finalColumnOrder),
|
|
417
|
-
|
|
432
|
+
sessionDataEnrichmentExcept,
|
|
418
433
|
),
|
|
419
434
|
// include additional columns
|
|
420
435
|
row_inserted_timestamp: 'current_timestamp()',
|