ga4-export-fixer 0.9.0-dev.7 → 0.9.0-dev.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/documentation.js +1 -1
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/index.js +41 -36
- package/utils.js +33 -48
package/README.md
CHANGED
|
@@ -537,10 +537,10 @@ For typical use cases this is the right tool; reach for `customSteps` only when
|
|
|
537
537
|
| `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
|
|
538
538
|
| `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
|
|
539
539
|
| `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
540
|
-
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns
|
|
540
|
+
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
|
|
541
541
|
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
542
542
|
|
|
543
|
-
**
|
|
543
|
+
**Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`. There is no opt-out — for hard-replace semantics (NULL on missed JOIN), pre-aggregate or sentinel-fill in your source SQL.
|
|
544
544
|
|
|
545
545
|
**Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
|
|
546
546
|
|
|
@@ -571,7 +571,7 @@ enrichments: [
|
|
|
571
571
|
],
|
|
572
572
|
```
|
|
573
573
|
|
|
574
|
-
**Example** — fix a promoted event parameter via enrichment (
|
|
574
|
+
**Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
|
|
575
575
|
|
|
576
576
|
```javascript
|
|
577
577
|
{
|
|
@@ -582,7 +582,7 @@ enrichments: [
|
|
|
582
582
|
level: 'event',
|
|
583
583
|
source: { schema: 'analytics', name: 'page_title_overrides' },
|
|
584
584
|
joinKey: 'page_location',
|
|
585
|
-
columns: ['page_title'], // overlaps the promoted column →
|
|
585
|
+
columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
|
|
586
586
|
},
|
|
587
587
|
],
|
|
588
588
|
}
|
|
@@ -590,7 +590,7 @@ enrichments: [
|
|
|
590
590
|
|
|
591
591
|
> **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
|
|
592
592
|
|
|
593
|
-
> **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `
|
|
593
|
+
> **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
|
|
594
594
|
|
|
595
595
|
> **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
|
|
596
596
|
|
package/documentation.js
CHANGED
|
@@ -186,7 +186,7 @@ const getColumnDescriptions = (config, columnMetadata) => {
|
|
|
186
186
|
? existing.description
|
|
187
187
|
: null;
|
|
188
188
|
const newDesc = existingText
|
|
189
|
-
? `
|
|
189
|
+
? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
|
|
190
190
|
: `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
|
|
191
191
|
// If the original was a struct-shaped entry, preserve the structure but replace the description.
|
|
192
192
|
// Otherwise, set as a plain string.
|
package/package.json
CHANGED
|
@@ -324,16 +324,40 @@ ${excludedEventsSQL}`,
|
|
|
324
324
|
// enrichments throw "not yet supported" inside the utility — they will arrive in a later release.
|
|
325
325
|
const { steps: enrichmentSteps, joins: enrichmentJoins, columns: enrichmentColumns,
|
|
326
326
|
columnNames: enrichmentColumnNames } = utils.buildEnrichments(mergedConfig.enrichments);
|
|
327
|
-
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
328
327
|
|
|
329
|
-
//
|
|
330
|
-
//
|
|
331
|
-
//
|
|
332
|
-
//
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
const
|
|
336
|
-
const
|
|
328
|
+
// Wrap overlapping enrichment columns in coalesce(enrich_<name>.<col>, <original>) so a
|
|
329
|
+
// missed JOIN falls back to the existing value. Purely additive columns (no overlap)
|
|
330
|
+
// pass through unchanged. Source-of-original precedence matches the final SELECT's spread
|
|
331
|
+
// order: itemListOverrides first (overrides finalColumnOrder for `items`), then
|
|
332
|
+
// session_data (wins over event_data in getFinalColumnOrder when both have the column).
|
|
333
|
+
// See design_docs/planned/data-enrichments.md Q13.
|
|
334
|
+
const wrappedEnrichmentColumns = {};
|
|
335
|
+
for (const [col, enrichExpr] of Object.entries(enrichmentColumns)) {
|
|
336
|
+
let originalExpr;
|
|
337
|
+
if (col in itemListOverrides) {
|
|
338
|
+
originalExpr = itemListOverrides[col];
|
|
339
|
+
} else if (col in sessionDataStep.select.columns) {
|
|
340
|
+
originalExpr = `session_data.${col}`;
|
|
341
|
+
} else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
|
|
342
|
+
originalExpr = `event_data.${col}`;
|
|
343
|
+
}
|
|
344
|
+
wrappedEnrichmentColumns[col] = originalExpr
|
|
345
|
+
? `coalesce(${enrichExpr}, ${originalExpr})`
|
|
346
|
+
: enrichExpr;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// List all column names that have already been defined or should be left out
|
|
350
|
+
// Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
|
|
351
|
+
const alreadyMapped = [
|
|
352
|
+
...Object.keys(finalColumnOrder),
|
|
353
|
+
...Object.keys(itemListOverrides),
|
|
354
|
+
...enrichmentColumnNames,
|
|
355
|
+
'entrances',
|
|
356
|
+
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
357
|
+
'data_is_final',
|
|
358
|
+
'export_type',
|
|
359
|
+
...itemListExcludedColumns,
|
|
360
|
+
];
|
|
337
361
|
|
|
338
362
|
// Join event_data and session_data, include additional logic
|
|
339
363
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
@@ -344,28 +368,11 @@ ${excludedEventsSQL}`,
|
|
|
344
368
|
// get the most important columns in the correct order
|
|
345
369
|
...finalColumnOrder,
|
|
346
370
|
...itemListOverrides,
|
|
347
|
-
// event-level enrichment columns:
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
eventDataStep,
|
|
353
|
-
Object.keys(finalColumnOrder),
|
|
354
|
-
[
|
|
355
|
-
'entrances',
|
|
356
|
-
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
357
|
-
'data_is_final',
|
|
358
|
-
'export_type',
|
|
359
|
-
...itemListExcludedColumns,
|
|
360
|
-
...eventDataEnrichmentExcept,
|
|
361
|
-
]
|
|
362
|
-
),
|
|
363
|
-
// get the rest of the session_data columns
|
|
364
|
-
'[sql]session_data': utils.selectOtherColumns(
|
|
365
|
-
sessionDataStep,
|
|
366
|
-
Object.keys(finalColumnOrder),
|
|
367
|
-
sessionDataEnrichmentExcept,
|
|
368
|
-
),
|
|
371
|
+
// event-level enrichment columns: coalesce with the original when overlapping; otherwise add.
|
|
372
|
+
...wrappedEnrichmentColumns,
|
|
373
|
+
// explicit pass-throughs for the rest of event_data and session_data
|
|
374
|
+
...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
|
|
375
|
+
...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
|
|
369
376
|
// include additional columns
|
|
370
377
|
row_inserted_timestamp: 'current_timestamp()',
|
|
371
378
|
data_is_final: 'data_is_final',
|
|
@@ -384,7 +391,7 @@ ${excludedEventsSQL}`,
|
|
|
384
391
|
table: 'session_data',
|
|
385
392
|
on: 'using(session_id)'
|
|
386
393
|
},
|
|
387
|
-
//
|
|
394
|
+
// The left joins for the enrichment ctes
|
|
388
395
|
...enrichmentJoins,
|
|
389
396
|
],
|
|
390
397
|
where: helpers.incrementalDateFilter(mergedConfig)
|
|
@@ -398,10 +405,7 @@ ${excludedEventsSQL}`,
|
|
|
398
405
|
enhancedEventsStep,
|
|
399
406
|
];
|
|
400
407
|
|
|
401
|
-
//
|
|
402
|
-
// Reserved set is derived from packageSteps at runtime (single source of truth) — what
|
|
403
|
-
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
|
|
404
|
-
// and enrich_* names exist only when enrichments are configured).
|
|
408
|
+
// Ensure that the custom step names don't collide with the default or data enrichment step names
|
|
405
409
|
const customSteps = mergedConfig.customSteps ?? [];
|
|
406
410
|
if (customSteps.length > 0) {
|
|
407
411
|
const reservedNames = new Set(packageSteps.map(s => s.name));
|
|
@@ -415,6 +419,7 @@ ${excludedEventsSQL}`,
|
|
|
415
419
|
}
|
|
416
420
|
}
|
|
417
421
|
|
|
422
|
+
// Include custom steps last in the list
|
|
418
423
|
const steps = [...packageSteps, ...customSteps];
|
|
419
424
|
|
|
420
425
|
return utils.queryBuilder(steps);
|
package/utils.js
CHANGED
|
@@ -474,53 +474,6 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
474
474
|
return deepMerge(defaultConfig, inputConfig);
|
|
475
475
|
};
|
|
476
476
|
|
|
477
|
-
/**
|
|
478
|
-
* Generates a SQL selection string for a given query step, excluding columns already defined elsewhere
|
|
479
|
-
* or columns that should be excluded.
|
|
480
|
-
*
|
|
481
|
-
* This utility is helpful when joining tables/CTEs to avoid selecting duplicate or already-present columns.
|
|
482
|
-
*
|
|
483
|
-
* @param {Object} step - A queryBuilder structured step containing a `name` (CTE/table alias) and a `select.columns` object.
|
|
484
|
-
* @param {string[]} [alreadyDefinedColumns=[]] - Columns that have already been defined and should be excluded from selection.
|
|
485
|
-
* @param {string[]} [excludedColumns=[]] - Additional columns to explicitly exclude from selection.
|
|
486
|
-
* @returns {string|undefined} A SQL select string (e.g. 'stepName.*' or 'stepName.* except (col1, col2)'), or undefined if all columns are excluded.
|
|
487
|
-
*/
|
|
488
|
-
const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns = []) => {
|
|
489
|
-
const stepName = step.name;
|
|
490
|
-
const stepColumns = Object.keys(step.select.columns);
|
|
491
|
-
|
|
492
|
-
// Columns in step.select.columns that should be excluded (already-defined or explicitly listed)
|
|
493
|
-
const internalExcept = stepColumns.filter(
|
|
494
|
-
column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
|
|
495
|
-
);
|
|
496
|
-
|
|
497
|
-
// Columns in excludedColumns that aren't enumerated in step.select.columns. These are
|
|
498
|
-
// wildcard-sourced columns (e.g. default GA4 export columns coming through `event_data.*`
|
|
499
|
-
// inside event_data's own select). The caller knows what to exclude; trust them.
|
|
500
|
-
// BigQuery throws at dry-run if the column doesn't exist in the source — surfaces typos.
|
|
501
|
-
// Filter out undefined/null entries (callers can pass conditional values like
|
|
502
|
-
// `cond ? 'col' : undefined` for ergonomics).
|
|
503
|
-
const externalExcept = excludedColumns.filter(
|
|
504
|
-
c => typeof c === 'string' && c.length > 0 && !stepColumns.includes(c)
|
|
505
|
-
);
|
|
506
|
-
|
|
507
|
-
const allExcept = [...internalExcept, ...externalExcept];
|
|
508
|
-
|
|
509
|
-
// If nothing is excluded, select everything
|
|
510
|
-
if (allExcept.length === 0) {
|
|
511
|
-
return `${stepName}.*`;
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
// If every enumerated column is excluded and there are no external excepts to apply,
|
|
515
|
-
// there's nothing to select via the wildcard
|
|
516
|
-
if (internalExcept.length === stepColumns.length && externalExcept.length === 0) {
|
|
517
|
-
return;
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
return `${stepName}.* except (${allExcept.join(', ')})`;
|
|
521
|
-
};
|
|
522
|
-
|
|
523
|
-
|
|
524
477
|
/**
|
|
525
478
|
* Builds a queryBuilder `select.columns` fragment that passes through every source column
|
|
526
479
|
* not already covered by an explicit columns object.
|
|
@@ -641,6 +594,38 @@ const buildEnrichments = (enrichments) => {
|
|
|
641
594
|
};
|
|
642
595
|
|
|
643
596
|
|
|
597
|
+
/**
|
|
598
|
+
* Builds a qualified pass-through fragment for spreading into a downstream SELECT's
|
|
599
|
+
* `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
|
|
600
|
+
* emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
|
|
601
|
+
*
|
|
602
|
+
* Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
|
|
603
|
+
* shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
|
|
604
|
+
* `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
|
|
605
|
+
* so unknown names cause no harm. This is the safety property that lets callers pass
|
|
606
|
+
* "everything that might collide" without pre-filtering.
|
|
607
|
+
*
|
|
608
|
+
* @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
|
|
609
|
+
* @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
|
|
610
|
+
* downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
|
|
611
|
+
* @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
|
|
612
|
+
*
|
|
613
|
+
* @example
|
|
614
|
+
* buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
|
|
615
|
+
* // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
|
|
616
|
+
*/
|
|
617
|
+
const buildQualifiedPassThroughs = (step, alreadyCovered) => {
|
|
618
|
+
const covered = new Set(alreadyCovered);
|
|
619
|
+
const passThroughs = {};
|
|
620
|
+
for (const [col, expr] of Object.entries(step.select.columns)) {
|
|
621
|
+
if (expr === undefined) continue;
|
|
622
|
+
if (covered.has(col)) continue;
|
|
623
|
+
passThroughs[col] = `${step.name}.${col}`;
|
|
624
|
+
}
|
|
625
|
+
return passThroughs;
|
|
626
|
+
};
|
|
627
|
+
|
|
628
|
+
|
|
644
629
|
/**
|
|
645
630
|
* Processes a date input string and returns a corresponding SQL date casting expression,
|
|
646
631
|
* or passes through BigQuery SQL statements as-is.
|
|
@@ -715,9 +700,9 @@ module.exports = {
|
|
|
715
700
|
queryBuilder,
|
|
716
701
|
isDataformTableReferenceObject,
|
|
717
702
|
setDataformContext,
|
|
718
|
-
selectOtherColumns,
|
|
719
703
|
buildPassThroughs,
|
|
720
704
|
buildEnrichments,
|
|
705
|
+
buildQualifiedPassThroughs,
|
|
721
706
|
processDate,
|
|
722
707
|
getDatasetName
|
|
723
708
|
};
|