ga4-export-fixer 0.9.0-dev.8 → 0.9.0-dev.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -537,10 +537,10 @@ For typical use cases this is the right tool; reach for `customSteps` only when
537
537
  | `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
538
538
  | `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
539
539
  | `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
540
- | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
540
+ | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
541
541
  | `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
542
542
 
543
- **Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
543
+ **Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`. There is no opt-out — for hard-replace semantics (NULL on missed JOIN), pre-aggregate or sentinel-fill in your source SQL.
544
544
 
545
545
  **Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
546
546
 
@@ -571,7 +571,7 @@ enrichments: [
571
571
  ],
572
572
  ```
573
573
 
574
- **Example** — fix a promoted event parameter via enrichment (replacement case):
574
+ **Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
575
575
 
576
576
  ```javascript
577
577
  {
@@ -582,7 +582,7 @@ enrichments: [
582
582
  level: 'event',
583
583
  source: { schema: 'analytics', name: 'page_title_overrides' },
584
584
  joinKey: 'page_location',
585
- columns: ['page_title'], // overlaps the promoted column → replaces it
585
+ columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
586
586
  },
587
587
  ],
588
588
  }
@@ -590,7 +590,7 @@ enrichments: [
590
590
 
591
591
  > **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
592
592
 
593
- > **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Replaced by enrichment '<name>' (...). Original: <description>` for replacements). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
593
+ > **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
594
594
 
595
595
  > **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
596
596
 
package/documentation.js CHANGED
@@ -186,7 +186,7 @@ const getColumnDescriptions = (config, columnMetadata) => {
186
186
  ? existing.description
187
187
  : null;
188
188
  const newDesc = existingText
189
- ? `Replaced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}). Original: ${existingText}`
189
+ ? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
190
190
  : `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
191
191
  // If the original was a struct-shaped entry, preserve the structure but replace the description.
192
192
  // Otherwise, set as a plain string.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.8",
3
+ "version": "0.9.0-dev.9",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -325,8 +325,29 @@ ${excludedEventsSQL}`,
325
325
  const { steps: enrichmentSteps, joins: enrichmentJoins, columns: enrichmentColumns,
326
326
  columnNames: enrichmentColumnNames } = utils.buildEnrichments(mergedConfig.enrichments);
327
327
 
328
- // Build the set of columns the outer SELECT already maps explicitly (so wildcards skip them)
329
- // plus internal-only columns that should never reach enhanced_events.
328
+ // Wrap overlapping enrichment columns in coalesce(enrich_<name>.<col>, <original>) so a
329
+ // missed JOIN falls back to the existing value. Purely additive columns (no overlap)
330
+ // pass through unchanged. Source-of-original precedence matches the final SELECT's spread
331
+ // order: itemListOverrides first (overrides finalColumnOrder for `items`), then
332
+ // session_data (wins over event_data in getFinalColumnOrder when both have the column).
333
+ // See design_docs/planned/data-enrichments.md Q13.
334
+ const wrappedEnrichmentColumns = {};
335
+ for (const [col, enrichExpr] of Object.entries(enrichmentColumns)) {
336
+ let originalExpr;
337
+ if (col in itemListOverrides) {
338
+ originalExpr = itemListOverrides[col];
339
+ } else if (col in sessionDataStep.select.columns) {
340
+ originalExpr = `session_data.${col}`;
341
+ } else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
342
+ originalExpr = `event_data.${col}`;
343
+ }
344
+ wrappedEnrichmentColumns[col] = originalExpr
345
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
346
+ : enrichExpr;
347
+ }
348
+
349
+ // List all column names that have already been defined or should be left out
350
+ // Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
330
351
  const alreadyMapped = [
331
352
  ...Object.keys(finalColumnOrder),
332
353
  ...Object.keys(itemListOverrides),
@@ -347,8 +368,8 @@ ${excludedEventsSQL}`,
347
368
  // get the most important columns in the correct order
348
369
  ...finalColumnOrder,
349
370
  ...itemListOverrides,
350
- // event-level enrichment columns: override matching explicit columns; new columns added.
351
- ...enrichmentColumns,
371
+ // event-level enrichment columns: coalesce with the original when overlapping; otherwise add.
372
+ ...wrappedEnrichmentColumns,
352
373
  // explicit pass-throughs for the rest of event_data and session_data
353
374
  ...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
354
375
  ...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
@@ -370,7 +391,7 @@ ${excludedEventsSQL}`,
370
391
  table: 'session_data',
371
392
  on: 'using(session_id)'
372
393
  },
373
- // Event-level enrichment joins go last so they apply on top of the package's own joins.
394
+ // The left joins for the enrichment ctes
374
395
  ...enrichmentJoins,
375
396
  ],
376
397
  where: helpers.incrementalDateFilter(mergedConfig)
@@ -384,10 +405,7 @@ ${excludedEventsSQL}`,
384
405
  enhancedEventsStep,
385
406
  ];
386
407
 
387
- // Layer 2 validation: customSteps name must not collide with package step names.
388
- // Reserved set is derived from packageSteps at runtime (single source of truth) — what
389
- // is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
390
- // and enrich_* names exist only when enrichments are configured).
408
+ // Ensure that the custom step names don't collide with the default or data enrichment step names
391
409
  const customSteps = mergedConfig.customSteps ?? [];
392
410
  if (customSteps.length > 0) {
393
411
  const reservedNames = new Set(packageSteps.map(s => s.name));
@@ -401,6 +419,7 @@ ${excludedEventsSQL}`,
401
419
  }
402
420
  }
403
421
 
422
+ // Include custom steps last in the list
404
423
  const steps = [...packageSteps, ...customSteps];
405
424
 
406
425
  return utils.queryBuilder(steps);