ga4-export-fixer 0.8.0 → 0.9.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +129 -8
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +315 -262
- package/package.json +8 -5
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +216 -100
- package/tables/ga4EventsEnhanced/validation.js +99 -4
- package/utils.js +163 -26
package/utils.js
CHANGED
|
@@ -389,6 +389,16 @@ const setDataformContext = (ctx, config) => {
|
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
// resolve Dataform refs in enrichments[].source the same way as sourceTable
|
|
393
|
+
if (Array.isArray(config.enrichments)) {
|
|
394
|
+
config.enrichments = config.enrichments.map(e => {
|
|
395
|
+
if (isDataformTableReferenceObject(e.source)) {
|
|
396
|
+
return { ...e, source: ctx.ref(e.source) };
|
|
397
|
+
}
|
|
398
|
+
return e;
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
|
|
392
402
|
config.self = ctx.self();
|
|
393
403
|
config.incremental = ctx.incremental();
|
|
394
404
|
|
|
@@ -465,37 +475,162 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
465
475
|
};
|
|
466
476
|
|
|
467
477
|
/**
|
|
468
|
-
*
|
|
469
|
-
*
|
|
478
|
+
* Builds a queryBuilder `select.columns` fragment that passes through every source column
|
|
479
|
+
* not already covered by an explicit columns object.
|
|
470
480
|
*
|
|
471
|
-
*
|
|
472
|
-
*
|
|
473
|
-
*
|
|
474
|
-
*
|
|
475
|
-
*
|
|
476
|
-
*
|
|
481
|
+
* A source column is considered "covered" — and skipped from pass-throughs — when it appears as:
|
|
482
|
+
* - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
|
|
483
|
+
* sentinel like `{ event_dimensions: undefined }`), OR
|
|
484
|
+
* - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
|
|
485
|
+
* rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
|
|
486
|
+
*
|
|
487
|
+
* Values that are SQL expressions, function calls, or non-strings never count as coverage —
|
|
488
|
+
* they reference the source column internally but the column itself is still available as a
|
|
489
|
+
* pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
|
|
490
|
+
* never matches a bare column name.)
|
|
491
|
+
*
|
|
492
|
+
* @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
|
|
493
|
+
* @param {Iterable<string>} sourceColumns - Column names available on the source schema.
|
|
494
|
+
* @returns {Object} A map of `{ column: column }` entries for every source column not covered.
|
|
495
|
+
*
|
|
496
|
+
* @example
|
|
497
|
+
* buildPassThroughs(
|
|
498
|
+
* { event_name: 'event_name', user_traffic_source: 'traffic_source' },
|
|
499
|
+
* ['event_name', 'traffic_source', 'device', 'geo']
|
|
500
|
+
* );
|
|
501
|
+
* // → { device: 'device', geo: 'geo' }
|
|
477
502
|
*/
|
|
478
|
-
const
|
|
479
|
-
const
|
|
480
|
-
const
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
// If none of the columns have been defined or excluded, select them all
|
|
488
|
-
if (exceptColumns.length === 0) {
|
|
489
|
-
return `${stepName}.*`;
|
|
503
|
+
const buildPassThroughs = (explicitColumns, sourceColumns) => {
|
|
504
|
+
const explicitKeys = Object.keys(explicitColumns);
|
|
505
|
+
const explicitValues = Object.values(explicitColumns);
|
|
506
|
+
const passThroughs = {};
|
|
507
|
+
for (const column of sourceColumns) {
|
|
508
|
+
if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
|
|
509
|
+
passThroughs[column] = column;
|
|
510
|
+
}
|
|
490
511
|
}
|
|
512
|
+
return passThroughs;
|
|
513
|
+
};
|
|
491
514
|
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
|
|
518
|
+
* declarative `enrichments` feature. Routes event-level and item-level entries through
|
|
519
|
+
* separate output channels so the caller can attach them to different downstream CTEs.
|
|
520
|
+
*
|
|
521
|
+
* Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
|
|
522
|
+
* intended to be called by any table module that exposes an `enrichments` config field.
|
|
523
|
+
*
|
|
524
|
+
* Encapsulates one generation-time throw:
|
|
525
|
+
* - Same-level enrichment-vs-enrichment column collisions (two event-level enrichments or
|
|
526
|
+
* two item-level enrichments targeting the same column). Cross-level same-name is allowed —
|
|
527
|
+
* the two columns target structurally distinct slots (`enhanced_events.<col>` vs
|
|
528
|
+
* `items[].<col>`).
|
|
529
|
+
*
|
|
530
|
+
* @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
|
|
531
|
+
* { name, level, source, joinKey, columns, dedupe? }. `level` is 'event' (default) or 'item'.
|
|
532
|
+
* @returns {Object} A struct with four fields:
|
|
533
|
+
* - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per
|
|
534
|
+
* entry, regardless of level — all source CTEs go to the top of the pipeline).
|
|
535
|
+
* - `event` — { joins, columns, columnNames } for event-level enrichments. Caller attaches
|
|
536
|
+
* `joins` to the event-grained downstream CTE (e.g. `enhanced_events`) and spreads `columns`
|
|
537
|
+
* into that CTE's `select.columns`.
|
|
538
|
+
* - `item` — { joins, columns, columnNames } for item-level enrichments. Caller attaches
|
|
539
|
+
* `joins` to the item-grained downstream CTE (e.g. `items_rebuilt`) and folds `columns`
|
|
540
|
+
* into that CTE's struct construction.
|
|
541
|
+
* - `columnOwner` — map of `{ <column>: { i, name, level } }` recording which enrichment
|
|
542
|
+
* owns each column. The `level` field distinguishes cross-level same-name entries.
|
|
543
|
+
*
|
|
544
|
+
* @throws {Error} If two same-level enrichments target the same column name (with both
|
|
545
|
+
* enrichment names and the conflicting column in the error message).
|
|
546
|
+
*
|
|
547
|
+
* @example
|
|
548
|
+
* const { steps, event, item } = buildEnrichments(config.enrichments);
|
|
549
|
+
* // event.joins → attach to enhanced_events; event.columns → spread into enhanced_events
|
|
550
|
+
* // item.joins → attach to items_rebuilt; item.columns → fold into items struct
|
|
551
|
+
*/
|
|
552
|
+
const buildEnrichments = (enrichments) => {
|
|
553
|
+
const steps = [];
|
|
554
|
+
const channels = {
|
|
555
|
+
event: { joins: [], columns: {}, columnNames: new Set() },
|
|
556
|
+
item: { joins: [], columns: {}, columnNames: new Set() },
|
|
557
|
+
};
|
|
558
|
+
const columnOwner = {};
|
|
559
|
+
|
|
560
|
+
for (const [i, e] of (enrichments ?? []).entries()) {
|
|
561
|
+
const level = e.level ?? 'event';
|
|
562
|
+
const channel = channels[level];
|
|
563
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
564
|
+
const cteName = `enrich_${e.name}`;
|
|
565
|
+
|
|
566
|
+
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
567
|
+
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
568
|
+
const cteCols = {};
|
|
569
|
+
for (const k of joinKeys) cteCols[k] = k;
|
|
570
|
+
for (const c of e.columns) cteCols[c] = c;
|
|
571
|
+
const sourceStep = { name: cteName, select: { columns: cteCols }, from: e.source };
|
|
572
|
+
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
573
|
+
// pre-aggregate in their source SQL.
|
|
574
|
+
if (e.dedupe) {
|
|
575
|
+
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
576
|
+
}
|
|
577
|
+
steps.push(sourceStep);
|
|
578
|
+
|
|
579
|
+
channel.joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
|
|
580
|
+
|
|
581
|
+
for (const c of e.columns) {
|
|
582
|
+
// Same-level collision throw. Cross-level same-name is allowed because the two
|
|
583
|
+
// columns target structurally distinct output slots (event_data vs items[]).
|
|
584
|
+
if (channel.columnNames.has(c)) {
|
|
585
|
+
const owner = columnOwner[c];
|
|
586
|
+
throw new Error(
|
|
587
|
+
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
588
|
+
`(name: '${owner.name}') both target column '${c}' at level '${level}'. ` +
|
|
589
|
+
`Two enrichments cannot write the same column at the same level; rename one in source SQL or pick a different name.`
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
channel.columns[c] = `${cteName}.${c}`;
|
|
593
|
+
channel.columnNames.add(c);
|
|
594
|
+
// columnOwner is keyed by column name; if the same name appears at different
|
|
595
|
+
// levels, the second-writer entry wins, but we record level so diagnostics
|
|
596
|
+
// distinguish them. Same-level collisions throw above before reaching here.
|
|
597
|
+
columnOwner[c] = { i, name: e.name, level };
|
|
598
|
+
}
|
|
495
599
|
}
|
|
496
600
|
|
|
497
|
-
|
|
498
|
-
|
|
601
|
+
return { steps, event: channels.event, item: channels.item, columnOwner };
|
|
602
|
+
};
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
/**
|
|
606
|
+
* Builds a qualified pass-through fragment for spreading into a downstream SELECT's
|
|
607
|
+
* `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
|
|
608
|
+
* emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
|
|
609
|
+
*
|
|
610
|
+
* Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
|
|
611
|
+
* shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
|
|
612
|
+
* `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
|
|
613
|
+
* so unknown names cause no harm. This is the safety property that lets callers pass
|
|
614
|
+
* "everything that might collide" without pre-filtering.
|
|
615
|
+
*
|
|
616
|
+
* @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
|
|
617
|
+
* @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
|
|
618
|
+
* downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
|
|
619
|
+
* @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
|
|
620
|
+
*
|
|
621
|
+
* @example
|
|
622
|
+
* buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
|
|
623
|
+
* // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
|
|
624
|
+
*/
|
|
625
|
+
const buildQualifiedPassThroughs = (step, alreadyCovered) => {
|
|
626
|
+
const covered = new Set(alreadyCovered);
|
|
627
|
+
const passThroughs = {};
|
|
628
|
+
for (const [col, expr] of Object.entries(step.select.columns)) {
|
|
629
|
+
if (expr === undefined) continue;
|
|
630
|
+
if (covered.has(col)) continue;
|
|
631
|
+
passThroughs[col] = `${step.name}.${col}`;
|
|
632
|
+
}
|
|
633
|
+
return passThroughs;
|
|
499
634
|
};
|
|
500
635
|
|
|
501
636
|
|
|
@@ -573,7 +708,9 @@ module.exports = {
|
|
|
573
708
|
queryBuilder,
|
|
574
709
|
isDataformTableReferenceObject,
|
|
575
710
|
setDataformContext,
|
|
576
|
-
|
|
711
|
+
buildPassThroughs,
|
|
712
|
+
buildEnrichments,
|
|
713
|
+
buildQualifiedPassThroughs,
|
|
577
714
|
processDate,
|
|
578
715
|
getDatasetName
|
|
579
716
|
};
|