ga4-export-fixer 0.8.0 → 0.9.0-dev.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/utils.js CHANGED
@@ -389,6 +389,16 @@ const setDataformContext = (ctx, config) => {
389
389
  }
390
390
  }
391
391
 
392
+ // resolve Dataform refs in enrichments[].source the same way as sourceTable
393
+ if (Array.isArray(config.enrichments)) {
394
+ config.enrichments = config.enrichments.map(e => {
395
+ if (isDataformTableReferenceObject(e.source)) {
396
+ return { ...e, source: ctx.ref(e.source) };
397
+ }
398
+ return e;
399
+ });
400
+ }
401
+
392
402
  config.self = ctx.self();
393
403
  config.incremental = ctx.incremental();
394
404
 
@@ -465,37 +475,162 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
465
475
  };
466
476
 
467
477
  /**
468
- * Generates a SQL selection string for a given query step, excluding columns already defined elsewhere
469
- * or columns that should be excluded.
478
+ * Builds a queryBuilder `select.columns` fragment that passes through every source column
479
+ * not already covered by an explicit columns object.
470
480
  *
471
- * This utility is helpful when joining tables/CTEs to avoid selecting duplicate or already-present columns.
472
- *
473
- * @param {Object} step - A queryBuilder structured step containing a `name` (CTE/table alias) and a `select.columns` object.
474
- * @param {string[]} [alreadyDefinedColumns=[]] - Columns that have already been defined and should be excluded from selection.
475
- * @param {string[]} [excludedColumns=[]] - Additional columns to explicitly exclude from selection.
476
- * @returns {string|undefined} A SQL select string (e.g. 'stepName.*' or 'stepName.* except (col1, col2)'), or undefined if all columns are excluded.
481
+ * A source column is considered "covered" and skipped from pass-throughs when it appears as:
482
+ * - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
483
+ * sentinel like `{ event_dimensions: undefined }`), OR
484
+ * - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
485
+ * rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
486
+ *
487
+ * Values that are SQL expressions, function calls, or non-strings never count as coverage —
488
+ * they reference the source column internally but the column itself is still available as a
489
+ * pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
490
+ * never matches a bare column name.)
491
+ *
492
+ * @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
493
+ * @param {Iterable<string>} sourceColumns - Column names available on the source schema.
494
+ * @returns {Object} A map of `{ column: column }` entries for every source column not covered.
495
+ *
496
+ * @example
497
+ * buildPassThroughs(
498
+ * { event_name: 'event_name', user_traffic_source: 'traffic_source' },
499
+ * ['event_name', 'traffic_source', 'device', 'geo']
500
+ * );
501
+ * // → { device: 'device', geo: 'geo' }
477
502
  */
478
- const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns = []) => {
479
- const stepName = step.name;
480
- const stepColumns = Object.keys(step.select.columns);
481
-
482
- // Determine which columns to exclude: those already defined or explicitly excluded
483
- const exceptColumns = stepColumns.filter(
484
- column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
485
- );
486
-
487
- // If none of the columns have been defined or excluded, select them all
488
- if (exceptColumns.length === 0) {
489
- return `${stepName}.*`;
503
+ const buildPassThroughs = (explicitColumns, sourceColumns) => {
504
+ const explicitKeys = Object.keys(explicitColumns);
505
+ const explicitValues = Object.values(explicitColumns);
506
+ const passThroughs = {};
507
+ for (const column of sourceColumns) {
508
+ if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
509
+ passThroughs[column] = column;
510
+ }
490
511
  }
512
+ return passThroughs;
513
+ };
491
514
 
492
- // If all columns have been defined or excluded, do not select any
493
- if (exceptColumns.length === stepColumns.length) {
494
- return;
515
+
516
+ /**
517
+ * Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
518
+ * declarative `enrichments` feature. Routes event-level and item-level entries through
519
+ * separate output channels so the caller can attach them to different downstream CTEs.
520
+ *
521
+ * Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
522
+ * intended to be called by any table module that exposes an `enrichments` config field.
523
+ *
524
+ * Encapsulates one generation-time throw:
525
+ * - Same-level enrichment-vs-enrichment column collisions (two event-level enrichments or
526
+ * two item-level enrichments targeting the same column). Cross-level same-name is allowed —
527
+ * the two columns target structurally distinct slots (`enhanced_events.<col>` vs
528
+ * `items[].<col>`).
529
+ *
530
+ * @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
531
+ * { name, level, source, joinKey, columns, dedupe? }. `level` is 'event' (default) or 'item'.
532
+ * @returns {Object} A struct with four fields:
533
+ * - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per
534
+ * entry, regardless of level — all source CTEs go to the top of the pipeline).
535
+ * - `event` — { joins, columns, columnNames } for event-level enrichments. Caller attaches
536
+ * `joins` to the event-grained downstream CTE (e.g. `enhanced_events`) and spreads `columns`
537
+ * into that CTE's `select.columns`.
538
+ * - `item` — { joins, columns, columnNames } for item-level enrichments. Caller attaches
539
+ * `joins` to the item-grained downstream CTE (e.g. `items_rebuilt`) and folds `columns`
540
+ * into that CTE's struct construction.
541
+ * - `columnOwner` — map of `{ <column>: { i, name, level } }` recording which enrichment
542
+ * owns each column. The `level` field distinguishes cross-level same-name entries.
543
+ *
544
+ * @throws {Error} If two same-level enrichments target the same column name (with both
545
+ * enrichment names and the conflicting column in the error message).
546
+ *
547
+ * @example
548
+ * const { steps, event, item } = buildEnrichments(config.enrichments);
549
+ * // event.joins → attach to enhanced_events; event.columns → spread into enhanced_events
550
+ * // item.joins → attach to items_rebuilt; item.columns → fold into items struct
551
+ */
552
+ const buildEnrichments = (enrichments) => {
553
+ const steps = [];
554
+ const channels = {
555
+ event: { joins: [], columns: {}, columnNames: new Set() },
556
+ item: { joins: [], columns: {}, columnNames: new Set() },
557
+ };
558
+ const columnOwner = {};
559
+
560
+ for (const [i, e] of (enrichments ?? []).entries()) {
561
+ const level = e.level ?? 'event';
562
+ const channel = channels[level];
563
+ const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
564
+ const cteName = `enrich_${e.name}`;
565
+
566
+ // Source CTE selects joinKey columns plus the requested columns. key === value
567
+ // shape skips the alias clause in queryBuilder's columnsToSQL.
568
+ const cteCols = {};
569
+ for (const k of joinKeys) cteCols[k] = k;
570
+ for (const c of e.columns) cteCols[c] = c;
571
+ const sourceStep = { name: cteName, select: { columns: cteCols }, from: e.source };
572
+ // Opt-in dedupe: which row wins is non-deterministic — users with strict needs
573
+ // pre-aggregate in their source SQL.
574
+ if (e.dedupe) {
575
+ sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
576
+ }
577
+ steps.push(sourceStep);
578
+
579
+ channel.joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
580
+
581
+ for (const c of e.columns) {
582
+ // Same-level collision throw. Cross-level same-name is allowed because the two
583
+ // columns target structurally distinct output slots (event_data vs items[]).
584
+ if (channel.columnNames.has(c)) {
585
+ const owner = columnOwner[c];
586
+ throw new Error(
587
+ `config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
588
+ `(name: '${owner.name}') both target column '${c}' at level '${level}'. ` +
589
+ `Two enrichments cannot write the same column at the same level; rename one in source SQL or pick a different name.`
590
+ );
591
+ }
592
+ channel.columns[c] = `${cteName}.${c}`;
593
+ channel.columnNames.add(c);
594
+ // columnOwner is keyed by column name; if the same name appears at different
595
+ // levels, the second-writer entry wins, but we record level so diagnostics
596
+ // distinguish them. Same-level collisions throw above before reaching here.
597
+ columnOwner[c] = { i, name: e.name, level };
598
+ }
495
599
  }
496
600
 
497
- // Otherwise, select all except the excluded/defined ones
498
- return `${stepName}.* except (${exceptColumns.join(', ')})`;
601
+ return { steps, event: channels.event, item: channels.item, columnOwner };
602
+ };
603
+
604
+
605
+ /**
606
+ * Builds a qualified pass-through fragment for spreading into a downstream SELECT's
607
+ * `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
608
+ * emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
609
+ *
610
+ * Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
611
+ * shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
612
+ * `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
613
+ * so unknown names cause no harm. This is the safety property that lets callers pass
614
+ * "everything that might collide" without pre-filtering.
615
+ *
616
+ * @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
617
+ * @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
618
+ * downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
619
+ * @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
620
+ *
621
+ * @example
622
+ * buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
623
+ * // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
624
+ */
625
+ const buildQualifiedPassThroughs = (step, alreadyCovered) => {
626
+ const covered = new Set(alreadyCovered);
627
+ const passThroughs = {};
628
+ for (const [col, expr] of Object.entries(step.select.columns)) {
629
+ if (expr === undefined) continue;
630
+ if (covered.has(col)) continue;
631
+ passThroughs[col] = `${step.name}.${col}`;
632
+ }
633
+ return passThroughs;
499
634
  };
500
635
 
501
636
 
@@ -573,7 +708,9 @@ module.exports = {
573
708
  queryBuilder,
574
709
  isDataformTableReferenceObject,
575
710
  setDataformContext,
576
- selectOtherColumns,
711
+ buildPassThroughs,
712
+ buildEnrichments,
713
+ buildQualifiedPassThroughs,
577
714
  processDate,
578
715
  getDatasetName
579
716
  };