ga4-export-fixer 0.9.0-dev.6 → 0.9.0-dev.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/index.js +20 -88
- package/utils.js +115 -48
package/package.json
CHANGED
|
@@ -320,74 +320,23 @@ ${excludedEventsSQL}`,
|
|
|
320
320
|
} : {};
|
|
321
321
|
const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
|
|
322
322
|
|
|
323
|
-
// Build enrichment-source CTEs and gather event-level join/column data.
|
|
324
|
-
//
|
|
325
|
-
const
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
const
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
const cteName = `enrich_${e.name}`;
|
|
341
|
-
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
342
|
-
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
343
|
-
const cteCols = {};
|
|
344
|
-
for (const k of joinKeys) cteCols[k] = k;
|
|
345
|
-
for (const c of e.columns) cteCols[c] = c;
|
|
346
|
-
const sourceStep = {
|
|
347
|
-
name: cteName,
|
|
348
|
-
select: { columns: cteCols },
|
|
349
|
-
from: e.source,
|
|
350
|
-
};
|
|
351
|
-
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
352
|
-
// pre-aggregate in their source SQL.
|
|
353
|
-
if (e.dedupe) {
|
|
354
|
-
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
355
|
-
}
|
|
356
|
-
enrichmentSteps.push(sourceStep);
|
|
357
|
-
|
|
358
|
-
enrichmentJoins.push({
|
|
359
|
-
type: 'left',
|
|
360
|
-
table: cteName,
|
|
361
|
-
on: `using(${joinKeys.join(', ')})`,
|
|
362
|
-
});
|
|
363
|
-
|
|
364
|
-
// Replace-or-add: each enrichment column overrides explicit select columns via JS object
|
|
365
|
-
// spread, AND joins the excludedColumns set so it suppresses overlap with the wildcard
|
|
366
|
-
// event_data.* / session_data.* expansions below.
|
|
367
|
-
for (const c of e.columns) {
|
|
368
|
-
if (enrichmentColumnNames.has(c)) {
|
|
369
|
-
const owner = enrichmentColumnOwner[c];
|
|
370
|
-
throw new Error(
|
|
371
|
-
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
372
|
-
`(name: '${owner.name}') both target column '${c}'. ` +
|
|
373
|
-
`Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
|
|
374
|
-
);
|
|
375
|
-
}
|
|
376
|
-
enrichmentColumns[c] = `${cteName}.${c}`;
|
|
377
|
-
enrichmentColumnNames.add(c);
|
|
378
|
-
enrichmentColumnOwner[c] = { i, name: e.name };
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
382
|
-
|
|
383
|
-
// Only forward enrichment columns to each wildcard's EXCEPT input if they actually exist
|
|
384
|
-
// in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in SELECT *
|
|
385
|
-
// EXCEPT list does not exist". After M1, Object.keys(step.select.columns) is the complete
|
|
386
|
-
// column set of both event_data and session_data — so the same predicate works for both.
|
|
387
|
-
const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
|
|
388
|
-
const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
|
|
389
|
-
const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => eventDataExplicit.has(c));
|
|
390
|
-
const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => sessionDataExplicit.has(c));
|
|
323
|
+
// Build enrichment-source CTEs and gather event-level join/column data. Item-level
|
|
324
|
+
// enrichments throw "not yet supported" inside the utility — they will arrive in a later release.
|
|
325
|
+
const { steps: enrichmentSteps, joins: enrichmentJoins, columns: enrichmentColumns,
|
|
326
|
+
columnNames: enrichmentColumnNames } = utils.buildEnrichments(mergedConfig.enrichments);
|
|
327
|
+
|
|
328
|
+
// Build the set of columns the outer SELECT already maps explicitly (so wildcards skip them)
|
|
329
|
+
// plus internal-only columns that should never reach enhanced_events.
|
|
330
|
+
const alreadyMapped = [
|
|
331
|
+
...Object.keys(finalColumnOrder),
|
|
332
|
+
...Object.keys(itemListOverrides),
|
|
333
|
+
...enrichmentColumnNames,
|
|
334
|
+
'entrances',
|
|
335
|
+
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
336
|
+
'data_is_final',
|
|
337
|
+
'export_type',
|
|
338
|
+
...itemListExcludedColumns,
|
|
339
|
+
];
|
|
391
340
|
|
|
392
341
|
// Join event_data and session_data, include additional logic
|
|
393
342
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
@@ -399,27 +348,10 @@ ${excludedEventsSQL}`,
|
|
|
399
348
|
...finalColumnOrder,
|
|
400
349
|
...itemListOverrides,
|
|
401
350
|
// event-level enrichment columns: override matching explicit columns; new columns added.
|
|
402
|
-
// Wildcard-column overlap is handled below via excludedColumns.
|
|
403
351
|
...enrichmentColumns,
|
|
404
|
-
//
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
Object.keys(finalColumnOrder),
|
|
408
|
-
[
|
|
409
|
-
'entrances',
|
|
410
|
-
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
411
|
-
'data_is_final',
|
|
412
|
-
'export_type',
|
|
413
|
-
...itemListExcludedColumns,
|
|
414
|
-
...eventDataEnrichmentExcept,
|
|
415
|
-
]
|
|
416
|
-
),
|
|
417
|
-
// get the rest of the session_data columns
|
|
418
|
-
'[sql]session_data': utils.selectOtherColumns(
|
|
419
|
-
sessionDataStep,
|
|
420
|
-
Object.keys(finalColumnOrder),
|
|
421
|
-
sessionDataEnrichmentExcept,
|
|
422
|
-
),
|
|
352
|
+
// explicit pass-throughs for the rest of event_data and session_data
|
|
353
|
+
...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
|
|
354
|
+
...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
|
|
423
355
|
// include additional columns
|
|
424
356
|
row_inserted_timestamp: 'current_timestamp()',
|
|
425
357
|
data_is_final: 'data_is_final',
|
package/utils.js
CHANGED
|
@@ -474,53 +474,6 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
474
474
|
return deepMerge(defaultConfig, inputConfig);
|
|
475
475
|
};
|
|
476
476
|
|
|
477
|
-
/**
|
|
478
|
-
* Generates a SQL selection string for a given query step, excluding columns already defined elsewhere
|
|
479
|
-
* or columns that should be excluded.
|
|
480
|
-
*
|
|
481
|
-
* This utility is helpful when joining tables/CTEs to avoid selecting duplicate or already-present columns.
|
|
482
|
-
*
|
|
483
|
-
* @param {Object} step - A queryBuilder structured step containing a `name` (CTE/table alias) and a `select.columns` object.
|
|
484
|
-
* @param {string[]} [alreadyDefinedColumns=[]] - Columns that have already been defined and should be excluded from selection.
|
|
485
|
-
* @param {string[]} [excludedColumns=[]] - Additional columns to explicitly exclude from selection.
|
|
486
|
-
* @returns {string|undefined} A SQL select string (e.g. 'stepName.*' or 'stepName.* except (col1, col2)'), or undefined if all columns are excluded.
|
|
487
|
-
*/
|
|
488
|
-
const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns = []) => {
|
|
489
|
-
const stepName = step.name;
|
|
490
|
-
const stepColumns = Object.keys(step.select.columns);
|
|
491
|
-
|
|
492
|
-
// Columns in step.select.columns that should be excluded (already-defined or explicitly listed)
|
|
493
|
-
const internalExcept = stepColumns.filter(
|
|
494
|
-
column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
|
|
495
|
-
);
|
|
496
|
-
|
|
497
|
-
// Columns in excludedColumns that aren't enumerated in step.select.columns. These are
|
|
498
|
-
// wildcard-sourced columns (e.g. default GA4 export columns coming through `event_data.*`
|
|
499
|
-
// inside event_data's own select). The caller knows what to exclude; trust them.
|
|
500
|
-
// BigQuery throws at dry-run if the column doesn't exist in the source — surfaces typos.
|
|
501
|
-
// Filter out undefined/null entries (callers can pass conditional values like
|
|
502
|
-
// `cond ? 'col' : undefined` for ergonomics).
|
|
503
|
-
const externalExcept = excludedColumns.filter(
|
|
504
|
-
c => typeof c === 'string' && c.length > 0 && !stepColumns.includes(c)
|
|
505
|
-
);
|
|
506
|
-
|
|
507
|
-
const allExcept = [...internalExcept, ...externalExcept];
|
|
508
|
-
|
|
509
|
-
// If nothing is excluded, select everything
|
|
510
|
-
if (allExcept.length === 0) {
|
|
511
|
-
return `${stepName}.*`;
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
// If every enumerated column is excluded and there are no external excepts to apply,
|
|
515
|
-
// there's nothing to select via the wildcard
|
|
516
|
-
if (internalExcept.length === stepColumns.length && externalExcept.length === 0) {
|
|
517
|
-
return;
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
return `${stepName}.* except (${allExcept.join(', ')})`;
|
|
521
|
-
};
|
|
522
|
-
|
|
523
|
-
|
|
524
477
|
/**
|
|
525
478
|
* Builds a queryBuilder `select.columns` fragment that passes through every source column
|
|
526
479
|
* not already covered by an explicit columns object.
|
|
@@ -560,6 +513,119 @@ const buildPassThroughs = (explicitColumns, sourceColumns) => {
|
|
|
560
513
|
};
|
|
561
514
|
|
|
562
515
|
|
|
516
|
+
/**
|
|
517
|
+
* Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
|
|
518
|
+
* declarative `enrichments` feature.
|
|
519
|
+
*
|
|
520
|
+
* Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
|
|
521
|
+
* intended to be called by any table module that exposes an `enrichments` config field.
|
|
522
|
+
*
|
|
523
|
+
* Encapsulates two generation-time throws:
|
|
524
|
+
* - level: 'item' (not yet supported; deferred per design_docs/planned/data-enrichments.md Q15).
|
|
525
|
+
* - Enrichment-vs-enrichment column collisions (two enrichments targeting the same column).
|
|
526
|
+
*
|
|
527
|
+
* @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
|
|
528
|
+
* { name, level, source, joinKey, columns, dedupe? } per data-enrichments.md Q8.
|
|
529
|
+
* @returns {Object} A struct with five fields:
|
|
530
|
+
* - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per entry).
|
|
531
|
+
* - `joins` — array of LEFT JOIN clauses to attach downstream (one per entry).
|
|
532
|
+
* - `columns` — map of `{ <enrichmentColumn>: 'enrich_<name>.<col>' }` for spreading into a
|
|
533
|
+
* downstream SELECT's `select.columns`.
|
|
534
|
+
* - `columnNames` — Set of all enrichment column names (used by callers for overlap detection
|
|
535
|
+
* against downstream CTEs).
|
|
536
|
+
* - `columnOwner` — map of `{ <column>: { i, name } }` recording which enrichment owns each
|
|
537
|
+
* column; preserved for diagnostics.
|
|
538
|
+
*
|
|
539
|
+
* @throws {Error} If any entry has `level: 'item'` (with a pointer to data-enrichments.md).
|
|
540
|
+
* @throws {Error} If two enrichments target the same column name (with both enrichment names).
|
|
541
|
+
*
|
|
542
|
+
* @example
|
|
543
|
+
* const { steps, joins, columns, columnNames } = buildEnrichments(config.enrichments);
|
|
544
|
+
*/
|
|
545
|
+
const buildEnrichments = (enrichments) => {
|
|
546
|
+
const steps = [];
|
|
547
|
+
const joins = [];
|
|
548
|
+
const columns = {};
|
|
549
|
+
const columnNames = new Set();
|
|
550
|
+
const columnOwner = {};
|
|
551
|
+
|
|
552
|
+
for (const [i, e] of (enrichments ?? []).entries()) {
|
|
553
|
+
const level = e.level ?? 'event';
|
|
554
|
+
if (level === 'item') {
|
|
555
|
+
throw new Error(
|
|
556
|
+
`config.enrichments[${i}] uses level: 'item', which is not yet supported in this version. ` +
|
|
557
|
+
`Item-level enrichments will ship in a future release; see design_docs/planned/data-enrichments.md.`
|
|
558
|
+
);
|
|
559
|
+
}
|
|
560
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
561
|
+
const cteName = `enrich_${e.name}`;
|
|
562
|
+
|
|
563
|
+
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
564
|
+
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
565
|
+
const cteCols = {};
|
|
566
|
+
for (const k of joinKeys) cteCols[k] = k;
|
|
567
|
+
for (const c of e.columns) cteCols[c] = c;
|
|
568
|
+
const sourceStep = { name: cteName, select: { columns: cteCols }, from: e.source };
|
|
569
|
+
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
570
|
+
// pre-aggregate in their source SQL.
|
|
571
|
+
if (e.dedupe) {
|
|
572
|
+
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
573
|
+
}
|
|
574
|
+
steps.push(sourceStep);
|
|
575
|
+
|
|
576
|
+
joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
|
|
577
|
+
|
|
578
|
+
for (const c of e.columns) {
|
|
579
|
+
if (columnNames.has(c)) {
|
|
580
|
+
const owner = columnOwner[c];
|
|
581
|
+
throw new Error(
|
|
582
|
+
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
583
|
+
`(name: '${owner.name}') both target column '${c}'. ` +
|
|
584
|
+
`Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
|
|
585
|
+
);
|
|
586
|
+
}
|
|
587
|
+
columns[c] = `${cteName}.${c}`;
|
|
588
|
+
columnNames.add(c);
|
|
589
|
+
columnOwner[c] = { i, name: e.name };
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
return { steps, joins, columns, columnNames, columnOwner };
|
|
594
|
+
};
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Builds a qualified pass-through fragment for spreading into a downstream SELECT's
|
|
599
|
+
* `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
|
|
600
|
+
* emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
|
|
601
|
+
*
|
|
602
|
+
* Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
|
|
603
|
+
* shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
|
|
604
|
+
* `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
|
|
605
|
+
* so unknown names cause no harm. This is the safety property that lets callers pass
|
|
606
|
+
* "everything that might collide" without pre-filtering.
|
|
607
|
+
*
|
|
608
|
+
* @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
|
|
609
|
+
* @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
|
|
610
|
+
* downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
|
|
611
|
+
* @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
|
|
612
|
+
*
|
|
613
|
+
* @example
|
|
614
|
+
* buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
|
|
615
|
+
* // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
|
|
616
|
+
*/
|
|
617
|
+
const buildQualifiedPassThroughs = (step, alreadyCovered) => {
|
|
618
|
+
const covered = new Set(alreadyCovered);
|
|
619
|
+
const passThroughs = {};
|
|
620
|
+
for (const [col, expr] of Object.entries(step.select.columns)) {
|
|
621
|
+
if (expr === undefined) continue;
|
|
622
|
+
if (covered.has(col)) continue;
|
|
623
|
+
passThroughs[col] = `${step.name}.${col}`;
|
|
624
|
+
}
|
|
625
|
+
return passThroughs;
|
|
626
|
+
};
|
|
627
|
+
|
|
628
|
+
|
|
563
629
|
/**
|
|
564
630
|
* Processes a date input string and returns a corresponding SQL date casting expression,
|
|
565
631
|
* or passes through BigQuery SQL statements as-is.
|
|
@@ -634,8 +700,9 @@ module.exports = {
|
|
|
634
700
|
queryBuilder,
|
|
635
701
|
isDataformTableReferenceObject,
|
|
636
702
|
setDataformContext,
|
|
637
|
-
selectOtherColumns,
|
|
638
703
|
buildPassThroughs,
|
|
704
|
+
buildEnrichments,
|
|
705
|
+
buildQualifiedPassThroughs,
|
|
639
706
|
processDate,
|
|
640
707
|
getDatasetName
|
|
641
708
|
};
|