ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -4
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +91 -39
- package/package.json +5 -2
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +204 -91
- package/tables/ga4EventsEnhanced/validation.js +99 -4
- package/utils.js +163 -26
|
@@ -201,11 +201,11 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
// customSteps - optional array of queryBuilder step objects appended to the pipeline
|
|
205
|
-
//
|
|
204
|
+
// customSteps - optional array of queryBuilder step objects appended to the pipeline.
|
|
205
|
+
// Config-shape checks only: array, objects with non-empty name, no duplicates within customSteps.
|
|
206
206
|
// Step-shape validation (clause keys, etc.) deferred to queryBuilder.
|
|
207
|
-
// Collision-with-package-names check deferred to _generateEnhancedEventsSQL
|
|
208
|
-
//
|
|
207
|
+
// Collision-with-package-names check deferred to _generateEnhancedEventsSQL, since the
|
|
208
|
+
// reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
|
|
209
209
|
if (config.customSteps !== undefined) {
|
|
210
210
|
if (!Array.isArray(config.customSteps)) {
|
|
211
211
|
throw new Error(`config.customSteps must be an array. Received: ${JSON.stringify(config.customSteps)}`);
|
|
@@ -225,6 +225,101 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
225
225
|
seenNames.add(step.name);
|
|
226
226
|
}
|
|
227
227
|
}
|
|
228
|
+
|
|
229
|
+
// enrichments - optional array of declarative external-data enrichment specs.
|
|
230
|
+
// Config-shape checks only. Reserved-name collision and item-level joinKey resolution
|
|
231
|
+
// happen in _generateEnhancedEventsSQL, where the reserved set and item-level join targets
|
|
232
|
+
// are derived from the resolved config.
|
|
233
|
+
if (config.enrichments !== undefined) {
|
|
234
|
+
if (!Array.isArray(config.enrichments)) {
|
|
235
|
+
throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
|
|
236
|
+
}
|
|
237
|
+
const validLevels = ['event', 'item'];
|
|
238
|
+
const seenNames = new Set();
|
|
239
|
+
for (let i = 0; i < config.enrichments.length; i++) {
|
|
240
|
+
const entry = config.enrichments[i];
|
|
241
|
+
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
|
|
242
|
+
throw new Error(`config.enrichments[${i}] must be a non-null object. Received: ${JSON.stringify(entry)}`);
|
|
243
|
+
}
|
|
244
|
+
if (typeof entry.name !== 'string' || !entry.name.trim()) {
|
|
245
|
+
throw new Error(`config.enrichments[${i}].name must be a non-empty string. Received: ${JSON.stringify(entry.name)}`);
|
|
246
|
+
}
|
|
247
|
+
if (seenNames.has(entry.name)) {
|
|
248
|
+
throw new Error(`config.enrichments contains duplicate name '${entry.name}'. Each enrichments entry must have a unique name.`);
|
|
249
|
+
}
|
|
250
|
+
seenNames.add(entry.name);
|
|
251
|
+
if (entry.level !== undefined && !validLevels.includes(entry.level)) {
|
|
252
|
+
throw new Error(`config.enrichments[${i}].level must be one of: ${validLevels.join(', ')}. Received: ${JSON.stringify(entry.level)}`);
|
|
253
|
+
}
|
|
254
|
+
// source: Dataform table reference object or backtick-quoted string
|
|
255
|
+
if (entry.source === undefined || entry.source === null) {
|
|
256
|
+
throw new Error(`config.enrichments[${i}].source is required.`);
|
|
257
|
+
}
|
|
258
|
+
if (isDataformTableReferenceObject(entry.source)) {
|
|
259
|
+
// Valid Dataform reference
|
|
260
|
+
} else if (typeof entry.source === 'string') {
|
|
261
|
+
if (!entry.source.trim()) {
|
|
262
|
+
throw new Error(`config.enrichments[${i}].source must be a non-empty string. Received empty string.`);
|
|
263
|
+
}
|
|
264
|
+
if (!/^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(entry.source.trim())) {
|
|
265
|
+
throw new Error(`config.enrichments[${i}].source must be in the format '\`project.dataset.table\`' (with backticks) or a Dataform table reference. Received: ${JSON.stringify(entry.source)}`);
|
|
266
|
+
}
|
|
267
|
+
} else {
|
|
268
|
+
throw new Error(`config.enrichments[${i}].source must be a Dataform table reference object or a string in format '\`project.dataset.table\`'. Received: ${JSON.stringify(entry.source)}`);
|
|
269
|
+
}
|
|
270
|
+
// joinKey: required, plain SQL identifier OR non-empty array of plain SQL identifiers.
|
|
271
|
+
// Plain identifier = ^[a-zA-Z_][a-zA-Z0-9_]*$ — no aliases (`id as user_id`), no backticks,
|
|
272
|
+
// no dotted paths. Users with mismatched dim-column names alias in an upstream Dataform view.
|
|
273
|
+
const sqlIdentifier = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
274
|
+
const aliasingHint = ' Aliases like \'id as user_id\' are not supported here; alias in an upstream Dataform view if your dim has a different column name.';
|
|
275
|
+
if (entry.joinKey === undefined || entry.joinKey === null) {
|
|
276
|
+
throw new Error(`config.enrichments[${i}].joinKey is required.`);
|
|
277
|
+
}
|
|
278
|
+
if (typeof entry.joinKey === 'string') {
|
|
279
|
+
if (!entry.joinKey.trim()) {
|
|
280
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty string. Received empty string.`);
|
|
281
|
+
}
|
|
282
|
+
if (!sqlIdentifier.test(entry.joinKey)) {
|
|
283
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a plain SQL identifier. Received: ${JSON.stringify(entry.joinKey)}.${aliasingHint}`);
|
|
284
|
+
}
|
|
285
|
+
} else if (Array.isArray(entry.joinKey)) {
|
|
286
|
+
if (entry.joinKey.length === 0) {
|
|
287
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty array when provided as an array.`);
|
|
288
|
+
}
|
|
289
|
+
for (let j = 0; j < entry.joinKey.length; j++) {
|
|
290
|
+
const k = entry.joinKey[j];
|
|
291
|
+
if (typeof k !== 'string' || !k.trim()) {
|
|
292
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a non-empty string. Received: ${JSON.stringify(k)}`);
|
|
293
|
+
}
|
|
294
|
+
if (!sqlIdentifier.test(k)) {
|
|
295
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(k)}.${aliasingHint}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
} else {
|
|
299
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a string or a non-empty array of strings. Received: ${JSON.stringify(entry.joinKey)}`);
|
|
300
|
+
}
|
|
301
|
+
// columns: required, non-empty array of plain SQL identifiers (no aliasing).
|
|
302
|
+
if (!Array.isArray(entry.columns)) {
|
|
303
|
+
throw new Error(`config.enrichments[${i}].columns must be an array. Received: ${JSON.stringify(entry.columns)}`);
|
|
304
|
+
}
|
|
305
|
+
if (entry.columns.length === 0) {
|
|
306
|
+
throw new Error(`config.enrichments[${i}].columns must be non-empty. List the source columns to add to the output (excluding joinKey).`);
|
|
307
|
+
}
|
|
308
|
+
for (let j = 0; j < entry.columns.length; j++) {
|
|
309
|
+
const c = entry.columns[j];
|
|
310
|
+
if (typeof c !== 'string' || !c.trim()) {
|
|
311
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a non-empty string. Received: ${JSON.stringify(c)}`);
|
|
312
|
+
}
|
|
313
|
+
if (!sqlIdentifier.test(c)) {
|
|
314
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(c)}.${aliasingHint}`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// dedupe: optional boolean
|
|
318
|
+
if (entry.dedupe !== undefined && typeof entry.dedupe !== 'boolean') {
|
|
319
|
+
throw new Error(`config.enrichments[${i}].dedupe must be a boolean when provided. Received: ${JSON.stringify(entry.dedupe)}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
228
323
|
} catch (e) {
|
|
229
324
|
e.message = `Config validation: ${e.message}`;
|
|
230
325
|
throw e;
|
package/utils.js
CHANGED
|
@@ -389,6 +389,16 @@ const setDataformContext = (ctx, config) => {
|
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
// resolve Dataform refs in enrichments[].source the same way as sourceTable
|
|
393
|
+
if (Array.isArray(config.enrichments)) {
|
|
394
|
+
config.enrichments = config.enrichments.map(e => {
|
|
395
|
+
if (isDataformTableReferenceObject(e.source)) {
|
|
396
|
+
return { ...e, source: ctx.ref(e.source) };
|
|
397
|
+
}
|
|
398
|
+
return e;
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
|
|
392
402
|
config.self = ctx.self();
|
|
393
403
|
config.incremental = ctx.incremental();
|
|
394
404
|
|
|
@@ -465,37 +475,162 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
465
475
|
};
|
|
466
476
|
|
|
467
477
|
/**
|
|
468
|
-
*
|
|
469
|
-
*
|
|
478
|
+
* Builds a queryBuilder `select.columns` fragment that passes through every source column
|
|
479
|
+
* not already covered by an explicit columns object.
|
|
470
480
|
*
|
|
471
|
-
*
|
|
472
|
-
*
|
|
473
|
-
*
|
|
474
|
-
*
|
|
475
|
-
*
|
|
476
|
-
*
|
|
481
|
+
* A source column is considered "covered" — and skipped from pass-throughs — when it appears as:
|
|
482
|
+
* - a KEY in `explicitColumns` (a transform, package promotion, or undefined-valued exclusion
|
|
483
|
+
* sentinel like `{ event_dimensions: undefined }`), OR
|
|
484
|
+
* - a VALUE in `explicitColumns` (a bare source-column identifier referenced by a value-side
|
|
485
|
+
* rename, e.g. `{ user_traffic_source: 'traffic_source' }` covers 'traffic_source').
|
|
486
|
+
*
|
|
487
|
+
* Values that are SQL expressions, function calls, or non-strings never count as coverage —
|
|
488
|
+
* they reference the source column internally but the column itself is still available as a
|
|
489
|
+
* pass-through. (`.includes()` compares by strict equality, so 'extract(datetime from ...)'
|
|
490
|
+
* never matches a bare column name.)
|
|
491
|
+
*
|
|
492
|
+
* @param {Object} explicitColumns - A queryBuilder step's explicit `select.columns` entries.
|
|
493
|
+
* @param {Iterable<string>} sourceColumns - Column names available on the source schema.
|
|
494
|
+
* @returns {Object} A map of `{ column: column }` entries for every source column not covered.
|
|
495
|
+
*
|
|
496
|
+
* @example
|
|
497
|
+
* buildPassThroughs(
|
|
498
|
+
* { event_name: 'event_name', user_traffic_source: 'traffic_source' },
|
|
499
|
+
* ['event_name', 'traffic_source', 'device', 'geo']
|
|
500
|
+
* );
|
|
501
|
+
* // → { device: 'device', geo: 'geo' }
|
|
477
502
|
*/
|
|
478
|
-
const
|
|
479
|
-
const
|
|
480
|
-
const
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
// If none of the columns have been defined or excluded, select them all
|
|
488
|
-
if (exceptColumns.length === 0) {
|
|
489
|
-
return `${stepName}.*`;
|
|
503
|
+
const buildPassThroughs = (explicitColumns, sourceColumns) => {
|
|
504
|
+
const explicitKeys = Object.keys(explicitColumns);
|
|
505
|
+
const explicitValues = Object.values(explicitColumns);
|
|
506
|
+
const passThroughs = {};
|
|
507
|
+
for (const column of sourceColumns) {
|
|
508
|
+
if (!explicitKeys.includes(column) && !explicitValues.includes(column)) {
|
|
509
|
+
passThroughs[column] = column;
|
|
510
|
+
}
|
|
490
511
|
}
|
|
512
|
+
return passThroughs;
|
|
513
|
+
};
|
|
491
514
|
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
|
|
518
|
+
* declarative `enrichments` feature. Routes event-level and item-level entries through
|
|
519
|
+
* separate output channels so the caller can attach them to different downstream CTEs.
|
|
520
|
+
*
|
|
521
|
+
* Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
|
|
522
|
+
* intended to be called by any table module that exposes an `enrichments` config field.
|
|
523
|
+
*
|
|
524
|
+
* Encapsulates one generation-time throw:
|
|
525
|
+
* - Same-level enrichment-vs-enrichment column collisions (two event-level enrichments or
|
|
526
|
+
* two item-level enrichments targeting the same column). Cross-level same-name is allowed —
|
|
527
|
+
* the two columns target structurally distinct slots (`enhanced_events.<col>` vs
|
|
528
|
+
* `items[].<col>`).
|
|
529
|
+
*
|
|
530
|
+
* @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
|
|
531
|
+
* { name, level, source, joinKey, columns, dedupe? }. `level` is 'event' (default) or 'item'.
|
|
532
|
+
* @returns {Object} A struct with four fields:
|
|
533
|
+
* - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per
|
|
534
|
+
* entry, regardless of level — all source CTEs go to the top of the pipeline).
|
|
535
|
+
* - `event` — { joins, columns, columnNames } for event-level enrichments. Caller attaches
|
|
536
|
+
* `joins` to the event-grained downstream CTE (e.g. `enhanced_events`) and spreads `columns`
|
|
537
|
+
* into that CTE's `select.columns`.
|
|
538
|
+
* - `item` — { joins, columns, columnNames } for item-level enrichments. Caller attaches
|
|
539
|
+
* `joins` to the item-grained downstream CTE (e.g. `items_rebuilt`) and folds `columns`
|
|
540
|
+
* into that CTE's struct construction.
|
|
541
|
+
* - `columnOwner` — map of `{ <column>: { i, name, level } }` recording which enrichment
|
|
542
|
+
* owns each column. The `level` field distinguishes cross-level same-name entries.
|
|
543
|
+
*
|
|
544
|
+
* @throws {Error} If two same-level enrichments target the same column name (with both
|
|
545
|
+
* enrichment names and the conflicting column in the error message).
|
|
546
|
+
*
|
|
547
|
+
* @example
|
|
548
|
+
* const { steps, event, item } = buildEnrichments(config.enrichments);
|
|
549
|
+
* // event.joins → attach to enhanced_events; event.columns → spread into enhanced_events
|
|
550
|
+
* // item.joins → attach to items_rebuilt; item.columns → fold into items struct
|
|
551
|
+
*/
|
|
552
|
+
const buildEnrichments = (enrichments) => {
|
|
553
|
+
const steps = [];
|
|
554
|
+
const channels = {
|
|
555
|
+
event: { joins: [], columns: {}, columnNames: new Set() },
|
|
556
|
+
item: { joins: [], columns: {}, columnNames: new Set() },
|
|
557
|
+
};
|
|
558
|
+
const columnOwner = {};
|
|
559
|
+
|
|
560
|
+
for (const [i, e] of (enrichments ?? []).entries()) {
|
|
561
|
+
const level = e.level ?? 'event';
|
|
562
|
+
const channel = channels[level];
|
|
563
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
564
|
+
const cteName = `enrich_${e.name}`;
|
|
565
|
+
|
|
566
|
+
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
567
|
+
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
568
|
+
const cteCols = {};
|
|
569
|
+
for (const k of joinKeys) cteCols[k] = k;
|
|
570
|
+
for (const c of e.columns) cteCols[c] = c;
|
|
571
|
+
const sourceStep = { name: cteName, select: { columns: cteCols }, from: e.source };
|
|
572
|
+
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
573
|
+
// pre-aggregate in their source SQL.
|
|
574
|
+
if (e.dedupe) {
|
|
575
|
+
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
576
|
+
}
|
|
577
|
+
steps.push(sourceStep);
|
|
578
|
+
|
|
579
|
+
channel.joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
|
|
580
|
+
|
|
581
|
+
for (const c of e.columns) {
|
|
582
|
+
// Same-level collision throw. Cross-level same-name is allowed because the two
|
|
583
|
+
// columns target structurally distinct output slots (event_data vs items[]).
|
|
584
|
+
if (channel.columnNames.has(c)) {
|
|
585
|
+
const owner = columnOwner[c];
|
|
586
|
+
throw new Error(
|
|
587
|
+
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
588
|
+
`(name: '${owner.name}') both target column '${c}' at level '${level}'. ` +
|
|
589
|
+
`Two enrichments cannot write the same column at the same level; rename one in source SQL or pick a different name.`
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
channel.columns[c] = `${cteName}.${c}`;
|
|
593
|
+
channel.columnNames.add(c);
|
|
594
|
+
// columnOwner is keyed by column name; if the same name appears at different
|
|
595
|
+
// levels, the second-writer entry wins, but we record level so diagnostics
|
|
596
|
+
// distinguish them. Same-level collisions throw above before reaching here.
|
|
597
|
+
columnOwner[c] = { i, name: e.name, level };
|
|
598
|
+
}
|
|
495
599
|
}
|
|
496
600
|
|
|
497
|
-
|
|
498
|
-
|
|
601
|
+
return { steps, event: channels.event, item: channels.item, columnOwner };
|
|
602
|
+
};
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
/**
|
|
606
|
+
* Builds a qualified pass-through fragment for spreading into a downstream SELECT's
|
|
607
|
+
* `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
|
|
608
|
+
* emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
|
|
609
|
+
*
|
|
610
|
+
* Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
|
|
611
|
+
* shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
|
|
612
|
+
* `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
|
|
613
|
+
* so unknown names cause no harm. This is the safety property that lets callers pass
|
|
614
|
+
* "everything that might collide" without pre-filtering.
|
|
615
|
+
*
|
|
616
|
+
* @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
|
|
617
|
+
* @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
|
|
618
|
+
* downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
|
|
619
|
+
* @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
|
|
620
|
+
*
|
|
621
|
+
* @example
|
|
622
|
+
* buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
|
|
623
|
+
* // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
|
|
624
|
+
*/
|
|
625
|
+
const buildQualifiedPassThroughs = (step, alreadyCovered) => {
|
|
626
|
+
const covered = new Set(alreadyCovered);
|
|
627
|
+
const passThroughs = {};
|
|
628
|
+
for (const [col, expr] of Object.entries(step.select.columns)) {
|
|
629
|
+
if (expr === undefined) continue;
|
|
630
|
+
if (covered.has(col)) continue;
|
|
631
|
+
passThroughs[col] = `${step.name}.${col}`;
|
|
632
|
+
}
|
|
633
|
+
return passThroughs;
|
|
499
634
|
};
|
|
500
635
|
|
|
501
636
|
|
|
@@ -573,7 +708,9 @@ module.exports = {
|
|
|
573
708
|
queryBuilder,
|
|
574
709
|
isDataformTableReferenceObject,
|
|
575
710
|
setDataformContext,
|
|
576
|
-
|
|
711
|
+
buildPassThroughs,
|
|
712
|
+
buildEnrichments,
|
|
713
|
+
buildQualifiedPassThroughs,
|
|
577
714
|
processDate,
|
|
578
715
|
getDatasetName
|
|
579
716
|
};
|