ga4-export-fixer 0.8.0 → 0.9.0-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -5
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +263 -262
- package/package.json +6 -5
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +91 -21
- package/tables/ga4EventsEnhanced/validation.js +95 -0
- package/utils.js +30 -8
|
@@ -230,7 +230,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
230
230
|
// ecommerce
|
|
231
231
|
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
232
232
|
items: 'items',
|
|
233
|
-
|
|
233
|
+
_item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
234
234
|
// flag if the data is "final" and is not expected to change anymore
|
|
235
235
|
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
236
236
|
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
@@ -268,9 +268,9 @@ ${excludedEventsSQL}`,
|
|
|
268
268
|
'group by': 'session_id',
|
|
269
269
|
};
|
|
270
270
|
|
|
271
|
-
// item
|
|
272
|
-
// 1.
|
|
273
|
-
// 2.
|
|
271
|
+
// Shared item-array CTEs:
|
|
272
|
+
// 1. items_unnested: unnest items from ecommerce events, compute attribution via window function
|
|
273
|
+
// 2. items_rebuilt: re-aggregate items with attributed list fields
|
|
274
274
|
const itemListSteps = itemListAttribution ? (() => {
|
|
275
275
|
const attrExpr = helpers.itemListAttributionExpr(
|
|
276
276
|
itemListAttribution.lookbackType,
|
|
@@ -279,12 +279,14 @@ ${excludedEventsSQL}`,
|
|
|
279
279
|
);
|
|
280
280
|
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
281
281
|
|
|
282
|
-
const
|
|
283
|
-
name: '
|
|
282
|
+
const unnestedStep = {
|
|
283
|
+
name: 'items_unnested',
|
|
284
284
|
select: {
|
|
285
285
|
columns: {
|
|
286
|
-
'
|
|
286
|
+
'_item_row_id': '_item_row_id',
|
|
287
287
|
'event_name': 'event_name',
|
|
288
|
+
// event_date is carried forward for ability to use it in data enrichment joins
|
|
289
|
+
'event_date': 'event_date',
|
|
288
290
|
'item': 'item',
|
|
289
291
|
'_item_list_attr': attrExpr,
|
|
290
292
|
},
|
|
@@ -293,11 +295,11 @@ ${excludedEventsSQL}`,
|
|
|
293
295
|
where: `event_name in (${ecommerceEventsFilter})`,
|
|
294
296
|
};
|
|
295
297
|
|
|
296
|
-
const
|
|
297
|
-
name: '
|
|
298
|
+
const rebuiltStep = {
|
|
299
|
+
name: 'items_rebuilt',
|
|
298
300
|
select: {
|
|
299
301
|
columns: {
|
|
300
|
-
'
|
|
302
|
+
'_item_row_id': '_item_row_id',
|
|
301
303
|
'items': `array_agg(
|
|
302
304
|
(select as struct item.* replace(
|
|
303
305
|
coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
|
|
@@ -307,21 +309,81 @@ ${excludedEventsSQL}`,
|
|
|
307
309
|
)`,
|
|
308
310
|
},
|
|
309
311
|
},
|
|
310
|
-
from: '
|
|
311
|
-
'group by': '
|
|
312
|
+
from: 'items_unnested',
|
|
313
|
+
'group by': '_item_row_id',
|
|
312
314
|
};
|
|
313
315
|
|
|
314
|
-
return [
|
|
316
|
+
return [unnestedStep, rebuiltStep];
|
|
315
317
|
})() : null;
|
|
316
318
|
|
|
317
319
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
318
320
|
|
|
319
|
-
// When item list attribution is enabled, override the items column and exclude
|
|
321
|
+
// When item list attribution is enabled, override the items column and exclude _item_row_id
|
|
320
322
|
// COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
|
|
321
323
|
const itemListOverrides = itemListSteps ? {
|
|
322
|
-
items: 'coalesce(
|
|
324
|
+
items: 'coalesce(items_rebuilt.items, event_data.items)',
|
|
323
325
|
} : {};
|
|
324
|
-
const itemListExcludedColumns = itemListSteps ? ['
|
|
326
|
+
const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
|
|
327
|
+
|
|
328
|
+
// Build enrichment-source CTEs and gather event-level join/column data.
|
|
329
|
+
// Item-level enrichments throw "not yet supported" — they will arrive in a later release.
|
|
330
|
+
const enrichments = mergedConfig.enrichments ?? [];
|
|
331
|
+
const enrichmentSteps = [];
|
|
332
|
+
const enrichmentJoins = [];
|
|
333
|
+
const enrichmentColumns = {}; // column name → SQL expression for select.columns
|
|
334
|
+
const enrichmentColumnNames = new Set(); // column names for excludedColumns of wildcards
|
|
335
|
+
const enrichmentColumnOwner = {}; // column name → { i, name } for collision errors
|
|
336
|
+
for (const [i, e] of enrichments.entries()) {
|
|
337
|
+
const level = e.level ?? 'event';
|
|
338
|
+
if (level === 'item') {
|
|
339
|
+
throw new Error(
|
|
340
|
+
`config.enrichments[${i}] uses level: 'item', which is not yet supported in this version. ` +
|
|
341
|
+
`Item-level enrichments will ship in a future release; see design_docs/planned/data-enrichments.md.`
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
345
|
+
const cteName = `enrich_${e.name}`;
|
|
346
|
+
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
347
|
+
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
348
|
+
const cteCols = {};
|
|
349
|
+
for (const k of joinKeys) cteCols[k] = k;
|
|
350
|
+
for (const c of e.columns) cteCols[c] = c;
|
|
351
|
+
const sourceStep = {
|
|
352
|
+
name: cteName,
|
|
353
|
+
select: { columns: cteCols },
|
|
354
|
+
from: e.source,
|
|
355
|
+
};
|
|
356
|
+
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
357
|
+
// pre-aggregate in their source SQL.
|
|
358
|
+
if (e.dedupe) {
|
|
359
|
+
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
360
|
+
}
|
|
361
|
+
enrichmentSteps.push(sourceStep);
|
|
362
|
+
|
|
363
|
+
enrichmentJoins.push({
|
|
364
|
+
type: 'left',
|
|
365
|
+
table: cteName,
|
|
366
|
+
on: `using(${joinKeys.join(', ')})`,
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// Replace-or-add: each enrichment column overrides explicit select columns via JS object
|
|
370
|
+
// spread, AND joins the excludedColumns set so it suppresses overlap with the wildcard
|
|
371
|
+
// event_data.* / session_data.* expansions below.
|
|
372
|
+
for (const c of e.columns) {
|
|
373
|
+
if (enrichmentColumnNames.has(c)) {
|
|
374
|
+
const owner = enrichmentColumnOwner[c];
|
|
375
|
+
throw new Error(
|
|
376
|
+
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
377
|
+
`(name: '${owner.name}') both target column '${c}'. ` +
|
|
378
|
+
`Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
enrichmentColumns[c] = `${cteName}.${c}`;
|
|
382
|
+
enrichmentColumnNames.add(c);
|
|
383
|
+
enrichmentColumnOwner[c] = { i, name: e.name };
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
325
387
|
|
|
326
388
|
// Join event_data and session_data, include additional logic
|
|
327
389
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
@@ -332,6 +394,9 @@ ${excludedEventsSQL}`,
|
|
|
332
394
|
// get the most important columns in the correct order
|
|
333
395
|
...finalColumnOrder,
|
|
334
396
|
...itemListOverrides,
|
|
397
|
+
// event-level enrichment columns: override matching explicit columns; new columns added.
|
|
398
|
+
// Wildcard-column overlap is handled below via excludedColumns.
|
|
399
|
+
...enrichmentColumns,
|
|
335
400
|
// get the rest of the event_data columns
|
|
336
401
|
'[sql]event_data': utils.selectOtherColumns(
|
|
337
402
|
eventDataStep,
|
|
@@ -342,13 +407,14 @@ ${excludedEventsSQL}`,
|
|
|
342
407
|
'data_is_final',
|
|
343
408
|
'export_type',
|
|
344
409
|
...itemListExcludedColumns,
|
|
410
|
+
...enrichmentExcludedColumns,
|
|
345
411
|
]
|
|
346
412
|
),
|
|
347
413
|
// get the rest of the session_data columns
|
|
348
414
|
'[sql]session_data': utils.selectOtherColumns(
|
|
349
415
|
sessionDataStep,
|
|
350
416
|
Object.keys(finalColumnOrder),
|
|
351
|
-
[]
|
|
417
|
+
[...enrichmentExcludedColumns],
|
|
352
418
|
),
|
|
353
419
|
// include additional columns
|
|
354
420
|
row_inserted_timestamp: 'current_timestamp()',
|
|
@@ -360,19 +426,22 @@ ${excludedEventsSQL}`,
|
|
|
360
426
|
joins: [
|
|
361
427
|
...(itemListSteps ? [{
|
|
362
428
|
type: 'left',
|
|
363
|
-
table: '
|
|
364
|
-
on: 'using(
|
|
429
|
+
table: 'items_rebuilt',
|
|
430
|
+
on: 'using(_item_row_id)'
|
|
365
431
|
}] : []),
|
|
366
432
|
{
|
|
367
433
|
type: 'left',
|
|
368
434
|
table: 'session_data',
|
|
369
435
|
on: 'using(session_id)'
|
|
370
|
-
}
|
|
436
|
+
},
|
|
437
|
+
// Event-level enrichment joins go last so they apply on top of the package's own joins.
|
|
438
|
+
...enrichmentJoins,
|
|
371
439
|
],
|
|
372
440
|
where: helpers.incrementalDateFilter(mergedConfig)
|
|
373
441
|
};
|
|
374
442
|
|
|
375
443
|
const packageSteps = [
|
|
444
|
+
...enrichmentSteps,
|
|
376
445
|
eventDataStep,
|
|
377
446
|
...(itemListSteps ?? []),
|
|
378
447
|
sessionDataStep,
|
|
@@ -381,7 +450,8 @@ ${excludedEventsSQL}`,
|
|
|
381
450
|
|
|
382
451
|
// Layer 2 validation: customSteps name must not collide with package step names.
|
|
383
452
|
// Reserved set is derived from packageSteps at runtime (single source of truth) — what
|
|
384
|
-
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on
|
|
453
|
+
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
|
|
454
|
+
// and enrich_* names exist only when enrichments are configured).
|
|
385
455
|
const customSteps = mergedConfig.customSteps ?? [];
|
|
386
456
|
if (customSteps.length > 0) {
|
|
387
457
|
const reservedNames = new Set(packageSteps.map(s => s.name));
|
|
@@ -225,6 +225,101 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
225
225
|
seenNames.add(step.name);
|
|
226
226
|
}
|
|
227
227
|
}
|
|
228
|
+
|
|
229
|
+
// enrichments - optional array of declarative external-data enrichment specs.
|
|
230
|
+
// This block performs Layer 1 (config-shape) checks. Layer 2 checks (reserved-name collision
|
|
231
|
+
// + item-level deferral throw) live in _generateEnhancedEventsSQL — the reserved set is
|
|
232
|
+
// config-dependent and the item-level deferral throws there once the SQL is built.
|
|
233
|
+
if (config.enrichments !== undefined) {
|
|
234
|
+
if (!Array.isArray(config.enrichments)) {
|
|
235
|
+
throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
|
|
236
|
+
}
|
|
237
|
+
const validLevels = ['event', 'item'];
|
|
238
|
+
const seenNames = new Set();
|
|
239
|
+
for (let i = 0; i < config.enrichments.length; i++) {
|
|
240
|
+
const entry = config.enrichments[i];
|
|
241
|
+
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
|
|
242
|
+
throw new Error(`config.enrichments[${i}] must be a non-null object. Received: ${JSON.stringify(entry)}`);
|
|
243
|
+
}
|
|
244
|
+
if (typeof entry.name !== 'string' || !entry.name.trim()) {
|
|
245
|
+
throw new Error(`config.enrichments[${i}].name must be a non-empty string. Received: ${JSON.stringify(entry.name)}`);
|
|
246
|
+
}
|
|
247
|
+
if (seenNames.has(entry.name)) {
|
|
248
|
+
throw new Error(`config.enrichments contains duplicate name '${entry.name}'. Each enrichments entry must have a unique name.`);
|
|
249
|
+
}
|
|
250
|
+
seenNames.add(entry.name);
|
|
251
|
+
if (entry.level !== undefined && !validLevels.includes(entry.level)) {
|
|
252
|
+
throw new Error(`config.enrichments[${i}].level must be one of: ${validLevels.join(', ')}. Received: ${JSON.stringify(entry.level)}`);
|
|
253
|
+
}
|
|
254
|
+
// source: Dataform table reference object or backtick-quoted string
|
|
255
|
+
if (entry.source === undefined || entry.source === null) {
|
|
256
|
+
throw new Error(`config.enrichments[${i}].source is required.`);
|
|
257
|
+
}
|
|
258
|
+
if (isDataformTableReferenceObject(entry.source)) {
|
|
259
|
+
// Valid Dataform reference
|
|
260
|
+
} else if (typeof entry.source === 'string') {
|
|
261
|
+
if (!entry.source.trim()) {
|
|
262
|
+
throw new Error(`config.enrichments[${i}].source must be a non-empty string. Received empty string.`);
|
|
263
|
+
}
|
|
264
|
+
if (!/^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(entry.source.trim())) {
|
|
265
|
+
throw new Error(`config.enrichments[${i}].source must be in the format '\`project.dataset.table\`' (with backticks) or a Dataform table reference. Received: ${JSON.stringify(entry.source)}`);
|
|
266
|
+
}
|
|
267
|
+
} else {
|
|
268
|
+
throw new Error(`config.enrichments[${i}].source must be a Dataform table reference object or a string in format '\`project.dataset.table\`'. Received: ${JSON.stringify(entry.source)}`);
|
|
269
|
+
}
|
|
270
|
+
// joinKey: required, plain SQL identifier OR non-empty array of plain SQL identifiers.
|
|
271
|
+
// Plain identifier = ^[a-zA-Z_][a-zA-Z0-9_]*$ — no aliases (`id as user_id`), no backticks,
|
|
272
|
+
// no dotted paths. Users with mismatched dim-column names alias in an upstream Dataform view.
|
|
273
|
+
const sqlIdentifier = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
274
|
+
const aliasingHint = ' Aliases like \'id as user_id\' are not supported here; alias in an upstream Dataform view if your dim has a different column name.';
|
|
275
|
+
if (entry.joinKey === undefined || entry.joinKey === null) {
|
|
276
|
+
throw new Error(`config.enrichments[${i}].joinKey is required.`);
|
|
277
|
+
}
|
|
278
|
+
if (typeof entry.joinKey === 'string') {
|
|
279
|
+
if (!entry.joinKey.trim()) {
|
|
280
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty string. Received empty string.`);
|
|
281
|
+
}
|
|
282
|
+
if (!sqlIdentifier.test(entry.joinKey)) {
|
|
283
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a plain SQL identifier. Received: ${JSON.stringify(entry.joinKey)}.${aliasingHint}`);
|
|
284
|
+
}
|
|
285
|
+
} else if (Array.isArray(entry.joinKey)) {
|
|
286
|
+
if (entry.joinKey.length === 0) {
|
|
287
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty array when provided as an array.`);
|
|
288
|
+
}
|
|
289
|
+
for (let j = 0; j < entry.joinKey.length; j++) {
|
|
290
|
+
const k = entry.joinKey[j];
|
|
291
|
+
if (typeof k !== 'string' || !k.trim()) {
|
|
292
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a non-empty string. Received: ${JSON.stringify(k)}`);
|
|
293
|
+
}
|
|
294
|
+
if (!sqlIdentifier.test(k)) {
|
|
295
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(k)}.${aliasingHint}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
} else {
|
|
299
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a string or a non-empty array of strings. Received: ${JSON.stringify(entry.joinKey)}`);
|
|
300
|
+
}
|
|
301
|
+
// columns: required, non-empty array of plain SQL identifiers (no aliasing).
|
|
302
|
+
if (!Array.isArray(entry.columns)) {
|
|
303
|
+
throw new Error(`config.enrichments[${i}].columns must be an array. Received: ${JSON.stringify(entry.columns)}`);
|
|
304
|
+
}
|
|
305
|
+
if (entry.columns.length === 0) {
|
|
306
|
+
throw new Error(`config.enrichments[${i}].columns must be non-empty. List the source columns to add to the output (excluding joinKey).`);
|
|
307
|
+
}
|
|
308
|
+
for (let j = 0; j < entry.columns.length; j++) {
|
|
309
|
+
const c = entry.columns[j];
|
|
310
|
+
if (typeof c !== 'string' || !c.trim()) {
|
|
311
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a non-empty string. Received: ${JSON.stringify(c)}`);
|
|
312
|
+
}
|
|
313
|
+
if (!sqlIdentifier.test(c)) {
|
|
314
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(c)}.${aliasingHint}`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// dedupe: optional boolean
|
|
318
|
+
if (entry.dedupe !== undefined && typeof entry.dedupe !== 'boolean') {
|
|
319
|
+
throw new Error(`config.enrichments[${i}].dedupe must be a boolean when provided. Received: ${JSON.stringify(entry.dedupe)}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
228
323
|
} catch (e) {
|
|
229
324
|
e.message = `Config validation: ${e.message}`;
|
|
230
325
|
throw e;
|
package/utils.js
CHANGED
|
@@ -389,6 +389,16 @@ const setDataformContext = (ctx, config) => {
|
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
// resolve Dataform refs in enrichments[].source the same way as sourceTable
|
|
393
|
+
if (Array.isArray(config.enrichments)) {
|
|
394
|
+
config.enrichments = config.enrichments.map(e => {
|
|
395
|
+
if (isDataformTableReferenceObject(e.source)) {
|
|
396
|
+
return { ...e, source: ctx.ref(e.source) };
|
|
397
|
+
}
|
|
398
|
+
return e;
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
|
|
392
402
|
config.self = ctx.self();
|
|
393
403
|
config.incremental = ctx.incremental();
|
|
394
404
|
|
|
@@ -479,23 +489,35 @@ const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns =
|
|
|
479
489
|
const stepName = step.name;
|
|
480
490
|
const stepColumns = Object.keys(step.select.columns);
|
|
481
491
|
|
|
482
|
-
//
|
|
483
|
-
const
|
|
492
|
+
// Columns in step.select.columns that should be excluded (already-defined or explicitly listed)
|
|
493
|
+
const internalExcept = stepColumns.filter(
|
|
484
494
|
column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
|
|
485
495
|
);
|
|
486
496
|
|
|
487
|
-
//
|
|
488
|
-
|
|
497
|
+
// Columns in excludedColumns that aren't enumerated in step.select.columns. These are
|
|
498
|
+
// wildcard-sourced columns (e.g. default GA4 export columns coming through `event_data.*`
|
|
499
|
+
// inside event_data's own select). The caller knows what to exclude; trust them.
|
|
500
|
+
// BigQuery throws at dry-run if the column doesn't exist in the source — surfaces typos.
|
|
501
|
+
// Filter out undefined/null entries (callers can pass conditional values like
|
|
502
|
+
// `cond ? 'col' : undefined` for ergonomics).
|
|
503
|
+
const externalExcept = excludedColumns.filter(
|
|
504
|
+
c => typeof c === 'string' && c.length > 0 && !stepColumns.includes(c)
|
|
505
|
+
);
|
|
506
|
+
|
|
507
|
+
const allExcept = [...internalExcept, ...externalExcept];
|
|
508
|
+
|
|
509
|
+
// If nothing is excluded, select everything
|
|
510
|
+
if (allExcept.length === 0) {
|
|
489
511
|
return `${stepName}.*`;
|
|
490
512
|
}
|
|
491
513
|
|
|
492
|
-
// If
|
|
493
|
-
|
|
514
|
+
// If every enumerated column is excluded and there are no external excepts to apply,
|
|
515
|
+
// there's nothing to select via the wildcard
|
|
516
|
+
if (internalExcept.length === stepColumns.length && externalExcept.length === 0) {
|
|
494
517
|
return;
|
|
495
518
|
}
|
|
496
519
|
|
|
497
|
-
|
|
498
|
-
return `${stepName}.* except (${exceptColumns.join(', ')})`;
|
|
520
|
+
return `${stepName}.* except (${allExcept.join(', ')})`;
|
|
499
521
|
};
|
|
500
522
|
|
|
501
523
|
|