ga4-export-fixer 0.8.0 → 0.9.0-dev.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -197,51 +197,46 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
197
197
  return excludedColumns;
198
198
  };
199
199
 
200
- // initial step: extract data from the export tables
200
+ // initial step: extract data from the export tables.
201
+ // Explicit columns first (transforms + package-promoted + user-excluded sentinels);
202
+ // then pass-through entries for every GA4 export column not already accounted for.
203
+ // After this, Object.keys(eventDataStep.select.columns) is the complete column set of event_data.
204
+ const eventDataExplicitColumns = {
205
+ // exclude default export columns that are not needed
206
+ // do this first so that the columns defined later are not excluded
207
+ ...getExcludedColumns(),
208
+ event_date: helpers.eventDate,
209
+ event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
210
+ event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
211
+ session_id: helpers.sessionId,
212
+ // page details
213
+ page_location: helpers.unnestEventParam('page_location', 'string'),
214
+ page: helpers.extractPageDetails(),
215
+ // promote event params to columns
216
+ ...promotedEventParameters(),
217
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
218
+ // rename traffic_source for clarity
219
+ user_traffic_source: 'traffic_source',
220
+ // ecommerce
221
+ ecommerce: helpers.fixEcommerceStruct('ecommerce'),
222
+ // assign a unique row id, used for handling item-level attribution and enrichment
223
+ _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
224
+ // flag if the data is "final" and is not expected to change anymore
225
+ data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
226
+ export_type: helpers.getGa4ExportType('_table_suffix'),
227
+ // prep columns for later steps
228
+ entrances: helpers.unnestEventParam('entrances', 'int'),
229
+ session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
230
+ };
231
+ // Pass through every GA4 export column not already covered by an explicit transform,
232
+ // promotion, exclusion sentinel, or value-side rename in eventDataExplicitColumns.
233
+ const eventDataPassThroughs = utils.buildPassThroughs(eventDataExplicitColumns, helpers.ga4ExportColumns);
201
234
  const eventDataStep = {
202
235
  name: 'event_data',
203
236
  select: {
204
237
  columns: {
205
- // exclude default export columns that are not needed
206
- // do this first so that the columns defined later are not excluded
207
- ...getExcludedColumns(),
208
- // date and time
209
- event_date: helpers.eventDate,
210
- event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
- event_timestamp: 'event_timestamp',
212
- event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
- // event name
214
- event_name: 'event_name',
215
- // identifiers
216
- session_id: helpers.sessionId,
217
- user_pseudo_id: 'user_pseudo_id',
218
- user_id: 'user_id',
219
- // page
220
- page_location: helpers.unnestEventParam('page_location', 'string'),
221
- page: helpers.extractPageDetails(),
222
- // event parameters and user properties
223
- ...promotedEventParameters(),
224
- event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
- user_properties: 'user_properties',
226
- // traffic source
227
- collected_traffic_source: 'collected_traffic_source',
228
- session_traffic_source_last_click: 'session_traffic_source_last_click',
229
- user_traffic_source: 'traffic_source',
230
- // ecommerce
231
- ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
- items: 'items',
233
- _item_list_attribution_row_id: itemListAttribution ? helpers.itemListAttributionRowId(ecommerceEventsFilter) : undefined,
234
- // flag if the data is "final" and is not expected to change anymore
235
- data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
- export_type: helpers.getGa4ExportType('_table_suffix'),
237
- // prep columns for later steps
238
- entrances: helpers.unnestEventParam('entrances', 'int'),
239
- session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
- // include all other columns from the export data
241
- get '[sql]other_columns'() {
242
- const definedColumns = Object.keys(this);
243
- return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
244
- },
238
+ ...eventDataExplicitColumns,
239
+ ...eventDataPassThroughs,
245
240
  },
246
241
  },
247
242
  from: mergedConfig.sourceTable,
@@ -268,60 +263,192 @@ ${excludedEventsSQL}`,
268
263
  'group by': 'session_id',
269
264
  };
270
265
 
271
- // item list attribution CTEs:
272
- // 1. item_list_unnest: unnest items from ecommerce events, compute attribution via window function
273
- // 2. item_list_data: re-aggregate items with attributed list fields
274
- const itemListSteps = itemListAttribution ? (() => {
275
- const attrExpr = helpers.itemListAttributionExpr(
276
- itemListAttribution.lookbackType,
277
- timestampColumn,
278
- itemListAttribution.lookbackTimeMs
279
- );
266
+ // Build enrichment-source CTEs and gather per-level join/column data. The utility routes
267
+ // event-level and item-level entries through separate output channels.
268
+ const { steps: enrichmentSteps, event: eventEnrichments, item: itemEnrichments }
269
+ = utils.buildEnrichments(mergedConfig.enrichments);
270
+
271
+ // Validate item-level joinKey columns and collect any event_data columns that need to
272
+ // be carried up to items_unnested as top-level columns (so the LEFT JOIN inside
273
+ // items_rebuilt can USING(...) on them). Item-struct fields are already top-level on
274
+ // items_unnested and need no extension.
275
+ const itemJoinKeysFromEventData = new Set();
276
+ for (const [i, e] of (mergedConfig.enrichments ?? []).entries()) {
277
+ const level = e.level ?? 'event';
278
+ if (level !== 'item') continue;
279
+ const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
280
+ for (const c of joinKeys) {
281
+ if (helpers.ga4ItemStructFields.includes(c)) {
282
+ // Already a top-level column on items_unnested.
283
+ } else if (c in eventDataStep.select.columns && eventDataStep.select.columns[c] !== undefined) {
284
+ itemJoinKeysFromEventData.add(c);
285
+ } else {
286
+ throw new Error(
287
+ `config.enrichments[${i}] (name: '${e.name}') uses item-level joinKey '${c}', ` +
288
+ `which is neither a field on the GA4 items struct (helpers.ga4ItemStructFields) ` +
289
+ `nor a column on event_data. Valid item-level joinKeys are item-struct fields ` +
290
+ `(e.g. item_id, item_category) or any event_data column (e.g. user_pseudo_id, event_date).`
291
+ );
292
+ }
293
+ }
294
+ }
295
+
296
+ // Shared item-array CTEs:
297
+ // 1. items_unnested: unnest items from ecommerce events; LAST_VALUE attribution window
298
+ // is emitted only when itemListAttribution is configured.
299
+ // 2. items_rebuilt: re-aggregate items via explicit struct(...) construction;
300
+ // LEFT JOIN enrich_<name> for each item-level enrichment.
301
+ // Activation: emitted when EITHER itemListAttribution is configured OR at least one
302
+ // item-level enrichment is present.
303
+ const itemEnrichmentsActive = itemEnrichments.joins.length > 0;
304
+ const itemsScaffoldActive = !!itemListAttribution || itemEnrichmentsActive;
305
+ const itemListSteps = itemsScaffoldActive ? (() => {
280
306
  const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
281
307
 
282
- const attributionStep = {
283
- name: 'item_list_attribution',
284
- select: {
285
- columns: {
286
- '_item_list_attribution_row_id': '_item_list_attribution_row_id',
287
- 'event_name': 'event_name',
288
- 'item': 'item',
289
- '_item_list_attr': attrExpr,
290
- },
291
- },
308
+ // Flatten the item struct: every standard items-struct field is selected as a
309
+ // top-level column of items_unnested. This makes downstream joins simpler
310
+ // (LEFT JOIN ... USING(item_id) works without aliasing tricks) and lets items_rebuilt
311
+ // reference fields as bare column names instead of `item.<col>`.
312
+ const itemFieldColumns = {};
313
+ for (const f of helpers.ga4ItemStructFields) {
314
+ itemFieldColumns[f] = `item.${f}`;
315
+ }
316
+
317
+ // Carry up any event_data joinKey columns used by item-level enrichments so the
318
+ // USING(...) clause in items_rebuilt can bind against top-level identifiers.
319
+ // Skip ones already in the base columns above (e.g. event_date is always carried).
320
+ const baseColumnNames = new Set(['_item_row_id', 'event_name', 'event_date', ...Object.keys(itemFieldColumns)]);
321
+ const extraJoinKeyColumns = {};
322
+ for (const c of itemJoinKeysFromEventData) {
323
+ if (!baseColumnNames.has(c)) {
324
+ extraJoinKeyColumns[c] = c;
325
+ }
326
+ }
327
+
328
+ // items_unnested base columns. The _item_list_attr struct (LAST_VALUE window) is
329
+ // added only when itemListAttribution is configured — when only item enrichments
330
+ // are active, the window function is omitted entirely for cleaner SQL.
331
+ const unnestedSelectColumns = {
332
+ '_item_row_id': '_item_row_id',
333
+ 'event_name': 'event_name',
334
+ // event_date is carried forward for ability to use it in data enrichment joins
335
+ 'event_date': 'event_date',
336
+ ...itemFieldColumns,
337
+ ...extraJoinKeyColumns,
338
+ };
339
+ if (itemListAttribution) {
340
+ unnestedSelectColumns._item_list_attr = helpers.itemListAttributionExpr(
341
+ itemListAttribution.lookbackType,
342
+ timestampColumn,
343
+ itemListAttribution.lookbackTimeMs
344
+ );
345
+ }
346
+
347
+ const unnestedStep = {
348
+ name: 'items_unnested',
349
+ select: { columns: unnestedSelectColumns },
292
350
  from: 'event_data, unnest(items) as item',
293
351
  where: `event_name in (${ecommerceEventsFilter})`,
294
352
  };
295
353
 
296
- const dataStep = {
297
- name: 'item_list_data',
354
+ // Build the per-field expression map for the items struct. Seed with the canonical
355
+ // GA4 items-struct fields — each references the matching top-level column on
356
+ // items_unnested. When itemListAttribution is configured, override the three
357
+ // attribution entries with their package-generated coalesce-with-passthrough
358
+ // expressions. Item-level enrichment columns layer on top via the spread below.
359
+ const preItemExpressions = {};
360
+ for (const f of helpers.ga4ItemStructFields) {
361
+ preItemExpressions[f] = f;
362
+ }
363
+ if (itemListAttribution) {
364
+ preItemExpressions.item_list_name = `coalesce(if(${passthroughEvents}, item_list_name, _item_list_attr.item_list_name), '(not set)')`;
365
+ preItemExpressions.item_list_id = `coalesce(if(${passthroughEvents}, item_list_id, _item_list_attr.item_list_id), '(not set)')`;
366
+ preItemExpressions.item_list_index = `coalesce(if(${passthroughEvents}, item_list_index, _item_list_attr.item_list_index))`;
367
+ }
368
+
369
+ // Wrap overlapping item-level enrichment columns in coalesce(<enrichExpr>, <originalExpr>)
370
+ // so a missed JOIN falls back to the existing item field value. Purely additive
371
+ // columns (no overlap) pass through unchanged.
372
+ const wrappedItemEnrichmentColumns = {};
373
+ for (const [col, enrichExpr] of Object.entries(itemEnrichments.columns)) {
374
+ const originalExpr = preItemExpressions[col];
375
+ wrappedItemEnrichmentColumns[col] = originalExpr
376
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
377
+ : enrichExpr;
378
+ }
379
+
380
+ // Final struct: standard fields first, then enrichment overrides spread on top
381
+ // (overlapping keys replace preItemExpressions entries; additive keys are appended).
382
+ const finalItemStructFields = { ...preItemExpressions, ...wrappedItemEnrichmentColumns };
383
+
384
+ const itemStructClauses = Object.entries(finalItemStructFields)
385
+ .map(([col, expr]) => `${expr} as ${col}`)
386
+ .join(',\n ');
387
+
388
+ const rebuiltStep = {
389
+ name: 'items_rebuilt',
298
390
  select: {
299
391
  columns: {
300
- '_item_list_attribution_row_id': '_item_list_attribution_row_id',
301
- 'items': `array_agg(
302
- (select as struct item.* replace(
303
- coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
304
- coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
305
- coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
306
- ))
307
- )`,
392
+ '_item_row_id': '_item_row_id',
393
+ 'items': `array_agg(struct(
394
+ ${itemStructClauses}
395
+ ))`,
308
396
  },
309
397
  },
310
- from: 'item_list_attribution',
311
- 'group by': '_item_list_attribution_row_id',
398
+ from: 'items_unnested',
399
+ 'group by': '_item_row_id',
312
400
  };
401
+ // Item-level enrichment joins (only attach when present). Each enrichment's LEFT JOIN
402
+ // binds against top-level columns on items_unnested (item-struct fields, or event_data
403
+ // joinKey columns carried up via extraJoinKeyColumns above).
404
+ if (itemEnrichmentsActive) {
405
+ rebuiltStep.joins = itemEnrichments.joins;
406
+ }
313
407
 
314
- return [attributionStep, dataStep];
408
+ return [unnestedStep, rebuiltStep];
315
409
  })() : null;
316
410
 
317
411
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
318
412
 
319
- // When item list attribution is enabled, override the items column and exclude _item_list_attribution_row_id
413
+ // When the items scaffold is active, override the items column and exclude _item_row_id
320
414
  // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
321
415
  const itemListOverrides = itemListSteps ? {
322
- items: 'coalesce(item_list_data.items, event_data.items)',
416
+ items: 'coalesce(items_rebuilt.items, event_data.items)',
323
417
  } : {};
324
- const itemListExcludedColumns = itemListSteps ? ['_item_list_attribution_row_id'] : [];
418
+ const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
419
+
420
+ // Wrap overlapping event-level enrichment columns in coalesce(enrich_<name>.<col>, <original>)
421
+ // so a missed JOIN falls back to the existing value. Purely additive columns (no overlap)
422
+ // pass through unchanged. Source-of-original precedence matches the final SELECT's spread
423
+ // order: itemListOverrides first (overrides finalColumnOrder for `items`), then
424
+ // session_data (wins over event_data in getFinalColumnOrder when both have the column).
425
+ const wrappedEventEnrichmentColumns = {};
426
+ for (const [col, enrichExpr] of Object.entries(eventEnrichments.columns)) {
427
+ let originalExpr;
428
+ if (col in itemListOverrides) {
429
+ originalExpr = itemListOverrides[col];
430
+ } else if (col in sessionDataStep.select.columns) {
431
+ originalExpr = `session_data.${col}`;
432
+ } else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
433
+ originalExpr = `event_data.${col}`;
434
+ }
435
+ wrappedEventEnrichmentColumns[col] = originalExpr
436
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
437
+ : enrichExpr;
438
+ }
439
+
440
+ // List all column names that have already been defined or should be left out
441
+ // Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
442
+ const alreadyMapped = [
443
+ ...Object.keys(finalColumnOrder),
444
+ ...Object.keys(itemListOverrides),
445
+ ...eventEnrichments.columnNames,
446
+ 'entrances',
447
+ mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
448
+ 'data_is_final',
449
+ 'export_type',
450
+ ...itemListExcludedColumns,
451
+ ];
325
452
 
326
453
  // Join event_data and session_data, include additional logic
327
454
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
@@ -332,24 +459,11 @@ ${excludedEventsSQL}`,
332
459
  // get the most important columns in the correct order
333
460
  ...finalColumnOrder,
334
461
  ...itemListOverrides,
335
- // get the rest of the event_data columns
336
- '[sql]event_data': utils.selectOtherColumns(
337
- eventDataStep,
338
- Object.keys(finalColumnOrder),
339
- [
340
- 'entrances',
341
- mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
342
- 'data_is_final',
343
- 'export_type',
344
- ...itemListExcludedColumns,
345
- ]
346
- ),
347
- // get the rest of the session_data columns
348
- '[sql]session_data': utils.selectOtherColumns(
349
- sessionDataStep,
350
- Object.keys(finalColumnOrder),
351
- []
352
- ),
462
+ // event-level enrichment columns: coalesce with the original when overlapping; otherwise add.
463
+ ...wrappedEventEnrichmentColumns,
464
+ // explicit pass-throughs for the rest of event_data and session_data
465
+ ...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
466
+ ...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
353
467
  // include additional columns
354
468
  row_inserted_timestamp: 'current_timestamp()',
355
469
  data_is_final: 'data_is_final',
@@ -360,28 +474,29 @@ ${excludedEventsSQL}`,
360
474
  joins: [
361
475
  ...(itemListSteps ? [{
362
476
  type: 'left',
363
- table: 'item_list_data',
364
- on: 'using(_item_list_attribution_row_id)'
477
+ table: 'items_rebuilt',
478
+ on: 'using(_item_row_id)'
365
479
  }] : []),
366
480
  {
367
481
  type: 'left',
368
482
  table: 'session_data',
369
483
  on: 'using(session_id)'
370
- }
484
+ },
485
+ // The left joins for the event-level enrichment ctes
486
+ ...eventEnrichments.joins,
371
487
  ],
372
488
  where: helpers.incrementalDateFilter(mergedConfig)
373
489
  };
374
490
 
375
491
  const packageSteps = [
492
+ ...enrichmentSteps,
376
493
  eventDataStep,
377
494
  ...(itemListSteps ?? []),
378
495
  sessionDataStep,
379
496
  enhancedEventsStep,
380
497
  ];
381
498
 
382
- // Layer 2 validation: customSteps name must not collide with package step names.
383
- // Reserved set is derived from packageSteps at runtime (single source of truth) — what
384
- // is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on).
499
+ // Ensure that the custom step names don't collide with the default or data enrichment step names
385
500
  const customSteps = mergedConfig.customSteps ?? [];
386
501
  if (customSteps.length > 0) {
387
502
  const reservedNames = new Set(packageSteps.map(s => s.name));
@@ -395,6 +510,7 @@ ${excludedEventsSQL}`,
395
510
  }
396
511
  }
397
512
 
513
+ // Include custom steps last in the list
398
514
  const steps = [...packageSteps, ...customSteps];
399
515
 
400
516
  return utils.queryBuilder(steps);
@@ -201,11 +201,11 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
201
201
  }
202
202
  }
203
203
 
204
- // customSteps - optional array of queryBuilder step objects appended to the pipeline
205
- // Layer 1 (config shape): array, objects with non-empty name, no duplicates within customSteps.
204
+ // customSteps - optional array of queryBuilder step objects appended to the pipeline.
205
+ // Config-shape checks only: array, objects with non-empty name, no duplicates within customSteps.
206
206
  // Step-shape validation (clause keys, etc.) deferred to queryBuilder.
207
- // Collision-with-package-names check deferred to _generateEnhancedEventsSQL (Layer 2),
208
- // since the reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
207
+ // Collision-with-package-names check deferred to _generateEnhancedEventsSQL, since the
208
+ // reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
209
209
  if (config.customSteps !== undefined) {
210
210
  if (!Array.isArray(config.customSteps)) {
211
211
  throw new Error(`config.customSteps must be an array. Received: ${JSON.stringify(config.customSteps)}`);
@@ -225,6 +225,101 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
225
225
  seenNames.add(step.name);
226
226
  }
227
227
  }
228
+
229
+ // enrichments - optional array of declarative external-data enrichment specs.
230
+ // Config-shape checks only. Reserved-name collision and item-level joinKey resolution
231
+ // happen in _generateEnhancedEventsSQL, where the reserved set and item-level join targets
232
+ // are derived from the resolved config.
233
+ if (config.enrichments !== undefined) {
234
+ if (!Array.isArray(config.enrichments)) {
235
+ throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
236
+ }
237
+ const validLevels = ['event', 'item'];
238
+ const seenNames = new Set();
239
+ for (let i = 0; i < config.enrichments.length; i++) {
240
+ const entry = config.enrichments[i];
241
+ if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
242
+ throw new Error(`config.enrichments[${i}] must be a non-null object. Received: ${JSON.stringify(entry)}`);
243
+ }
244
+ if (typeof entry.name !== 'string' || !entry.name.trim()) {
245
+ throw new Error(`config.enrichments[${i}].name must be a non-empty string. Received: ${JSON.stringify(entry.name)}`);
246
+ }
247
+ if (seenNames.has(entry.name)) {
248
+ throw new Error(`config.enrichments contains duplicate name '${entry.name}'. Each enrichments entry must have a unique name.`);
249
+ }
250
+ seenNames.add(entry.name);
251
+ if (entry.level !== undefined && !validLevels.includes(entry.level)) {
252
+ throw new Error(`config.enrichments[${i}].level must be one of: ${validLevels.join(', ')}. Received: ${JSON.stringify(entry.level)}`);
253
+ }
254
+ // source: Dataform table reference object or backtick-quoted string
255
+ if (entry.source === undefined || entry.source === null) {
256
+ throw new Error(`config.enrichments[${i}].source is required.`);
257
+ }
258
+ if (isDataformTableReferenceObject(entry.source)) {
259
+ // Valid Dataform reference
260
+ } else if (typeof entry.source === 'string') {
261
+ if (!entry.source.trim()) {
262
+ throw new Error(`config.enrichments[${i}].source must be a non-empty string. Received empty string.`);
263
+ }
264
+ if (!/^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(entry.source.trim())) {
265
+ throw new Error(`config.enrichments[${i}].source must be in the format '\`project.dataset.table\`' (with backticks) or a Dataform table reference. Received: ${JSON.stringify(entry.source)}`);
266
+ }
267
+ } else {
268
+ throw new Error(`config.enrichments[${i}].source must be a Dataform table reference object or a string in format '\`project.dataset.table\`'. Received: ${JSON.stringify(entry.source)}`);
269
+ }
270
+ // joinKey: required, plain SQL identifier OR non-empty array of plain SQL identifiers.
271
+ // Plain identifier = ^[a-zA-Z_][a-zA-Z0-9_]*$ — no aliases (`id as user_id`), no backticks,
272
+ // no dotted paths. Users with mismatched dim-column names alias in an upstream Dataform view.
273
+ const sqlIdentifier = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
274
+ const aliasingHint = ' Aliases like \'id as user_id\' are not supported here; alias in an upstream Dataform view if your dim has a different column name.';
275
+ if (entry.joinKey === undefined || entry.joinKey === null) {
276
+ throw new Error(`config.enrichments[${i}].joinKey is required.`);
277
+ }
278
+ if (typeof entry.joinKey === 'string') {
279
+ if (!entry.joinKey.trim()) {
280
+ throw new Error(`config.enrichments[${i}].joinKey must be a non-empty string. Received empty string.`);
281
+ }
282
+ if (!sqlIdentifier.test(entry.joinKey)) {
283
+ throw new Error(`config.enrichments[${i}].joinKey must be a plain SQL identifier. Received: ${JSON.stringify(entry.joinKey)}.${aliasingHint}`);
284
+ }
285
+ } else if (Array.isArray(entry.joinKey)) {
286
+ if (entry.joinKey.length === 0) {
287
+ throw new Error(`config.enrichments[${i}].joinKey must be a non-empty array when provided as an array.`);
288
+ }
289
+ for (let j = 0; j < entry.joinKey.length; j++) {
290
+ const k = entry.joinKey[j];
291
+ if (typeof k !== 'string' || !k.trim()) {
292
+ throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a non-empty string. Received: ${JSON.stringify(k)}`);
293
+ }
294
+ if (!sqlIdentifier.test(k)) {
295
+ throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(k)}.${aliasingHint}`);
296
+ }
297
+ }
298
+ } else {
299
+ throw new Error(`config.enrichments[${i}].joinKey must be a string or a non-empty array of strings. Received: ${JSON.stringify(entry.joinKey)}`);
300
+ }
301
+ // columns: required, non-empty array of plain SQL identifiers (no aliasing).
302
+ if (!Array.isArray(entry.columns)) {
303
+ throw new Error(`config.enrichments[${i}].columns must be an array. Received: ${JSON.stringify(entry.columns)}`);
304
+ }
305
+ if (entry.columns.length === 0) {
306
+ throw new Error(`config.enrichments[${i}].columns must be non-empty. List the source columns to add to the output (excluding joinKey).`);
307
+ }
308
+ for (let j = 0; j < entry.columns.length; j++) {
309
+ const c = entry.columns[j];
310
+ if (typeof c !== 'string' || !c.trim()) {
311
+ throw new Error(`config.enrichments[${i}].columns[${j}] must be a non-empty string. Received: ${JSON.stringify(c)}`);
312
+ }
313
+ if (!sqlIdentifier.test(c)) {
314
+ throw new Error(`config.enrichments[${i}].columns[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(c)}.${aliasingHint}`);
315
+ }
316
+ }
317
+ // dedupe: optional boolean
318
+ if (entry.dedupe !== undefined && typeof entry.dedupe !== 'boolean') {
319
+ throw new Error(`config.enrichments[${i}].dedupe must be a boolean when provided. Received: ${JSON.stringify(entry.dedupe)}`);
320
+ }
321
+ }
322
+ }
228
323
  } catch (e) {
229
324
  e.message = `Config validation: ${e.message}`;
230
325
  throw e;