ga4-export-fixer 0.9.0-dev.7 → 0.9.0-dev.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.7",
3
+ "version": "0.9.0-dev.8",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -324,16 +324,19 @@ ${excludedEventsSQL}`,
324
324
  // enrichments throw "not yet supported" inside the utility — they will arrive in a later release.
325
325
  const { steps: enrichmentSteps, joins: enrichmentJoins, columns: enrichmentColumns,
326
326
  columnNames: enrichmentColumnNames } = utils.buildEnrichments(mergedConfig.enrichments);
327
- const enrichmentExcludedColumns = [...enrichmentColumnNames];
328
327
 
329
- // Only forward enrichment columns to each wildcard's EXCEPT input if they actually exist
330
- // in that wildcard's source CTE. Otherwise BigQuery rejects with "Column X in SELECT *
331
- // EXCEPT list does not exist". After M1, Object.keys(step.select.columns) is the complete
332
- // column set of both event_data and session_data — so the same predicate works for both.
333
- const eventDataExplicit = new Set(Object.keys(eventDataStep.select.columns));
334
- const sessionDataExplicit = new Set(Object.keys(sessionDataStep.select.columns));
335
- const eventDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => eventDataExplicit.has(c));
336
- const sessionDataEnrichmentExcept = enrichmentExcludedColumns.filter(c => sessionDataExplicit.has(c));
328
+ // Build the set of columns the outer SELECT already maps explicitly (so wildcards skip them)
329
+ // plus internal-only columns that should never reach enhanced_events.
330
+ const alreadyMapped = [
331
+ ...Object.keys(finalColumnOrder),
332
+ ...Object.keys(itemListOverrides),
333
+ ...enrichmentColumnNames,
334
+ 'entrances',
335
+ mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
336
+ 'data_is_final',
337
+ 'export_type',
338
+ ...itemListExcludedColumns,
339
+ ];
337
340
 
338
341
  // Join event_data and session_data, include additional logic
339
342
  // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
@@ -345,27 +348,10 @@ ${excludedEventsSQL}`,
345
348
  ...finalColumnOrder,
346
349
  ...itemListOverrides,
347
350
  // event-level enrichment columns: override matching explicit columns; new columns added.
348
- // Wildcard-column overlap is handled below via excludedColumns.
349
351
  ...enrichmentColumns,
350
- // get the rest of the event_data columns
351
- '[sql]event_data': utils.selectOtherColumns(
352
- eventDataStep,
353
- Object.keys(finalColumnOrder),
354
- [
355
- 'entrances',
356
- mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
357
- 'data_is_final',
358
- 'export_type',
359
- ...itemListExcludedColumns,
360
- ...eventDataEnrichmentExcept,
361
- ]
362
- ),
363
- // get the rest of the session_data columns
364
- '[sql]session_data': utils.selectOtherColumns(
365
- sessionDataStep,
366
- Object.keys(finalColumnOrder),
367
- sessionDataEnrichmentExcept,
368
- ),
352
+ // explicit pass-throughs for the rest of event_data and session_data
353
+ ...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
354
+ ...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
369
355
  // include additional columns
370
356
  row_inserted_timestamp: 'current_timestamp()',
371
357
  data_is_final: 'data_is_final',
package/utils.js CHANGED
@@ -474,53 +474,6 @@ const mergeDataformTableConfigurations = (defaultConfig, inputConfig = {}) => {
474
474
  return deepMerge(defaultConfig, inputConfig);
475
475
  };
476
476
 
477
- /**
478
- * Generates a SQL selection string for a given query step, excluding columns already defined elsewhere
479
- * or columns that should be excluded.
480
- *
481
- * This utility is helpful when joining tables/CTEs to avoid selecting duplicate or already-present columns.
482
- *
483
- * @param {Object} step - A queryBuilder structured step containing a `name` (CTE/table alias) and a `select.columns` object.
484
- * @param {string[]} [alreadyDefinedColumns=[]] - Columns that have already been defined and should be excluded from selection.
485
- * @param {string[]} [excludedColumns=[]] - Additional columns to explicitly exclude from selection.
486
- * @returns {string|undefined} A SQL select string (e.g. 'stepName.*' or 'stepName.* except (col1, col2)'), or undefined if all columns are excluded.
487
- */
488
- const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns = []) => {
489
- const stepName = step.name;
490
- const stepColumns = Object.keys(step.select.columns);
491
-
492
- // Columns in step.select.columns that should be excluded (already-defined or explicitly listed)
493
- const internalExcept = stepColumns.filter(
494
- column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
495
- );
496
-
497
- // Columns in excludedColumns that aren't enumerated in step.select.columns. These are
498
- // wildcard-sourced columns (e.g. default GA4 export columns coming through `event_data.*`
499
- // inside event_data's own select). The caller knows what to exclude; trust them.
500
- // BigQuery throws at dry-run if the column doesn't exist in the source — surfaces typos.
501
- // Filter out undefined/null entries (callers can pass conditional values like
502
- // `cond ? 'col' : undefined` for ergonomics).
503
- const externalExcept = excludedColumns.filter(
504
- c => typeof c === 'string' && c.length > 0 && !stepColumns.includes(c)
505
- );
506
-
507
- const allExcept = [...internalExcept, ...externalExcept];
508
-
509
- // If nothing is excluded, select everything
510
- if (allExcept.length === 0) {
511
- return `${stepName}.*`;
512
- }
513
-
514
- // If every enumerated column is excluded and there are no external excepts to apply,
515
- // there's nothing to select via the wildcard
516
- if (internalExcept.length === stepColumns.length && externalExcept.length === 0) {
517
- return;
518
- }
519
-
520
- return `${stepName}.* except (${allExcept.join(', ')})`;
521
- };
522
-
523
-
524
477
  /**
525
478
  * Builds a queryBuilder `select.columns` fragment that passes through every source column
526
479
  * not already covered by an explicit columns object.
@@ -641,6 +594,38 @@ const buildEnrichments = (enrichments) => {
641
594
  };
642
595
 
643
596
 
597
+ /**
598
+ * Builds a qualified pass-through fragment for spreading into a downstream SELECT's
599
+ * `select.columns`. For each column in `step.select.columns` not already in `alreadyCovered`,
600
+ * emits an entry of the form `{ <col>: '<step.name>.<col>' }`.
601
+ *
602
+ * Columns whose values in `step.select.columns` are `undefined` (the user-exclusion sentinel
603
+ * shape from getExcludedColumns) are skipped. Names in `alreadyCovered` that don't exist in
604
+ * `step.select.columns` are silently ignored — the loop only iterates `step.select.columns`,
605
+ * so unknown names cause no harm. This is the safety property that lets callers pass
606
+ * "everything that might collide" without pre-filtering.
607
+ *
608
+ * @param {Object} step - A queryBuilder step with a `name` and `select.columns` object.
609
+ * @param {Iterable<string>} alreadyCovered - Column names already mapped elsewhere in the
610
+ * downstream SELECT, plus any internal-only columns the downstream SELECT shouldn't re-emit.
611
+ * @returns {Object} A map of `{ <col>: '<step.name>.<col>' }` entries.
612
+ *
613
+ * @example
614
+ * buildQualifiedPassThroughs(eventDataStep, ['event_date', 'session_id', 'entrances']);
615
+ * // → { event_name: 'event_data.event_name', user_pseudo_id: 'event_data.user_pseudo_id', ... }
616
+ */
617
+ const buildQualifiedPassThroughs = (step, alreadyCovered) => {
618
+ const covered = new Set(alreadyCovered);
619
+ const passThroughs = {};
620
+ for (const [col, expr] of Object.entries(step.select.columns)) {
621
+ if (expr === undefined) continue;
622
+ if (covered.has(col)) continue;
623
+ passThroughs[col] = `${step.name}.${col}`;
624
+ }
625
+ return passThroughs;
626
+ };
627
+
628
+
644
629
  /**
645
630
  * Processes a date input string and returns a corresponding SQL date casting expression,
646
631
  * or passes through BigQuery SQL statements as-is.
@@ -715,9 +700,9 @@ module.exports = {
715
700
  queryBuilder,
716
701
  isDataformTableReferenceObject,
717
702
  setDataformContext,
718
- selectOtherColumns,
719
703
  buildPassThroughs,
720
704
  buildEnrichments,
705
+ buildQualifiedPassThroughs,
721
706
  processDate,
722
707
  getDatasetName
723
708
  };