ga4-export-fixer 0.9.0-dev.8 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -44,96 +44,100 @@ The goal of the package is to **speed up development** when building data models
44
44
  </td>
45
45
  </tr>
46
46
  <tr>
47
+ <td valign="top">
48
+ <b>🧬 Data Enrichments</b><br>
49
+ Join external lookup data (cohorts, product master, etc.) at row level or ecommerce item level via <code>enrichments</code>
50
+ </td>
47
51
  <td valign="top">
48
52
  <b>📐 Flexible Schema</b><br>
49
53
  Keeps the flexible structure of the original export with key fields promoted to columns for better query performance; partitioning &amp; clustering enabled
50
54
  </td>
55
+ </tr>
56
+ <tr>
51
57
  <td valign="top">
52
58
  <b>🤖 AI Agent Ready</b><br>
53
59
  Extensive table &amp; column descriptions for AI agents and humans
54
60
  </td>
55
- </tr>
56
- <tr>
57
61
  <td valign="top">
58
62
  <b>🔑 Session Identity Resolution</b><br>
59
63
  <code>user_id</code> resolved per session; <code>merged_user_id</code> coalesces with <code>user_pseudo_id</code>
60
64
  </td>
65
+ </tr>
66
+ <tr>
61
67
  <td valign="top">
62
68
  <b>📡 Session Traffic Sources</b><br>
63
69
  <code>session_first_traffic_source</code> and <code>session_traffic_source_last_click</code> computed automatically, adjusting for sessions that span midnight
64
70
  </td>
65
- </tr>
66
- <tr>
67
71
  <td valign="top">
68
72
  <b>📍 Landing Page Detection</b><br>
69
73
  Derived per session from the first page where <code>entrances > 0</code>
70
74
  </td>
75
+ </tr>
76
+ <tr>
71
77
  <td valign="top">
72
78
  <b>🔗 Page URL Parsing</b><br>
73
79
  Parsed <code>hostname</code>, <code>path</code>, <code>query</code>, and <code>query_params</code> from <code>page_location</code>
74
80
  </td>
75
- </tr>
76
- <tr>
77
81
  <td valign="top">
78
82
  <b>🛒 Ecommerce Data Fixes</b><br>
79
83
  Nullifies placeholder <code>transaction_id</code>; corrects <code>purchase_revenue</code> bugs
80
84
  </td>
85
+ </tr>
86
+ <tr>
81
87
  <td valign="top">
82
88
  <b>🏷️ Item List Attribution</b><br>
83
89
  Attributes <code>item_list_name</code>, <code>item_list_id</code>, and <code>item_list_index</code> from item selection events to downstream ecommerce events
84
90
  </td>
85
- </tr>
86
- <tr>
87
91
  <td valign="top">
88
92
  <b>⚙️ Event Parameter Handling</b><br>
89
93
  Promote event params to columns; include or exclude by name
90
94
  </td>
95
+ </tr>
96
+ <tr>
91
97
  <td valign="top">
92
98
  <b>📊 Session Parameters</b><br>
93
99
  Promote selected event parameters as <code>session_params</code>
94
100
  </td>
95
- </tr>
96
- <tr>
97
101
  <td valign="top">
98
102
  <b>⏱️ Custom Timestamp</b><br>
99
103
  Use a custom event parameter as primary timestamp with automatic fallback
100
104
  </td>
105
+ </tr>
106
+ <tr>
101
107
  <td valign="top">
102
108
  <b>🔒 Schema Lock</b><br>
103
109
  Lock table schema to a specific GA4 export date to prevent schema drift
104
110
  </td>
105
- </tr>
106
- <tr>
107
111
  <td valign="top">
108
112
  <b>✅ Data Freshness Tracking</b><br>
109
113
  <code>data_is_final</code> flag and <code>export_type</code> label on every row
110
114
  </td>
115
+ </tr>
116
+ <tr>
111
117
  <td valign="top">
112
118
  <b>🔍 Data Quality Assertions</b><br>
113
119
  Built-in daily assertion reconciles sessions, events, and revenue between the enhanced table and raw export
114
120
  </td>
115
- </tr>
116
- <tr>
117
121
  <td valign="top">
118
122
  <b>🔃 Selective Re-processing</b><br>
119
123
  Re-process a date range without full table rebuild using <code>incrementalStartOverride</code> and <code>incrementalEndOverride</code>
120
124
  </td>
125
+ </tr>
126
+ <tr>
121
127
  <td valign="top">
122
128
  <b>📑 Batch Processing</b><br>
123
129
  Process large exports in smaller batches via <code>numberOfDaysToProcess</code>
124
130
  </td>
125
- </tr>
126
- <tr>
127
131
  <td valign="top">
128
132
  <b>🕐 Timezone-Aware Datetime</b><br>
129
133
  <code>event_datetime</code> converted to a configurable IANA timezone
130
134
  </td>
135
+ </tr>
136
+ <tr>
131
137
  <td valign="top">
132
138
  <b>🧩 Custom Processing Steps</b><br>
133
139
  Append user-defined CTEs via <code>customSteps</code> to derive new columns or join external tables
134
140
  </td>
135
- </tr>
136
- <tr>
137
141
  <td valign="top">
138
142
  <b>🛡️ Zero Dependencies</b><br>
139
143
  No additional external dependencies added to your Dataform repository
@@ -145,7 +149,6 @@ The goal of the package is to **speed up development** when building data models
145
149
 
146
150
  Features under consideration for future releases:
147
151
 
148
- - Data enrichment (item-level, session-level, event-level)
149
152
  - Aggregated tables (ga4_session, ga4_ecommerce...)
150
153
  - Web and app specific default configurations
151
154
  - Custom channel grouping
@@ -169,7 +172,7 @@ Include the package in the package.json file in your Dataform repository.
169
172
  {
170
173
  "dependencies": {
171
174
  "@dataform/core": "3.0.42",
172
- "ga4-export-fixer": "0.8.0"
175
+ "ga4-export-fixer": "0.9.0"
173
176
  }
174
177
  }
175
178
  ```
@@ -198,7 +201,8 @@ Create a new **ga4_events_enhanced** table using a **.js** file in your reposito
198
201
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
199
202
 
200
203
  const config = {
201
- sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT
204
+ // using hard-coded GA4 export path
205
+ sourceTable: '`project.analytics_12345.events_*`'
202
206
  };
203
207
 
204
208
  ga4EventsEnhanced.createTable(publish, config);
@@ -212,6 +216,7 @@ ga4EventsEnhanced.createTable(publish, config);
212
216
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
213
217
 
214
218
  const config = {
219
+ // GA4 export path declared, using the table reference object
215
220
  sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
216
221
  // use dataformTableConfig to make changes to the default Dataform table configuration
217
222
  dataformTableConfig: {
@@ -290,7 +295,8 @@ js {
290
295
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
291
296
 
292
297
  const config = {
293
- sourceTable: ref(constants.GA4_TABLES.MY_GA4_EXPORT),
298
+ // using hard-coded GA4 export path
299
+ sourceTable: '`project.analytics_12345.events_*`',
294
300
  self: self(),
295
301
  incremental: incremental()
296
302
  };
@@ -534,13 +540,13 @@ For typical use cases this is the right tool; reach for `customSteps` only when
534
540
  | Field | Type | Required | Description |
535
541
  | --- | --- | --- | --- |
536
542
  | `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
537
- | `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
543
+ | `level` | `'row'` / `'item'` | No, defaults to `'row'` | Join grain. `'row'` joins external dim data onto each row of `enhanced_events` (any column on `enhanced_events` as the key). `'item'` joins external dim data onto each item inside the `items` array (any field on the items struct or any event_data column as the key). |
538
544
  | `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
539
- | `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
540
- | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
545
+ | `joinKey` | string / string[] | Yes | For `level: 'row'`: column name(s) on `enhanced_events`. For `level: 'item'`: field name(s) on the items struct (e.g. `'item_id'`) or column name(s) on `event_data` (e.g. `'user_pseudo_id'`). Composite keys (array) compile to `USING(col1, col2, ...)`. |
546
+ | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
541
547
  | `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
542
548
 
543
- **Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
549
+ **Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`.
544
550
 
545
551
  **Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
546
552
 
@@ -548,7 +554,7 @@ For typical use cases this is the right tool; reach for `customSteps` only when
548
554
  enrichments: [
549
555
  {
550
556
  name: 'cohorts',
551
- level: 'event',
557
+ // level omitted → defaults to 'row'
552
558
  source: { schema: 'analytics', name: 'user_cohorts' },
553
559
  joinKey: 'user_pseudo_id',
554
560
  columns: ['cohort_label', 'lifecycle_stage'],
@@ -562,7 +568,7 @@ enrichments: [
562
568
  enrichments: [
563
569
  {
564
570
  name: 'segments',
565
- level: 'event',
571
+ level: 'row',
566
572
  source: '`my-project.analytics.daily_user_segments`',
567
573
  joinKey: ['event_date', 'user_pseudo_id'],
568
574
  columns: ['segment'],
@@ -571,7 +577,7 @@ enrichments: [
571
577
  ],
572
578
  ```
573
579
 
574
- **Example** — fix a promoted event parameter via enrichment (replacement case):
580
+ **Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
575
581
 
576
582
  ```javascript
577
583
  {
@@ -579,18 +585,34 @@ enrichments: [
579
585
  enrichments: [
580
586
  {
581
587
  name: 'titles',
582
- level: 'event',
588
+ level: 'row',
583
589
  source: { schema: 'analytics', name: 'page_title_overrides' },
584
590
  joinKey: 'page_location',
585
- columns: ['page_title'], // overlaps the promoted column → replaces it
591
+ columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
586
592
  },
587
593
  ],
588
594
  }
589
595
  ```
590
596
 
597
+ **Example** — item-level enrichment: attach product master data to each item via `item_id`. The enrichment flows into the `items` array struct; `margin_bucket` is added as a new item-struct field, and `item_category` overlap-coalesces against the original. Item-level enrichment columns do NOT appear at the row grain — they live inside `items[].<col>`:
598
+
599
+ ```javascript
600
+ enrichments: [
601
+ {
602
+ name: 'products',
603
+ level: 'item',
604
+ source: { schema: 'analytics', name: 'product_master' },
605
+ joinKey: 'item_id', // joins on item.item_id
606
+ columns: ['margin_bucket', 'item_category'], // margin_bucket is additive; item_category overlap-coalesces
607
+ },
608
+ ],
609
+ ```
610
+
611
+ For `level: 'item'`, valid `joinKey` values are any field on the GA4 items struct (`item_id`, `item_category`, etc.) or any column on `event_data` (`user_pseudo_id`, `event_date`, etc.). A row-level and an item-level enrichment may share the same column name (e.g. both writing `cohort`) — the two columns target structurally distinct slots (`enhanced_events.cohort` at row grain vs `items[].cohort` inside the items array) and are not in collision.
612
+
591
613
  > **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
592
614
 
593
- > **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Replaced by enrichment '<name>' (...). Original: <description>` for replacements). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
615
+ > **Note:** Row-level enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default. Item-level enrichment columns do not receive auto-generated descriptions (BigQuery does not surface per-field descriptions on STRUCT-array fields cleanly through Dataform's column-description mechanism).
594
616
 
595
617
  > **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
596
618
 
package/documentation.js CHANGED
@@ -173,8 +173,8 @@ const getColumnDescriptions = (config, columnMetadata) => {
173
173
  // Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
174
174
  if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
175
175
  config.enrichments.forEach(e => {
176
- const level = e.level ?? 'event';
177
- if (level !== 'event') return;
176
+ const level = e.level ?? 'row';
177
+ if (level !== 'row') return;
178
178
  const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
179
179
  const joinKeyText = joinKeys.join(', ');
180
180
  const sourceText = renderEnrichmentSource(e.source);
@@ -186,7 +186,7 @@ const getColumnDescriptions = (config, columnMetadata) => {
186
186
  ? existing.description
187
187
  : null;
188
188
  const newDesc = existingText
189
- ? `Replaced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}). Original: ${existingText}`
189
+ ? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
190
190
  : `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
191
191
  // If the original was a struct-shaped entry, preserve the structure but replace the description.
192
192
  // Otherwise, set as a plain string.
@@ -140,6 +140,55 @@ const ga4ExportColumns = [
140
140
  */
141
141
  const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
142
142
 
143
+ /**
144
+ * The standard GA4 BigQuery export items-struct field names, based on the official schema.
145
+ * Listed in GA4's source order — `items_rebuilt`'s explicit struct construction emits fields
146
+ * in this order, and consumers may reasonably depend on the items-struct schema field order
147
+ * matching GA4's own.
148
+ *
149
+ * `item_params` is a nested REPEATED RECORD and projects through as a single struct entry
150
+ * (no per-key handling).
151
+ *
152
+ * list updated 2026-05-12
153
+ */
154
+ const ga4ItemStructFields = [
155
+ "item_id",
156
+ "item_name",
157
+ "item_brand",
158
+ "item_variant",
159
+ "item_category",
160
+ "item_category2",
161
+ "item_category3",
162
+ "item_category4",
163
+ "item_category5",
164
+ "price_in_usd",
165
+ "price",
166
+ "quantity",
167
+ "item_revenue_in_usd",
168
+ "item_revenue",
169
+ "item_refund_in_usd",
170
+ "item_refund",
171
+ "coupon",
172
+ "affiliation",
173
+ "location_id",
174
+ "item_list_id",
175
+ "item_list_name",
176
+ "item_list_index",
177
+ "promotion_id",
178
+ "promotion_name",
179
+ "creative_name",
180
+ "creative_slot",
181
+ "item_params"
182
+ ];
183
+
184
+ /**
185
+ * Checks whether a given field name is part of the standard GA4 BigQuery export items struct.
186
+ *
187
+ * @param {string} fieldName - The name of the field to check.
188
+ * @returns {boolean} True if the field name is a standard items-struct field, otherwise false.
189
+ */
190
+ const isGa4ItemStructField = (fieldName) => ga4ItemStructFields.includes(fieldName);
191
+
143
192
  /**
144
193
  * Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
145
194
  *
@@ -186,13 +235,17 @@ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs)
186
235
  frameBounds = `range between ${lookbackMicros} preceding and current row`;
187
236
  }
188
237
 
189
- return `last_value(
238
+ // Suppress attribution for:
239
+ // - refund events (outside the selection-driven journey window)
240
+ // - unconsented events (user_pseudo_id is NULL) — attribution requires a visitor
241
+ // identity to stitch select_* events to later receivers within the same visitor.
242
+ return `if(event_name = 'refund' or user_pseudo_id is null, null, last_value(
190
243
  if(${selectEvents}, ${structExpr}, null) ignore nulls
191
244
  ) over(
192
245
  partition by ${partitionBy}
193
246
  order by ${timestampColumn} asc
194
247
  ${frameBounds}
195
- )`;
248
+ ))`;
196
249
  };
197
250
 
198
251
  /**
@@ -209,6 +262,10 @@ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs)
209
262
  * the rows are interchangeable, so arbitrary row number assignment between them
210
263
  * produces the same result.
211
264
  *
265
+ * Unconsented events (user_pseudo_id is NULL) use an empty-string sentinel inside
266
+ * concat — without it, CONCAT NULL-propagates and the row_id becomes NULL, which
267
+ * would prevent enrichments from applying to such events.
268
+ *
212
269
  * @param {string} ecommerceEventsFilter - Comma-separated, quoted list of event names
213
270
  * (e.g., "'purchase', 'add_to_cart'").
214
271
  * @returns {string} SQL expression that evaluates to the row id or NULL.
@@ -217,7 +274,7 @@ const itemRowId = (ecommerceEventsFilter) => {
217
274
  return `if(
218
275
  event_name in (${ecommerceEventsFilter}),
219
276
  farm_fingerprint(concat(
220
- user_pseudo_id,
277
+ ifnull(user_pseudo_id, ''),
221
278
  cast(event_timestamp as string),
222
279
  event_name,
223
280
  to_json_string(items),
@@ -257,6 +314,8 @@ module.exports = {
257
314
  isFinalData,
258
315
  ga4ExportColumns,
259
316
  isGa4ExportColumn,
317
+ ga4ItemStructFields,
318
+ isGa4ItemStructField,
260
319
  getGa4ExportType,
261
320
  itemListAttributionExpr,
262
321
  itemRowId,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.9.0-dev.8",
3
+ "version": "0.9.0",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -69,8 +69,9 @@ const ga4EventsEnhancedConfig = {
69
69
  // each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
70
70
  customSteps: [],
71
71
  // declarative external-data enrichments joined into the pipeline
72
- // each entry: { name, level: 'event' | 'item', source, joinKey, columns, dedupe? }
73
- // 'item' level is accepted at config time but throws at SQL gen not yet implemented
72
+ // each entry: { name, source, joinKey, columns, level?, dedupe? }
73
+ // `level` is optional defaults to 'row' (one row of the enclosing table per join match).
74
+ // 'item' targets the items[] array (GA4-specific, ecommerce events only).
74
75
  enrichments: [],
75
76
  };
76
77
 
@@ -162,8 +162,18 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
162
162
 
163
163
  // item list attribution config
164
164
  const itemListAttribution = mergedConfig.itemListAttribution;
165
- const ecommerceEventsFilter = itemListAttribution
166
- ? helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ')
165
+
166
+ // Build enrichment-source CTEs and gather per-level join/column data. The utility routes
167
+ // row-level and item-level entries through separate output channels. Done up here so the
168
+ // items-scaffold activation state is known before building event_data (which needs
169
+ // _item_row_id when the scaffold is active for any reason).
170
+ const { steps: enrichmentSteps, row: rowEnrichments, item: itemEnrichments }
171
+ = utils.buildEnrichments(mergedConfig.enrichments);
172
+ const itemEnrichmentsActive = itemEnrichments.joins.length > 0;
173
+ const itemsScaffoldActive = !!itemListAttribution || itemEnrichmentsActive;
174
+
175
+ const ecommerceEventsFilter = itemsScaffoldActive
176
+ ? helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ')
167
177
  : null;
168
178
 
169
179
  // auto-adjust bufferDays for time-based item list attribution lookback
@@ -220,7 +230,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
220
230
  // ecommerce
221
231
  ecommerce: helpers.fixEcommerceStruct('ecommerce'),
222
232
  // assign a unique row id, used for handling item-level attribution and enrichment
223
- _item_row_id: itemListAttribution ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
233
+ _item_row_id: itemsScaffoldActive ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
224
234
  // flag if the data is "final" and is not expected to change anymore
225
235
  data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
226
236
  export_type: helpers.getGa4ExportType('_table_suffix'),
@@ -263,74 +273,183 @@ ${excludedEventsSQL}`,
263
273
  'group by': 'session_id',
264
274
  };
265
275
 
276
+ // Validate item-level joinKey columns and collect any event_data columns that need to
277
+ // be carried up to items_unnested as top-level columns (so the LEFT JOIN inside
278
+ // items_rebuilt can USING(...) on them). Item-struct fields are already top-level on
279
+ // items_unnested and need no extension.
280
+ const itemJoinKeysFromEventData = new Set();
281
+ for (const [i, e] of (mergedConfig.enrichments ?? []).entries()) {
282
+ const level = e.level ?? 'row';
283
+ if (level !== 'item') continue;
284
+ const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
285
+ for (const c of joinKeys) {
286
+ if (helpers.ga4ItemStructFields.includes(c)) {
287
+ // Already a top-level column on items_unnested.
288
+ } else if (c in eventDataStep.select.columns && eventDataStep.select.columns[c] !== undefined) {
289
+ itemJoinKeysFromEventData.add(c);
290
+ } else {
291
+ throw new Error(
292
+ `config.enrichments[${i}] (name: '${e.name}') uses item-level joinKey '${c}', ` +
293
+ `which is neither a field on the GA4 items struct (helpers.ga4ItemStructFields) ` +
294
+ `nor a column on event_data. Valid item-level joinKeys are item-struct fields ` +
295
+ `(e.g. item_id, item_category) or any event_data column (e.g. user_pseudo_id, event_date).`
296
+ );
297
+ }
298
+ }
299
+ }
300
+
266
301
  // Shared item-array CTEs:
267
- // 1. items_unnested: unnest items from ecommerce events, compute attribution via window function
268
- // 2. items_rebuilt: re-aggregate items with attributed list fields
269
- const itemListSteps = itemListAttribution ? (() => {
270
- const attrExpr = helpers.itemListAttributionExpr(
271
- itemListAttribution.lookbackType,
272
- timestampColumn,
273
- itemListAttribution.lookbackTimeMs
274
- );
302
+ // 1. items_unnested: unnest items from ecommerce events; LAST_VALUE attribution window
303
+ // is emitted only when itemListAttribution is configured.
304
+ // 2. items_rebuilt: re-aggregate items via explicit struct(...) construction;
305
+ // LEFT JOIN enrich_<name> for each item-level enrichment.
306
+ // Activation: emitted when EITHER itemListAttribution is configured OR at least one
307
+ // item-level enrichment is present.
308
+ const itemListSteps = itemsScaffoldActive ? (() => {
275
309
  const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
276
310
 
311
+ // Flatten the item struct: every standard items-struct field is selected as a
312
+ // top-level column of items_unnested. This makes downstream joins simpler
313
+ // (LEFT JOIN ... USING(item_id) works without aliasing tricks) and lets items_rebuilt
314
+ // reference fields as bare column names instead of `item.<col>`.
315
+ const itemFieldColumns = {};
316
+ for (const f of helpers.ga4ItemStructFields) {
317
+ itemFieldColumns[f] = `item.${f}`;
318
+ }
319
+
320
+ // Carry up any event_data joinKey columns used by item-level enrichments so the
321
+ // USING(...) clause in items_rebuilt can bind against top-level identifiers.
322
+ // Skip ones already in the base columns above
323
+ const baseColumnNames = new Set(['_item_row_id', 'event_name', ...Object.keys(itemFieldColumns)]);
324
+ const extraJoinKeyColumns = {};
325
+ for (const c of itemJoinKeysFromEventData) {
326
+ if (!baseColumnNames.has(c)) {
327
+ extraJoinKeyColumns[c] = c;
328
+ }
329
+ }
330
+
331
+ // items_unnested base columns. The _item_list_attr struct (LAST_VALUE window) is
332
+ // added only when itemListAttribution is configured — when only item enrichments
333
+ // are active, the window function is omitted entirely for cleaner SQL.
334
+ const unnestedSelectColumns = {
335
+ '_item_row_id': '_item_row_id',
336
+ 'event_name': 'event_name',
337
+ ...itemFieldColumns,
338
+ ...extraJoinKeyColumns,
339
+ };
340
+ if (itemListAttribution) {
341
+ unnestedSelectColumns._item_list_attr = helpers.itemListAttributionExpr(
342
+ itemListAttribution.lookbackType,
343
+ timestampColumn,
344
+ itemListAttribution.lookbackTimeMs
345
+ );
346
+ }
347
+
277
348
  const unnestedStep = {
278
349
  name: 'items_unnested',
279
- select: {
280
- columns: {
281
- '_item_row_id': '_item_row_id',
282
- 'event_name': 'event_name',
283
- // event_date is carried forward for ability to use it in data enrichment joins
284
- 'event_date': 'event_date',
285
- 'item': 'item',
286
- '_item_list_attr': attrExpr,
287
- },
288
- },
350
+ select: { columns: unnestedSelectColumns },
289
351
  from: 'event_data, unnest(items) as item',
290
352
  where: `event_name in (${ecommerceEventsFilter})`,
291
353
  };
292
354
 
355
+ // Build the per-field expression map for the items struct. Seed with the canonical
356
+ // GA4 items-struct fields — each references the matching top-level column on
357
+ // items_unnested. When itemListAttribution is configured, override the three
358
+ // attribution entries with their package-generated coalesce-with-passthrough
359
+ // expressions. Item-level enrichment columns layer on top via the spread below.
360
+ // References are qualified with `items_unnested.` so that overlapping item-level
361
+ // enrichments (which JOIN against enrich_<name> CTEs that may share column names)
362
+ // do not produce ambiguous bare-column references.
363
+ const preItemExpressions = {};
364
+ for (const f of helpers.ga4ItemStructFields) {
365
+ preItemExpressions[f] = `items_unnested.${f}`;
366
+ }
367
+ if (itemListAttribution) {
368
+ preItemExpressions.item_list_name = `coalesce(if(${passthroughEvents}, items_unnested.item_list_name, _item_list_attr.item_list_name), '(not set)')`;
369
+ preItemExpressions.item_list_id = `coalesce(if(${passthroughEvents}, items_unnested.item_list_id, _item_list_attr.item_list_id), '(not set)')`;
370
+ preItemExpressions.item_list_index = `coalesce(if(${passthroughEvents}, items_unnested.item_list_index, _item_list_attr.item_list_index))`;
371
+ }
372
+
373
+ // Wrap overlapping item-level enrichment columns in coalesce(<enrichExpr>, <originalExpr>)
374
+ // so a missed JOIN falls back to the existing item field value. Purely additive
375
+ // columns (no overlap) pass through unchanged.
376
+ const wrappedItemEnrichmentColumns = {};
377
+ for (const [col, enrichExpr] of Object.entries(itemEnrichments.columns)) {
378
+ const originalExpr = preItemExpressions[col];
379
+ wrappedItemEnrichmentColumns[col] = originalExpr
380
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
381
+ : enrichExpr;
382
+ }
383
+
384
+ // Final struct: standard fields first, then enrichment overrides spread on top
385
+ // (overlapping keys replace preItemExpressions entries; additive keys are appended).
386
+ const finalItemStructFields = { ...preItemExpressions, ...wrappedItemEnrichmentColumns };
387
+
388
+ const itemStructClauses = Object.entries(finalItemStructFields)
389
+ .map(([col, expr]) => `${expr} as ${col}`)
390
+ .join(',\n ');
391
+
293
392
  const rebuiltStep = {
294
393
  name: 'items_rebuilt',
295
394
  select: {
296
395
  columns: {
297
396
  '_item_row_id': '_item_row_id',
298
- 'items': `array_agg(
299
- (select as struct item.* replace(
300
- coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
301
- coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
302
- coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
303
- ))
304
- )`,
397
+ 'items': `array_agg(struct(
398
+ ${itemStructClauses}
399
+ ))`,
305
400
  },
306
401
  },
307
402
  from: 'items_unnested',
308
403
  'group by': '_item_row_id',
309
404
  };
405
+ // Item-level enrichment joins (only attach when present). Each enrichment's LEFT JOIN
406
+ // binds against top-level columns on items_unnested (item-struct fields, or event_data
407
+ // joinKey columns carried up via extraJoinKeyColumns above).
408
+ if (itemEnrichmentsActive) {
409
+ rebuiltStep.joins = itemEnrichments.joins;
410
+ }
310
411
 
311
412
  return [unnestedStep, rebuiltStep];
312
413
  })() : null;
313
414
 
314
415
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
315
416
 
316
- // When item list attribution is enabled, override the items column and exclude _item_row_id
317
- // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
417
+ // When the items scaffold is active, override the items column and exclude _item_row_id.
418
+ // ifnull(..., []) preserves the empty-array shape for events that have no items_rebuilt
419
+ // match (non-ecommerce events, or ecommerce events with empty items arrays). The empty
420
+ // array literal is type-inferred from items_rebuilt.items, which includes any item-level
421
+ // enrichment columns — so additive enrichments don't cause a struct-schema mismatch.
318
422
  const itemListOverrides = itemListSteps ? {
319
- items: 'coalesce(items_rebuilt.items, event_data.items)',
423
+ items: 'ifnull(items_rebuilt.items, [])',
320
424
  } : {};
321
425
  const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
322
426
 
323
- // Build enrichment-source CTEs and gather event-level join/column data. Item-level
324
- // enrichments throw "not yet supported" inside the utility they will arrive in a later release.
325
- const { steps: enrichmentSteps, joins: enrichmentJoins, columns: enrichmentColumns,
326
- columnNames: enrichmentColumnNames } = utils.buildEnrichments(mergedConfig.enrichments);
427
+ // Wrap overlapping row-level enrichment columns in coalesce(enrich_<name>.<col>, <original>)
428
+ // so a missed JOIN falls back to the existing value. Purely additive columns (no overlap)
429
+ // pass through unchanged. Source-of-original precedence matches the final SELECT's spread
430
+ // order: itemListOverrides first (overrides finalColumnOrder for `items`), then
431
+ // session_data (wins over event_data in getFinalColumnOrder when both have the column).
432
+ const wrappedRowEnrichmentColumns = {};
433
+ for (const [col, enrichExpr] of Object.entries(rowEnrichments.columns)) {
434
+ let originalExpr;
435
+ if (col in itemListOverrides) {
436
+ originalExpr = itemListOverrides[col];
437
+ } else if (col in sessionDataStep.select.columns) {
438
+ originalExpr = `session_data.${col}`;
439
+ } else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
440
+ originalExpr = `event_data.${col}`;
441
+ }
442
+ wrappedRowEnrichmentColumns[col] = originalExpr
443
+ ? `coalesce(${enrichExpr}, ${originalExpr})`
444
+ : enrichExpr;
445
+ }
327
446
 
328
- // Build the set of columns the outer SELECT already maps explicitly (so wildcards skip them)
329
- // plus internal-only columns that should never reach enhanced_events.
447
+ // List all column names that have already been defined or should be left out
448
+ // Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
330
449
  const alreadyMapped = [
331
450
  ...Object.keys(finalColumnOrder),
332
451
  ...Object.keys(itemListOverrides),
333
- ...enrichmentColumnNames,
452
+ ...rowEnrichments.columnNames,
334
453
  'entrances',
335
454
  mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
336
455
  'data_is_final',
@@ -347,8 +466,8 @@ ${excludedEventsSQL}`,
347
466
  // get the most important columns in the correct order
348
467
  ...finalColumnOrder,
349
468
  ...itemListOverrides,
350
- // event-level enrichment columns: override matching explicit columns; new columns added.
351
- ...enrichmentColumns,
469
+ // row-level enrichment columns: coalesce with the original when overlapping; otherwise add.
470
+ ...wrappedRowEnrichmentColumns,
352
471
  // explicit pass-throughs for the rest of event_data and session_data
353
472
  ...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
354
473
  ...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
@@ -370,8 +489,8 @@ ${excludedEventsSQL}`,
370
489
  table: 'session_data',
371
490
  on: 'using(session_id)'
372
491
  },
373
- // Event-level enrichment joins go last so they apply on top of the package's own joins.
374
- ...enrichmentJoins,
492
+ // The left joins for the row-level enrichment ctes
493
+ ...rowEnrichments.joins,
375
494
  ],
376
495
  where: helpers.incrementalDateFilter(mergedConfig)
377
496
  };
@@ -384,10 +503,7 @@ ${excludedEventsSQL}`,
384
503
  enhancedEventsStep,
385
504
  ];
386
505
 
387
- // Layer 2 validation: customSteps name must not collide with package step names.
388
- // Reserved set is derived from packageSteps at runtime (single source of truth) — what
389
- // is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
390
- // and enrich_* names exist only when enrichments are configured).
506
+ // Ensure that the custom step names don't collide with the default or data enrichment step names
391
507
  const customSteps = mergedConfig.customSteps ?? [];
392
508
  if (customSteps.length > 0) {
393
509
  const reservedNames = new Set(packageSteps.map(s => s.name));
@@ -401,6 +517,7 @@ ${excludedEventsSQL}`,
401
517
  }
402
518
  }
403
519
 
520
+ // Include custom steps last in the list
404
521
  const steps = [...packageSteps, ...customSteps];
405
522
 
406
523
  return utils.queryBuilder(steps);
@@ -201,11 +201,11 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
201
201
  }
202
202
  }
203
203
 
204
- // customSteps - optional array of queryBuilder step objects appended to the pipeline
205
- // Layer 1 (config shape): array, objects with non-empty name, no duplicates within customSteps.
204
+ // customSteps - optional array of queryBuilder step objects appended to the pipeline.
205
+ // Config-shape checks only: array, objects with non-empty name, no duplicates within customSteps.
206
206
  // Step-shape validation (clause keys, etc.) deferred to queryBuilder.
207
- // Collision-with-package-names check deferred to _generateEnhancedEventsSQL (Layer 2),
208
- // since the reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
207
+ // Collision-with-package-names check deferred to _generateEnhancedEventsSQL, since the
208
+ // reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
209
209
  if (config.customSteps !== undefined) {
210
210
  if (!Array.isArray(config.customSteps)) {
211
211
  throw new Error(`config.customSteps must be an array. Received: ${JSON.stringify(config.customSteps)}`);
@@ -227,14 +227,14 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
227
227
  }
228
228
 
229
229
  // enrichments - optional array of declarative external-data enrichment specs.
230
- // This block performs Layer 1 (config-shape) checks. Layer 2 checks (reserved-name collision
231
- // + item-level deferral throw) live in _generateEnhancedEventsSQL the reserved set is
232
- // config-dependent and the item-level deferral throws there once the SQL is built.
230
+ // Config-shape checks only. Reserved-name collision and item-level joinKey resolution
231
+ // happen in _generateEnhancedEventsSQL, where the reserved set and item-level join targets
232
+ // are derived from the resolved config.
233
233
  if (config.enrichments !== undefined) {
234
234
  if (!Array.isArray(config.enrichments)) {
235
235
  throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
236
236
  }
237
- const validLevels = ['event', 'item'];
237
+ const validLevels = ['row', 'item'];
238
238
  const seenNames = new Set();
239
239
  for (let i = 0; i < config.enrichments.length; i++) {
240
240
  const entry = config.enrichments[i];
package/utils.js CHANGED
@@ -515,48 +515,53 @@ const buildPassThroughs = (explicitColumns, sourceColumns) => {
515
515
 
516
516
  /**
517
517
  * Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
518
- * declarative `enrichments` feature.
518
+ * declarative `enrichments` feature. Routes row-level and item-level entries through
519
+ * separate output channels so the caller can attach them to different downstream CTEs.
519
520
  *
520
521
  * Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
521
522
  * intended to be called by any table module that exposes an `enrichments` config field.
522
523
  *
523
- * Encapsulates two generation-time throws:
524
- * - level: 'item' (not yet supported; deferred per design_docs/planned/data-enrichments.md Q15).
525
- * - Enrichment-vs-enrichment column collisions (two enrichments targeting the same column).
524
+ * Encapsulates one generation-time throw:
525
+ * - Same-level enrichment-vs-enrichment column collisions (two row-level enrichments or
526
+ * two item-level enrichments targeting the same column). Cross-level same-name is allowed —
527
+ * the two columns target structurally distinct slots (e.g. `enhanced_events.<col>` vs
528
+ * `items[].<col>`).
526
529
  *
527
530
  * @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
528
- * { name, level, source, joinKey, columns, dedupe? } per data-enrichments.md Q8.
529
- * @returns {Object} A struct with five fields:
530
- * - `steps` array of queryBuilder source-CTE step definitions (one `enrich_<name>` per entry).
531
- * - `joins` array of LEFT JOIN clauses to attach downstream (one per entry).
532
- * - `columns` — map of `{ <enrichmentColumn>: 'enrich_<name>.<col>' }` for spreading into a
533
- * downstream SELECT's `select.columns`.
534
- * - `columnNames` — Set of all enrichment column names (used by callers for overlap detection
535
- * against downstream CTEs).
536
- * - `columnOwner` map of `{ <column>: { i, name } }` recording which enrichment owns each
537
- * column; preserved for diagnostics.
531
+ * { name, source, joinKey, columns, level?, dedupe? }. `level` is 'row' (default) or 'item'.
532
+ * 'row' means one row of the enclosing table per join match; 'item' targets a nested array
533
+ * (currently only the GA4 items[] array).
534
+ * @returns {Object} A struct with four fields:
535
+ * - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per
536
+ * entry, regardless of level — all source CTEs go to the top of the pipeline).
537
+ * - `row` — { joins, columns, columnNames } for row-level enrichments. Caller attaches
538
+ * `joins` to the row-grained downstream CTE (e.g. `enhanced_events`) and spreads `columns`
539
+ * into that CTE's `select.columns`.
540
+ * - `item` — { joins, columns, columnNames } for item-level enrichments. Caller attaches
541
+ * `joins` to the item-grained downstream CTE (e.g. `items_rebuilt`) and folds `columns`
542
+ * into that CTE's struct construction.
543
+ * - `columnOwner` — map of `{ <column>: { i, name, level } }` recording which enrichment
544
+ * owns each column. The `level` field distinguishes cross-level same-name entries.
538
545
  *
539
- * @throws {Error} If any entry has `level: 'item'` (with a pointer to data-enrichments.md).
540
- * @throws {Error} If two enrichments target the same column name (with both enrichment names).
546
+ * @throws {Error} If two same-level enrichments target the same column name (with both
547
+ * enrichment names and the conflicting column in the error message).
541
548
  *
542
549
  * @example
543
- * const { steps, joins, columns, columnNames } = buildEnrichments(config.enrichments);
550
+ * const { steps, row, item } = buildEnrichments(config.enrichments);
551
+ * // row.joins → attach to enhanced_events; row.columns → spread into enhanced_events
552
+ * // item.joins → attach to items_rebuilt; item.columns → fold into items struct
544
553
  */
545
554
  const buildEnrichments = (enrichments) => {
546
555
  const steps = [];
547
- const joins = [];
548
- const columns = {};
549
- const columnNames = new Set();
556
+ const channels = {
557
+ row: { joins: [], columns: {}, columnNames: new Set() },
558
+ item: { joins: [], columns: {}, columnNames: new Set() },
559
+ };
550
560
  const columnOwner = {};
551
561
 
552
562
  for (const [i, e] of (enrichments ?? []).entries()) {
553
- const level = e.level ?? 'event';
554
- if (level === 'item') {
555
- throw new Error(
556
- `config.enrichments[${i}] uses level: 'item', which is not yet supported in this version. ` +
557
- `Item-level enrichments will ship in a future release; see design_docs/planned/data-enrichments.md.`
558
- );
559
- }
563
+ const level = e.level ?? 'row';
564
+ const channel = channels[level];
560
565
  const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
561
566
  const cteName = `enrich_${e.name}`;
562
567
 
@@ -573,24 +578,29 @@ const buildEnrichments = (enrichments) => {
573
578
  }
574
579
  steps.push(sourceStep);
575
580
 
576
- joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
581
+ channel.joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
577
582
 
578
583
  for (const c of e.columns) {
579
- if (columnNames.has(c)) {
584
+ // Same-level collision throw. Cross-level same-name is allowed because the two
585
+ // columns target structurally distinct output slots (event_data vs items[]).
586
+ if (channel.columnNames.has(c)) {
580
587
  const owner = columnOwner[c];
581
588
  throw new Error(
582
589
  `config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
583
- `(name: '${owner.name}') both target column '${c}'. ` +
584
- `Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
590
+ `(name: '${owner.name}') both target column '${c}' at level '${level}'. ` +
591
+ `Two enrichments cannot write the same column at the same level; rename one in source SQL or pick a different name.`
585
592
  );
586
593
  }
587
- columns[c] = `${cteName}.${c}`;
588
- columnNames.add(c);
589
- columnOwner[c] = { i, name: e.name };
594
+ channel.columns[c] = `${cteName}.${c}`;
595
+ channel.columnNames.add(c);
596
+ // columnOwner is keyed by column name; if the same name appears at different
597
+ // levels, the second-writer entry wins, but we record level so diagnostics
598
+ // distinguish them. Same-level collisions throw above before reaching here.
599
+ columnOwner[c] = { i, name: e.name, level };
590
600
  }
591
601
  }
592
602
 
593
- return { steps, joins, columns, columnNames, columnOwner };
603
+ return { steps, row: channels.row, item: channels.item, columnOwner };
594
604
  };
595
605
 
596
606