ga4-export-fixer 0.9.0-dev.8 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -32
- package/documentation.js +3 -3
- package/helpers/ga4Transforms.js +62 -3
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/config.js +3 -2
- package/tables/ga4EventsEnhanced/index.js +163 -46
- package/tables/ga4EventsEnhanced/validation.js +8 -8
- package/utils.js +45 -35
package/README.md
CHANGED
|
@@ -44,96 +44,100 @@ The goal of the package is to **speed up development** when building data models
|
|
|
44
44
|
</td>
|
|
45
45
|
</tr>
|
|
46
46
|
<tr>
|
|
47
|
+
<td valign="top">
|
|
48
|
+
<b>🧬 Data Enrichments</b><br>
|
|
49
|
+
Join external lookup data (cohorts, product master, etc.) at row level or ecommerce item level via <code>enrichments</code>
|
|
50
|
+
</td>
|
|
47
51
|
<td valign="top">
|
|
48
52
|
<b>📐 Flexible Schema</b><br>
|
|
49
53
|
Keeps the flexible structure of the original export with key fields promoted to columns for better query performance; partitioning & clustering enabled
|
|
50
54
|
</td>
|
|
55
|
+
</tr>
|
|
56
|
+
<tr>
|
|
51
57
|
<td valign="top">
|
|
52
58
|
<b>🤖 AI Agent Ready</b><br>
|
|
53
59
|
Extensive table & column descriptions for AI agents and humans
|
|
54
60
|
</td>
|
|
55
|
-
</tr>
|
|
56
|
-
<tr>
|
|
57
61
|
<td valign="top">
|
|
58
62
|
<b>🔑 Session Identity Resolution</b><br>
|
|
59
63
|
<code>user_id</code> resolved per session; <code>merged_user_id</code> coalesces with <code>user_pseudo_id</code>
|
|
60
64
|
</td>
|
|
65
|
+
</tr>
|
|
66
|
+
<tr>
|
|
61
67
|
<td valign="top">
|
|
62
68
|
<b>📡 Session Traffic Sources</b><br>
|
|
63
69
|
<code>session_first_traffic_source</code> and <code>session_traffic_source_last_click</code> computed automatically, adjusting for sessions that span midnight
|
|
64
70
|
</td>
|
|
65
|
-
</tr>
|
|
66
|
-
<tr>
|
|
67
71
|
<td valign="top">
|
|
68
72
|
<b>📍 Landing Page Detection</b><br>
|
|
69
73
|
Derived per session from the first page where <code>entrances > 0</code>
|
|
70
74
|
</td>
|
|
75
|
+
</tr>
|
|
76
|
+
<tr>
|
|
71
77
|
<td valign="top">
|
|
72
78
|
<b>🔗 Page URL Parsing</b><br>
|
|
73
79
|
Parsed <code>hostname</code>, <code>path</code>, <code>query</code>, and <code>query_params</code> from <code>page_location</code>
|
|
74
80
|
</td>
|
|
75
|
-
</tr>
|
|
76
|
-
<tr>
|
|
77
81
|
<td valign="top">
|
|
78
82
|
<b>🛒 Ecommerce Data Fixes</b><br>
|
|
79
83
|
Nullifies placeholder <code>transaction_id</code>; corrects <code>purchase_revenue</code> bugs
|
|
80
84
|
</td>
|
|
85
|
+
</tr>
|
|
86
|
+
<tr>
|
|
81
87
|
<td valign="top">
|
|
82
88
|
<b>🏷️ Item List Attribution</b><br>
|
|
83
89
|
Attributes <code>item_list_name</code>, <code>item_list_id</code>, and <code>item_list_index</code> from item selection events to downstream ecommerce events
|
|
84
90
|
</td>
|
|
85
|
-
</tr>
|
|
86
|
-
<tr>
|
|
87
91
|
<td valign="top">
|
|
88
92
|
<b>⚙️ Event Parameter Handling</b><br>
|
|
89
93
|
Promote event params to columns; include or exclude by name
|
|
90
94
|
</td>
|
|
95
|
+
</tr>
|
|
96
|
+
<tr>
|
|
91
97
|
<td valign="top">
|
|
92
98
|
<b>📊 Session Parameters</b><br>
|
|
93
99
|
Promote selected event parameters as <code>session_params</code>
|
|
94
100
|
</td>
|
|
95
|
-
</tr>
|
|
96
|
-
<tr>
|
|
97
101
|
<td valign="top">
|
|
98
102
|
<b>⏱️ Custom Timestamp</b><br>
|
|
99
103
|
Use a custom event parameter as primary timestamp with automatic fallback
|
|
100
104
|
</td>
|
|
105
|
+
</tr>
|
|
106
|
+
<tr>
|
|
101
107
|
<td valign="top">
|
|
102
108
|
<b>🔒 Schema Lock</b><br>
|
|
103
109
|
Lock table schema to a specific GA4 export date to prevent schema drift
|
|
104
110
|
</td>
|
|
105
|
-
</tr>
|
|
106
|
-
<tr>
|
|
107
111
|
<td valign="top">
|
|
108
112
|
<b>✅ Data Freshness Tracking</b><br>
|
|
109
113
|
<code>data_is_final</code> flag and <code>export_type</code> label on every row
|
|
110
114
|
</td>
|
|
115
|
+
</tr>
|
|
116
|
+
<tr>
|
|
111
117
|
<td valign="top">
|
|
112
118
|
<b>🔍 Data Quality Assertions</b><br>
|
|
113
119
|
Built-in daily assertion reconciles sessions, events, and revenue between the enhanced table and raw export
|
|
114
120
|
</td>
|
|
115
|
-
</tr>
|
|
116
|
-
<tr>
|
|
117
121
|
<td valign="top">
|
|
118
122
|
<b>🔃 Selective Re-processing</b><br>
|
|
119
123
|
Re-process a date range without full table rebuild using <code>incrementalStartOverride</code> and <code>incrementalEndOverride</code>
|
|
120
124
|
</td>
|
|
125
|
+
</tr>
|
|
126
|
+
<tr>
|
|
121
127
|
<td valign="top">
|
|
122
128
|
<b>📑 Batch Processing</b><br>
|
|
123
129
|
Process large exports in smaller batches via <code>numberOfDaysToProcess</code>
|
|
124
130
|
</td>
|
|
125
|
-
</tr>
|
|
126
|
-
<tr>
|
|
127
131
|
<td valign="top">
|
|
128
132
|
<b>🕐 Timezone-Aware Datetime</b><br>
|
|
129
133
|
<code>event_datetime</code> converted to a configurable IANA timezone
|
|
130
134
|
</td>
|
|
135
|
+
</tr>
|
|
136
|
+
<tr>
|
|
131
137
|
<td valign="top">
|
|
132
138
|
<b>🧩 Custom Processing Steps</b><br>
|
|
133
139
|
Append user-defined CTEs via <code>customSteps</code> to derive new columns or join external tables
|
|
134
140
|
</td>
|
|
135
|
-
</tr>
|
|
136
|
-
<tr>
|
|
137
141
|
<td valign="top">
|
|
138
142
|
<b>🛡️ Zero Dependencies</b><br>
|
|
139
143
|
No additional external dependencies added to your Dataform repository
|
|
@@ -145,7 +149,6 @@ The goal of the package is to **speed up development** when building data models
|
|
|
145
149
|
|
|
146
150
|
Features under consideration for future releases:
|
|
147
151
|
|
|
148
|
-
- Data enrichment (item-level, session-level, event-level)
|
|
149
152
|
- Aggregated tables (ga4_session, ga4_ecommerce...)
|
|
150
153
|
- Web and app specific default configurations
|
|
151
154
|
- Custom channel grouping
|
|
@@ -169,7 +172,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
169
172
|
{
|
|
170
173
|
"dependencies": {
|
|
171
174
|
"@dataform/core": "3.0.42",
|
|
172
|
-
"ga4-export-fixer": "0.
|
|
175
|
+
"ga4-export-fixer": "0.9.0"
|
|
173
176
|
}
|
|
174
177
|
}
|
|
175
178
|
```
|
|
@@ -198,7 +201,8 @@ Create a new **ga4_events_enhanced** table using a **.js** file in your reposito
|
|
|
198
201
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
199
202
|
|
|
200
203
|
const config = {
|
|
201
|
-
|
|
204
|
+
// using hard-coded GA4 export path
|
|
205
|
+
sourceTable: '`project.analytics_12345.events_*`'
|
|
202
206
|
};
|
|
203
207
|
|
|
204
208
|
ga4EventsEnhanced.createTable(publish, config);
|
|
@@ -212,6 +216,7 @@ ga4EventsEnhanced.createTable(publish, config);
|
|
|
212
216
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
213
217
|
|
|
214
218
|
const config = {
|
|
219
|
+
// GA4 export path declared, using the table reference object
|
|
215
220
|
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
|
|
216
221
|
// use dataformTableConfig to make changes to the default Dataform table configuration
|
|
217
222
|
dataformTableConfig: {
|
|
@@ -290,7 +295,8 @@ js {
|
|
|
290
295
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
291
296
|
|
|
292
297
|
const config = {
|
|
293
|
-
|
|
298
|
+
// using hard-coded GA4 export path
|
|
299
|
+
sourceTable: '`project.analytics_12345.events_*`',
|
|
294
300
|
self: self(),
|
|
295
301
|
incremental: incremental()
|
|
296
302
|
};
|
|
@@ -534,13 +540,13 @@ For typical use cases this is the right tool; reach for `customSteps` only when
|
|
|
534
540
|
| Field | Type | Required | Description |
|
|
535
541
|
| --- | --- | --- | --- |
|
|
536
542
|
| `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
|
|
537
|
-
| `level` | `'
|
|
543
|
+
| `level` | `'row'` / `'item'` | No, defaults to `'row'` | Join grain. `'row'` joins external dim data onto each row of `enhanced_events` (any column on `enhanced_events` as the key). `'item'` joins external dim data onto each item inside the `items` array (any field on the items struct or any event_data column as the key). |
|
|
538
544
|
| `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
|
|
539
|
-
| `joinKey` | string / string[] | Yes |
|
|
540
|
-
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns
|
|
545
|
+
| `joinKey` | string / string[] | Yes | For `level: 'row'`: column name(s) on `enhanced_events`. For `level: 'item'`: field name(s) on the items struct (e.g. `'item_id'`) or column name(s) on `event_data` (e.g. `'user_pseudo_id'`). Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
546
|
+
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
|
|
541
547
|
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
542
548
|
|
|
543
|
-
**
|
|
549
|
+
**Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`.
|
|
544
550
|
|
|
545
551
|
**Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
|
|
546
552
|
|
|
@@ -548,7 +554,7 @@ For typical use cases this is the right tool; reach for `customSteps` only when
|
|
|
548
554
|
enrichments: [
|
|
549
555
|
{
|
|
550
556
|
name: 'cohorts',
|
|
551
|
-
level
|
|
557
|
+
// level omitted → defaults to 'row'
|
|
552
558
|
source: { schema: 'analytics', name: 'user_cohorts' },
|
|
553
559
|
joinKey: 'user_pseudo_id',
|
|
554
560
|
columns: ['cohort_label', 'lifecycle_stage'],
|
|
@@ -562,7 +568,7 @@ enrichments: [
|
|
|
562
568
|
enrichments: [
|
|
563
569
|
{
|
|
564
570
|
name: 'segments',
|
|
565
|
-
level: '
|
|
571
|
+
level: 'row',
|
|
566
572
|
source: '`my-project.analytics.daily_user_segments`',
|
|
567
573
|
joinKey: ['event_date', 'user_pseudo_id'],
|
|
568
574
|
columns: ['segment'],
|
|
@@ -571,7 +577,7 @@ enrichments: [
|
|
|
571
577
|
],
|
|
572
578
|
```
|
|
573
579
|
|
|
574
|
-
**Example** — fix a promoted event parameter via enrichment (
|
|
580
|
+
**Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
|
|
575
581
|
|
|
576
582
|
```javascript
|
|
577
583
|
{
|
|
@@ -579,18 +585,34 @@ enrichments: [
|
|
|
579
585
|
enrichments: [
|
|
580
586
|
{
|
|
581
587
|
name: 'titles',
|
|
582
|
-
level: '
|
|
588
|
+
level: 'row',
|
|
583
589
|
source: { schema: 'analytics', name: 'page_title_overrides' },
|
|
584
590
|
joinKey: 'page_location',
|
|
585
|
-
columns: ['page_title'], // overlaps the promoted column →
|
|
591
|
+
columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
|
|
586
592
|
},
|
|
587
593
|
],
|
|
588
594
|
}
|
|
589
595
|
```
|
|
590
596
|
|
|
597
|
+
**Example** — item-level enrichment: attach product master data to each item via `item_id`. The enrichment flows into the `items` array struct; `margin_bucket` is added as a new item-struct field, and `item_category` overlap-coalesces against the original. Item-level enrichment columns do NOT appear at the row grain — they live inside `items[].<col>`:
|
|
598
|
+
|
|
599
|
+
```javascript
|
|
600
|
+
enrichments: [
|
|
601
|
+
{
|
|
602
|
+
name: 'products',
|
|
603
|
+
level: 'item',
|
|
604
|
+
source: { schema: 'analytics', name: 'product_master' },
|
|
605
|
+
joinKey: 'item_id', // joins on item.item_id
|
|
606
|
+
columns: ['margin_bucket', 'item_category'], // margin_bucket is additive; item_category overlap-coalesces
|
|
607
|
+
},
|
|
608
|
+
],
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
For `level: 'item'`, valid `joinKey` values are any field on the GA4 items struct (`item_id`, `item_category`, etc.) or any column on `event_data` (`user_pseudo_id`, `event_date`, etc.). A row-level and an item-level enrichment may share the same column name (e.g. both writing `cohort`) — the two columns target structurally distinct slots (`enhanced_events.cohort` at row grain vs `items[].cohort` inside the items array) and are not in collision.
|
|
612
|
+
|
|
591
613
|
> **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
|
|
592
614
|
|
|
593
|
-
> **Note:**
|
|
615
|
+
> **Note:** Row-level enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default. Item-level enrichment columns do not receive auto-generated descriptions (BigQuery does not surface per-field descriptions on STRUCT-array fields cleanly through Dataform's column-description mechanism).
|
|
594
616
|
|
|
595
617
|
> **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
|
|
596
618
|
|
package/documentation.js
CHANGED
|
@@ -173,8 +173,8 @@ const getColumnDescriptions = (config, columnMetadata) => {
|
|
|
173
173
|
// Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
|
|
174
174
|
if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
|
|
175
175
|
config.enrichments.forEach(e => {
|
|
176
|
-
const level = e.level ?? '
|
|
177
|
-
if (level !== '
|
|
176
|
+
const level = e.level ?? 'row';
|
|
177
|
+
if (level !== 'row') return;
|
|
178
178
|
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
179
179
|
const joinKeyText = joinKeys.join(', ');
|
|
180
180
|
const sourceText = renderEnrichmentSource(e.source);
|
|
@@ -186,7 +186,7 @@ const getColumnDescriptions = (config, columnMetadata) => {
|
|
|
186
186
|
? existing.description
|
|
187
187
|
: null;
|
|
188
188
|
const newDesc = existingText
|
|
189
|
-
? `
|
|
189
|
+
? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
|
|
190
190
|
: `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
|
|
191
191
|
// If the original was a struct-shaped entry, preserve the structure but replace the description.
|
|
192
192
|
// Otherwise, set as a plain string.
|
package/helpers/ga4Transforms.js
CHANGED
|
@@ -140,6 +140,55 @@ const ga4ExportColumns = [
|
|
|
140
140
|
*/
|
|
141
141
|
const isGa4ExportColumn = (columnName) => ga4ExportColumns.includes(columnName);
|
|
142
142
|
|
|
143
|
+
/**
|
|
144
|
+
* The standard GA4 BigQuery export items-struct field names, based on the official schema.
|
|
145
|
+
* Listed in GA4's source order — `items_rebuilt`'s explicit struct construction emits fields
|
|
146
|
+
* in this order, and consumers may reasonably depend on the items-struct schema field order
|
|
147
|
+
* matching GA4's own.
|
|
148
|
+
*
|
|
149
|
+
* `item_params` is a nested REPEATED RECORD and projects through as a single struct entry
|
|
150
|
+
* (no per-key handling).
|
|
151
|
+
*
|
|
152
|
+
* list updated 2026-05-12
|
|
153
|
+
*/
|
|
154
|
+
const ga4ItemStructFields = [
|
|
155
|
+
"item_id",
|
|
156
|
+
"item_name",
|
|
157
|
+
"item_brand",
|
|
158
|
+
"item_variant",
|
|
159
|
+
"item_category",
|
|
160
|
+
"item_category2",
|
|
161
|
+
"item_category3",
|
|
162
|
+
"item_category4",
|
|
163
|
+
"item_category5",
|
|
164
|
+
"price_in_usd",
|
|
165
|
+
"price",
|
|
166
|
+
"quantity",
|
|
167
|
+
"item_revenue_in_usd",
|
|
168
|
+
"item_revenue",
|
|
169
|
+
"item_refund_in_usd",
|
|
170
|
+
"item_refund",
|
|
171
|
+
"coupon",
|
|
172
|
+
"affiliation",
|
|
173
|
+
"location_id",
|
|
174
|
+
"item_list_id",
|
|
175
|
+
"item_list_name",
|
|
176
|
+
"item_list_index",
|
|
177
|
+
"promotion_id",
|
|
178
|
+
"promotion_name",
|
|
179
|
+
"creative_name",
|
|
180
|
+
"creative_slot",
|
|
181
|
+
"item_params"
|
|
182
|
+
];
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Checks whether a given field name is part of the standard GA4 BigQuery export items struct.
|
|
186
|
+
*
|
|
187
|
+
* @param {string} fieldName - The name of the field to check.
|
|
188
|
+
* @returns {boolean} True if the field name is a standard items-struct field, otherwise false.
|
|
189
|
+
*/
|
|
190
|
+
const isGa4ItemStructField = (fieldName) => ga4ItemStructFields.includes(fieldName);
|
|
191
|
+
|
|
143
192
|
/**
|
|
144
193
|
* Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
|
|
145
194
|
*
|
|
@@ -186,13 +235,17 @@ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs)
|
|
|
186
235
|
frameBounds = `range between ${lookbackMicros} preceding and current row`;
|
|
187
236
|
}
|
|
188
237
|
|
|
189
|
-
|
|
238
|
+
// Suppress attribution for:
|
|
239
|
+
// - refund events (outside the selection-driven journey window)
|
|
240
|
+
// - unconsented events (user_pseudo_id is NULL) — attribution requires a visitor
|
|
241
|
+
// identity to stitch select_* events to later receivers within the same visitor.
|
|
242
|
+
return `if(event_name = 'refund' or user_pseudo_id is null, null, last_value(
|
|
190
243
|
if(${selectEvents}, ${structExpr}, null) ignore nulls
|
|
191
244
|
) over(
|
|
192
245
|
partition by ${partitionBy}
|
|
193
246
|
order by ${timestampColumn} asc
|
|
194
247
|
${frameBounds}
|
|
195
|
-
)`;
|
|
248
|
+
))`;
|
|
196
249
|
};
|
|
197
250
|
|
|
198
251
|
/**
|
|
@@ -209,6 +262,10 @@ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs)
|
|
|
209
262
|
* the rows are interchangeable, so arbitrary row number assignment between them
|
|
210
263
|
* produces the same result.
|
|
211
264
|
*
|
|
265
|
+
* Unconsented events (user_pseudo_id is NULL) use an empty-string sentinel inside
|
|
266
|
+
* concat — without it, CONCAT NULL-propagates and the row_id becomes NULL, which
|
|
267
|
+
* would prevent enrichments from applying to such events.
|
|
268
|
+
*
|
|
212
269
|
* @param {string} ecommerceEventsFilter - Comma-separated, quoted list of event names
|
|
213
270
|
* (e.g., "'purchase', 'add_to_cart'").
|
|
214
271
|
* @returns {string} SQL expression that evaluates to the row id or NULL.
|
|
@@ -217,7 +274,7 @@ const itemRowId = (ecommerceEventsFilter) => {
|
|
|
217
274
|
return `if(
|
|
218
275
|
event_name in (${ecommerceEventsFilter}),
|
|
219
276
|
farm_fingerprint(concat(
|
|
220
|
-
user_pseudo_id,
|
|
277
|
+
ifnull(user_pseudo_id, ''),
|
|
221
278
|
cast(event_timestamp as string),
|
|
222
279
|
event_name,
|
|
223
280
|
to_json_string(items),
|
|
@@ -257,6 +314,8 @@ module.exports = {
|
|
|
257
314
|
isFinalData,
|
|
258
315
|
ga4ExportColumns,
|
|
259
316
|
isGa4ExportColumn,
|
|
317
|
+
ga4ItemStructFields,
|
|
318
|
+
isGa4ItemStructField,
|
|
260
319
|
getGa4ExportType,
|
|
261
320
|
itemListAttributionExpr,
|
|
262
321
|
itemRowId,
|
package/package.json
CHANGED
|
@@ -69,8 +69,9 @@ const ga4EventsEnhancedConfig = {
|
|
|
69
69
|
// each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
|
|
70
70
|
customSteps: [],
|
|
71
71
|
// declarative external-data enrichments joined into the pipeline
|
|
72
|
-
// each entry: { name,
|
|
73
|
-
//
|
|
72
|
+
// each entry: { name, source, joinKey, columns, level?, dedupe? }
|
|
73
|
+
// `level` is optional — defaults to 'row' (one row of the enclosing table per join match).
|
|
74
|
+
// 'item' targets the items[] array (GA4-specific, ecommerce events only).
|
|
74
75
|
enrichments: [],
|
|
75
76
|
};
|
|
76
77
|
|
|
@@ -162,8 +162,18 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
162
162
|
|
|
163
163
|
// item list attribution config
|
|
164
164
|
const itemListAttribution = mergedConfig.itemListAttribution;
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
|
|
166
|
+
// Build enrichment-source CTEs and gather per-level join/column data. The utility routes
|
|
167
|
+
// row-level and item-level entries through separate output channels. Done up here so the
|
|
168
|
+
// items-scaffold activation state is known before building event_data (which needs
|
|
169
|
+
// _item_row_id when the scaffold is active for any reason).
|
|
170
|
+
const { steps: enrichmentSteps, row: rowEnrichments, item: itemEnrichments }
|
|
171
|
+
= utils.buildEnrichments(mergedConfig.enrichments);
|
|
172
|
+
const itemEnrichmentsActive = itemEnrichments.joins.length > 0;
|
|
173
|
+
const itemsScaffoldActive = !!itemListAttribution || itemEnrichmentsActive;
|
|
174
|
+
|
|
175
|
+
const ecommerceEventsFilter = itemsScaffoldActive
|
|
176
|
+
? helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ')
|
|
167
177
|
: null;
|
|
168
178
|
|
|
169
179
|
// auto-adjust bufferDays for time-based item list attribution lookback
|
|
@@ -220,7 +230,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
220
230
|
// ecommerce
|
|
221
231
|
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
222
232
|
// assign a unique row id, used for handling item-level attribution and enrichment
|
|
223
|
-
_item_row_id:
|
|
233
|
+
_item_row_id: itemsScaffoldActive ? helpers.itemRowId(ecommerceEventsFilter) : undefined,
|
|
224
234
|
// flag if the data is "final" and is not expected to change anymore
|
|
225
235
|
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
226
236
|
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
@@ -263,74 +273,183 @@ ${excludedEventsSQL}`,
|
|
|
263
273
|
'group by': 'session_id',
|
|
264
274
|
};
|
|
265
275
|
|
|
276
|
+
// Validate item-level joinKey columns and collect any event_data columns that need to
|
|
277
|
+
// be carried up to items_unnested as top-level columns (so the LEFT JOIN inside
|
|
278
|
+
// items_rebuilt can USING(...) on them). Item-struct fields are already top-level on
|
|
279
|
+
// items_unnested and need no extension.
|
|
280
|
+
const itemJoinKeysFromEventData = new Set();
|
|
281
|
+
for (const [i, e] of (mergedConfig.enrichments ?? []).entries()) {
|
|
282
|
+
const level = e.level ?? 'row';
|
|
283
|
+
if (level !== 'item') continue;
|
|
284
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
285
|
+
for (const c of joinKeys) {
|
|
286
|
+
if (helpers.ga4ItemStructFields.includes(c)) {
|
|
287
|
+
// Already a top-level column on items_unnested.
|
|
288
|
+
} else if (c in eventDataStep.select.columns && eventDataStep.select.columns[c] !== undefined) {
|
|
289
|
+
itemJoinKeysFromEventData.add(c);
|
|
290
|
+
} else {
|
|
291
|
+
throw new Error(
|
|
292
|
+
`config.enrichments[${i}] (name: '${e.name}') uses item-level joinKey '${c}', ` +
|
|
293
|
+
`which is neither a field on the GA4 items struct (helpers.ga4ItemStructFields) ` +
|
|
294
|
+
`nor a column on event_data. Valid item-level joinKeys are item-struct fields ` +
|
|
295
|
+
`(e.g. item_id, item_category) or any event_data column (e.g. user_pseudo_id, event_date).`
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
266
301
|
// Shared item-array CTEs:
|
|
267
|
-
// 1. items_unnested: unnest items from ecommerce events
|
|
268
|
-
//
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
);
|
|
302
|
+
// 1. items_unnested: unnest items from ecommerce events; LAST_VALUE attribution window
|
|
303
|
+
// is emitted only when itemListAttribution is configured.
|
|
304
|
+
// 2. items_rebuilt: re-aggregate items via explicit struct(...) construction;
|
|
305
|
+
// LEFT JOIN enrich_<name> for each item-level enrichment.
|
|
306
|
+
// Activation: emitted when EITHER itemListAttribution is configured OR at least one
|
|
307
|
+
// item-level enrichment is present.
|
|
308
|
+
const itemListSteps = itemsScaffoldActive ? (() => {
|
|
275
309
|
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
276
310
|
|
|
311
|
+
// Flatten the item struct: every standard items-struct field is selected as a
|
|
312
|
+
// top-level column of items_unnested. This makes downstream joins simpler
|
|
313
|
+
// (LEFT JOIN ... USING(item_id) works without aliasing tricks) and lets items_rebuilt
|
|
314
|
+
// reference fields as bare column names instead of `item.<col>`.
|
|
315
|
+
const itemFieldColumns = {};
|
|
316
|
+
for (const f of helpers.ga4ItemStructFields) {
|
|
317
|
+
itemFieldColumns[f] = `item.${f}`;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Carry up any event_data joinKey columns used by item-level enrichments so the
|
|
321
|
+
// USING(...) clause in items_rebuilt can bind against top-level identifiers.
|
|
322
|
+
// Skip ones already in the base columns above
|
|
323
|
+
const baseColumnNames = new Set(['_item_row_id', 'event_name', ...Object.keys(itemFieldColumns)]);
|
|
324
|
+
const extraJoinKeyColumns = {};
|
|
325
|
+
for (const c of itemJoinKeysFromEventData) {
|
|
326
|
+
if (!baseColumnNames.has(c)) {
|
|
327
|
+
extraJoinKeyColumns[c] = c;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// items_unnested base columns. The _item_list_attr struct (LAST_VALUE window) is
|
|
332
|
+
// added only when itemListAttribution is configured — when only item enrichments
|
|
333
|
+
// are active, the window function is omitted entirely for cleaner SQL.
|
|
334
|
+
const unnestedSelectColumns = {
|
|
335
|
+
'_item_row_id': '_item_row_id',
|
|
336
|
+
'event_name': 'event_name',
|
|
337
|
+
...itemFieldColumns,
|
|
338
|
+
...extraJoinKeyColumns,
|
|
339
|
+
};
|
|
340
|
+
if (itemListAttribution) {
|
|
341
|
+
unnestedSelectColumns._item_list_attr = helpers.itemListAttributionExpr(
|
|
342
|
+
itemListAttribution.lookbackType,
|
|
343
|
+
timestampColumn,
|
|
344
|
+
itemListAttribution.lookbackTimeMs
|
|
345
|
+
);
|
|
346
|
+
}
|
|
347
|
+
|
|
277
348
|
const unnestedStep = {
|
|
278
349
|
name: 'items_unnested',
|
|
279
|
-
select: {
|
|
280
|
-
columns: {
|
|
281
|
-
'_item_row_id': '_item_row_id',
|
|
282
|
-
'event_name': 'event_name',
|
|
283
|
-
// event_date is carried forward for ability to use it in data enrichment joins
|
|
284
|
-
'event_date': 'event_date',
|
|
285
|
-
'item': 'item',
|
|
286
|
-
'_item_list_attr': attrExpr,
|
|
287
|
-
},
|
|
288
|
-
},
|
|
350
|
+
select: { columns: unnestedSelectColumns },
|
|
289
351
|
from: 'event_data, unnest(items) as item',
|
|
290
352
|
where: `event_name in (${ecommerceEventsFilter})`,
|
|
291
353
|
};
|
|
292
354
|
|
|
355
|
+
// Build the per-field expression map for the items struct. Seed with the canonical
|
|
356
|
+
// GA4 items-struct fields — each references the matching top-level column on
|
|
357
|
+
// items_unnested. When itemListAttribution is configured, override the three
|
|
358
|
+
// attribution entries with their package-generated coalesce-with-passthrough
|
|
359
|
+
// expressions. Item-level enrichment columns layer on top via the spread below.
|
|
360
|
+
// References are qualified with `items_unnested.` so that overlapping item-level
|
|
361
|
+
// enrichments (which JOIN against enrich_<name> CTEs that may share column names)
|
|
362
|
+
// do not produce ambiguous bare-column references.
|
|
363
|
+
const preItemExpressions = {};
|
|
364
|
+
for (const f of helpers.ga4ItemStructFields) {
|
|
365
|
+
preItemExpressions[f] = `items_unnested.${f}`;
|
|
366
|
+
}
|
|
367
|
+
if (itemListAttribution) {
|
|
368
|
+
preItemExpressions.item_list_name = `coalesce(if(${passthroughEvents}, items_unnested.item_list_name, _item_list_attr.item_list_name), '(not set)')`;
|
|
369
|
+
preItemExpressions.item_list_id = `coalesce(if(${passthroughEvents}, items_unnested.item_list_id, _item_list_attr.item_list_id), '(not set)')`;
|
|
370
|
+
preItemExpressions.item_list_index = `coalesce(if(${passthroughEvents}, items_unnested.item_list_index, _item_list_attr.item_list_index))`;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Wrap overlapping item-level enrichment columns in coalesce(<enrichExpr>, <originalExpr>)
|
|
374
|
+
// so a missed JOIN falls back to the existing item field value. Purely additive
|
|
375
|
+
// columns (no overlap) pass through unchanged.
|
|
376
|
+
const wrappedItemEnrichmentColumns = {};
|
|
377
|
+
for (const [col, enrichExpr] of Object.entries(itemEnrichments.columns)) {
|
|
378
|
+
const originalExpr = preItemExpressions[col];
|
|
379
|
+
wrappedItemEnrichmentColumns[col] = originalExpr
|
|
380
|
+
? `coalesce(${enrichExpr}, ${originalExpr})`
|
|
381
|
+
: enrichExpr;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Final struct: standard fields first, then enrichment overrides spread on top
|
|
385
|
+
// (overlapping keys replace preItemExpressions entries; additive keys are appended).
|
|
386
|
+
const finalItemStructFields = { ...preItemExpressions, ...wrappedItemEnrichmentColumns };
|
|
387
|
+
|
|
388
|
+
const itemStructClauses = Object.entries(finalItemStructFields)
|
|
389
|
+
.map(([col, expr]) => `${expr} as ${col}`)
|
|
390
|
+
.join(',\n ');
|
|
391
|
+
|
|
293
392
|
const rebuiltStep = {
|
|
294
393
|
name: 'items_rebuilt',
|
|
295
394
|
select: {
|
|
296
395
|
columns: {
|
|
297
396
|
'_item_row_id': '_item_row_id',
|
|
298
|
-
'items': `array_agg(
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
|
|
302
|
-
coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
|
|
303
|
-
))
|
|
304
|
-
)`,
|
|
397
|
+
'items': `array_agg(struct(
|
|
398
|
+
${itemStructClauses}
|
|
399
|
+
))`,
|
|
305
400
|
},
|
|
306
401
|
},
|
|
307
402
|
from: 'items_unnested',
|
|
308
403
|
'group by': '_item_row_id',
|
|
309
404
|
};
|
|
405
|
+
// Item-level enrichment joins (only attach when present). Each enrichment's LEFT JOIN
|
|
406
|
+
// binds against top-level columns on items_unnested (item-struct fields, or event_data
|
|
407
|
+
// joinKey columns carried up via extraJoinKeyColumns above).
|
|
408
|
+
if (itemEnrichmentsActive) {
|
|
409
|
+
rebuiltStep.joins = itemEnrichments.joins;
|
|
410
|
+
}
|
|
310
411
|
|
|
311
412
|
return [unnestedStep, rebuiltStep];
|
|
312
413
|
})() : null;
|
|
313
414
|
|
|
314
415
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
315
416
|
|
|
316
|
-
// When
|
|
317
|
-
//
|
|
417
|
+
// When the items scaffold is active, override the items column and exclude _item_row_id.
|
|
418
|
+
// ifnull(..., []) preserves the empty-array shape for events that have no items_rebuilt
|
|
419
|
+
// match (non-ecommerce events, or ecommerce events with empty items arrays). The empty
|
|
420
|
+
// array literal is type-inferred from items_rebuilt.items, which includes any item-level
|
|
421
|
+
// enrichment columns — so additive enrichments don't cause a struct-schema mismatch.
|
|
318
422
|
const itemListOverrides = itemListSteps ? {
|
|
319
|
-
items: '
|
|
423
|
+
items: 'ifnull(items_rebuilt.items, [])',
|
|
320
424
|
} : {};
|
|
321
425
|
const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
|
|
322
426
|
|
|
323
|
-
//
|
|
324
|
-
//
|
|
325
|
-
|
|
326
|
-
|
|
427
|
+
// Wrap overlapping row-level enrichment columns in coalesce(enrich_<name>.<col>, <original>)
|
|
428
|
+
// so a missed JOIN falls back to the existing value. Purely additive columns (no overlap)
|
|
429
|
+
// pass through unchanged. Source-of-original precedence matches the final SELECT's spread
|
|
430
|
+
// order: itemListOverrides first (overrides finalColumnOrder for `items`), then
|
|
431
|
+
// session_data (wins over event_data in getFinalColumnOrder when both have the column).
|
|
432
|
+
const wrappedRowEnrichmentColumns = {};
|
|
433
|
+
for (const [col, enrichExpr] of Object.entries(rowEnrichments.columns)) {
|
|
434
|
+
let originalExpr;
|
|
435
|
+
if (col in itemListOverrides) {
|
|
436
|
+
originalExpr = itemListOverrides[col];
|
|
437
|
+
} else if (col in sessionDataStep.select.columns) {
|
|
438
|
+
originalExpr = `session_data.${col}`;
|
|
439
|
+
} else if (col in eventDataStep.select.columns && eventDataStep.select.columns[col] !== undefined) {
|
|
440
|
+
originalExpr = `event_data.${col}`;
|
|
441
|
+
}
|
|
442
|
+
wrappedRowEnrichmentColumns[col] = originalExpr
|
|
443
|
+
? `coalesce(${enrichExpr}, ${originalExpr})`
|
|
444
|
+
: enrichExpr;
|
|
445
|
+
}
|
|
327
446
|
|
|
328
|
-
//
|
|
329
|
-
//
|
|
447
|
+
// List all column names that have already been defined or should be left out
|
|
448
|
+
// Used for the final pass-through: include the rest of the coulumns that haven't been explicitly listed yet
|
|
330
449
|
const alreadyMapped = [
|
|
331
450
|
...Object.keys(finalColumnOrder),
|
|
332
451
|
...Object.keys(itemListOverrides),
|
|
333
|
-
...
|
|
452
|
+
...rowEnrichments.columnNames,
|
|
334
453
|
'entrances',
|
|
335
454
|
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
336
455
|
'data_is_final',
|
|
@@ -347,8 +466,8 @@ ${excludedEventsSQL}`,
|
|
|
347
466
|
// get the most important columns in the correct order
|
|
348
467
|
...finalColumnOrder,
|
|
349
468
|
...itemListOverrides,
|
|
350
|
-
//
|
|
351
|
-
...
|
|
469
|
+
// row-level enrichment columns: coalesce with the original when overlapping; otherwise add.
|
|
470
|
+
...wrappedRowEnrichmentColumns,
|
|
352
471
|
// explicit pass-throughs for the rest of event_data and session_data
|
|
353
472
|
...utils.buildQualifiedPassThroughs(eventDataStep, alreadyMapped),
|
|
354
473
|
...utils.buildQualifiedPassThroughs(sessionDataStep, alreadyMapped),
|
|
@@ -370,8 +489,8 @@ ${excludedEventsSQL}`,
|
|
|
370
489
|
table: 'session_data',
|
|
371
490
|
on: 'using(session_id)'
|
|
372
491
|
},
|
|
373
|
-
//
|
|
374
|
-
...
|
|
492
|
+
// The left joins for the row-level enrichment ctes
|
|
493
|
+
...rowEnrichments.joins,
|
|
375
494
|
],
|
|
376
495
|
where: helpers.incrementalDateFilter(mergedConfig)
|
|
377
496
|
};
|
|
@@ -384,10 +503,7 @@ ${excludedEventsSQL}`,
|
|
|
384
503
|
enhancedEventsStep,
|
|
385
504
|
];
|
|
386
505
|
|
|
387
|
-
//
|
|
388
|
-
// Reserved set is derived from packageSteps at runtime (single source of truth) — what
|
|
389
|
-
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
|
|
390
|
-
// and enrich_* names exist only when enrichments are configured).
|
|
506
|
+
// Ensure that the custom step names don't collide with the default or data enrichment step names
|
|
391
507
|
const customSteps = mergedConfig.customSteps ?? [];
|
|
392
508
|
if (customSteps.length > 0) {
|
|
393
509
|
const reservedNames = new Set(packageSteps.map(s => s.name));
|
|
@@ -401,6 +517,7 @@ ${excludedEventsSQL}`,
|
|
|
401
517
|
}
|
|
402
518
|
}
|
|
403
519
|
|
|
520
|
+
// Include custom steps last in the list
|
|
404
521
|
const steps = [...packageSteps, ...customSteps];
|
|
405
522
|
|
|
406
523
|
return utils.queryBuilder(steps);
|
|
@@ -201,11 +201,11 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
// customSteps - optional array of queryBuilder step objects appended to the pipeline
|
|
205
|
-
//
|
|
204
|
+
// customSteps - optional array of queryBuilder step objects appended to the pipeline.
|
|
205
|
+
// Config-shape checks only: array, objects with non-empty name, no duplicates within customSteps.
|
|
206
206
|
// Step-shape validation (clause keys, etc.) deferred to queryBuilder.
|
|
207
|
-
// Collision-with-package-names check deferred to _generateEnhancedEventsSQL
|
|
208
|
-
//
|
|
207
|
+
// Collision-with-package-names check deferred to _generateEnhancedEventsSQL, since the
|
|
208
|
+
// reserved set is config-dependent (e.g. item_list_* only exist when itemListAttribution is on).
|
|
209
209
|
if (config.customSteps !== undefined) {
|
|
210
210
|
if (!Array.isArray(config.customSteps)) {
|
|
211
211
|
throw new Error(`config.customSteps must be an array. Received: ${JSON.stringify(config.customSteps)}`);
|
|
@@ -227,14 +227,14 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
// enrichments - optional array of declarative external-data enrichment specs.
|
|
230
|
-
//
|
|
231
|
-
//
|
|
232
|
-
//
|
|
230
|
+
// Config-shape checks only. Reserved-name collision and item-level joinKey resolution
|
|
231
|
+
// happen in _generateEnhancedEventsSQL, where the reserved set and item-level join targets
|
|
232
|
+
// are derived from the resolved config.
|
|
233
233
|
if (config.enrichments !== undefined) {
|
|
234
234
|
if (!Array.isArray(config.enrichments)) {
|
|
235
235
|
throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
|
|
236
236
|
}
|
|
237
|
-
const validLevels = ['
|
|
237
|
+
const validLevels = ['row', 'item'];
|
|
238
238
|
const seenNames = new Set();
|
|
239
239
|
for (let i = 0; i < config.enrichments.length; i++) {
|
|
240
240
|
const entry = config.enrichments[i];
|
package/utils.js
CHANGED
|
@@ -515,48 +515,53 @@ const buildPassThroughs = (explicitColumns, sourceColumns) => {
|
|
|
515
515
|
|
|
516
516
|
/**
|
|
517
517
|
* Builds the per-enrichment CTE definitions, JOIN clauses, and column-name mappings for the
|
|
518
|
-
* declarative `enrichments` feature.
|
|
518
|
+
* declarative `enrichments` feature. Routes row-level and item-level entries through
|
|
519
|
+
* separate output channels so the caller can attach them to different downstream CTEs.
|
|
519
520
|
*
|
|
520
521
|
* Pure config-to-data mapping. No knowledge of downstream CTEs or specific table modules —
|
|
521
522
|
* intended to be called by any table module that exposes an `enrichments` config field.
|
|
522
523
|
*
|
|
523
|
-
* Encapsulates
|
|
524
|
-
* - level
|
|
525
|
-
*
|
|
524
|
+
* Encapsulates one generation-time throw:
|
|
525
|
+
* - Same-level enrichment-vs-enrichment column collisions (two row-level enrichments or
|
|
526
|
+
* two item-level enrichments targeting the same column). Cross-level same-name is allowed —
|
|
527
|
+
* the two columns target structurally distinct slots (e.g. `enhanced_events.<col>` vs
|
|
528
|
+
* `items[].<col>`).
|
|
526
529
|
*
|
|
527
530
|
* @param {Array<Object>} enrichments - Validated enrichment entries. Each entry has fields:
|
|
528
|
-
* { name,
|
|
529
|
-
*
|
|
530
|
-
*
|
|
531
|
-
*
|
|
532
|
-
* - `
|
|
533
|
-
*
|
|
534
|
-
* - `
|
|
535
|
-
*
|
|
536
|
-
*
|
|
537
|
-
*
|
|
531
|
+
* { name, source, joinKey, columns, level?, dedupe? }. `level` is 'row' (default) or 'item'.
|
|
532
|
+
* 'row' means one row of the enclosing table per join match; 'item' targets a nested array
|
|
533
|
+
* (currently only the GA4 items[] array).
|
|
534
|
+
* @returns {Object} A struct with four fields:
|
|
535
|
+
* - `steps` — array of queryBuilder source-CTE step definitions (one `enrich_<name>` per
|
|
536
|
+
* entry, regardless of level — all source CTEs go to the top of the pipeline).
|
|
537
|
+
* - `row` — { joins, columns, columnNames } for row-level enrichments. Caller attaches
|
|
538
|
+
* `joins` to the row-grained downstream CTE (e.g. `enhanced_events`) and spreads `columns`
|
|
539
|
+
* into that CTE's `select.columns`.
|
|
540
|
+
* - `item` — { joins, columns, columnNames } for item-level enrichments. Caller attaches
|
|
541
|
+
* `joins` to the item-grained downstream CTE (e.g. `items_rebuilt`) and folds `columns`
|
|
542
|
+
* into that CTE's struct construction.
|
|
543
|
+
* - `columnOwner` — map of `{ <column>: { i, name, level } }` recording which enrichment
|
|
544
|
+
* owns each column. The `level` field distinguishes cross-level same-name entries.
|
|
538
545
|
*
|
|
539
|
-
* @throws {Error} If
|
|
540
|
-
*
|
|
546
|
+
* @throws {Error} If two same-level enrichments target the same column name (with both
|
|
547
|
+
* enrichment names and the conflicting column in the error message).
|
|
541
548
|
*
|
|
542
549
|
* @example
|
|
543
|
-
* const { steps,
|
|
550
|
+
* const { steps, row, item } = buildEnrichments(config.enrichments);
|
|
551
|
+
* // row.joins → attach to enhanced_events; row.columns → spread into enhanced_events
|
|
552
|
+
* // item.joins → attach to items_rebuilt; item.columns → fold into items struct
|
|
544
553
|
*/
|
|
545
554
|
const buildEnrichments = (enrichments) => {
|
|
546
555
|
const steps = [];
|
|
547
|
-
const
|
|
548
|
-
|
|
549
|
-
|
|
556
|
+
const channels = {
|
|
557
|
+
row: { joins: [], columns: {}, columnNames: new Set() },
|
|
558
|
+
item: { joins: [], columns: {}, columnNames: new Set() },
|
|
559
|
+
};
|
|
550
560
|
const columnOwner = {};
|
|
551
561
|
|
|
552
562
|
for (const [i, e] of (enrichments ?? []).entries()) {
|
|
553
|
-
const level = e.level ?? '
|
|
554
|
-
|
|
555
|
-
throw new Error(
|
|
556
|
-
`config.enrichments[${i}] uses level: 'item', which is not yet supported in this version. ` +
|
|
557
|
-
`Item-level enrichments will ship in a future release; see design_docs/planned/data-enrichments.md.`
|
|
558
|
-
);
|
|
559
|
-
}
|
|
563
|
+
const level = e.level ?? 'row';
|
|
564
|
+
const channel = channels[level];
|
|
560
565
|
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
561
566
|
const cteName = `enrich_${e.name}`;
|
|
562
567
|
|
|
@@ -573,24 +578,29 @@ const buildEnrichments = (enrichments) => {
|
|
|
573
578
|
}
|
|
574
579
|
steps.push(sourceStep);
|
|
575
580
|
|
|
576
|
-
joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
|
|
581
|
+
channel.joins.push({ type: 'left', table: cteName, on: `using(${joinKeys.join(', ')})` });
|
|
577
582
|
|
|
578
583
|
for (const c of e.columns) {
|
|
579
|
-
|
|
584
|
+
// Same-level collision throw. Cross-level same-name is allowed because the two
|
|
585
|
+
// columns target structurally distinct output slots (event_data vs items[]).
|
|
586
|
+
if (channel.columnNames.has(c)) {
|
|
580
587
|
const owner = columnOwner[c];
|
|
581
588
|
throw new Error(
|
|
582
589
|
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
583
|
-
`(name: '${owner.name}') both target column '${c}'. ` +
|
|
584
|
-
`Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
|
|
590
|
+
`(name: '${owner.name}') both target column '${c}' at level '${level}'. ` +
|
|
591
|
+
`Two enrichments cannot write the same column at the same level; rename one in source SQL or pick a different name.`
|
|
585
592
|
);
|
|
586
593
|
}
|
|
587
|
-
columns[c] = `${cteName}.${c}`;
|
|
588
|
-
columnNames.add(c);
|
|
589
|
-
columnOwner
|
|
594
|
+
channel.columns[c] = `${cteName}.${c}`;
|
|
595
|
+
channel.columnNames.add(c);
|
|
596
|
+
// columnOwner is keyed by column name; if the same name appears at different
|
|
597
|
+
// levels, the second-writer entry wins, but we record level so diagnostics
|
|
598
|
+
// distinguish them. Same-level collisions throw above before reaching here.
|
|
599
|
+
columnOwner[c] = { i, name: e.name, level };
|
|
590
600
|
}
|
|
591
601
|
}
|
|
592
602
|
|
|
593
|
-
return { steps,
|
|
603
|
+
return { steps, row: channels.row, item: channels.item, columnOwner };
|
|
594
604
|
};
|
|
595
605
|
|
|
596
606
|
|