ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/documentation.js +272 -223
- package/package.json +3 -2
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +72 -5
- package/tables/ga4EventsEnhanced/validation.js +95 -0
- package/utils.js +30 -8
package/README.md
CHANGED
|
@@ -334,6 +334,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
334
334
|
| `preOperations` | object | [See details](#preOperations) | Date range and incremental refresh configuration |
|
|
335
335
|
| `eventParamsToColumns` | object[] | `[]` | Event parameters to promote to columns. [See item schema](#eventParamsToColumns) |
|
|
336
336
|
| `customSteps` | object[] | `[]` | User-defined CTEs appended to the pipeline after `enhanced_events`. [See Custom CTEs](#custom-ctes) |
|
|
337
|
+
| `enrichments` | object[] | `[]` | Declarative external-data enrichments joined into `enhanced_events`. [See Data Enrichments](#data-enrichments) |
|
|
337
338
|
|
|
338
339
|
<a id="default-dataformtableconfig"></a>
|
|
339
340
|
<details>
|
|
@@ -473,7 +474,8 @@ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
|
|
|
473
474
|
| `session_data` | yes | Session-level aggregations (grouped by `session_id`). |
|
|
474
475
|
| `items_unnested` | only when `itemListAttribution` is on | Per-event item rows (one row per item per ecommerce event), with attribution window function applied. |
|
|
475
476
|
| `items_rebuilt` | only when `itemListAttribution` is on | Re-aggregated items with attributed list fields, joined back to events via `_item_row_id`. |
|
|
476
|
-
| `
|
|
477
|
+
| `enrich_<name>` | only when configured via `enrichments` | One CTE per [enrichment](#data-enrichments) entry, providing dim data for joining into `enhanced_events`. |
|
|
478
|
+
| `enhanced_events` | yes | The package's standard output shape (joined event_data + session_data + items_rebuilt + enrich_*, columns ordered, incremental date filter applied). The natural starting point for most custom CTEs. |
|
|
477
479
|
|
|
478
480
|
Example custom step using the raw SQL format:
|
|
479
481
|
|
|
@@ -521,6 +523,77 @@ end`,
|
|
|
521
523
|
|
|
522
524
|
> **Note:** Built-in assertions assume the package's standard schema. If your custom CTEs rename, drop, or filter rows in ways that break those assumptions, disable the affected assertions explicitly via the `assertions` config option.
|
|
523
525
|
|
|
526
|
+
<a id="data-enrichments"></a>
|
|
527
|
+
|
|
528
|
+
**`enrichments`** — declaratively join external dimension data into `enhanced_events` (cohort labels, page metadata, marketing attribution, etc.). Each entry describes one dim source plus the join — the package generates the source CTE, the `LEFT JOIN`, and column descriptions automatically.
|
|
529
|
+
|
|
530
|
+
For typical use cases this is the right tool; reach for `customSteps` only when you need a transformation that doesn't fit a flat dim join.
|
|
531
|
+
|
|
532
|
+
**Per-enrichment shape:**
|
|
533
|
+
|
|
534
|
+
| Field | Type | Required | Description |
|
|
535
|
+
| --- | --- | --- | --- |
|
|
536
|
+
| `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
|
|
537
|
+
| `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
|
|
538
|
+
| `source` | Dataform `ref()` / string | Yes | Source dim table. Use `ref()` in Dataform or a backtick-quoted ``` `project.dataset.table` ``` string. |
|
|
539
|
+
| `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
540
|
+
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
|
|
541
|
+
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
542
|
+
|
|
543
|
+
**Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
|
|
544
|
+
|
|
545
|
+
**Example** — attach user cohort labels by `user_pseudo_id`:
|
|
546
|
+
|
|
547
|
+
```javascript
|
|
548
|
+
enrichments: [
|
|
549
|
+
{
|
|
550
|
+
name: 'cohorts',
|
|
551
|
+
level: 'event',
|
|
552
|
+
source: ctx.ref('user_cohorts'),
|
|
553
|
+
joinKey: 'user_pseudo_id',
|
|
554
|
+
columns: ['cohort_label', 'lifecycle_stage'],
|
|
555
|
+
},
|
|
556
|
+
],
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
**Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net:
|
|
560
|
+
|
|
561
|
+
```javascript
|
|
562
|
+
enrichments: [
|
|
563
|
+
{
|
|
564
|
+
name: 'segments',
|
|
565
|
+
level: 'event',
|
|
566
|
+
source: ctx.ref('daily_user_segments'),
|
|
567
|
+
joinKey: ['event_date', 'user_pseudo_id'],
|
|
568
|
+
columns: ['segment'],
|
|
569
|
+
dedupe: true,
|
|
570
|
+
},
|
|
571
|
+
],
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
**Example** — fix a promoted event parameter via enrichment (replacement case):
|
|
575
|
+
|
|
576
|
+
```javascript
|
|
577
|
+
{
|
|
578
|
+
eventParamsToColumns: [{ name: 'page_title', type: 'string' }],
|
|
579
|
+
enrichments: [
|
|
580
|
+
{
|
|
581
|
+
name: 'titles',
|
|
582
|
+
level: 'event',
|
|
583
|
+
source: ctx.ref('page_title_overrides'),
|
|
584
|
+
joinKey: 'page_location',
|
|
585
|
+
columns: ['page_title'], // overlaps the promoted column → replaces it
|
|
586
|
+
},
|
|
587
|
+
],
|
|
588
|
+
}
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
> **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
|
|
592
|
+
|
|
593
|
+
> **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Replaced by enrichment '<name>' (...). Original: <description>` for replacements). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
|
|
594
|
+
|
|
595
|
+
> **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
|
|
596
|
+
|
|
524
597
|
<br>
|
|
525
598
|
|
|
526
599
|
---
|
package/documentation.js
CHANGED
|
@@ -1,223 +1,272 @@
|
|
|
1
|
-
const constants = require('./constants');
|
|
2
|
-
const { version } = require('./package.json');
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Composes a multi-section column description string from individual sections.
|
|
6
|
-
* Sections with null/undefined/empty values are omitted.
|
|
7
|
-
* Sections are separated by line breaks for readability.
|
|
8
|
-
*
|
|
9
|
-
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
-
* @returns {string} Composed description
|
|
11
|
-
*/
|
|
12
|
-
const composeDescription = (sections) => {
|
|
13
|
-
const parts = [];
|
|
14
|
-
|
|
15
|
-
if (sections.base) {
|
|
16
|
-
parts.push(sections.base);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
if (sections.lineage) {
|
|
20
|
-
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (sections.typicalUse) {
|
|
24
|
-
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (sections.config) {
|
|
28
|
-
parts.push(`Config: ${sections.config}`);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return parts.join('\n\n');
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
-
*
|
|
37
|
-
* @param {string} columnName - The column name to look up.
|
|
38
|
-
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
-
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
-
*/
|
|
41
|
-
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
-
const entry = columnLineage[columnName];
|
|
43
|
-
if (!entry) return null;
|
|
44
|
-
|
|
45
|
-
const sourceLabels = {
|
|
46
|
-
'ga4_export': 'Standard GA4 export field',
|
|
47
|
-
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
-
'derived': 'Derived',
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
-
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
-
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
-
*
|
|
59
|
-
* @param {Object} config - The merged configuration object.
|
|
60
|
-
* @returns {Object} Map of column names to config note strings.
|
|
61
|
-
*/
|
|
62
|
-
const buildConfigNotes = (config) => {
|
|
63
|
-
const notes = {};
|
|
64
|
-
|
|
65
|
-
if (!config) return notes;
|
|
66
|
-
|
|
67
|
-
const append = (key, text) => {
|
|
68
|
-
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
// timezone
|
|
72
|
-
if (config.timezone) {
|
|
73
|
-
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// customTimestampParam
|
|
77
|
-
if (config.customTimestampParam) {
|
|
78
|
-
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
-
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// data_is_final
|
|
83
|
-
if (config.dataIsFinal) {
|
|
84
|
-
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
-
if (method === 'DAY_THRESHOLD') {
|
|
86
|
-
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
-
} else {
|
|
88
|
-
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// excludedEvents
|
|
93
|
-
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
-
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// excludedEventParams
|
|
98
|
-
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
-
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// sessionParams
|
|
103
|
-
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
-
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// includedExportTypes
|
|
108
|
-
if (config.includedExportTypes) {
|
|
109
|
-
const types = Object.entries(config.includedExportTypes)
|
|
110
|
-
.filter(([, enabled]) => enabled)
|
|
111
|
-
.map(([type]) => type);
|
|
112
|
-
if (types.length > 0) {
|
|
113
|
-
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return notes;
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
-
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
-
* multi-section descriptions.
|
|
124
|
-
*
|
|
125
|
-
* @param {Object} config - The merged configuration object.
|
|
126
|
-
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
-
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
-
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
-
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
-
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
-
*/
|
|
132
|
-
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
-
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
-
|
|
135
|
-
const configNotes = buildConfigNotes(config);
|
|
136
|
-
|
|
137
|
-
// Compose multi-section descriptions for each top-level column
|
|
138
|
-
for (const key of Object.keys(descriptions)) {
|
|
139
|
-
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
-
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
-
|
|
142
|
-
if (!baseDesc) continue;
|
|
143
|
-
|
|
144
|
-
const composed = composeDescription({
|
|
145
|
-
base: baseDesc,
|
|
146
|
-
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
-
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
-
config: configNotes[key] || null,
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
if (isStruct) {
|
|
152
|
-
descriptions[key].description = composed;
|
|
153
|
-
} else {
|
|
154
|
-
descriptions[key] = composed;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
-
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
-
config.eventParamsToColumns.forEach(p => {
|
|
161
|
-
const columnName = p.columnName || p.name;
|
|
162
|
-
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
-
descriptions[columnName] = composeDescription({
|
|
164
|
-
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
-
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
-
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
-
config: null,
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
1
|
+
const constants = require('./constants');
|
|
2
|
+
const { version } = require('./package.json');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Composes a multi-section column description string from individual sections.
|
|
6
|
+
* Sections with null/undefined/empty values are omitted.
|
|
7
|
+
* Sections are separated by line breaks for readability.
|
|
8
|
+
*
|
|
9
|
+
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
+
* @returns {string} Composed description
|
|
11
|
+
*/
|
|
12
|
+
const composeDescription = (sections) => {
|
|
13
|
+
const parts = [];
|
|
14
|
+
|
|
15
|
+
if (sections.base) {
|
|
16
|
+
parts.push(sections.base);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (sections.lineage) {
|
|
20
|
+
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (sections.typicalUse) {
|
|
24
|
+
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (sections.config) {
|
|
28
|
+
parts.push(`Config: ${sections.config}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return parts.join('\n\n');
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
+
*
|
|
37
|
+
* @param {string} columnName - The column name to look up.
|
|
38
|
+
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
+
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
+
*/
|
|
41
|
+
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
+
const entry = columnLineage[columnName];
|
|
43
|
+
if (!entry) return null;
|
|
44
|
+
|
|
45
|
+
const sourceLabels = {
|
|
46
|
+
'ga4_export': 'Standard GA4 export field',
|
|
47
|
+
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
+
'derived': 'Derived',
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
+
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
+
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
+
*
|
|
59
|
+
* @param {Object} config - The merged configuration object.
|
|
60
|
+
* @returns {Object} Map of column names to config note strings.
|
|
61
|
+
*/
|
|
62
|
+
const buildConfigNotes = (config) => {
|
|
63
|
+
const notes = {};
|
|
64
|
+
|
|
65
|
+
if (!config) return notes;
|
|
66
|
+
|
|
67
|
+
const append = (key, text) => {
|
|
68
|
+
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// timezone
|
|
72
|
+
if (config.timezone) {
|
|
73
|
+
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// customTimestampParam
|
|
77
|
+
if (config.customTimestampParam) {
|
|
78
|
+
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
+
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// data_is_final
|
|
83
|
+
if (config.dataIsFinal) {
|
|
84
|
+
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
+
if (method === 'DAY_THRESHOLD') {
|
|
86
|
+
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
+
} else {
|
|
88
|
+
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// excludedEvents
|
|
93
|
+
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
+
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// excludedEventParams
|
|
98
|
+
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
+
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// sessionParams
|
|
103
|
+
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
+
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// includedExportTypes
|
|
108
|
+
if (config.includedExportTypes) {
|
|
109
|
+
const types = Object.entries(config.includedExportTypes)
|
|
110
|
+
.filter(([, enabled]) => enabled)
|
|
111
|
+
.map(([type]) => type);
|
|
112
|
+
if (types.length > 0) {
|
|
113
|
+
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return notes;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
+
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
+
* multi-section descriptions.
|
|
124
|
+
*
|
|
125
|
+
* @param {Object} config - The merged configuration object.
|
|
126
|
+
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
+
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
+
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
+
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
+
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
+
*/
|
|
132
|
+
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
+
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
+
|
|
135
|
+
const configNotes = buildConfigNotes(config);
|
|
136
|
+
|
|
137
|
+
// Compose multi-section descriptions for each top-level column
|
|
138
|
+
for (const key of Object.keys(descriptions)) {
|
|
139
|
+
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
+
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
+
|
|
142
|
+
if (!baseDesc) continue;
|
|
143
|
+
|
|
144
|
+
const composed = composeDescription({
|
|
145
|
+
base: baseDesc,
|
|
146
|
+
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
+
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
+
config: configNotes[key] || null,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (isStruct) {
|
|
152
|
+
descriptions[key].description = composed;
|
|
153
|
+
} else {
|
|
154
|
+
descriptions[key] = composed;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
+
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
+
config.eventParamsToColumns.forEach(p => {
|
|
161
|
+
const columnName = p.columnName || p.name;
|
|
162
|
+
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
+
descriptions[columnName] = composeDescription({
|
|
164
|
+
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
+
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
+
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
+
config: null,
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Add descriptions for columns added or replaced by data enrichments.
|
|
173
|
+
// Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
|
|
174
|
+
if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
|
|
175
|
+
config.enrichments.forEach(e => {
|
|
176
|
+
const level = e.level ?? 'event';
|
|
177
|
+
if (level !== 'event') return;
|
|
178
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
179
|
+
const joinKeyText = joinKeys.join(', ');
|
|
180
|
+
const sourceText = renderEnrichmentSource(e.source);
|
|
181
|
+
for (const c of e.columns) {
|
|
182
|
+
const existing = descriptions[c];
|
|
183
|
+
const existingText = typeof existing === 'string'
|
|
184
|
+
? existing
|
|
185
|
+
: (existing && typeof existing === 'object' && existing.description)
|
|
186
|
+
? existing.description
|
|
187
|
+
: null;
|
|
188
|
+
const newDesc = existingText
|
|
189
|
+
? `Replaced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}). Original: ${existingText}`
|
|
190
|
+
: `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
|
|
191
|
+
// If the original was a struct-shaped entry, preserve the structure but replace the description.
|
|
192
|
+
// Otherwise, set as a plain string.
|
|
193
|
+
if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
|
|
194
|
+
descriptions[c] = { ...existing, description: newDesc };
|
|
195
|
+
} else {
|
|
196
|
+
descriptions[c] = newDesc;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return descriptions;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Renders an enrichment source for inclusion in column descriptions.
|
|
207
|
+
*
|
|
208
|
+
* - Backtick-quoted string: passed through as-is.
|
|
209
|
+
* - Dataform table reference object: rendered as `<dataset>.<name>` (project not available
|
|
210
|
+
* at description-generation time; resolved later via ctx.ref()).
|
|
211
|
+
*
|
|
212
|
+
* @param {string|Object} source - The enrichment's source field.
|
|
213
|
+
* @returns {string} Backtick-quoted source identifier for display.
|
|
214
|
+
*/
|
|
215
|
+
const renderEnrichmentSource = (source) => {
|
|
216
|
+
if (typeof source === 'string') return source;
|
|
217
|
+
if (source && typeof source === 'object') {
|
|
218
|
+
const dataset = source.dataset || source.schema;
|
|
219
|
+
if (dataset && source.name) return '`' + dataset + '.' + source.name + '`';
|
|
220
|
+
}
|
|
221
|
+
return String(source);
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Checks whether a column (or its parent struct) is excluded by the config.
|
|
226
|
+
*
|
|
227
|
+
* @param {string[]} dependsOn - Column names this entry depends on.
|
|
228
|
+
* @param {string[]} excludedColumns - Combined excluded columns from config.
|
|
229
|
+
* @returns {boolean} True if ALL dependsOn columns are excluded.
|
|
230
|
+
*/
|
|
231
|
+
const isExcluded = (dependsOn, excludedColumns) => {
|
|
232
|
+
if (!dependsOn || dependsOn.length === 0) return false;
|
|
233
|
+
return dependsOn.every(col => excludedColumns.includes(col));
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Builds the full table description by combining table-specific sections
|
|
238
|
+
* with shared sections (package attribution, config JSON dump).
|
|
239
|
+
*
|
|
240
|
+
* @param {Object} config - The merged configuration object.
|
|
241
|
+
* @param {string[]} tableSections - Table-specific description sections (provided by the table module).
|
|
242
|
+
* @returns {string} The composed table description.
|
|
243
|
+
*/
|
|
244
|
+
const buildTableDescription = (config, tableSections) => {
|
|
245
|
+
const sections = [...tableSections];
|
|
246
|
+
|
|
247
|
+
// Package Attribution
|
|
248
|
+
sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX} Version: ${version}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
|
|
249
|
+
|
|
250
|
+
// Config JSON dump
|
|
251
|
+
const configForDump = Object.fromEntries(
|
|
252
|
+
Object.entries(config).filter(([key]) => !key.startsWith('default'))
|
|
253
|
+
);
|
|
254
|
+
// Strip description and columns from dataformTableConfig to avoid circular reference and bloat
|
|
255
|
+
if (configForDump.dataformTableConfig) {
|
|
256
|
+
const { description, columns, ...rest } = configForDump.dataformTableConfig;
|
|
257
|
+
configForDump.dataformTableConfig = rest;
|
|
258
|
+
}
|
|
259
|
+
const configJson = JSON.stringify(configForDump, null, 2);
|
|
260
|
+
sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
|
|
261
|
+
|
|
262
|
+
return sections.join('\n\n');
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
module.exports = {
|
|
266
|
+
getColumnDescriptions,
|
|
267
|
+
buildTableDescription,
|
|
268
|
+
composeDescription,
|
|
269
|
+
getLineageText,
|
|
270
|
+
buildConfigNotes,
|
|
271
|
+
isExcluded,
|
|
272
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.9.0-dev.
|
|
3
|
+
"version": "0.9.0-dev.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js",
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js",
|
|
21
21
|
"test:summary": "node tests/testRunner.js",
|
|
22
22
|
"test:docs": "node tests/documentation.test.js",
|
|
23
23
|
"test:preops": "node tests/preOperations.test.js",
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
"test:createTable": "node tests/createTable.test.js",
|
|
29
29
|
"test:queryBuilder": "node tests/queryBuilder.test.js",
|
|
30
30
|
"test:customSteps": "node tests/customSteps.test.js",
|
|
31
|
+
"test:enrichments": "node tests/enrichments.test.js",
|
|
31
32
|
"test:integration": "node tests/integration/integration.test.js",
|
|
32
33
|
"release:dev": "./scripts/release-dev.sh",
|
|
33
34
|
"readme": "node scripts/updateReadme.js",
|
|
@@ -68,6 +68,10 @@ const ga4EventsEnhancedConfig = {
|
|
|
68
68
|
// user-defined CTEs appended to the pipeline after enhanced_events
|
|
69
69
|
// each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
|
|
70
70
|
customSteps: [],
|
|
71
|
+
// declarative external-data enrichments joined into the pipeline
|
|
72
|
+
// each entry: { name, level: 'event' | 'item', source, joinKey, columns, dedupe? }
|
|
73
|
+
// 'item' level is accepted at config time but throws at SQL gen — not yet implemented
|
|
74
|
+
enrichments: [],
|
|
71
75
|
};
|
|
72
76
|
|
|
73
77
|
module.exports = { ga4EventsEnhancedConfig };
|
|
@@ -268,8 +268,7 @@ ${excludedEventsSQL}`,
|
|
|
268
268
|
'group by': 'session_id',
|
|
269
269
|
};
|
|
270
270
|
|
|
271
|
-
// Shared item-array CTEs
|
|
272
|
-
// item-level data enrichments — see design_docs/planned/data-enrichments.md, Q16):
|
|
271
|
+
// Shared item-array CTEs:
|
|
273
272
|
// 1. items_unnested: unnest items from ecommerce events, compute attribution via window function
|
|
274
273
|
// 2. items_rebuilt: re-aggregate items with attributed list fields
|
|
275
274
|
const itemListSteps = itemListAttribution ? (() => {
|
|
@@ -326,6 +325,66 @@ ${excludedEventsSQL}`,
|
|
|
326
325
|
} : {};
|
|
327
326
|
const itemListExcludedColumns = itemListSteps ? ['_item_row_id'] : [];
|
|
328
327
|
|
|
328
|
+
// Build enrichment-source CTEs and gather event-level join/column data.
|
|
329
|
+
// Item-level enrichments throw "not yet supported" — they will arrive in a later release.
|
|
330
|
+
const enrichments = mergedConfig.enrichments ?? [];
|
|
331
|
+
const enrichmentSteps = [];
|
|
332
|
+
const enrichmentJoins = [];
|
|
333
|
+
const enrichmentColumns = {}; // column name → SQL expression for select.columns
|
|
334
|
+
const enrichmentColumnNames = new Set(); // column names for excludedColumns of wildcards
|
|
335
|
+
const enrichmentColumnOwner = {}; // column name → { i, name } for collision errors
|
|
336
|
+
for (const [i, e] of enrichments.entries()) {
|
|
337
|
+
const level = e.level ?? 'event';
|
|
338
|
+
if (level === 'item') {
|
|
339
|
+
throw new Error(
|
|
340
|
+
`config.enrichments[${i}] uses level: 'item', which is not yet supported in this version. ` +
|
|
341
|
+
`Item-level enrichments will ship in a future release; see design_docs/planned/data-enrichments.md.`
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
345
|
+
const cteName = `enrich_${e.name}`;
|
|
346
|
+
// Source CTE selects joinKey columns plus the requested columns. key === value
|
|
347
|
+
// shape skips the alias clause in queryBuilder's columnsToSQL.
|
|
348
|
+
const cteCols = {};
|
|
349
|
+
for (const k of joinKeys) cteCols[k] = k;
|
|
350
|
+
for (const c of e.columns) cteCols[c] = c;
|
|
351
|
+
const sourceStep = {
|
|
352
|
+
name: cteName,
|
|
353
|
+
select: { columns: cteCols },
|
|
354
|
+
from: e.source,
|
|
355
|
+
};
|
|
356
|
+
// Opt-in dedupe: which row wins is non-deterministic — users with strict needs
|
|
357
|
+
// pre-aggregate in their source SQL.
|
|
358
|
+
if (e.dedupe) {
|
|
359
|
+
sourceStep.qualify = `row_number() over (partition by ${joinKeys.join(', ')}) = 1`;
|
|
360
|
+
}
|
|
361
|
+
enrichmentSteps.push(sourceStep);
|
|
362
|
+
|
|
363
|
+
enrichmentJoins.push({
|
|
364
|
+
type: 'left',
|
|
365
|
+
table: cteName,
|
|
366
|
+
on: `using(${joinKeys.join(', ')})`,
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// Replace-or-add: each enrichment column overrides explicit select columns via JS object
|
|
370
|
+
// spread, AND joins the excludedColumns set so it suppresses overlap with the wildcard
|
|
371
|
+
// event_data.* / session_data.* expansions below.
|
|
372
|
+
for (const c of e.columns) {
|
|
373
|
+
if (enrichmentColumnNames.has(c)) {
|
|
374
|
+
const owner = enrichmentColumnOwner[c];
|
|
375
|
+
throw new Error(
|
|
376
|
+
`config.enrichments[${i}] (name: '${e.name}') and config.enrichments[${owner.i}] ` +
|
|
377
|
+
`(name: '${owner.name}') both target column '${c}'. ` +
|
|
378
|
+
`Two enrichments cannot write the same column; rename one in source SQL or pick a different name.`
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
enrichmentColumns[c] = `${cteName}.${c}`;
|
|
382
|
+
enrichmentColumnNames.add(c);
|
|
383
|
+
enrichmentColumnOwner[c] = { i, name: e.name };
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
const enrichmentExcludedColumns = [...enrichmentColumnNames];
|
|
387
|
+
|
|
329
388
|
// Join event_data and session_data, include additional logic
|
|
330
389
|
// Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
|
|
331
390
|
const enhancedEventsStep = {
|
|
@@ -335,6 +394,9 @@ ${excludedEventsSQL}`,
|
|
|
335
394
|
// get the most important columns in the correct order
|
|
336
395
|
...finalColumnOrder,
|
|
337
396
|
...itemListOverrides,
|
|
397
|
+
// event-level enrichment columns: override matching explicit columns; new columns added.
|
|
398
|
+
// Wildcard-column overlap is handled below via excludedColumns.
|
|
399
|
+
...enrichmentColumns,
|
|
338
400
|
// get the rest of the event_data columns
|
|
339
401
|
'[sql]event_data': utils.selectOtherColumns(
|
|
340
402
|
eventDataStep,
|
|
@@ -345,13 +407,14 @@ ${excludedEventsSQL}`,
|
|
|
345
407
|
'data_is_final',
|
|
346
408
|
'export_type',
|
|
347
409
|
...itemListExcludedColumns,
|
|
410
|
+
...enrichmentExcludedColumns,
|
|
348
411
|
]
|
|
349
412
|
),
|
|
350
413
|
// get the rest of the session_data columns
|
|
351
414
|
'[sql]session_data': utils.selectOtherColumns(
|
|
352
415
|
sessionDataStep,
|
|
353
416
|
Object.keys(finalColumnOrder),
|
|
354
|
-
[]
|
|
417
|
+
[...enrichmentExcludedColumns],
|
|
355
418
|
),
|
|
356
419
|
// include additional columns
|
|
357
420
|
row_inserted_timestamp: 'current_timestamp()',
|
|
@@ -370,12 +433,15 @@ ${excludedEventsSQL}`,
|
|
|
370
433
|
type: 'left',
|
|
371
434
|
table: 'session_data',
|
|
372
435
|
on: 'using(session_id)'
|
|
373
|
-
}
|
|
436
|
+
},
|
|
437
|
+
// Event-level enrichment joins go last so they apply on top of the package's own joins.
|
|
438
|
+
...enrichmentJoins,
|
|
374
439
|
],
|
|
375
440
|
where: helpers.incrementalDateFilter(mergedConfig)
|
|
376
441
|
};
|
|
377
442
|
|
|
378
443
|
const packageSteps = [
|
|
444
|
+
...enrichmentSteps,
|
|
379
445
|
eventDataStep,
|
|
380
446
|
...(itemListSteps ?? []),
|
|
381
447
|
sessionDataStep,
|
|
@@ -384,7 +450,8 @@ ${excludedEventsSQL}`,
|
|
|
384
450
|
|
|
385
451
|
// Layer 2 validation: customSteps name must not collide with package step names.
|
|
386
452
|
// Reserved set is derived from packageSteps at runtime (single source of truth) — what
|
|
387
|
-
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on
|
|
453
|
+
// is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on,
|
|
454
|
+
// and enrich_* names exist only when enrichments are configured).
|
|
388
455
|
const customSteps = mergedConfig.customSteps ?? [];
|
|
389
456
|
if (customSteps.length > 0) {
|
|
390
457
|
const reservedNames = new Set(packageSteps.map(s => s.name));
|
|
@@ -225,6 +225,101 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
225
225
|
seenNames.add(step.name);
|
|
226
226
|
}
|
|
227
227
|
}
|
|
228
|
+
|
|
229
|
+
// enrichments - optional array of declarative external-data enrichment specs.
|
|
230
|
+
// This block performs Layer 1 (config-shape) checks. Layer 2 checks (reserved-name collision
|
|
231
|
+
// + item-level deferral throw) live in _generateEnhancedEventsSQL — the reserved set is
|
|
232
|
+
// config-dependent and the item-level deferral throws there once the SQL is built.
|
|
233
|
+
if (config.enrichments !== undefined) {
|
|
234
|
+
if (!Array.isArray(config.enrichments)) {
|
|
235
|
+
throw new Error(`config.enrichments must be an array. Received: ${JSON.stringify(config.enrichments)}`);
|
|
236
|
+
}
|
|
237
|
+
const validLevels = ['event', 'item'];
|
|
238
|
+
const seenNames = new Set();
|
|
239
|
+
for (let i = 0; i < config.enrichments.length; i++) {
|
|
240
|
+
const entry = config.enrichments[i];
|
|
241
|
+
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
|
|
242
|
+
throw new Error(`config.enrichments[${i}] must be a non-null object. Received: ${JSON.stringify(entry)}`);
|
|
243
|
+
}
|
|
244
|
+
if (typeof entry.name !== 'string' || !entry.name.trim()) {
|
|
245
|
+
throw new Error(`config.enrichments[${i}].name must be a non-empty string. Received: ${JSON.stringify(entry.name)}`);
|
|
246
|
+
}
|
|
247
|
+
if (seenNames.has(entry.name)) {
|
|
248
|
+
throw new Error(`config.enrichments contains duplicate name '${entry.name}'. Each enrichments entry must have a unique name.`);
|
|
249
|
+
}
|
|
250
|
+
seenNames.add(entry.name);
|
|
251
|
+
if (entry.level !== undefined && !validLevels.includes(entry.level)) {
|
|
252
|
+
throw new Error(`config.enrichments[${i}].level must be one of: ${validLevels.join(', ')}. Received: ${JSON.stringify(entry.level)}`);
|
|
253
|
+
}
|
|
254
|
+
// source: Dataform table reference object or backtick-quoted string
|
|
255
|
+
if (entry.source === undefined || entry.source === null) {
|
|
256
|
+
throw new Error(`config.enrichments[${i}].source is required.`);
|
|
257
|
+
}
|
|
258
|
+
if (isDataformTableReferenceObject(entry.source)) {
|
|
259
|
+
// Valid Dataform reference
|
|
260
|
+
} else if (typeof entry.source === 'string') {
|
|
261
|
+
if (!entry.source.trim()) {
|
|
262
|
+
throw new Error(`config.enrichments[${i}].source must be a non-empty string. Received empty string.`);
|
|
263
|
+
}
|
|
264
|
+
if (!/^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(entry.source.trim())) {
|
|
265
|
+
throw new Error(`config.enrichments[${i}].source must be in the format '\`project.dataset.table\`' (with backticks) or a Dataform table reference. Received: ${JSON.stringify(entry.source)}`);
|
|
266
|
+
}
|
|
267
|
+
} else {
|
|
268
|
+
throw new Error(`config.enrichments[${i}].source must be a Dataform table reference object or a string in format '\`project.dataset.table\`'. Received: ${JSON.stringify(entry.source)}`);
|
|
269
|
+
}
|
|
270
|
+
// joinKey: required, plain SQL identifier OR non-empty array of plain SQL identifiers.
|
|
271
|
+
// Plain identifier = ^[a-zA-Z_][a-zA-Z0-9_]*$ — no aliases (`id as user_id`), no backticks,
|
|
272
|
+
// no dotted paths. Users with mismatched dim-column names alias in an upstream Dataform view.
|
|
273
|
+
const sqlIdentifier = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
274
|
+
const aliasingHint = ' Aliases like \'id as user_id\' are not supported here; alias in an upstream Dataform view if your dim has a different column name.';
|
|
275
|
+
if (entry.joinKey === undefined || entry.joinKey === null) {
|
|
276
|
+
throw new Error(`config.enrichments[${i}].joinKey is required.`);
|
|
277
|
+
}
|
|
278
|
+
if (typeof entry.joinKey === 'string') {
|
|
279
|
+
if (!entry.joinKey.trim()) {
|
|
280
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty string. Received empty string.`);
|
|
281
|
+
}
|
|
282
|
+
if (!sqlIdentifier.test(entry.joinKey)) {
|
|
283
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a plain SQL identifier. Received: ${JSON.stringify(entry.joinKey)}.${aliasingHint}`);
|
|
284
|
+
}
|
|
285
|
+
} else if (Array.isArray(entry.joinKey)) {
|
|
286
|
+
if (entry.joinKey.length === 0) {
|
|
287
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a non-empty array when provided as an array.`);
|
|
288
|
+
}
|
|
289
|
+
for (let j = 0; j < entry.joinKey.length; j++) {
|
|
290
|
+
const k = entry.joinKey[j];
|
|
291
|
+
if (typeof k !== 'string' || !k.trim()) {
|
|
292
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a non-empty string. Received: ${JSON.stringify(k)}`);
|
|
293
|
+
}
|
|
294
|
+
if (!sqlIdentifier.test(k)) {
|
|
295
|
+
throw new Error(`config.enrichments[${i}].joinKey[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(k)}.${aliasingHint}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
} else {
|
|
299
|
+
throw new Error(`config.enrichments[${i}].joinKey must be a string or a non-empty array of strings. Received: ${JSON.stringify(entry.joinKey)}`);
|
|
300
|
+
}
|
|
301
|
+
// columns: required, non-empty array of plain SQL identifiers (no aliasing).
|
|
302
|
+
if (!Array.isArray(entry.columns)) {
|
|
303
|
+
throw new Error(`config.enrichments[${i}].columns must be an array. Received: ${JSON.stringify(entry.columns)}`);
|
|
304
|
+
}
|
|
305
|
+
if (entry.columns.length === 0) {
|
|
306
|
+
throw new Error(`config.enrichments[${i}].columns must be non-empty. List the source columns to add to the output (excluding joinKey).`);
|
|
307
|
+
}
|
|
308
|
+
for (let j = 0; j < entry.columns.length; j++) {
|
|
309
|
+
const c = entry.columns[j];
|
|
310
|
+
if (typeof c !== 'string' || !c.trim()) {
|
|
311
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a non-empty string. Received: ${JSON.stringify(c)}`);
|
|
312
|
+
}
|
|
313
|
+
if (!sqlIdentifier.test(c)) {
|
|
314
|
+
throw new Error(`config.enrichments[${i}].columns[${j}] must be a plain SQL identifier. Received: ${JSON.stringify(c)}.${aliasingHint}`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// dedupe: optional boolean
|
|
318
|
+
if (entry.dedupe !== undefined && typeof entry.dedupe !== 'boolean') {
|
|
319
|
+
throw new Error(`config.enrichments[${i}].dedupe must be a boolean when provided. Received: ${JSON.stringify(entry.dedupe)}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
228
323
|
} catch (e) {
|
|
229
324
|
e.message = `Config validation: ${e.message}`;
|
|
230
325
|
throw e;
|
package/utils.js
CHANGED
|
@@ -389,6 +389,16 @@ const setDataformContext = (ctx, config) => {
|
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
// resolve Dataform refs in enrichments[].source the same way as sourceTable
|
|
393
|
+
if (Array.isArray(config.enrichments)) {
|
|
394
|
+
config.enrichments = config.enrichments.map(e => {
|
|
395
|
+
if (isDataformTableReferenceObject(e.source)) {
|
|
396
|
+
return { ...e, source: ctx.ref(e.source) };
|
|
397
|
+
}
|
|
398
|
+
return e;
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
|
|
392
402
|
config.self = ctx.self();
|
|
393
403
|
config.incremental = ctx.incremental();
|
|
394
404
|
|
|
@@ -479,23 +489,35 @@ const selectOtherColumns = (step, alreadyDefinedColumns = [], excludedColumns =
|
|
|
479
489
|
const stepName = step.name;
|
|
480
490
|
const stepColumns = Object.keys(step.select.columns);
|
|
481
491
|
|
|
482
|
-
//
|
|
483
|
-
const
|
|
492
|
+
// Columns in step.select.columns that should be excluded (already-defined or explicitly listed)
|
|
493
|
+
const internalExcept = stepColumns.filter(
|
|
484
494
|
column => alreadyDefinedColumns.includes(column) || excludedColumns.includes(column)
|
|
485
495
|
);
|
|
486
496
|
|
|
487
|
-
//
|
|
488
|
-
|
|
497
|
+
// Columns in excludedColumns that aren't enumerated in step.select.columns. These are
|
|
498
|
+
// wildcard-sourced columns (e.g. default GA4 export columns coming through `event_data.*`
|
|
499
|
+
// inside event_data's own select). The caller knows what to exclude; trust them.
|
|
500
|
+
// BigQuery throws at dry-run if the column doesn't exist in the source — surfaces typos.
|
|
501
|
+
// Filter out undefined/null entries (callers can pass conditional values like
|
|
502
|
+
// `cond ? 'col' : undefined` for ergonomics).
|
|
503
|
+
const externalExcept = excludedColumns.filter(
|
|
504
|
+
c => typeof c === 'string' && c.length > 0 && !stepColumns.includes(c)
|
|
505
|
+
);
|
|
506
|
+
|
|
507
|
+
const allExcept = [...internalExcept, ...externalExcept];
|
|
508
|
+
|
|
509
|
+
// If nothing is excluded, select everything
|
|
510
|
+
if (allExcept.length === 0) {
|
|
489
511
|
return `${stepName}.*`;
|
|
490
512
|
}
|
|
491
513
|
|
|
492
|
-
// If
|
|
493
|
-
|
|
514
|
+
// If every enumerated column is excluded and there are no external excepts to apply,
|
|
515
|
+
// there's nothing to select via the wildcard
|
|
516
|
+
if (internalExcept.length === stepColumns.length && externalExcept.length === 0) {
|
|
494
517
|
return;
|
|
495
518
|
}
|
|
496
519
|
|
|
497
|
-
|
|
498
|
-
return `${stepName}.* except (${exceptColumns.join(', ')})`;
|
|
520
|
+
return `${stepName}.* except (${allExcept.join(', ')})`;
|
|
499
521
|
};
|
|
500
522
|
|
|
501
523
|
|