ga4-export-fixer 0.8.0 → 0.9.0-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -5
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +263 -262
- package/package.json +6 -5
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +91 -21
- package/tables/ga4EventsEnhanced/validation.js +95 -0
- package/utils.js +30 -8
package/README.md
CHANGED
|
@@ -128,6 +128,12 @@ The goal of the package is to **speed up development** when building data models
|
|
|
128
128
|
<b>🕐 Timezone-Aware Datetime</b><br>
|
|
129
129
|
<code>event_datetime</code> converted to a configurable IANA timezone
|
|
130
130
|
</td>
|
|
131
|
+
<td valign="top">
|
|
132
|
+
<b>🧩 Custom Processing Steps</b><br>
|
|
133
|
+
Append user-defined CTEs via <code>customSteps</code> to derive new columns or join external tables
|
|
134
|
+
</td>
|
|
135
|
+
</tr>
|
|
136
|
+
<tr>
|
|
131
137
|
<td valign="top">
|
|
132
138
|
<b>🛡️ Zero Dependencies</b><br>
|
|
133
139
|
No additional external dependencies added to your Dataform repository
|
|
@@ -139,10 +145,10 @@ The goal of the package is to **speed up development** when building data models
|
|
|
139
145
|
|
|
140
146
|
Features under consideration for future releases:
|
|
141
147
|
|
|
148
|
+
- Data enrichment (item-level, session-level, event-level)
|
|
149
|
+
- Aggregated tables (ga4_session, ga4_ecommerce...)
|
|
142
150
|
- Web and app specific default configurations
|
|
143
151
|
- Custom channel grouping
|
|
144
|
-
- Data enrichment (item-level, session-level, event-level)
|
|
145
|
-
- Custom processing steps (additional CTEs)
|
|
146
152
|
- Custom traffic source attribution
|
|
147
153
|
|
|
148
154
|
## Installation
|
|
@@ -328,6 +334,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
328
334
|
| `preOperations` | object | [See details](#preOperations) | Date range and incremental refresh configuration |
|
|
329
335
|
| `eventParamsToColumns` | object[] | `[]` | Event parameters to promote to columns. [See item schema](#eventParamsToColumns) |
|
|
330
336
|
| `customSteps` | object[] | `[]` | User-defined CTEs appended to the pipeline after `enhanced_events`. [See Custom CTEs](#custom-ctes) |
|
|
337
|
+
| `enrichments` | object[] | `[]` | Declarative external-data enrichments joined into `enhanced_events`. [See Data Enrichments](#data-enrichments) |
|
|
331
338
|
|
|
332
339
|
<a id="default-dataformtableconfig"></a>
|
|
333
340
|
<details>
|
|
@@ -465,10 +472,12 @@ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
|
|
|
465
472
|
| ------------------------ | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
466
473
|
| `event_data` | yes | Extracted and shaped events from `sourceTable`, with date filtering and column promotions applied. *Unfiltered for the buffer-days range.* |
|
|
467
474
|
| `session_data` | yes | Session-level aggregations (grouped by `session_id`). |
|
|
468
|
-
| `
|
|
469
|
-
| `
|
|
470
|
-
| `
|
|
475
|
+
| `items_unnested` | only when `itemListAttribution` is on | Per-event item rows (one row per item per ecommerce event), with attribution window function applied. |
|
|
476
|
+
| `items_rebuilt` | only when `itemListAttribution` is on | Re-aggregated items with attributed list fields, joined back to events via `_item_row_id`. |
|
|
477
|
+
| `enrich_<name>` | only when configured via `enrichments` | One CTE per [enrichment](#data-enrichments) entry, providing dim data for joining into `enhanced_events`. |
|
|
478
|
+
| `enhanced_events` | yes | The package's standard output shape (joined event_data + session_data + items_rebuilt + enrich_*, columns ordered, incremental date filter applied). The natural starting point for most custom CTEs. |
|
|
471
479
|
|
|
480
|
+
Example custom step using the raw SQL format:
|
|
472
481
|
|
|
473
482
|
```javascript
|
|
474
483
|
// Add a content_group column derived from page.path
|
|
@@ -488,10 +497,103 @@ from enhanced_events`,
|
|
|
488
497
|
],
|
|
489
498
|
```
|
|
490
499
|
|
|
500
|
+
The same example in the structured shape:
|
|
501
|
+
|
|
502
|
+
```javascript
|
|
503
|
+
customSteps: [
|
|
504
|
+
{
|
|
505
|
+
name: 'final',
|
|
506
|
+
select: {
|
|
507
|
+
columns: {
|
|
508
|
+
'[sql]passthrough': 'enhanced_events.*',
|
|
509
|
+
content_group: `case
|
|
510
|
+
when page.path like '/blog/%' then 'blog'
|
|
511
|
+
when page.path like '/products/%' then 'product'
|
|
512
|
+
when page.path = '/' then 'home'
|
|
513
|
+
else 'other'
|
|
514
|
+
end`,
|
|
515
|
+
},
|
|
516
|
+
},
|
|
517
|
+
from: 'enhanced_events',
|
|
518
|
+
},
|
|
519
|
+
],
|
|
520
|
+
```
|
|
521
|
+
|
|
491
522
|
> **Note:** Custom columns aren't auto-documented. Use `dataformTableConfig.columns` to add descriptions — it's deep-merged with the package's defaults, so your keys are added or override matching defaults, and untouched defaults stay.
|
|
492
523
|
|
|
493
524
|
> **Note:** Built-in assertions assume the package's standard schema. If your custom CTEs rename, drop, or filter rows in ways that break those assumptions, disable the affected assertions explicitly via the `assertions` config option.
|
|
494
525
|
|
|
526
|
+
<a id="data-enrichments"></a>
|
|
527
|
+
|
|
528
|
+
**`enrichments`** — declaratively join external dimension data into `enhanced_events` (cohort labels, page metadata, marketing attribution, etc.). Each entry describes one dim source plus the join — the package generates the source CTE, the `LEFT JOIN`, and column descriptions automatically.
|
|
529
|
+
|
|
530
|
+
For typical use cases this is the right tool; reach for `customSteps` only when you need a transformation that doesn't fit a flat dim join.
|
|
531
|
+
|
|
532
|
+
**Per-enrichment shape:**
|
|
533
|
+
|
|
534
|
+
| Field | Type | Required | Description |
|
|
535
|
+
| --- | --- | --- | --- |
|
|
536
|
+
| `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
|
|
537
|
+
| `level` | `'event'` | No, defaults to `'event'` | Join grain. Currently only `'event'` is supported (item-level enrichments will arrive in a later release). |
|
|
538
|
+
| `source` | Dataform `ref()` / string | Yes | Source dim table. Use `ref()` in Dataform or a backtick-quoted ``` `project.dataset.table` ``` string. |
|
|
539
|
+
| `joinKey` | string / string[] | Yes | Column name(s) on `enhanced_events` to join on. Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
540
|
+
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns REPLACE them. |
|
|
541
|
+
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
542
|
+
|
|
543
|
+
**Replace-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value REPLACES it. If there is no overlap, the column is added.
|
|
544
|
+
|
|
545
|
+
**Example** — attach user cohort labels by `user_pseudo_id`:
|
|
546
|
+
|
|
547
|
+
```javascript
|
|
548
|
+
enrichments: [
|
|
549
|
+
{
|
|
550
|
+
name: 'cohorts',
|
|
551
|
+
level: 'event',
|
|
552
|
+
source: ctx.ref('user_cohorts'),
|
|
553
|
+
joinKey: 'user_pseudo_id',
|
|
554
|
+
columns: ['cohort_label', 'lifecycle_stage'],
|
|
555
|
+
},
|
|
556
|
+
],
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
**Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net:
|
|
560
|
+
|
|
561
|
+
```javascript
|
|
562
|
+
enrichments: [
|
|
563
|
+
{
|
|
564
|
+
name: 'segments',
|
|
565
|
+
level: 'event',
|
|
566
|
+
source: ctx.ref('daily_user_segments'),
|
|
567
|
+
joinKey: ['event_date', 'user_pseudo_id'],
|
|
568
|
+
columns: ['segment'],
|
|
569
|
+
dedupe: true,
|
|
570
|
+
},
|
|
571
|
+
],
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
**Example** — fix a promoted event parameter via enrichment (replacement case):
|
|
575
|
+
|
|
576
|
+
```javascript
|
|
577
|
+
{
|
|
578
|
+
eventParamsToColumns: [{ name: 'page_title', type: 'string' }],
|
|
579
|
+
enrichments: [
|
|
580
|
+
{
|
|
581
|
+
name: 'titles',
|
|
582
|
+
level: 'event',
|
|
583
|
+
source: ctx.ref('page_title_overrides'),
|
|
584
|
+
joinKey: 'page_location',
|
|
585
|
+
columns: ['page_title'], // overlaps the promoted column → replaces it
|
|
586
|
+
},
|
|
587
|
+
],
|
|
588
|
+
}
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
> **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
|
|
592
|
+
|
|
593
|
+
> **Note:** Enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Replaced by enrichment '<name>' (...). Original: <description>` for replacements). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default.
|
|
594
|
+
|
|
595
|
+
> **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
|
|
596
|
+
|
|
495
597
|
<br>
|
|
496
598
|
|
|
497
599
|
---
|
package/documentation.js
CHANGED
|
@@ -1,223 +1,272 @@
|
|
|
1
|
-
const constants = require('./constants');
|
|
2
|
-
const { version } = require('./package.json');
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Composes a multi-section column description string from individual sections.
|
|
6
|
-
* Sections with null/undefined/empty values are omitted.
|
|
7
|
-
* Sections are separated by line breaks for readability.
|
|
8
|
-
*
|
|
9
|
-
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
-
* @returns {string} Composed description
|
|
11
|
-
*/
|
|
12
|
-
const composeDescription = (sections) => {
|
|
13
|
-
const parts = [];
|
|
14
|
-
|
|
15
|
-
if (sections.base) {
|
|
16
|
-
parts.push(sections.base);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
if (sections.lineage) {
|
|
20
|
-
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (sections.typicalUse) {
|
|
24
|
-
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (sections.config) {
|
|
28
|
-
parts.push(`Config: ${sections.config}`);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return parts.join('\n\n');
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
-
*
|
|
37
|
-
* @param {string} columnName - The column name to look up.
|
|
38
|
-
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
-
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
-
*/
|
|
41
|
-
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
-
const entry = columnLineage[columnName];
|
|
43
|
-
if (!entry) return null;
|
|
44
|
-
|
|
45
|
-
const sourceLabels = {
|
|
46
|
-
'ga4_export': 'Standard GA4 export field',
|
|
47
|
-
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
-
'derived': 'Derived',
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
-
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
-
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
-
*
|
|
59
|
-
* @param {Object} config - The merged configuration object.
|
|
60
|
-
* @returns {Object} Map of column names to config note strings.
|
|
61
|
-
*/
|
|
62
|
-
const buildConfigNotes = (config) => {
|
|
63
|
-
const notes = {};
|
|
64
|
-
|
|
65
|
-
if (!config) return notes;
|
|
66
|
-
|
|
67
|
-
const append = (key, text) => {
|
|
68
|
-
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
// timezone
|
|
72
|
-
if (config.timezone) {
|
|
73
|
-
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// customTimestampParam
|
|
77
|
-
if (config.customTimestampParam) {
|
|
78
|
-
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
-
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// data_is_final
|
|
83
|
-
if (config.dataIsFinal) {
|
|
84
|
-
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
-
if (method === 'DAY_THRESHOLD') {
|
|
86
|
-
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
-
} else {
|
|
88
|
-
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// excludedEvents
|
|
93
|
-
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
-
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// excludedEventParams
|
|
98
|
-
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
-
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// sessionParams
|
|
103
|
-
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
-
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// includedExportTypes
|
|
108
|
-
if (config.includedExportTypes) {
|
|
109
|
-
const types = Object.entries(config.includedExportTypes)
|
|
110
|
-
.filter(([, enabled]) => enabled)
|
|
111
|
-
.map(([type]) => type);
|
|
112
|
-
if (types.length > 0) {
|
|
113
|
-
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return notes;
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
-
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
-
* multi-section descriptions.
|
|
124
|
-
*
|
|
125
|
-
* @param {Object} config - The merged configuration object.
|
|
126
|
-
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
-
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
-
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
-
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
-
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
-
*/
|
|
132
|
-
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
-
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
-
|
|
135
|
-
const configNotes = buildConfigNotes(config);
|
|
136
|
-
|
|
137
|
-
// Compose multi-section descriptions for each top-level column
|
|
138
|
-
for (const key of Object.keys(descriptions)) {
|
|
139
|
-
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
-
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
-
|
|
142
|
-
if (!baseDesc) continue;
|
|
143
|
-
|
|
144
|
-
const composed = composeDescription({
|
|
145
|
-
base: baseDesc,
|
|
146
|
-
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
-
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
-
config: configNotes[key] || null,
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
if (isStruct) {
|
|
152
|
-
descriptions[key].description = composed;
|
|
153
|
-
} else {
|
|
154
|
-
descriptions[key] = composed;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
-
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
-
config.eventParamsToColumns.forEach(p => {
|
|
161
|
-
const columnName = p.columnName || p.name;
|
|
162
|
-
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
-
descriptions[columnName] = composeDescription({
|
|
164
|
-
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
-
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
-
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
-
config: null,
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
1
|
+
const constants = require('./constants');
|
|
2
|
+
const { version } = require('./package.json');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Composes a multi-section column description string from individual sections.
|
|
6
|
+
* Sections with null/undefined/empty values are omitted.
|
|
7
|
+
* Sections are separated by line breaks for readability.
|
|
8
|
+
*
|
|
9
|
+
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
+
* @returns {string} Composed description
|
|
11
|
+
*/
|
|
12
|
+
const composeDescription = (sections) => {
|
|
13
|
+
const parts = [];
|
|
14
|
+
|
|
15
|
+
if (sections.base) {
|
|
16
|
+
parts.push(sections.base);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (sections.lineage) {
|
|
20
|
+
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (sections.typicalUse) {
|
|
24
|
+
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (sections.config) {
|
|
28
|
+
parts.push(`Config: ${sections.config}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return parts.join('\n\n');
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
+
*
|
|
37
|
+
* @param {string} columnName - The column name to look up.
|
|
38
|
+
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
+
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
+
*/
|
|
41
|
+
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
+
const entry = columnLineage[columnName];
|
|
43
|
+
if (!entry) return null;
|
|
44
|
+
|
|
45
|
+
const sourceLabels = {
|
|
46
|
+
'ga4_export': 'Standard GA4 export field',
|
|
47
|
+
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
+
'derived': 'Derived',
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
+
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
+
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
+
*
|
|
59
|
+
* @param {Object} config - The merged configuration object.
|
|
60
|
+
* @returns {Object} Map of column names to config note strings.
|
|
61
|
+
*/
|
|
62
|
+
const buildConfigNotes = (config) => {
|
|
63
|
+
const notes = {};
|
|
64
|
+
|
|
65
|
+
if (!config) return notes;
|
|
66
|
+
|
|
67
|
+
const append = (key, text) => {
|
|
68
|
+
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// timezone
|
|
72
|
+
if (config.timezone) {
|
|
73
|
+
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// customTimestampParam
|
|
77
|
+
if (config.customTimestampParam) {
|
|
78
|
+
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
+
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// data_is_final
|
|
83
|
+
if (config.dataIsFinal) {
|
|
84
|
+
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
+
if (method === 'DAY_THRESHOLD') {
|
|
86
|
+
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
+
} else {
|
|
88
|
+
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// excludedEvents
|
|
93
|
+
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
+
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// excludedEventParams
|
|
98
|
+
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
+
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// sessionParams
|
|
103
|
+
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
+
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// includedExportTypes
|
|
108
|
+
if (config.includedExportTypes) {
|
|
109
|
+
const types = Object.entries(config.includedExportTypes)
|
|
110
|
+
.filter(([, enabled]) => enabled)
|
|
111
|
+
.map(([type]) => type);
|
|
112
|
+
if (types.length > 0) {
|
|
113
|
+
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return notes;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
+
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
+
* multi-section descriptions.
|
|
124
|
+
*
|
|
125
|
+
* @param {Object} config - The merged configuration object.
|
|
126
|
+
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
+
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
+
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
+
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
+
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
+
*/
|
|
132
|
+
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
+
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
+
|
|
135
|
+
const configNotes = buildConfigNotes(config);
|
|
136
|
+
|
|
137
|
+
// Compose multi-section descriptions for each top-level column
|
|
138
|
+
for (const key of Object.keys(descriptions)) {
|
|
139
|
+
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
+
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
+
|
|
142
|
+
if (!baseDesc) continue;
|
|
143
|
+
|
|
144
|
+
const composed = composeDescription({
|
|
145
|
+
base: baseDesc,
|
|
146
|
+
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
+
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
+
config: configNotes[key] || null,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (isStruct) {
|
|
152
|
+
descriptions[key].description = composed;
|
|
153
|
+
} else {
|
|
154
|
+
descriptions[key] = composed;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
+
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
+
config.eventParamsToColumns.forEach(p => {
|
|
161
|
+
const columnName = p.columnName || p.name;
|
|
162
|
+
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
+
descriptions[columnName] = composeDescription({
|
|
164
|
+
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
+
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
+
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
+
config: null,
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Add descriptions for columns added or replaced by data enrichments.
|
|
173
|
+
// Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
|
|
174
|
+
if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
|
|
175
|
+
config.enrichments.forEach(e => {
|
|
176
|
+
const level = e.level ?? 'event';
|
|
177
|
+
if (level !== 'event') return;
|
|
178
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
179
|
+
const joinKeyText = joinKeys.join(', ');
|
|
180
|
+
const sourceText = renderEnrichmentSource(e.source);
|
|
181
|
+
for (const c of e.columns) {
|
|
182
|
+
const existing = descriptions[c];
|
|
183
|
+
const existingText = typeof existing === 'string'
|
|
184
|
+
? existing
|
|
185
|
+
: (existing && typeof existing === 'object' && existing.description)
|
|
186
|
+
? existing.description
|
|
187
|
+
: null;
|
|
188
|
+
const newDesc = existingText
|
|
189
|
+
? `Replaced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}). Original: ${existingText}`
|
|
190
|
+
: `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
|
|
191
|
+
// If the original was a struct-shaped entry, preserve the structure but replace the description.
|
|
192
|
+
// Otherwise, set as a plain string.
|
|
193
|
+
if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
|
|
194
|
+
descriptions[c] = { ...existing, description: newDesc };
|
|
195
|
+
} else {
|
|
196
|
+
descriptions[c] = newDesc;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return descriptions;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Renders an enrichment source for inclusion in column descriptions.
|
|
207
|
+
*
|
|
208
|
+
* - Backtick-quoted string: passed through as-is.
|
|
209
|
+
* - Dataform table reference object: rendered as `<dataset>.<name>` (project not available
|
|
210
|
+
* at description-generation time; resolved later via ctx.ref()).
|
|
211
|
+
*
|
|
212
|
+
* @param {string|Object} source - The enrichment's source field.
|
|
213
|
+
* @returns {string} Backtick-quoted source identifier for display.
|
|
214
|
+
*/
|
|
215
|
+
const renderEnrichmentSource = (source) => {
|
|
216
|
+
if (typeof source === 'string') return source;
|
|
217
|
+
if (source && typeof source === 'object') {
|
|
218
|
+
const dataset = source.dataset || source.schema;
|
|
219
|
+
if (dataset && source.name) return '`' + dataset + '.' + source.name + '`';
|
|
220
|
+
}
|
|
221
|
+
return String(source);
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Checks whether a column (or its parent struct) is excluded by the config.
|
|
226
|
+
*
|
|
227
|
+
* @param {string[]} dependsOn - Column names this entry depends on.
|
|
228
|
+
* @param {string[]} excludedColumns - Combined excluded columns from config.
|
|
229
|
+
* @returns {boolean} True if ALL dependsOn columns are excluded.
|
|
230
|
+
*/
|
|
231
|
+
const isExcluded = (dependsOn, excludedColumns) => {
|
|
232
|
+
if (!dependsOn || dependsOn.length === 0) return false;
|
|
233
|
+
return dependsOn.every(col => excludedColumns.includes(col));
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Builds the full table description by combining table-specific sections
|
|
238
|
+
* with shared sections (package attribution, config JSON dump).
|
|
239
|
+
*
|
|
240
|
+
* @param {Object} config - The merged configuration object.
|
|
241
|
+
* @param {string[]} tableSections - Table-specific description sections (provided by the table module).
|
|
242
|
+
* @returns {string} The composed table description.
|
|
243
|
+
*/
|
|
244
|
+
const buildTableDescription = (config, tableSections) => {
|
|
245
|
+
const sections = [...tableSections];
|
|
246
|
+
|
|
247
|
+
// Package Attribution
|
|
248
|
+
sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX} Version: ${version}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
|
|
249
|
+
|
|
250
|
+
// Config JSON dump
|
|
251
|
+
const configForDump = Object.fromEntries(
|
|
252
|
+
Object.entries(config).filter(([key]) => !key.startsWith('default'))
|
|
253
|
+
);
|
|
254
|
+
// Strip description and columns from dataformTableConfig to avoid circular reference and bloat
|
|
255
|
+
if (configForDump.dataformTableConfig) {
|
|
256
|
+
const { description, columns, ...rest } = configForDump.dataformTableConfig;
|
|
257
|
+
configForDump.dataformTableConfig = rest;
|
|
258
|
+
}
|
|
259
|
+
const configJson = JSON.stringify(configForDump, null, 2);
|
|
260
|
+
sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
|
|
261
|
+
|
|
262
|
+
return sections.join('\n\n');
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
module.exports = {
|
|
266
|
+
getColumnDescriptions,
|
|
267
|
+
buildTableDescription,
|
|
268
|
+
composeDescription,
|
|
269
|
+
getLineageText,
|
|
270
|
+
buildConfigNotes,
|
|
271
|
+
isExcluded,
|
|
272
|
+
};
|