ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -4
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +91 -39
- package/package.json +5 -2
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +204 -91
- package/tables/ga4EventsEnhanced/validation.js +99 -4
- package/utils.js +163 -26
package/README.md
CHANGED
|
@@ -198,7 +198,8 @@ Create a new **ga4_events_enhanced** table using a **.js** file in your reposito
|
|
|
198
198
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
199
199
|
|
|
200
200
|
const config = {
|
|
201
|
-
|
|
201
|
+
// using hard-coded GA4 export path
|
|
202
|
+
sourceTable: '`project.analytics_12345.events_*`'
|
|
202
203
|
};
|
|
203
204
|
|
|
204
205
|
ga4EventsEnhanced.createTable(publish, config);
|
|
@@ -212,6 +213,7 @@ ga4EventsEnhanced.createTable(publish, config);
|
|
|
212
213
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
213
214
|
|
|
214
215
|
const config = {
|
|
216
|
+
// GA4 export path declared, using the table reference object
|
|
215
217
|
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
|
|
216
218
|
// use dataformTableConfig to make changes to the default Dataform table configuration
|
|
217
219
|
dataformTableConfig: {
|
|
@@ -290,7 +292,8 @@ js {
|
|
|
290
292
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
291
293
|
|
|
292
294
|
const config = {
|
|
293
|
-
|
|
295
|
+
// using hard-coded GA4 export path
|
|
296
|
+
sourceTable: '`project.analytics_12345.events_*`',
|
|
294
297
|
self: self(),
|
|
295
298
|
incremental: incremental()
|
|
296
299
|
};
|
|
@@ -314,7 +317,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
314
317
|
|
|
315
318
|
| Field | Type | Default/Required | Description |
|
|
316
319
|
| ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
317
|
-
| `sourceTable` | Dataform ref
|
|
320
|
+
| `sourceTable` | Dataform ref / object / string | **required** | Source GA4 export table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.events_*` ``` string for an external table. |
|
|
318
321
|
| `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
|
|
319
322
|
| `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
|
|
320
323
|
| `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
|
|
@@ -334,6 +337,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
334
337
|
| `preOperations` | object | [See details](#preOperations) | Date range and incremental refresh configuration |
|
|
335
338
|
| `eventParamsToColumns` | object[] | `[]` | Event parameters to promote to columns. [See item schema](#eventParamsToColumns) |
|
|
336
339
|
| `customSteps` | object[] | `[]` | User-defined CTEs appended to the pipeline after `enhanced_events`. [See Custom CTEs](#custom-ctes) |
|
|
340
|
+
| `enrichments` | object[] | `[]` | Declarative external-data enrichments joined into `enhanced_events`. [See Data Enrichments](#data-enrichments) |
|
|
337
341
|
|
|
338
342
|
<a id="default-dataformtableconfig"></a>
|
|
339
343
|
<details>
|
|
@@ -473,7 +477,8 @@ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
|
|
|
473
477
|
| `session_data` | yes | Session-level aggregations (grouped by `session_id`). |
|
|
474
478
|
| `items_unnested` | only when `itemListAttribution` is on | Per-event item rows (one row per item per ecommerce event), with attribution window function applied. |
|
|
475
479
|
| `items_rebuilt` | only when `itemListAttribution` is on | Re-aggregated items with attributed list fields, joined back to events via `_item_row_id`. |
|
|
476
|
-
| `
|
|
480
|
+
| `enrich_<name>` | only when configured via `enrichments` | One CTE per [enrichment](#data-enrichments) entry, providing dim data for joining into `enhanced_events`. |
|
|
481
|
+
| `enhanced_events` | yes | The package's standard output shape (joined event_data + session_data + items_rebuilt + enrich_*, columns ordered, incremental date filter applied). The natural starting point for most custom CTEs. |
|
|
477
482
|
|
|
478
483
|
Example custom step using the raw SQL format:
|
|
479
484
|
|
|
@@ -521,6 +526,93 @@ end`,
|
|
|
521
526
|
|
|
522
527
|
> **Note:** Built-in assertions assume the package's standard schema. If your custom CTEs rename, drop, or filter rows in ways that break those assumptions, disable the affected assertions explicitly via the `assertions` config option.
|
|
523
528
|
|
|
529
|
+
<a id="data-enrichments"></a>
|
|
530
|
+
|
|
531
|
+
**`enrichments`** — declaratively join external dimension data into `enhanced_events` (cohort labels, page metadata, marketing attribution, etc.). Each entry describes one dim source plus the join — the package generates the source CTE, the `LEFT JOIN`, and column descriptions automatically.
|
|
532
|
+
|
|
533
|
+
For typical use cases this is the right tool; reach for `customSteps` only when you need a transformation that doesn't fit a flat dim join.
|
|
534
|
+
|
|
535
|
+
**Per-enrichment shape:**
|
|
536
|
+
|
|
537
|
+
| Field | Type | Required | Description |
|
|
538
|
+
| --- | --- | --- | --- |
|
|
539
|
+
| `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
|
|
540
|
+
| `level` | `'event'` / `'item'` | No, defaults to `'event'` | Join grain. `'event'` joins external dim data onto each event row (any column on `enhanced_events` as the key). `'item'` joins external dim data onto each item inside the `items` array (any field on the items struct or any event_data column as the key). |
|
|
541
|
+
| `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
|
|
542
|
+
| `joinKey` | string / string[] | Yes | For `level: 'event'`: column name(s) on `enhanced_events`. For `level: 'item'`: field name(s) on the items struct (e.g. `'item_id'`) or column name(s) on `event_data` (e.g. `'user_pseudo_id'`). Composite keys (array) compile to `USING(col1, col2, ...)`. |
|
|
543
|
+
| `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
|
|
544
|
+
| `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
|
|
545
|
+
|
|
546
|
+
**Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`.
|
|
547
|
+
|
|
548
|
+
**Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
|
|
549
|
+
|
|
550
|
+
```javascript
|
|
551
|
+
enrichments: [
|
|
552
|
+
{
|
|
553
|
+
name: 'cohorts',
|
|
554
|
+
level: 'event',
|
|
555
|
+
source: { schema: 'analytics', name: 'user_cohorts' },
|
|
556
|
+
joinKey: 'user_pseudo_id',
|
|
557
|
+
columns: ['cohort_label', 'lifecycle_stage'],
|
|
558
|
+
},
|
|
559
|
+
],
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
**Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net (external table referenced by backtick-FQN):
|
|
563
|
+
|
|
564
|
+
```javascript
|
|
565
|
+
enrichments: [
|
|
566
|
+
{
|
|
567
|
+
name: 'segments',
|
|
568
|
+
level: 'event',
|
|
569
|
+
source: '`my-project.analytics.daily_user_segments`',
|
|
570
|
+
joinKey: ['event_date', 'user_pseudo_id'],
|
|
571
|
+
columns: ['segment'],
|
|
572
|
+
dedupe: true,
|
|
573
|
+
},
|
|
574
|
+
],
|
|
575
|
+
```
|
|
576
|
+
|
|
577
|
+
**Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
|
|
578
|
+
|
|
579
|
+
```javascript
|
|
580
|
+
{
|
|
581
|
+
eventParamsToColumns: [{ name: 'page_title', type: 'string' }],
|
|
582
|
+
enrichments: [
|
|
583
|
+
{
|
|
584
|
+
name: 'titles',
|
|
585
|
+
level: 'event',
|
|
586
|
+
source: { schema: 'analytics', name: 'page_title_overrides' },
|
|
587
|
+
joinKey: 'page_location',
|
|
588
|
+
columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
|
|
589
|
+
},
|
|
590
|
+
],
|
|
591
|
+
}
|
|
592
|
+
```
|
|
593
|
+
|
|
594
|
+
**Example** — item-level enrichment: attach product master data to each item via `item_id`. The enrichment flows into the `items` array struct; `margin_bucket` is added as a new item-struct field, and `item_category` overlap-coalesces against the original. Item-level enrichment columns do NOT appear at the event grain — they live inside `items[].<col>`:
|
|
595
|
+
|
|
596
|
+
```javascript
|
|
597
|
+
enrichments: [
|
|
598
|
+
{
|
|
599
|
+
name: 'products',
|
|
600
|
+
level: 'item',
|
|
601
|
+
source: { schema: 'analytics', name: 'product_master' },
|
|
602
|
+
joinKey: 'item_id', // joins on item.item_id
|
|
603
|
+
columns: ['margin_bucket', 'item_category'], // margin_bucket is additive; item_category overlap-coalesces
|
|
604
|
+
},
|
|
605
|
+
],
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
For `level: 'item'`, valid `joinKey` values are any field on the GA4 items struct (`item_id`, `item_category`, etc.) or any column on `event_data` (`user_pseudo_id`, `event_date`, etc.). An event-level and an item-level enrichment may share the same column name (e.g. both writing `cohort`) — the two columns target structurally distinct slots (`enhanced_events.cohort` at event grain vs `items[].cohort` inside the items array) and are not in collision.
|
|
609
|
+
|
|
610
|
+
> **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
|
|
611
|
+
|
|
612
|
+
> **Note:** Event-level enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default. Item-level enrichment columns do not receive auto-generated descriptions (BigQuery does not surface per-field descriptions on STRUCT-array fields cleanly through Dataform's column-description mechanism).
|
|
613
|
+
|
|
614
|
+
> **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
|
|
615
|
+
|
|
524
616
|
<br>
|
|
525
617
|
|
|
526
618
|
---
|
package/documentation.js
CHANGED
|
@@ -1,223 +1,272 @@
|
|
|
1
|
-
const constants = require('./constants');
|
|
2
|
-
const { version } = require('./package.json');
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Composes a multi-section column description string from individual sections.
|
|
6
|
-
* Sections with null/undefined/empty values are omitted.
|
|
7
|
-
* Sections are separated by line breaks for readability.
|
|
8
|
-
*
|
|
9
|
-
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
-
* @returns {string} Composed description
|
|
11
|
-
*/
|
|
12
|
-
const composeDescription = (sections) => {
|
|
13
|
-
const parts = [];
|
|
14
|
-
|
|
15
|
-
if (sections.base) {
|
|
16
|
-
parts.push(sections.base);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
if (sections.lineage) {
|
|
20
|
-
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (sections.typicalUse) {
|
|
24
|
-
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (sections.config) {
|
|
28
|
-
parts.push(`Config: ${sections.config}`);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return parts.join('\n\n');
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
-
*
|
|
37
|
-
* @param {string} columnName - The column name to look up.
|
|
38
|
-
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
-
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
-
*/
|
|
41
|
-
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
-
const entry = columnLineage[columnName];
|
|
43
|
-
if (!entry) return null;
|
|
44
|
-
|
|
45
|
-
const sourceLabels = {
|
|
46
|
-
'ga4_export': 'Standard GA4 export field',
|
|
47
|
-
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
-
'derived': 'Derived',
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
-
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
-
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
-
*
|
|
59
|
-
* @param {Object} config - The merged configuration object.
|
|
60
|
-
* @returns {Object} Map of column names to config note strings.
|
|
61
|
-
*/
|
|
62
|
-
const buildConfigNotes = (config) => {
|
|
63
|
-
const notes = {};
|
|
64
|
-
|
|
65
|
-
if (!config) return notes;
|
|
66
|
-
|
|
67
|
-
const append = (key, text) => {
|
|
68
|
-
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
// timezone
|
|
72
|
-
if (config.timezone) {
|
|
73
|
-
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// customTimestampParam
|
|
77
|
-
if (config.customTimestampParam) {
|
|
78
|
-
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
-
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// data_is_final
|
|
83
|
-
if (config.dataIsFinal) {
|
|
84
|
-
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
-
if (method === 'DAY_THRESHOLD') {
|
|
86
|
-
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
-
} else {
|
|
88
|
-
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// excludedEvents
|
|
93
|
-
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
-
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// excludedEventParams
|
|
98
|
-
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
-
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// sessionParams
|
|
103
|
-
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
-
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// includedExportTypes
|
|
108
|
-
if (config.includedExportTypes) {
|
|
109
|
-
const types = Object.entries(config.includedExportTypes)
|
|
110
|
-
.filter(([, enabled]) => enabled)
|
|
111
|
-
.map(([type]) => type);
|
|
112
|
-
if (types.length > 0) {
|
|
113
|
-
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return notes;
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
-
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
-
* multi-section descriptions.
|
|
124
|
-
*
|
|
125
|
-
* @param {Object} config - The merged configuration object.
|
|
126
|
-
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
-
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
-
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
-
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
-
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
-
*/
|
|
132
|
-
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
-
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
-
|
|
135
|
-
const configNotes = buildConfigNotes(config);
|
|
136
|
-
|
|
137
|
-
// Compose multi-section descriptions for each top-level column
|
|
138
|
-
for (const key of Object.keys(descriptions)) {
|
|
139
|
-
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
-
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
-
|
|
142
|
-
if (!baseDesc) continue;
|
|
143
|
-
|
|
144
|
-
const composed = composeDescription({
|
|
145
|
-
base: baseDesc,
|
|
146
|
-
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
-
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
-
config: configNotes[key] || null,
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
if (isStruct) {
|
|
152
|
-
descriptions[key].description = composed;
|
|
153
|
-
} else {
|
|
154
|
-
descriptions[key] = composed;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
-
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
-
config.eventParamsToColumns.forEach(p => {
|
|
161
|
-
const columnName = p.columnName || p.name;
|
|
162
|
-
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
-
descriptions[columnName] = composeDescription({
|
|
164
|
-
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
-
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
-
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
-
config: null,
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
1
|
+
const constants = require('./constants');
|
|
2
|
+
const { version } = require('./package.json');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Composes a multi-section column description string from individual sections.
|
|
6
|
+
* Sections with null/undefined/empty values are omitted.
|
|
7
|
+
* Sections are separated by line breaks for readability.
|
|
8
|
+
*
|
|
9
|
+
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
10
|
+
* @returns {string} Composed description
|
|
11
|
+
*/
|
|
12
|
+
const composeDescription = (sections) => {
|
|
13
|
+
const parts = [];
|
|
14
|
+
|
|
15
|
+
if (sections.base) {
|
|
16
|
+
parts.push(sections.base);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (sections.lineage) {
|
|
20
|
+
parts.push(`Lineage: ${sections.lineage}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (sections.typicalUse) {
|
|
24
|
+
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (sections.config) {
|
|
28
|
+
parts.push(`Config: ${sections.config}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return parts.join('\n\n');
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
36
|
+
*
|
|
37
|
+
* @param {string} columnName - The column name to look up.
|
|
38
|
+
* @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
|
|
39
|
+
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
40
|
+
*/
|
|
41
|
+
const getLineageText = (columnName, columnLineage) => {
|
|
42
|
+
const entry = columnLineage[columnName];
|
|
43
|
+
if (!entry) return null;
|
|
44
|
+
|
|
45
|
+
const sourceLabels = {
|
|
46
|
+
'ga4_export': 'Standard GA4 export field',
|
|
47
|
+
'ga4_export_modified': 'GA4 export field (modified)',
|
|
48
|
+
'derived': 'Derived',
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const label = sourceLabels[entry.source] || entry.source;
|
|
52
|
+
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
57
|
+
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
58
|
+
*
|
|
59
|
+
* @param {Object} config - The merged configuration object.
|
|
60
|
+
* @returns {Object} Map of column names to config note strings.
|
|
61
|
+
*/
|
|
62
|
+
const buildConfigNotes = (config) => {
|
|
63
|
+
const notes = {};
|
|
64
|
+
|
|
65
|
+
if (!config) return notes;
|
|
66
|
+
|
|
67
|
+
const append = (key, text) => {
|
|
68
|
+
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// timezone
|
|
72
|
+
if (config.timezone) {
|
|
73
|
+
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// customTimestampParam
|
|
77
|
+
if (config.customTimestampParam) {
|
|
78
|
+
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
79
|
+
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// data_is_final
|
|
83
|
+
if (config.dataIsFinal) {
|
|
84
|
+
const method = config.dataIsFinal.detectionMethod;
|
|
85
|
+
if (method === 'DAY_THRESHOLD') {
|
|
86
|
+
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
87
|
+
} else {
|
|
88
|
+
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// excludedEvents
|
|
93
|
+
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
94
|
+
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// excludedEventParams
|
|
98
|
+
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
99
|
+
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// sessionParams
|
|
103
|
+
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
104
|
+
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// includedExportTypes
|
|
108
|
+
if (config.includedExportTypes) {
|
|
109
|
+
const types = Object.entries(config.includedExportTypes)
|
|
110
|
+
.filter(([, enabled]) => enabled)
|
|
111
|
+
.map(([type]) => type);
|
|
112
|
+
if (types.length > 0) {
|
|
113
|
+
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return notes;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Returns a deep copy of the column descriptions, enriched with
|
|
122
|
+
* lineage, typical use, and configuration-specific sections composed into
|
|
123
|
+
* multi-section descriptions.
|
|
124
|
+
*
|
|
125
|
+
* @param {Object} config - The merged configuration object.
|
|
126
|
+
* @param {Object} columnMetadata - Column metadata provided by the table module.
|
|
127
|
+
* @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
|
|
128
|
+
* @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
|
|
129
|
+
* @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
|
|
130
|
+
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
131
|
+
*/
|
|
132
|
+
const getColumnDescriptions = (config, columnMetadata) => {
|
|
133
|
+
const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
|
|
134
|
+
|
|
135
|
+
const configNotes = buildConfigNotes(config);
|
|
136
|
+
|
|
137
|
+
// Compose multi-section descriptions for each top-level column
|
|
138
|
+
for (const key of Object.keys(descriptions)) {
|
|
139
|
+
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
140
|
+
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
141
|
+
|
|
142
|
+
if (!baseDesc) continue;
|
|
143
|
+
|
|
144
|
+
const composed = composeDescription({
|
|
145
|
+
base: baseDesc,
|
|
146
|
+
lineage: getLineageText(key, columnMetadata.lineage),
|
|
147
|
+
typicalUse: columnMetadata.typicalUse[key] || null,
|
|
148
|
+
config: configNotes[key] || null,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (isStruct) {
|
|
152
|
+
descriptions[key].description = composed;
|
|
153
|
+
} else {
|
|
154
|
+
descriptions[key] = composed;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Add descriptions for dynamically promoted event parameter columns
|
|
159
|
+
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
160
|
+
config.eventParamsToColumns.forEach(p => {
|
|
161
|
+
const columnName = p.columnName || p.name;
|
|
162
|
+
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
163
|
+
descriptions[columnName] = composeDescription({
|
|
164
|
+
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
165
|
+
lineage: `Derived -- Promoted from the event_params array`,
|
|
166
|
+
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
167
|
+
config: null,
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Add descriptions for columns added or replaced by data enrichments.
|
|
173
|
+
// Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
|
|
174
|
+
if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
|
|
175
|
+
config.enrichments.forEach(e => {
|
|
176
|
+
const level = e.level ?? 'event';
|
|
177
|
+
if (level !== 'event') return;
|
|
178
|
+
const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
|
|
179
|
+
const joinKeyText = joinKeys.join(', ');
|
|
180
|
+
const sourceText = renderEnrichmentSource(e.source);
|
|
181
|
+
for (const c of e.columns) {
|
|
182
|
+
const existing = descriptions[c];
|
|
183
|
+
const existingText = typeof existing === 'string'
|
|
184
|
+
? existing
|
|
185
|
+
: (existing && typeof existing === 'object' && existing.description)
|
|
186
|
+
? existing.description
|
|
187
|
+
: null;
|
|
188
|
+
const newDesc = existingText
|
|
189
|
+
? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
|
|
190
|
+
: `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
|
|
191
|
+
// If the original was a struct-shaped entry, preserve the structure but replace the description.
|
|
192
|
+
// Otherwise, set as a plain string.
|
|
193
|
+
if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
|
|
194
|
+
descriptions[c] = { ...existing, description: newDesc };
|
|
195
|
+
} else {
|
|
196
|
+
descriptions[c] = newDesc;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return descriptions;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Renders an enrichment source for inclusion in column descriptions.
|
|
207
|
+
*
|
|
208
|
+
* - Backtick-quoted string: passed through as-is.
|
|
209
|
+
* - Dataform table reference object: rendered as `<dataset>.<name>` (project not available
|
|
210
|
+
* at description-generation time; resolved later via ctx.ref()).
|
|
211
|
+
*
|
|
212
|
+
* @param {string|Object} source - The enrichment's source field.
|
|
213
|
+
* @returns {string} Backtick-quoted source identifier for display.
|
|
214
|
+
*/
|
|
215
|
+
const renderEnrichmentSource = (source) => {
|
|
216
|
+
if (typeof source === 'string') return source;
|
|
217
|
+
if (source && typeof source === 'object') {
|
|
218
|
+
const dataset = source.dataset || source.schema;
|
|
219
|
+
if (dataset && source.name) return '`' + dataset + '.' + source.name + '`';
|
|
220
|
+
}
|
|
221
|
+
return String(source);
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Checks whether a column (or its parent struct) is excluded by the config.
|
|
226
|
+
*
|
|
227
|
+
* @param {string[]} dependsOn - Column names this entry depends on.
|
|
228
|
+
* @param {string[]} excludedColumns - Combined excluded columns from config.
|
|
229
|
+
* @returns {boolean} True if ALL dependsOn columns are excluded.
|
|
230
|
+
*/
|
|
231
|
+
const isExcluded = (dependsOn, excludedColumns) => {
|
|
232
|
+
if (!dependsOn || dependsOn.length === 0) return false;
|
|
233
|
+
return dependsOn.every(col => excludedColumns.includes(col));
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Builds the full table description by combining table-specific sections
|
|
238
|
+
* with shared sections (package attribution, config JSON dump).
|
|
239
|
+
*
|
|
240
|
+
* @param {Object} config - The merged configuration object.
|
|
241
|
+
* @param {string[]} tableSections - Table-specific description sections (provided by the table module).
|
|
242
|
+
* @returns {string} The composed table description.
|
|
243
|
+
*/
|
|
244
|
+
const buildTableDescription = (config, tableSections) => {
|
|
245
|
+
const sections = [...tableSections];
|
|
246
|
+
|
|
247
|
+
// Package Attribution
|
|
248
|
+
sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX} Version: ${version}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
|
|
249
|
+
|
|
250
|
+
// Config JSON dump
|
|
251
|
+
const configForDump = Object.fromEntries(
|
|
252
|
+
Object.entries(config).filter(([key]) => !key.startsWith('default'))
|
|
253
|
+
);
|
|
254
|
+
// Strip description and columns from dataformTableConfig to avoid circular reference and bloat
|
|
255
|
+
if (configForDump.dataformTableConfig) {
|
|
256
|
+
const { description, columns, ...rest } = configForDump.dataformTableConfig;
|
|
257
|
+
configForDump.dataformTableConfig = rest;
|
|
258
|
+
}
|
|
259
|
+
const configJson = JSON.stringify(configForDump, null, 2);
|
|
260
|
+
sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
|
|
261
|
+
|
|
262
|
+
return sections.join('\n\n');
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
module.exports = {
|
|
266
|
+
getColumnDescriptions,
|
|
267
|
+
buildTableDescription,
|
|
268
|
+
composeDescription,
|
|
269
|
+
getLineageText,
|
|
270
|
+
buildConfigNotes,
|
|
271
|
+
isExcluded,
|
|
272
|
+
};
|