ga4-export-fixer 0.9.0-dev.1 → 0.9.0-dev.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -198,7 +198,8 @@ Create a new **ga4_events_enhanced** table using a **.js** file in your reposito
198
198
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
199
199
 
200
200
  const config = {
201
- sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT
201
+ // using hard-coded GA4 export path
202
+ sourceTable: '`project.analytics_12345.events_*`'
202
203
  };
203
204
 
204
205
  ga4EventsEnhanced.createTable(publish, config);
@@ -212,6 +213,7 @@ ga4EventsEnhanced.createTable(publish, config);
212
213
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
213
214
 
214
215
  const config = {
216
+ // GA4 export path declared, using the table reference object
215
217
  sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
216
218
  // use dataformTableConfig to make changes to the default Dataform table configuration
217
219
  dataformTableConfig: {
@@ -290,7 +292,8 @@ js {
290
292
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
291
293
 
292
294
  const config = {
293
- sourceTable: ref(constants.GA4_TABLES.MY_GA4_EXPORT),
295
+ // using hard-coded GA4 export path
296
+ sourceTable: '`project.analytics_12345.events_*`',
294
297
  self: self(),
295
298
  incremental: incremental()
296
299
  };
@@ -314,7 +317,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
314
317
 
315
318
  | Field | Type | Default/Required | Description |
316
319
  | ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
317
- | `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format ``project.dataset.table`` |
320
+ | `sourceTable` | Dataform ref / object / string | **required** | Source GA4 export table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.events_*` ``` string for an external table. |
318
321
  | `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
319
322
  | `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
320
323
  | `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
@@ -334,6 +337,7 @@ All fields are optional except `sourceTable`. Default values are applied automat
334
337
  | `preOperations` | object | [See details](#preOperations) | Date range and incremental refresh configuration |
335
338
  | `eventParamsToColumns` | object[] | `[]` | Event parameters to promote to columns. [See item schema](#eventParamsToColumns) |
336
339
  | `customSteps` | object[] | `[]` | User-defined CTEs appended to the pipeline after `enhanced_events`. [See Custom CTEs](#custom-ctes) |
340
+ | `enrichments` | object[] | `[]` | Declarative external-data enrichments joined into `enhanced_events`. [See Data Enrichments](#data-enrichments) |
337
341
 
338
342
  <a id="default-dataformtableconfig"></a>
339
343
  <details>
@@ -473,7 +477,8 @@ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
473
477
  | `session_data` | yes | Session-level aggregations (grouped by `session_id`). |
474
478
  | `items_unnested` | only when `itemListAttribution` is on | Per-event item rows (one row per item per ecommerce event), with attribution window function applied. |
475
479
  | `items_rebuilt` | only when `itemListAttribution` is on | Re-aggregated items with attributed list fields, joined back to events via `_item_row_id`. |
476
- | `enhanced_events` | yes | The package's standard output shape (joined event_data + session_data + items_rebuilt, columns ordered, incremental date filter applied). The natural starting point for most custom CTEs. |
480
+ | `enrich_<name>` | only when configured via `enrichments` | One CTE per [enrichment](#data-enrichments) entry, providing dim data for joining into `enhanced_events`. |
481
+ | `enhanced_events` | yes | The package's standard output shape (joined event_data + session_data + items_rebuilt + enrich_*, columns ordered, incremental date filter applied). The natural starting point for most custom CTEs. |
477
482
 
478
483
  Example custom step using the raw SQL format:
479
484
 
@@ -521,6 +526,93 @@ end`,
521
526
 
522
527
  > **Note:** Built-in assertions assume the package's standard schema. If your custom CTEs rename, drop, or filter rows in ways that break those assumptions, disable the affected assertions explicitly via the `assertions` config option.
523
528
 
529
+ <a id="data-enrichments"></a>
530
+
531
+ **`enrichments`** — declaratively join external dimension data into `enhanced_events` (cohort labels, page metadata, marketing attribution, etc.). Each entry describes one dim source plus the join — the package generates the source CTE, the `LEFT JOIN`, and column descriptions automatically.
532
+
533
+ For typical use cases this is the right tool; reach for `customSteps` only when you need a transformation that doesn't fit a flat dim join.
534
+
535
+ **Per-enrichment shape:**
536
+
537
+ | Field | Type | Required | Description |
538
+ | --- | --- | --- | --- |
539
+ | `name` | string | Yes | Used in the generated `enrich_<name>` CTE name. Unique within `enrichments`. |
540
+ | `level` | `'event'` / `'item'` | No, defaults to `'event'` | Join grain. `'event'` joins external dim data onto each event row (any column on `enhanced_events` as the key). `'item'` joins external dim data onto each item inside the `items` array (any field on the items struct or any event_data column as the key). |
541
+ | `source` | Dataform ref / object / string | Yes | Source dim table. Inside an SQLX `js { }` block use `ref(...)`. From a `.js` definition file use a `{ schema, name }` ref object (resolved later via `ctx.ref()`) or a backtick-quoted ``` `project.dataset.table` ``` string for an external table. |
542
+ | `joinKey` | string / string[] | Yes | For `level: 'event'`: column name(s) on `enhanced_events`. For `level: 'item'`: field name(s) on the items struct (e.g. `'item_id'`) or column name(s) on `event_data` (e.g. `'user_pseudo_id'`). Composite keys (array) compile to `USING(col1, col2, ...)`. |
543
+ | `columns` | string[] | Yes | Source columns to add to the output (excluding `joinKey`). Names matching existing columns are coalesced with the original (`coalesce(enrich.col, original)`) so missed JOINs fall back to the existing value. |
544
+ | `dedupe` | boolean | No, defaults to `false` | When `true`, wraps the source CTE in `qualify row_number() over (partition by <joinKey>) = 1` for non-unique-key dim sources. Non-deterministic which row wins; for strict needs, pre-aggregate in source SQL. |
545
+
546
+ **Coalesce-or-add semantics.** If an enrichment column name matches an existing column on `enhanced_events` (a column promoted via `eventParamsToColumns`, a package-generated column, or a default GA4 column from the export), the enrichment value is coalesced with the original: `coalesce(enrich_<name>.<col>, <original>) as <col>`. Rows where the JOIN matches get the enrichment value; rows where it misses fall back to the existing value rather than going NULL. If there is no overlap, the column is added as a plain `enrich_<name>.<col>`.
547
+
548
+ **Example** — attach user cohort labels by `user_pseudo_id` (Dataform-declared table referenced by `{ schema, name }`):
549
+
550
+ ```javascript
551
+ enrichments: [
552
+ {
553
+ name: 'cohorts',
554
+ level: 'event',
555
+ source: { schema: 'analytics', name: 'user_cohorts' },
556
+ joinKey: 'user_pseudo_id',
557
+ columns: ['cohort_label', 'lifecycle_stage'],
558
+ },
559
+ ],
560
+ ```
561
+
562
+ **Example** — composite key (date + user) for daily-varying dim data, with dedupe safety net (external table referenced by backtick-FQN):
563
+
564
+ ```javascript
565
+ enrichments: [
566
+ {
567
+ name: 'segments',
568
+ level: 'event',
569
+ source: '`my-project.analytics.daily_user_segments`',
570
+ joinKey: ['event_date', 'user_pseudo_id'],
571
+ columns: ['segment'],
572
+ dedupe: true,
573
+ },
574
+ ],
575
+ ```
576
+
577
+ **Example** — fix a promoted event parameter via enrichment (coalesce case: enrichment value wins where the JOIN matches, original kept where it doesn't):
578
+
579
+ ```javascript
580
+ {
581
+ eventParamsToColumns: [{ name: 'page_title', type: 'string' }],
582
+ enrichments: [
583
+ {
584
+ name: 'titles',
585
+ level: 'event',
586
+ source: { schema: 'analytics', name: 'page_title_overrides' },
587
+ joinKey: 'page_location',
588
+ columns: ['page_title'], // overlaps the promoted column → coalesce(enrich.page_title, event_data.page_title)
589
+ },
590
+ ],
591
+ }
592
+ ```
593
+
594
+ **Example** — item-level enrichment: attach product master data to each item via `item_id`. The enrichment flows into the `items` array struct; `margin_bucket` is added as a new item-struct field, and `item_category` overlap-coalesces against the original. Item-level enrichment columns do NOT appear at the event grain — they live inside `items[].<col>`:
595
+
596
+ ```javascript
597
+ enrichments: [
598
+ {
599
+ name: 'products',
600
+ level: 'item',
601
+ source: { schema: 'analytics', name: 'product_master' },
602
+ joinKey: 'item_id', // joins on item.item_id
603
+ columns: ['margin_bucket', 'item_category'], // margin_bucket is additive; item_category overlap-coalesces
604
+ },
605
+ ],
606
+ ```
607
+
608
+ For `level: 'item'`, valid `joinKey` values are any field on the GA4 items struct (`item_id`, `item_category`, etc.) or any column on `event_data` (`user_pseudo_id`, `event_date`, etc.). An event-level and an item-level enrichment may share the same column name (e.g. both writing `cohort`) — the two columns target structurally distinct slots (`enhanced_events.cohort` at event grain vs `items[].cohort` inside the items array) and are not in collision.
609
+
610
+ > **Note:** Each enrichment generates a CTE named `enrich_<name>` at the top of the pipeline. The `enrich_*` namespace is part of the reserved-names contract — `customSteps` cannot use these names. The active reserved set includes only the names of enrichments actually configured.
611
+
612
+ > **Note:** Event-level enrichment columns get auto-generated descriptions (`Added by enrichment '<name>' (joined on <joinKey> from <source>).` for new columns; `Coalesced by enrichment '<name>' (...; falls back to original on missed JOIN). Original: <description>` for overlapping columns). User-supplied `dataformTableConfig.columns` overrides win — the auto-generated description is the default. Item-level enrichment columns do not receive auto-generated descriptions (BigQuery does not surface per-field descriptions on STRUCT-array fields cleanly through Dataform's column-description mechanism).
613
+
614
+ > **Note:** `joinKey` and `columns` entries must be plain SQL identifiers — inline aliases like `'id as user_id'` are rejected at validation time. If your dim source uses a different column name, alias it in an upstream Dataform view and point `source` at that view.
615
+
524
616
  <br>
525
617
 
526
618
  ---
package/documentation.js CHANGED
@@ -1,223 +1,272 @@
1
- const constants = require('./constants');
2
- const { version } = require('./package.json');
3
-
4
- /**
5
- * Composes a multi-section column description string from individual sections.
6
- * Sections with null/undefined/empty values are omitted.
7
- * Sections are separated by line breaks for readability.
8
- *
9
- * @param {Object} sections - { base, lineage, typicalUse, config }
10
- * @returns {string} Composed description
11
- */
12
- const composeDescription = (sections) => {
13
- const parts = [];
14
-
15
- if (sections.base) {
16
- parts.push(sections.base);
17
- }
18
-
19
- if (sections.lineage) {
20
- parts.push(`Lineage: ${sections.lineage}`);
21
- }
22
-
23
- if (sections.typicalUse) {
24
- parts.push(`Typical use: ${sections.typicalUse}`);
25
- }
26
-
27
- if (sections.config) {
28
- parts.push(`Config: ${sections.config}`);
29
- }
30
-
31
- return parts.join('\n\n');
32
- };
33
-
34
- /**
35
- * Returns a formatted lineage text string for a column, or null if no lineage data exists.
36
- *
37
- * @param {string} columnName - The column name to look up.
38
- * @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
39
- * @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
40
- */
41
- const getLineageText = (columnName, columnLineage) => {
42
- const entry = columnLineage[columnName];
43
- if (!entry) return null;
44
-
45
- const sourceLabels = {
46
- 'ga4_export': 'Standard GA4 export field',
47
- 'ga4_export_modified': 'GA4 export field (modified)',
48
- 'derived': 'Derived',
49
- };
50
-
51
- const label = sourceLabels[entry.source] || entry.source;
52
- return entry.note ? `${label} -- ${entry.note}` : label;
53
- };
54
-
55
- /**
56
- * Builds a map of config-specific notes for columns based on the provided configuration.
57
- * Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
58
- *
59
- * @param {Object} config - The merged configuration object.
60
- * @returns {Object} Map of column names to config note strings.
61
- */
62
- const buildConfigNotes = (config) => {
63
- const notes = {};
64
-
65
- if (!config) return notes;
66
-
67
- const append = (key, text) => {
68
- notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
69
- };
70
-
71
- // timezone
72
- if (config.timezone) {
73
- append('event_datetime', `Timezone: ${config.timezone}`);
74
- }
75
-
76
- // customTimestampParam
77
- if (config.customTimestampParam) {
78
- append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
79
- append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
80
- }
81
-
82
- // data_is_final
83
- if (config.dataIsFinal) {
84
- const method = config.dataIsFinal.detectionMethod;
85
- if (method === 'DAY_THRESHOLD') {
86
- append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
87
- } else {
88
- append('data_is_final', `Detection method: EXPORT_TYPE`);
89
- }
90
- }
91
-
92
- // excludedEvents
93
- if (config.excludedEvents && config.excludedEvents.length > 0) {
94
- append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
95
- }
96
-
97
- // excludedEventParams
98
- if (config.excludedEventParams && config.excludedEventParams.length > 0) {
99
- append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
100
- }
101
-
102
- // sessionParams
103
- if (config.sessionParams && config.sessionParams.length > 0) {
104
- append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
105
- }
106
-
107
- // includedExportTypes
108
- if (config.includedExportTypes) {
109
- const types = Object.entries(config.includedExportTypes)
110
- .filter(([, enabled]) => enabled)
111
- .map(([type]) => type);
112
- if (types.length > 0) {
113
- append('export_type', `Included export types: ${types.join(', ')}`);
114
- }
115
- }
116
-
117
- return notes;
118
- };
119
-
120
- /**
121
- * Returns a deep copy of the column descriptions, enriched with
122
- * lineage, typical use, and configuration-specific sections composed into
123
- * multi-section descriptions.
124
- *
125
- * @param {Object} config - The merged configuration object.
126
- * @param {Object} columnMetadata - Column metadata provided by the table module.
127
- * @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
128
- * @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
129
- * @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
130
- * @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
131
- */
132
- const getColumnDescriptions = (config, columnMetadata) => {
133
- const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
134
-
135
- const configNotes = buildConfigNotes(config);
136
-
137
- // Compose multi-section descriptions for each top-level column
138
- for (const key of Object.keys(descriptions)) {
139
- const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
140
- const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
141
-
142
- if (!baseDesc) continue;
143
-
144
- const composed = composeDescription({
145
- base: baseDesc,
146
- lineage: getLineageText(key, columnMetadata.lineage),
147
- typicalUse: columnMetadata.typicalUse[key] || null,
148
- config: configNotes[key] || null,
149
- });
150
-
151
- if (isStruct) {
152
- descriptions[key].description = composed;
153
- } else {
154
- descriptions[key] = composed;
155
- }
156
- }
157
-
158
- // Add descriptions for dynamically promoted event parameter columns
159
- if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
160
- config.eventParamsToColumns.forEach(p => {
161
- const columnName = p.columnName || p.name;
162
- const type = p.type ? ` (${p.type})` : ' (any data type)';
163
- descriptions[columnName] = composeDescription({
164
- base: `Promoted from event parameter '${p.name}'${type}`,
165
- lineage: `Derived -- Promoted from the event_params array`,
166
- typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
167
- config: null,
168
- });
169
- });
170
- }
171
-
172
- return descriptions;
173
- };
174
-
175
- /**
176
- * Checks whether a column (or its parent struct) is excluded by the config.
177
- *
178
- * @param {string[]} dependsOn - Column names this entry depends on.
179
- * @param {string[]} excludedColumns - Combined excluded columns from config.
180
- * @returns {boolean} True if ALL dependsOn columns are excluded.
181
- */
182
- const isExcluded = (dependsOn, excludedColumns) => {
183
- if (!dependsOn || dependsOn.length === 0) return false;
184
- return dependsOn.every(col => excludedColumns.includes(col));
185
- };
186
-
187
- /**
188
- * Builds the full table description by combining table-specific sections
189
- * with shared sections (package attribution, config JSON dump).
190
- *
191
- * @param {Object} config - The merged configuration object.
192
- * @param {string[]} tableSections - Table-specific description sections (provided by the table module).
193
- * @returns {string} The composed table description.
194
- */
195
- const buildTableDescription = (config, tableSections) => {
196
- const sections = [...tableSections];
197
-
198
- // Package Attribution
199
- sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX} Version: ${version}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
200
-
201
- // Config JSON dump
202
- const configForDump = Object.fromEntries(
203
- Object.entries(config).filter(([key]) => !key.startsWith('default'))
204
- );
205
- // Strip description and columns from dataformTableConfig to avoid circular reference and bloat
206
- if (configForDump.dataformTableConfig) {
207
- const { description, columns, ...rest } = configForDump.dataformTableConfig;
208
- configForDump.dataformTableConfig = rest;
209
- }
210
- const configJson = JSON.stringify(configForDump, null, 2);
211
- sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
212
-
213
- return sections.join('\n\n');
214
- };
215
-
216
- module.exports = {
217
- getColumnDescriptions,
218
- buildTableDescription,
219
- composeDescription,
220
- getLineageText,
221
- buildConfigNotes,
222
- isExcluded,
223
- };
1
+ const constants = require('./constants');
2
+ const { version } = require('./package.json');
3
+
4
+ /**
5
+ * Composes a multi-section column description string from individual sections.
6
+ * Sections with null/undefined/empty values are omitted.
7
+ * Sections are separated by line breaks for readability.
8
+ *
9
+ * @param {Object} sections - { base, lineage, typicalUse, config }
10
+ * @returns {string} Composed description
11
+ */
12
+ const composeDescription = (sections) => {
13
+ const parts = [];
14
+
15
+ if (sections.base) {
16
+ parts.push(sections.base);
17
+ }
18
+
19
+ if (sections.lineage) {
20
+ parts.push(`Lineage: ${sections.lineage}`);
21
+ }
22
+
23
+ if (sections.typicalUse) {
24
+ parts.push(`Typical use: ${sections.typicalUse}`);
25
+ }
26
+
27
+ if (sections.config) {
28
+ parts.push(`Config: ${sections.config}`);
29
+ }
30
+
31
+ return parts.join('\n\n');
32
+ };
33
+
34
+ /**
35
+ * Returns a formatted lineage text string for a column, or null if no lineage data exists.
36
+ *
37
+ * @param {string} columnName - The column name to look up.
38
+ * @param {Object} columnLineage - The lineage data object mapping column names to { source, note }.
39
+ * @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
40
+ */
41
+ const getLineageText = (columnName, columnLineage) => {
42
+ const entry = columnLineage[columnName];
43
+ if (!entry) return null;
44
+
45
+ const sourceLabels = {
46
+ 'ga4_export': 'Standard GA4 export field',
47
+ 'ga4_export_modified': 'GA4 export field (modified)',
48
+ 'derived': 'Derived',
49
+ };
50
+
51
+ const label = sourceLabels[entry.source] || entry.source;
52
+ return entry.note ? `${label} -- ${entry.note}` : label;
53
+ };
54
+
55
+ /**
56
+ * Builds a map of config-specific notes for columns based on the provided configuration.
57
+ * Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
58
+ *
59
+ * @param {Object} config - The merged configuration object.
60
+ * @returns {Object} Map of column names to config note strings.
61
+ */
62
+ const buildConfigNotes = (config) => {
63
+ const notes = {};
64
+
65
+ if (!config) return notes;
66
+
67
+ const append = (key, text) => {
68
+ notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
69
+ };
70
+
71
+ // timezone
72
+ if (config.timezone) {
73
+ append('event_datetime', `Timezone: ${config.timezone}`);
74
+ }
75
+
76
+ // customTimestampParam
77
+ if (config.customTimestampParam) {
78
+ append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
79
+ append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
80
+ }
81
+
82
+ // data_is_final
83
+ if (config.dataIsFinal) {
84
+ const method = config.dataIsFinal.detectionMethod;
85
+ if (method === 'DAY_THRESHOLD') {
86
+ append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
87
+ } else {
88
+ append('data_is_final', `Detection method: EXPORT_TYPE`);
89
+ }
90
+ }
91
+
92
+ // excludedEvents
93
+ if (config.excludedEvents && config.excludedEvents.length > 0) {
94
+ append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
95
+ }
96
+
97
+ // excludedEventParams
98
+ if (config.excludedEventParams && config.excludedEventParams.length > 0) {
99
+ append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
100
+ }
101
+
102
+ // sessionParams
103
+ if (config.sessionParams && config.sessionParams.length > 0) {
104
+ append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
105
+ }
106
+
107
+ // includedExportTypes
108
+ if (config.includedExportTypes) {
109
+ const types = Object.entries(config.includedExportTypes)
110
+ .filter(([, enabled]) => enabled)
111
+ .map(([type]) => type);
112
+ if (types.length > 0) {
113
+ append('export_type', `Included export types: ${types.join(', ')}`);
114
+ }
115
+ }
116
+
117
+ return notes;
118
+ };
119
+
120
+ /**
121
+ * Returns a deep copy of the column descriptions, enriched with
122
+ * lineage, typical use, and configuration-specific sections composed into
123
+ * multi-section descriptions.
124
+ *
125
+ * @param {Object} config - The merged configuration object.
126
+ * @param {Object} columnMetadata - Column metadata provided by the table module.
127
+ * @param {Object} columnMetadata.descriptions - Column descriptions (Dataform ITableConfig columns format).
128
+ * @param {Object} columnMetadata.lineage - Column lineage data mapping column names to { source, note }.
129
+ * @param {Object} columnMetadata.typicalUse - Column typical use mapping column names to description strings.
130
+ * @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
131
+ */
132
+ const getColumnDescriptions = (config, columnMetadata) => {
133
+ const descriptions = JSON.parse(JSON.stringify(columnMetadata.descriptions));
134
+
135
+ const configNotes = buildConfigNotes(config);
136
+
137
+ // Compose multi-section descriptions for each top-level column
138
+ for (const key of Object.keys(descriptions)) {
139
+ const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
140
+ const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
141
+
142
+ if (!baseDesc) continue;
143
+
144
+ const composed = composeDescription({
145
+ base: baseDesc,
146
+ lineage: getLineageText(key, columnMetadata.lineage),
147
+ typicalUse: columnMetadata.typicalUse[key] || null,
148
+ config: configNotes[key] || null,
149
+ });
150
+
151
+ if (isStruct) {
152
+ descriptions[key].description = composed;
153
+ } else {
154
+ descriptions[key] = composed;
155
+ }
156
+ }
157
+
158
+ // Add descriptions for dynamically promoted event parameter columns
159
+ if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
160
+ config.eventParamsToColumns.forEach(p => {
161
+ const columnName = p.columnName || p.name;
162
+ const type = p.type ? ` (${p.type})` : ' (any data type)';
163
+ descriptions[columnName] = composeDescription({
164
+ base: `Promoted from event parameter '${p.name}'${type}`,
165
+ lineage: `Derived -- Promoted from the event_params array`,
166
+ typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
167
+ config: null,
168
+ });
169
+ });
170
+ }
171
+
172
+ // Add descriptions for columns added or replaced by data enrichments.
173
+ // Item-level enrichments are not yet supported and throw at SQL gen time — skip here.
174
+ if (config && Array.isArray(config.enrichments) && config.enrichments.length > 0) {
175
+ config.enrichments.forEach(e => {
176
+ const level = e.level ?? 'event';
177
+ if (level !== 'event') return;
178
+ const joinKeys = Array.isArray(e.joinKey) ? e.joinKey : [e.joinKey];
179
+ const joinKeyText = joinKeys.join(', ');
180
+ const sourceText = renderEnrichmentSource(e.source);
181
+ for (const c of e.columns) {
182
+ const existing = descriptions[c];
183
+ const existingText = typeof existing === 'string'
184
+ ? existing
185
+ : (existing && typeof existing === 'object' && existing.description)
186
+ ? existing.description
187
+ : null;
188
+ const newDesc = existingText
189
+ ? `Coalesced by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}; falls back to original on missed JOIN). Original: ${existingText}`
190
+ : `Added by enrichment '${e.name}' (joined on ${joinKeyText} from ${sourceText}).`;
191
+ // If the original was a struct-shaped entry, preserve the structure but replace the description.
192
+ // Otherwise, set as a plain string.
193
+ if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
194
+ descriptions[c] = { ...existing, description: newDesc };
195
+ } else {
196
+ descriptions[c] = newDesc;
197
+ }
198
+ }
199
+ });
200
+ }
201
+
202
+ return descriptions;
203
+ };
204
+
205
+ /**
206
+ * Renders an enrichment source for inclusion in column descriptions.
207
+ *
208
+ * - Backtick-quoted string: passed through as-is.
209
+ * - Dataform table reference object: rendered as `<dataset>.<name>` (project not available
210
+ * at description-generation time; resolved later via ctx.ref()).
211
+ *
212
+ * @param {string|Object} source - The enrichment's source field.
213
+ * @returns {string} Backtick-quoted source identifier for display.
214
+ */
215
+ const renderEnrichmentSource = (source) => {
216
+ if (typeof source === 'string') return source;
217
+ if (source && typeof source === 'object') {
218
+ const dataset = source.dataset || source.schema;
219
+ if (dataset && source.name) return '`' + dataset + '.' + source.name + '`';
220
+ }
221
+ return String(source);
222
+ };
223
+
224
+ /**
225
+ * Checks whether a column (or its parent struct) is excluded by the config.
226
+ *
227
+ * @param {string[]} dependsOn - Column names this entry depends on.
228
+ * @param {string[]} excludedColumns - Combined excluded columns from config.
229
+ * @returns {boolean} True if ALL dependsOn columns are excluded.
230
+ */
231
+ const isExcluded = (dependsOn, excludedColumns) => {
232
+ if (!dependsOn || dependsOn.length === 0) return false;
233
+ return dependsOn.every(col => excludedColumns.includes(col));
234
+ };
235
+
236
+ /**
237
+ * Builds the full table description by combining table-specific sections
238
+ * with shared sections (package attribution, config JSON dump).
239
+ *
240
+ * @param {Object} config - The merged configuration object.
241
+ * @param {string[]} tableSections - Table-specific description sections (provided by the table module).
242
+ * @returns {string} The composed table description.
243
+ */
244
+ const buildTableDescription = (config, tableSections) => {
245
+ const sections = [...tableSections];
246
+
247
+ // Package Attribution
248
+ sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX} Version: ${version}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
249
+
250
+ // Config JSON dump
251
+ const configForDump = Object.fromEntries(
252
+ Object.entries(config).filter(([key]) => !key.startsWith('default'))
253
+ );
254
+ // Strip description and columns from dataformTableConfig to avoid circular reference and bloat
255
+ if (configForDump.dataformTableConfig) {
256
+ const { description, columns, ...rest } = configForDump.dataformTableConfig;
257
+ configForDump.dataformTableConfig = rest;
258
+ }
259
+ const configJson = JSON.stringify(configForDump, null, 2);
260
+ sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
261
+
262
+ return sections.join('\n\n');
263
+ };
264
+
265
+ module.exports = {
266
+ getColumnDescriptions,
267
+ buildTableDescription,
268
+ composeDescription,
269
+ getLineageText,
270
+ buildConfigNotes,
271
+ isExcluded,
272
+ };