ga4-export-fixer 0.4.3-dev.1 → 0.4.3-dev.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/documentation.js +304 -306
- package/package.json +42 -42
- package/tables/ga4EventsEnhanced.js +14 -26
package/documentation.js
CHANGED
|
@@ -1,306 +1,304 @@
|
|
|
1
|
-
const columnDescriptions = require('./columns/columnDescriptions.json');
|
|
2
|
-
const columnLineage = require('./columns/columnLineage.json');
|
|
3
|
-
const columnTypicalUse = require('./columns/columnTypicalUse.json');
|
|
4
|
-
const tableAgentInstructions = require('./columns/tableAgentInstructions.json');
|
|
5
|
-
const constants = require('./constants');
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Composes a multi-section column description string from individual sections.
|
|
9
|
-
* Sections with null/undefined/empty values are omitted.
|
|
10
|
-
* Sections are separated by line breaks for readability.
|
|
11
|
-
*
|
|
12
|
-
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
13
|
-
* @returns {string} Composed description
|
|
14
|
-
*/
|
|
15
|
-
const composeDescription = (sections) => {
|
|
16
|
-
const parts = [];
|
|
17
|
-
|
|
18
|
-
if (sections.base) {
|
|
19
|
-
parts.push(sections.base);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
if (sections.lineage) {
|
|
23
|
-
parts.push(`Lineage: ${sections.lineage}`);
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (sections.typicalUse) {
|
|
27
|
-
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (sections.config) {
|
|
31
|
-
parts.push(`Config: ${sections.config}`);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
return parts.join('\n\n');
|
|
35
|
-
};
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
39
|
-
*
|
|
40
|
-
* @param {string} columnName - The column name to look up.
|
|
41
|
-
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
42
|
-
*/
|
|
43
|
-
const getLineageText = (columnName) => {
|
|
44
|
-
const entry = columnLineage[columnName];
|
|
45
|
-
if (!entry) return null;
|
|
46
|
-
|
|
47
|
-
const sourceLabels = {
|
|
48
|
-
'ga4_export': 'Standard GA4 export field',
|
|
49
|
-
'ga4_export_modified': 'GA4 export field (modified)',
|
|
50
|
-
'derived': 'Derived',
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
const label = sourceLabels[entry.source] || entry.source;
|
|
54
|
-
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
59
|
-
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
60
|
-
*
|
|
61
|
-
* @param {Object} config - The merged configuration object.
|
|
62
|
-
* @returns {Object} Map of column names to config note strings.
|
|
63
|
-
*/
|
|
64
|
-
const buildConfigNotes = (config) => {
|
|
65
|
-
const notes = {};
|
|
66
|
-
|
|
67
|
-
if (!config) return notes;
|
|
68
|
-
|
|
69
|
-
const append = (key, text) => {
|
|
70
|
-
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
71
|
-
};
|
|
72
|
-
|
|
73
|
-
// timezone
|
|
74
|
-
if (config.timezone) {
|
|
75
|
-
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// customTimestampParam
|
|
79
|
-
if (config.customTimestampParam) {
|
|
80
|
-
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
81
|
-
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// data_is_final
|
|
85
|
-
if (config.dataIsFinal) {
|
|
86
|
-
const method = config.dataIsFinal.detectionMethod;
|
|
87
|
-
if (method === 'DAY_THRESHOLD') {
|
|
88
|
-
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
89
|
-
} else {
|
|
90
|
-
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// excludedEvents
|
|
95
|
-
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
96
|
-
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// excludedEventParams
|
|
100
|
-
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
101
|
-
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// sessionParams
|
|
105
|
-
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
106
|
-
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// includedExportTypes
|
|
110
|
-
if (config.includedExportTypes) {
|
|
111
|
-
const types = Object.entries(config.includedExportTypes)
|
|
112
|
-
.filter(([, enabled]) => enabled)
|
|
113
|
-
.map(([type]) => type);
|
|
114
|
-
if (types.length > 0) {
|
|
115
|
-
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
return notes;
|
|
120
|
-
};
|
|
121
|
-
|
|
122
|
-
/**
|
|
123
|
-
* Returns a deep copy of the default column descriptions, enriched with
|
|
124
|
-
* lineage, typical use, and configuration-specific sections composed into
|
|
125
|
-
* multi-section descriptions.
|
|
126
|
-
*
|
|
127
|
-
* @param {Object} config - The merged configuration object from ga4EventsEnhanced.
|
|
128
|
-
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
129
|
-
*/
|
|
130
|
-
const getColumnDescriptions = (config) => {
|
|
131
|
-
const descriptions = JSON.parse(JSON.stringify(columnDescriptions));
|
|
132
|
-
|
|
133
|
-
const configNotes = buildConfigNotes(config);
|
|
134
|
-
|
|
135
|
-
// Compose multi-section descriptions for each top-level column
|
|
136
|
-
for (const key of Object.keys(descriptions)) {
|
|
137
|
-
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
138
|
-
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
139
|
-
|
|
140
|
-
if (!baseDesc) continue;
|
|
141
|
-
|
|
142
|
-
const composed = composeDescription({
|
|
143
|
-
base: baseDesc,
|
|
144
|
-
lineage: getLineageText(key),
|
|
145
|
-
typicalUse: columnTypicalUse[key] || null,
|
|
146
|
-
config: configNotes[key] || null,
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
if (isStruct) {
|
|
150
|
-
descriptions[key].description = composed;
|
|
151
|
-
} else {
|
|
152
|
-
descriptions[key] = composed;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// Add descriptions for dynamically promoted event parameter columns
|
|
157
|
-
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
158
|
-
config.eventParamsToColumns.forEach(p => {
|
|
159
|
-
const columnName = p.columnName || p.name;
|
|
160
|
-
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
161
|
-
descriptions[columnName] = composeDescription({
|
|
162
|
-
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
163
|
-
lineage: `Derived -- Promoted from the event_params array`,
|
|
164
|
-
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
165
|
-
config: null,
|
|
166
|
-
});
|
|
167
|
-
});
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
return descriptions;
|
|
171
|
-
};
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* Checks whether a column (or its parent struct) is excluded by the config.
|
|
175
|
-
*
|
|
176
|
-
* @param {string[]} dependsOn - Column names this entry depends on.
|
|
177
|
-
* @param {string[]} excludedColumns - Combined excluded columns from config.
|
|
178
|
-
* @returns {boolean} True if ALL dependsOn columns are excluded.
|
|
179
|
-
*/
|
|
180
|
-
const isExcluded = (dependsOn, excludedColumns) => {
|
|
181
|
-
if (!dependsOn || dependsOn.length === 0) return false;
|
|
182
|
-
return dependsOn.every(col => excludedColumns.includes(col));
|
|
183
|
-
};
|
|
184
|
-
|
|
185
|
-
/**
|
|
186
|
-
* Composes the full table description for ga4_events_enhanced, including
|
|
187
|
-
* AI agent instructions (key fields, synonyms, filtering guidance, event vocabulary)
|
|
188
|
-
* and the existing table features and config JSON dump.
|
|
189
|
-
*
|
|
190
|
-
* @param {Object} config - The merged configuration object.
|
|
191
|
-
* @returns {string} The composed table description.
|
|
192
|
-
*/
|
|
193
|
-
const getTableDescription = (config) => {
|
|
194
|
-
// Only use user-configured excludedColumns for filtering AI instructions.
|
|
195
|
-
// defaultExcludedColumns refers to raw GA4 export columns excluded during extraction
|
|
196
|
-
// (e.g. session_id is excluded from the raw export but exists as a derived column in the final table).
|
|
197
|
-
const excludedColumns = config.excludedColumns || [];
|
|
198
|
-
|
|
199
|
-
const excludedEvents = [
|
|
200
|
-
...(config.defaultExcludedEvents || []),
|
|
201
|
-
...(config.excludedEvents || []),
|
|
202
|
-
];
|
|
203
|
-
|
|
204
|
-
const sections = [];
|
|
205
|
-
|
|
206
|
-
// 1. Overview
|
|
207
|
-
const overviewLines = [
|
|
208
|
-
'GA4 Events Enhanced',
|
|
209
|
-
'',
|
|
210
|
-
'An enhanced version of the GA4 BigQuery export. Each row is one event.',
|
|
211
|
-
];
|
|
212
|
-
if (config.timezone) {
|
|
213
|
-
overviewLines.push(`Timezone: ${config.timezone}.`);
|
|
214
|
-
}
|
|
215
|
-
sections.push(overviewLines.join('\n'));
|
|
216
|
-
|
|
217
|
-
// 2. Key Fields
|
|
218
|
-
const keyFieldLines = tableAgentInstructions.keyFields
|
|
219
|
-
.filter(kf => !isExcluded(kf.dependsOn, excludedColumns))
|
|
220
|
-
.map(kf => `- ${kf.field}: ${kf.note}`);
|
|
221
|
-
|
|
222
|
-
// Add promoted event params
|
|
223
|
-
if (config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
224
|
-
config.eventParamsToColumns.forEach(p => {
|
|
225
|
-
const columnName = p.columnName || p.name;
|
|
226
|
-
keyFieldLines.push(`- ${columnName}: Promoted event parameter '${p.name}'. Available as a top-level column for direct filtering.`);
|
|
227
|
-
});
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
if (keyFieldLines.length > 0) {
|
|
231
|
-
sections.push('KEY FIELDS:\n' + keyFieldLines.join('\n'));
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// 3. Synonyms
|
|
235
|
-
const synonymLines = tableAgentInstructions.synonyms
|
|
236
|
-
.filter(s => !isExcluded(s.dependsOn, excludedColumns))
|
|
237
|
-
.map(s => `- "${s.terms.join('" / "')}" → ${s.sql}`);
|
|
238
|
-
|
|
239
|
-
if (synonymLines.length > 0) {
|
|
240
|
-
sections.push('SYNONYMS:\n' + synonymLines.join('\n'));
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// 4. Filtering and Grouping
|
|
244
|
-
const guidanceLines = tableAgentInstructions.filteringGuidance
|
|
245
|
-
.filter(g => !isExcluded(g.dependsOn, excludedColumns))
|
|
246
|
-
.map(g => `- ${g.text}`);
|
|
247
|
-
|
|
248
|
-
if (guidanceLines.length > 0) {
|
|
249
|
-
sections.push('FILTERING AND GROUPING:\n' + guidanceLines.join('\n'));
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
// 5. Event Vocabulary
|
|
253
|
-
const vocabParts = [];
|
|
254
|
-
const autoEvents = tableAgentInstructions.eventVocabulary.autoCollectedAndEnhanced
|
|
255
|
-
.filter(e => !excludedEvents.includes(e));
|
|
256
|
-
if (autoEvents.length > 0) {
|
|
257
|
-
vocabParts.push(`Auto-collected and enhanced measurement: ${autoEvents.join(', ')}`);
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
if (!isExcluded(['ecommerce'], excludedColumns)) {
|
|
261
|
-
const ecomEvents = tableAgentInstructions.eventVocabulary.ecommerce
|
|
262
|
-
.filter(e => !excludedEvents.includes(e));
|
|
263
|
-
if (ecomEvents.length > 0) {
|
|
264
|
-
vocabParts.push(`Ecommerce (recommended): ${ecomEvents.join(', ')}`);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (vocabParts.length > 0) {
|
|
269
|
-
sections.push('COMMON EVENT NAMES:\n' + vocabParts.join('\n'));
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// 6. Table Features
|
|
273
|
-
const featureLines = [
|
|
274
|
-
'Combines daily, intraday, and fresh exports; the best available version of each event is used.',
|
|
275
|
-
'Incremental updates: non-final data is replaced with the latest available data on every run.',
|
|
276
|
-
'Promotes key fields (e.g. page_location, session_id) to top-level columns for faster queries.',
|
|
277
|
-
'Session-level fields: landing_page, user_id resolution, and configurable session parameters.',
|
|
278
|
-
];
|
|
279
|
-
sections.push('TABLE FEATURES:\n' + featureLines.map(f => `- ${f}`).join('\n'));
|
|
280
|
-
|
|
281
|
-
// 7. Package Attribution
|
|
282
|
-
sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
|
|
283
|
-
|
|
284
|
-
// 8. Config JSON dump
|
|
285
|
-
const
|
|
286
|
-
Object.
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
sections.
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
buildConfigNotes,
|
|
306
|
-
};
|
|
1
|
+
const columnDescriptions = require('./columns/columnDescriptions.json');
|
|
2
|
+
const columnLineage = require('./columns/columnLineage.json');
|
|
3
|
+
const columnTypicalUse = require('./columns/columnTypicalUse.json');
|
|
4
|
+
const tableAgentInstructions = require('./columns/tableAgentInstructions.json');
|
|
5
|
+
const constants = require('./constants');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Composes a multi-section column description string from individual sections.
|
|
9
|
+
* Sections with null/undefined/empty values are omitted.
|
|
10
|
+
* Sections are separated by line breaks for readability.
|
|
11
|
+
*
|
|
12
|
+
* @param {Object} sections - { base, lineage, typicalUse, config }
|
|
13
|
+
* @returns {string} Composed description
|
|
14
|
+
*/
|
|
15
|
+
const composeDescription = (sections) => {
|
|
16
|
+
const parts = [];
|
|
17
|
+
|
|
18
|
+
if (sections.base) {
|
|
19
|
+
parts.push(sections.base);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (sections.lineage) {
|
|
23
|
+
parts.push(`Lineage: ${sections.lineage}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (sections.typicalUse) {
|
|
27
|
+
parts.push(`Typical use: ${sections.typicalUse}`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (sections.config) {
|
|
31
|
+
parts.push(`Config: ${sections.config}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return parts.join('\n\n');
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Returns a formatted lineage text string for a column, or null if no lineage data exists.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} columnName - The column name to look up.
|
|
41
|
+
* @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
|
|
42
|
+
*/
|
|
43
|
+
const getLineageText = (columnName) => {
|
|
44
|
+
const entry = columnLineage[columnName];
|
|
45
|
+
if (!entry) return null;
|
|
46
|
+
|
|
47
|
+
const sourceLabels = {
|
|
48
|
+
'ga4_export': 'Standard GA4 export field',
|
|
49
|
+
'ga4_export_modified': 'GA4 export field (modified)',
|
|
50
|
+
'derived': 'Derived',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const label = sourceLabels[entry.source] || entry.source;
|
|
54
|
+
return entry.note ? `${label} -- ${entry.note}` : label;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Builds a map of config-specific notes for columns based on the provided configuration.
|
|
59
|
+
* Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
|
|
60
|
+
*
|
|
61
|
+
* @param {Object} config - The merged configuration object.
|
|
62
|
+
* @returns {Object} Map of column names to config note strings.
|
|
63
|
+
*/
|
|
64
|
+
const buildConfigNotes = (config) => {
|
|
65
|
+
const notes = {};
|
|
66
|
+
|
|
67
|
+
if (!config) return notes;
|
|
68
|
+
|
|
69
|
+
const append = (key, text) => {
|
|
70
|
+
notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// timezone
|
|
74
|
+
if (config.timezone) {
|
|
75
|
+
append('event_datetime', `Timezone: ${config.timezone}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// customTimestampParam
|
|
79
|
+
if (config.customTimestampParam) {
|
|
80
|
+
append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
|
|
81
|
+
append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// data_is_final
|
|
85
|
+
if (config.dataIsFinal) {
|
|
86
|
+
const method = config.dataIsFinal.detectionMethod;
|
|
87
|
+
if (method === 'DAY_THRESHOLD') {
|
|
88
|
+
append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
|
|
89
|
+
} else {
|
|
90
|
+
append('data_is_final', `Detection method: EXPORT_TYPE`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// excludedEvents
|
|
95
|
+
if (config.excludedEvents && config.excludedEvents.length > 0) {
|
|
96
|
+
append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// excludedEventParams
|
|
100
|
+
if (config.excludedEventParams && config.excludedEventParams.length > 0) {
|
|
101
|
+
append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// sessionParams
|
|
105
|
+
if (config.sessionParams && config.sessionParams.length > 0) {
|
|
106
|
+
append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// includedExportTypes
|
|
110
|
+
if (config.includedExportTypes) {
|
|
111
|
+
const types = Object.entries(config.includedExportTypes)
|
|
112
|
+
.filter(([, enabled]) => enabled)
|
|
113
|
+
.map(([type]) => type);
|
|
114
|
+
if (types.length > 0) {
|
|
115
|
+
append('export_type', `Included export types: ${types.join(', ')}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return notes;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Returns a deep copy of the default column descriptions, enriched with
|
|
124
|
+
* lineage, typical use, and configuration-specific sections composed into
|
|
125
|
+
* multi-section descriptions.
|
|
126
|
+
*
|
|
127
|
+
* @param {Object} config - The merged configuration object from ga4EventsEnhanced.
|
|
128
|
+
* @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
|
|
129
|
+
*/
|
|
130
|
+
const getColumnDescriptions = (config) => {
|
|
131
|
+
const descriptions = JSON.parse(JSON.stringify(columnDescriptions));
|
|
132
|
+
|
|
133
|
+
const configNotes = buildConfigNotes(config);
|
|
134
|
+
|
|
135
|
+
// Compose multi-section descriptions for each top-level column
|
|
136
|
+
for (const key of Object.keys(descriptions)) {
|
|
137
|
+
const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
|
|
138
|
+
const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
|
|
139
|
+
|
|
140
|
+
if (!baseDesc) continue;
|
|
141
|
+
|
|
142
|
+
const composed = composeDescription({
|
|
143
|
+
base: baseDesc,
|
|
144
|
+
lineage: getLineageText(key),
|
|
145
|
+
typicalUse: columnTypicalUse[key] || null,
|
|
146
|
+
config: configNotes[key] || null,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
if (isStruct) {
|
|
150
|
+
descriptions[key].description = composed;
|
|
151
|
+
} else {
|
|
152
|
+
descriptions[key] = composed;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Add descriptions for dynamically promoted event parameter columns
|
|
157
|
+
if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
158
|
+
config.eventParamsToColumns.forEach(p => {
|
|
159
|
+
const columnName = p.columnName || p.name;
|
|
160
|
+
const type = p.type ? ` (${p.type})` : ' (any data type)';
|
|
161
|
+
descriptions[columnName] = composeDescription({
|
|
162
|
+
base: `Promoted from event parameter '${p.name}'${type}`,
|
|
163
|
+
lineage: `Derived -- Promoted from the event_params array`,
|
|
164
|
+
typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
|
|
165
|
+
config: null,
|
|
166
|
+
});
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return descriptions;
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Checks whether a column (or its parent struct) is excluded by the config.
|
|
175
|
+
*
|
|
176
|
+
* @param {string[]} dependsOn - Column names this entry depends on.
|
|
177
|
+
* @param {string[]} excludedColumns - Combined excluded columns from config.
|
|
178
|
+
* @returns {boolean} True if ALL dependsOn columns are excluded.
|
|
179
|
+
*/
|
|
180
|
+
const isExcluded = (dependsOn, excludedColumns) => {
|
|
181
|
+
if (!dependsOn || dependsOn.length === 0) return false;
|
|
182
|
+
return dependsOn.every(col => excludedColumns.includes(col));
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Composes the full table description for ga4_events_enhanced, including
|
|
187
|
+
* AI agent instructions (key fields, synonyms, filtering guidance, event vocabulary)
|
|
188
|
+
* and the existing table features and config JSON dump.
|
|
189
|
+
*
|
|
190
|
+
* @param {Object} config - The merged configuration object.
|
|
191
|
+
* @returns {string} The composed table description.
|
|
192
|
+
*/
|
|
193
|
+
const getTableDescription = (config) => {
|
|
194
|
+
// Only use user-configured excludedColumns for filtering AI instructions.
|
|
195
|
+
// defaultExcludedColumns refers to raw GA4 export columns excluded during extraction
|
|
196
|
+
// (e.g. session_id is excluded from the raw export but exists as a derived column in the final table).
|
|
197
|
+
const excludedColumns = config.excludedColumns || [];
|
|
198
|
+
|
|
199
|
+
const excludedEvents = [
|
|
200
|
+
...(config.defaultExcludedEvents || []),
|
|
201
|
+
...(config.excludedEvents || []),
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
const sections = [];
|
|
205
|
+
|
|
206
|
+
// 1. Overview
|
|
207
|
+
const overviewLines = [
|
|
208
|
+
'GA4 Events Enhanced',
|
|
209
|
+
'',
|
|
210
|
+
'An enhanced version of the GA4 BigQuery export. Each row is one event.',
|
|
211
|
+
];
|
|
212
|
+
if (config.timezone) {
|
|
213
|
+
overviewLines.push(`Timezone: ${config.timezone}.`);
|
|
214
|
+
}
|
|
215
|
+
sections.push(overviewLines.join('\n'));
|
|
216
|
+
|
|
217
|
+
// 2. Key Fields
|
|
218
|
+
const keyFieldLines = tableAgentInstructions.keyFields
|
|
219
|
+
.filter(kf => !isExcluded(kf.dependsOn, excludedColumns))
|
|
220
|
+
.map(kf => `- ${kf.field}: ${kf.note}`);
|
|
221
|
+
|
|
222
|
+
// Add promoted event params
|
|
223
|
+
if (config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
224
|
+
config.eventParamsToColumns.forEach(p => {
|
|
225
|
+
const columnName = p.columnName || p.name;
|
|
226
|
+
keyFieldLines.push(`- ${columnName}: Promoted event parameter '${p.name}'. Available as a top-level column for direct filtering.`);
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (keyFieldLines.length > 0) {
|
|
231
|
+
sections.push('KEY FIELDS:\n' + keyFieldLines.join('\n'));
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// 3. Synonyms
|
|
235
|
+
const synonymLines = tableAgentInstructions.synonyms
|
|
236
|
+
.filter(s => !isExcluded(s.dependsOn, excludedColumns))
|
|
237
|
+
.map(s => `- "${s.terms.join('" / "')}" → ${s.sql}`);
|
|
238
|
+
|
|
239
|
+
if (synonymLines.length > 0) {
|
|
240
|
+
sections.push('SYNONYMS:\n' + synonymLines.join('\n'));
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// 4. Filtering and Grouping
|
|
244
|
+
const guidanceLines = tableAgentInstructions.filteringGuidance
|
|
245
|
+
.filter(g => !isExcluded(g.dependsOn, excludedColumns))
|
|
246
|
+
.map(g => `- ${g.text}`);
|
|
247
|
+
|
|
248
|
+
if (guidanceLines.length > 0) {
|
|
249
|
+
sections.push('FILTERING AND GROUPING:\n' + guidanceLines.join('\n'));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// 5. Event Vocabulary
|
|
253
|
+
const vocabParts = [];
|
|
254
|
+
const autoEvents = tableAgentInstructions.eventVocabulary.autoCollectedAndEnhanced
|
|
255
|
+
.filter(e => !excludedEvents.includes(e));
|
|
256
|
+
if (autoEvents.length > 0) {
|
|
257
|
+
vocabParts.push(`Auto-collected and enhanced measurement: ${autoEvents.join(', ')}`);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if (!isExcluded(['ecommerce'], excludedColumns)) {
|
|
261
|
+
const ecomEvents = tableAgentInstructions.eventVocabulary.ecommerce
|
|
262
|
+
.filter(e => !excludedEvents.includes(e));
|
|
263
|
+
if (ecomEvents.length > 0) {
|
|
264
|
+
vocabParts.push(`Ecommerce (recommended): ${ecomEvents.join(', ')}`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (vocabParts.length > 0) {
|
|
269
|
+
sections.push('COMMON EVENT NAMES:\n' + vocabParts.join('\n'));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// 6. Table Features
|
|
273
|
+
const featureLines = [
|
|
274
|
+
'Combines daily, intraday, and fresh exports; the best available version of each event is used.',
|
|
275
|
+
'Incremental updates: non-final data is replaced with the latest available data on every run.',
|
|
276
|
+
'Promotes key fields (e.g. page_location, session_id) to top-level columns for faster queries.',
|
|
277
|
+
'Session-level fields: landing_page, user_id resolution, and configurable session parameters.',
|
|
278
|
+
];
|
|
279
|
+
sections.push('TABLE FEATURES:\n' + featureLines.map(f => `- ${f}`).join('\n'));
|
|
280
|
+
|
|
281
|
+
// 7. Package Attribution
|
|
282
|
+
sections.push(`${constants.TABLE_DESCRIPTION_SUFFIX}\n${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}`);
|
|
283
|
+
|
|
284
|
+
// 8. Config JSON dump
|
|
285
|
+
const configJson = JSON.stringify(
|
|
286
|
+
Object.fromEntries(
|
|
287
|
+
Object.entries(config).filter(([key]) => !key.startsWith('default') && key !== 'dataformTableConfig')
|
|
288
|
+
),
|
|
289
|
+
null,
|
|
290
|
+
2
|
|
291
|
+
);
|
|
292
|
+
sections.push(`The last full table refresh was done using this configuration:\n${configJson}`);
|
|
293
|
+
|
|
294
|
+
return sections.join('\n\n');
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
module.exports = {
|
|
298
|
+
columnDescriptions,
|
|
299
|
+
getColumnDescriptions,
|
|
300
|
+
getTableDescription,
|
|
301
|
+
composeDescription,
|
|
302
|
+
getLineageText,
|
|
303
|
+
buildConfigNotes,
|
|
304
|
+
};
|
package/package.json
CHANGED
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.4.3-dev.
|
|
4
|
-
"description": "",
|
|
5
|
-
"main": "index.js",
|
|
6
|
-
"files": [
|
|
7
|
-
"index.js",
|
|
8
|
-
"helpers",
|
|
9
|
-
"utils.js",
|
|
10
|
-
"preOperations.js",
|
|
11
|
-
"constants.js",
|
|
12
|
-
"tables",
|
|
13
|
-
"inputValidation.js",
|
|
14
|
-
"defaultConfig.js",
|
|
15
|
-
"config.js",
|
|
16
|
-
"columns",
|
|
17
|
-
"documentation.js"
|
|
18
|
-
],
|
|
19
|
-
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js",
|
|
21
|
-
"test:docs": "node tests/documentation.test.js",
|
|
22
|
-
"test:preops": "node tests/preOperations.test.js",
|
|
23
|
-
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
24
|
-
"test:merge": "node tests/mergeSQLConfigurations.test.js",
|
|
25
|
-
"readme": "node scripts/updateReadme.js",
|
|
26
|
-
"prepublishOnly": "node scripts/updateReadme.js"
|
|
27
|
-
},
|
|
28
|
-
"repository": {
|
|
29
|
-
"type": "git",
|
|
30
|
-
"url": "git+https://github.com/tanelytics/ga4-export-fixer.git"
|
|
31
|
-
},
|
|
32
|
-
"author": "Taneli Salonen",
|
|
33
|
-
"license": "MIT",
|
|
34
|
-
"bugs": {
|
|
35
|
-
"url": "https://github.com/tanelytics/ga4-export-fixer/issues"
|
|
36
|
-
},
|
|
37
|
-
"homepage": "https://github.com/tanelytics/ga4-export-fixer#readme",
|
|
38
|
-
"devDependencies": {
|
|
39
|
-
"@google-cloud/bigquery": "^8.1.1",
|
|
40
|
-
"dotenv": "^17.3.1"
|
|
41
|
-
}
|
|
42
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "ga4-export-fixer",
|
|
3
|
+
"version": "0.4.3-dev.3",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"files": [
|
|
7
|
+
"index.js",
|
|
8
|
+
"helpers",
|
|
9
|
+
"utils.js",
|
|
10
|
+
"preOperations.js",
|
|
11
|
+
"constants.js",
|
|
12
|
+
"tables",
|
|
13
|
+
"inputValidation.js",
|
|
14
|
+
"defaultConfig.js",
|
|
15
|
+
"config.js",
|
|
16
|
+
"columns",
|
|
17
|
+
"documentation.js"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js",
|
|
21
|
+
"test:docs": "node tests/documentation.test.js",
|
|
22
|
+
"test:preops": "node tests/preOperations.test.js",
|
|
23
|
+
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
24
|
+
"test:merge": "node tests/mergeSQLConfigurations.test.js",
|
|
25
|
+
"readme": "node scripts/updateReadme.js",
|
|
26
|
+
"prepublishOnly": "node scripts/updateReadme.js"
|
|
27
|
+
},
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": "git+https://github.com/tanelytics/ga4-export-fixer.git"
|
|
31
|
+
},
|
|
32
|
+
"author": "Taneli Salonen",
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"bugs": {
|
|
35
|
+
"url": "https://github.com/tanelytics/ga4-export-fixer/issues"
|
|
36
|
+
},
|
|
37
|
+
"homepage": "https://github.com/tanelytics/ga4-export-fixer#readme",
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@google-cloud/bigquery": "^8.1.1",
|
|
40
|
+
"dotenv": "^17.3.1"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -319,16 +319,9 @@ ${excludedEventsSQL}`,
|
|
|
319
319
|
* @returns {Object} The Dataform publish() object for the enhanced events table, supporting chaining (e.g. .preOps, .query).
|
|
320
320
|
*/
|
|
321
321
|
const createEnhancedEventsTable = (dataformPublish, config) => {
|
|
322
|
-
|
|
323
|
-
// mergeSQLConfigurations overwrites arrays (tags), so passing user overrides through it
|
|
324
|
-
// would lose the default tags. By stripping it here, user overrides are applied exactly
|
|
325
|
-
// once via mergeDataformTableConfigurations.
|
|
326
|
-
const { dataformTableConfig: userDataformTableConfig, ...sqlConfig } = config;
|
|
327
|
-
|
|
328
|
-
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, sqlConfig);
|
|
322
|
+
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
329
323
|
|
|
330
|
-
|
|
331
|
-
const staticDefaults = mergedConfig.dataformTableConfig || {};
|
|
324
|
+
const tableDescription = documentation.getTableDescription(mergedConfig);
|
|
332
325
|
|
|
333
326
|
// Compute dynamic fields from merged SQL config
|
|
334
327
|
const getDatasetName = (sourceTable) => {
|
|
@@ -343,26 +336,21 @@ const createEnhancedEventsTable = (dataformPublish, config) => {
|
|
|
343
336
|
|
|
344
337
|
const dataset = getDatasetName(mergedConfig.sourceTable);
|
|
345
338
|
|
|
346
|
-
|
|
339
|
+
const dynamicFields = {
|
|
340
|
+
name: `${constants.DEFAULT_EVENTS_TABLE_NAME}_${dataset.replace('analytics_', '')}`,
|
|
341
|
+
schema: dataset,
|
|
342
|
+
description: tableDescription,
|
|
343
|
+
columns: documentation.getColumnDescriptions(mergedConfig),
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
// Build dataformTableConfig: static defaults (from defaultConfig.js) → dynamic fields → user overrides.
|
|
347
|
+
// Deep-clone defaults to prevent Dataform's publish() from mutating nested objects (e.g. bigquery)
|
|
348
|
+
// across multiple createTable calls in the same process.
|
|
347
349
|
const dataformTableConfig = utils.mergeDataformTableConfigurations(
|
|
348
|
-
{
|
|
349
|
-
|
|
350
|
-
name: `${constants.DEFAULT_EVENTS_TABLE_NAME}_${dataset.replace('analytics_', '')}`,
|
|
351
|
-
schema: dataset,
|
|
352
|
-
columns: documentation.getColumnDescriptions(mergedConfig),
|
|
353
|
-
},
|
|
354
|
-
userDataformTableConfig
|
|
350
|
+
{ ...JSON.parse(JSON.stringify(defaultConfig.dataformTableConfig || {})), ...dynamicFields },
|
|
351
|
+
config.dataformTableConfig
|
|
355
352
|
);
|
|
356
353
|
|
|
357
|
-
// Include the final dataformTableConfig in mergedConfig for the description's config dump
|
|
358
|
-
mergedConfig.dataformTableConfig = dataformTableConfig;
|
|
359
|
-
const tableDescription = documentation.getTableDescription(mergedConfig);
|
|
360
|
-
|
|
361
|
-
// Set description (user override from the merge wins if provided)
|
|
362
|
-
if (!dataformTableConfig.description) {
|
|
363
|
-
dataformTableConfig.description = tableDescription;
|
|
364
|
-
}
|
|
365
|
-
|
|
366
354
|
// create the table using Dataform publish()
|
|
367
355
|
return dataformPublish(dataformTableConfig.name, dataformTableConfig).preOps(ctx => {
|
|
368
356
|
return preOperations.setPreOperations(utils.setDataformContext(ctx, mergedConfig));
|