ga4-export-fixer 0.4.6 → 0.4.7-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/createTable.js +58 -0
- package/defaultConfig.js +0 -66
- package/documentation.js +222 -306
- package/index.js +1 -1
- package/inputValidation.js +0 -184
- package/package.json +3 -3
- package/tables/ga4EventsEnhanced/config.js +68 -0
- package/tables/{ga4EventsEnhanced.js → ga4EventsEnhanced/index.js} +28 -54
- package/tables/ga4EventsEnhanced/tableDescription.js +103 -0
- package/tables/ga4EventsEnhanced/validation.js +185 -0
- package/utils.js +22 -1
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnDescriptions.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnLineage.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnTypicalUse.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/tableAgentInstructions.json +0 -0
package/inputValidation.js
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
const { isDataformTableReferenceObject } = require('./utils.js');
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* Validates the base configuration fields shared across all table types.
|
|
5
3
|
* These correspond to the fields defined in baseConfig (defaultConfig.js):
|
|
@@ -92,188 +90,6 @@ const validateBaseConfig = (config, options = {}) => {
|
|
|
92
90
|
}
|
|
93
91
|
};
|
|
94
92
|
|
|
95
|
-
/**
|
|
96
|
-
* Validates a GA4 export fixer configuration object.
|
|
97
|
-
* Validation is performed on mergedConfig (default values merged with user input).
|
|
98
|
-
* All fields are required in the merged config; optional fields are only optional for user input
|
|
99
|
-
* and receive their values from the default configuration during merge.
|
|
100
|
-
*
|
|
101
|
-
* @param {Object} config - The merged configuration object to validate.
|
|
102
|
-
* @throws {Error} If any configuration value is invalid or missing.
|
|
103
|
-
*/
|
|
104
|
-
const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
105
|
-
try {
|
|
106
|
-
if (!config || typeof config !== 'object' || Array.isArray(config)) {
|
|
107
|
-
throw new Error(`config must be a non-null object. Received: ${JSON.stringify(config)}`);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// base config fields (self, incremental, test, testConfig, preOperations)
|
|
111
|
-
validateBaseConfig(config, options);
|
|
112
|
-
|
|
113
|
-
/*
|
|
114
|
-
Rest of the validations are related to ga4_events_enhanced table specific fields
|
|
115
|
-
*/
|
|
116
|
-
|
|
117
|
-
// sourceTable - required; string or Dataform table reference
|
|
118
|
-
if (config.sourceTable === undefined || config.sourceTable === null) {
|
|
119
|
-
throw new Error("config.sourceTable is required. Provide a Dataform table reference (using the ref() function) or a string in format '`project.dataset.table`'.");
|
|
120
|
-
}
|
|
121
|
-
if (isDataformTableReferenceObject(config.sourceTable)) {
|
|
122
|
-
// Valid Dataform reference
|
|
123
|
-
} else if (typeof config.sourceTable === 'string') {
|
|
124
|
-
if (!config.sourceTable.trim()) {
|
|
125
|
-
throw new Error("config.sourceTable must be a non-empty string. Received empty string.");
|
|
126
|
-
}
|
|
127
|
-
if (!/^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(config.sourceTable.trim())) {
|
|
128
|
-
throw new Error(`config.sourceTable must be in the format '\`project.dataset.table\`' (with backticks). Received: ${JSON.stringify(config.sourceTable)}`);
|
|
129
|
-
}
|
|
130
|
-
} else {
|
|
131
|
-
throw new Error(`config.sourceTable must be a Dataform table reference object or a string in format '\`project.dataset.table\`'. Received: ${JSON.stringify(config.sourceTable)}`);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// schemaLock - optional; must be undefined or a string in "YYYYMMDD" format (e.g., "20260101")
|
|
135
|
-
if (typeof config.schemaLock !== 'undefined') {
|
|
136
|
-
if (typeof config.schemaLock !== 'string' || !/^\d{8}$/.test(config.schemaLock)) {
|
|
137
|
-
throw new Error(`config.schemaLock must be a string in "YYYYMMDD" format (e.g., "20260101"). Received: ${JSON.stringify(config.schemaLock)}`);
|
|
138
|
-
}
|
|
139
|
-
// Must be a valid date
|
|
140
|
-
const year = parseInt(config.schemaLock.slice(0, 4), 10);
|
|
141
|
-
const month = parseInt(config.schemaLock.slice(4, 6), 10);
|
|
142
|
-
const day = parseInt(config.schemaLock.slice(6, 8), 10);
|
|
143
|
-
const date = new Date(year, month - 1, day);
|
|
144
|
-
if (date.getFullYear() !== year || date.getMonth() !== month - 1 || date.getDate() !== day) {
|
|
145
|
-
throw new Error(`config.schemaLock must be a valid date. Received: ${JSON.stringify(config.schemaLock)}`);
|
|
146
|
-
}
|
|
147
|
-
// Must be at least 20241009
|
|
148
|
-
if (config.schemaLock < "20241009") {
|
|
149
|
-
throw new Error(`config.schemaLock must be a date string equal to or greater than "20241009". Received: ${JSON.stringify(config.schemaLock)}`);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// includedExportTypes - required
|
|
154
|
-
if (typeof config.includedExportTypes === 'undefined') {
|
|
155
|
-
throw new Error("config.includedExportTypes is required.");
|
|
156
|
-
}
|
|
157
|
-
if (!config.includedExportTypes || typeof config.includedExportTypes !== 'object' || Array.isArray(config.includedExportTypes)) {
|
|
158
|
-
throw new Error(`config.includedExportTypes must be an object. Received: ${JSON.stringify(config.includedExportTypes)}`);
|
|
159
|
-
}
|
|
160
|
-
for (const key of ['daily', 'fresh', 'intraday']) {
|
|
161
|
-
if (!(key in config.includedExportTypes)) {
|
|
162
|
-
throw new Error(`config.includedExportTypes.${key} is required.`);
|
|
163
|
-
}
|
|
164
|
-
if (typeof config.includedExportTypes[key] !== 'boolean') {
|
|
165
|
-
throw new Error(`config.includedExportTypes.${key} must be a boolean. Received: ${JSON.stringify(config.includedExportTypes[key])}`);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
if (!config.includedExportTypes.daily && !config.includedExportTypes.fresh && !config.includedExportTypes.intraday) {
|
|
169
|
-
throw new Error("At least one of config.includedExportTypes.daily, config.includedExportTypes.fresh, or config.includedExportTypes.intraday must be true.");
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// timezone - required
|
|
173
|
-
if (typeof config.timezone === 'undefined') {
|
|
174
|
-
throw new Error("config.timezone is required.");
|
|
175
|
-
}
|
|
176
|
-
if (typeof config.timezone !== 'string' || !config.timezone.trim()) {
|
|
177
|
-
throw new Error(`config.timezone must be a non-empty string (e.g. 'Etc/UTC', 'Europe/Helsinki'). Received: ${JSON.stringify(config.timezone)}`);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// customTimestampParam - optional; must be undefined or a non-empty string
|
|
181
|
-
if (typeof config.customTimestampParam !== 'undefined') {
|
|
182
|
-
if (typeof config.customTimestampParam !== 'string' || !config.customTimestampParam.trim()) {
|
|
183
|
-
throw new Error(`config.customTimestampParam must be a non-empty string when provided. Received: ${JSON.stringify(config.customTimestampParam)}`);
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// dataIsFinal - required
|
|
188
|
-
if (typeof config.dataIsFinal === 'undefined') {
|
|
189
|
-
throw new Error("config.dataIsFinal is required.");
|
|
190
|
-
}
|
|
191
|
-
if (typeof config.dataIsFinal !== 'object' || Array.isArray(config.dataIsFinal)) {
|
|
192
|
-
throw new Error(`config.dataIsFinal must be an object. Received: ${JSON.stringify(config.dataIsFinal)}`);
|
|
193
|
-
}
|
|
194
|
-
if (typeof config.dataIsFinal.detectionMethod === 'undefined') {
|
|
195
|
-
throw new Error("config.dataIsFinal.detectionMethod is required.");
|
|
196
|
-
}
|
|
197
|
-
if (typeof config.dataIsFinal.detectionMethod !== 'string' || (config.dataIsFinal.detectionMethod !== 'EXPORT_TYPE' && config.dataIsFinal.detectionMethod !== 'DAY_THRESHOLD')) {
|
|
198
|
-
throw new Error(`config.dataIsFinal.detectionMethod must be 'EXPORT_TYPE' or 'DAY_THRESHOLD'. Received: ${JSON.stringify(config.dataIsFinal.detectionMethod)}`);
|
|
199
|
-
}
|
|
200
|
-
if (
|
|
201
|
-
config.dataIsFinal.detectionMethod === 'DAY_THRESHOLD' &&
|
|
202
|
-
typeof config.dataIsFinal.dayThreshold === 'undefined'
|
|
203
|
-
) {
|
|
204
|
-
throw new Error("config.dataIsFinal.dayThreshold is required when detectionMethod is 'DAY_THRESHOLD'.");
|
|
205
|
-
}
|
|
206
|
-
if (
|
|
207
|
-
config.dataIsFinal.detectionMethod === 'DAY_THRESHOLD' &&
|
|
208
|
-
(typeof config.dataIsFinal.dayThreshold !== 'number' || !Number.isInteger(config.dataIsFinal.dayThreshold) || config.dataIsFinal.dayThreshold < 0)
|
|
209
|
-
) {
|
|
210
|
-
throw new Error(`config.dataIsFinal.dayThreshold must be a non-negative integer. Received: ${JSON.stringify(config.dataIsFinal.dayThreshold)}`);
|
|
211
|
-
}
|
|
212
|
-
// EXPORT_TYPE detection relies on daily export tables to mark data as final.
|
|
213
|
-
// When daily is not enabled, all data would be marked as not final under EXPORT_TYPE,
|
|
214
|
-
// so DAY_THRESHOLD must be used instead.
|
|
215
|
-
if (
|
|
216
|
-
!config.includedExportTypes.daily &&
|
|
217
|
-
config.dataIsFinal.detectionMethod !== 'DAY_THRESHOLD'
|
|
218
|
-
) {
|
|
219
|
-
throw new Error(`config.dataIsFinal.detectionMethod must be 'DAY_THRESHOLD' when daily export is not enabled (config.includedExportTypes.daily is false). A dayThreshold of 1 is recommended for intraday only setups. With fresh export, the GA4 data is subject to possible changes for up to 72 hours. Received: ${JSON.stringify(config.dataIsFinal.detectionMethod)}`);
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
// bufferDays - required
|
|
223
|
-
if (typeof config.bufferDays !== 'number' || !Number.isInteger(config.bufferDays) || config.bufferDays < 0) {
|
|
224
|
-
throw new Error(`config.bufferDays must be a non-negative integer. Received: ${JSON.stringify(config.bufferDays)}`);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Array fields - all required
|
|
228
|
-
const stringArrayKeys = ['defaultExcludedEventParams', 'excludedEventParams', 'sessionParams', 'defaultExcludedEvents', 'excludedEvents', 'excludedColumns'];
|
|
229
|
-
for (const key of stringArrayKeys) {
|
|
230
|
-
if (config[key] === undefined) {
|
|
231
|
-
throw new Error(`config.${key} is required.`);
|
|
232
|
-
}
|
|
233
|
-
if (!Array.isArray(config[key])) {
|
|
234
|
-
throw new Error(`config.${key} must be an array. Received: ${JSON.stringify(config[key])}`);
|
|
235
|
-
}
|
|
236
|
-
for (let i = 0; i < config[key].length; i++) {
|
|
237
|
-
if (typeof config[key][i] !== 'string' || !config[key][i].trim()) {
|
|
238
|
-
throw new Error(`config.${key}[${i}] must be a non-empty string. Received: ${JSON.stringify(config[key][i])}`);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// eventParamsToColumns - required
|
|
244
|
-
if (config.eventParamsToColumns === undefined) {
|
|
245
|
-
throw new Error("config.eventParamsToColumns is required.");
|
|
246
|
-
}
|
|
247
|
-
if (!Array.isArray(config.eventParamsToColumns)) {
|
|
248
|
-
throw new Error(`config.eventParamsToColumns must be an array. Received: ${JSON.stringify(config.eventParamsToColumns)}`);
|
|
249
|
-
}
|
|
250
|
-
const validEventParamTypes = ['string', 'int', 'int64', 'double', 'float', 'float64'];
|
|
251
|
-
for (let i = 0; i < config.eventParamsToColumns.length; i++) {
|
|
252
|
-
const item = config.eventParamsToColumns[i];
|
|
253
|
-
if (!item || typeof item !== 'object' || Array.isArray(item)) {
|
|
254
|
-
throw new Error(`config.eventParamsToColumns[${i}] must be an object with 'name' and 'type' properties. Received: ${JSON.stringify(item)}`);
|
|
255
|
-
}
|
|
256
|
-
if (!item.name || typeof item.name !== 'string' || !item.name.trim()) {
|
|
257
|
-
throw new Error(`config.eventParamsToColumns[${i}].name must be a non-empty string. Received: ${JSON.stringify(item.name)}`);
|
|
258
|
-
}
|
|
259
|
-
if (item.type !== undefined && item.type !== null) {
|
|
260
|
-
if (!validEventParamTypes.includes(item.type)) {
|
|
261
|
-
throw new Error(`config.eventParamsToColumns[${i}].type must be one of: ${validEventParamTypes.join(', ')}. Received: ${JSON.stringify(item.type)}`);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
if (item.columnName !== undefined && item.columnName !== null && item.columnName !== '') {
|
|
265
|
-
if (typeof item.columnName !== 'string' || !item.columnName.trim()) {
|
|
266
|
-
throw new Error(`config.eventParamsToColumns[${i}].columnName must be a non-empty string when provided. Received: ${JSON.stringify(item.columnName)}`);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
} catch (e) {
|
|
271
|
-
e.message = `Config validation: ${e.message}`;
|
|
272
|
-
throw e;
|
|
273
|
-
}
|
|
274
|
-
};
|
|
275
|
-
|
|
276
93
|
module.exports = {
|
|
277
94
|
validateBaseConfig,
|
|
278
|
-
validateEnhancedEventsConfig
|
|
279
95
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.7-dev.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
"inputValidation.js",
|
|
14
14
|
"defaultConfig.js",
|
|
15
15
|
"config.js",
|
|
16
|
-
"
|
|
17
|
-
"
|
|
16
|
+
"documentation.js",
|
|
17
|
+
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
20
|
"test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js",
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
const { baseConfig } = require('../../defaultConfig.js');
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
The default configuration for the GA4 Events Enhanced table.
|
|
5
|
+
*/
|
|
6
|
+
const ga4EventsEnhancedConfig = {
|
|
7
|
+
...baseConfig,
|
|
8
|
+
sourceTable: undefined,
|
|
9
|
+
sourceTableType: 'GA4_EXPORT', // used with pre operations to detect if ga4 export specific pre operations are needed
|
|
10
|
+
// optional but recommended
|
|
11
|
+
schemaLock: undefined,
|
|
12
|
+
// only used with js tables
|
|
13
|
+
dataformTableConfig: {
|
|
14
|
+
type: 'incremental',
|
|
15
|
+
bigquery: {
|
|
16
|
+
partitionBy: 'event_date',
|
|
17
|
+
clusterBy: ['event_name', 'session_id', 'page_location', 'data_is_final'],
|
|
18
|
+
labels: {
|
|
19
|
+
'ga4_export_fixer': 'true'
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
onSchemaChange: 'EXTEND',
|
|
23
|
+
tags: ['ga4_export_fixer'],
|
|
24
|
+
},
|
|
25
|
+
// optional
|
|
26
|
+
includedExportTypes: {
|
|
27
|
+
daily: true,
|
|
28
|
+
fresh: false,
|
|
29
|
+
intraday: true,
|
|
30
|
+
},
|
|
31
|
+
timezone: 'Etc/UTC',
|
|
32
|
+
customTimestampParam: undefined,
|
|
33
|
+
dataIsFinal: {
|
|
34
|
+
detectionMethod: 'DAY_THRESHOLD', // 'EXPORT_TYPE' or 'DAY_THRESHOLD'
|
|
35
|
+
dayThreshold: 3 // only used if detectionMethod is 'DAY_THRESHOLD'
|
|
36
|
+
// according to GA4 documentation, the data up to 72 hours old is subject to possible changes
|
|
37
|
+
// in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
|
|
38
|
+
},
|
|
39
|
+
// number of additional days to take in for taking into account sessions that overlap days
|
|
40
|
+
bufferDays: 1,
|
|
41
|
+
// these parameters are excluded by default because they've been made available in other columns
|
|
42
|
+
defaultExcludedEventParams: [
|
|
43
|
+
'page_location',
|
|
44
|
+
'ga_session_id',
|
|
45
|
+
//'custom_event_timestamp', // removed if customTimestampParam is used
|
|
46
|
+
],
|
|
47
|
+
excludedEventParams: [],
|
|
48
|
+
eventParamsToColumns: [
|
|
49
|
+
//{name: 'page_location', type: 'string', columnName: 'page_location2'},
|
|
50
|
+
],
|
|
51
|
+
sessionParams: [],
|
|
52
|
+
defaultExcludedEvents: [],
|
|
53
|
+
// session_start and first_visit are excluded via the excludedEvents array
|
|
54
|
+
// this allows the user to include them if needed
|
|
55
|
+
excludedEvents: [
|
|
56
|
+
'session_start',
|
|
57
|
+
'first_visit'
|
|
58
|
+
],
|
|
59
|
+
defaultExcludedColumns: [
|
|
60
|
+
'event_dimensions', // legacy column, not needed
|
|
61
|
+
'traffic_source', // renamed to user_traffic_source
|
|
62
|
+
'session_id'
|
|
63
|
+
],
|
|
64
|
+
// exclude these columns when extracting raw data from the export tables
|
|
65
|
+
excludedColumns: [],
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
module.exports = { ga4EventsEnhancedConfig };
|
|
@@ -1,10 +1,18 @@
|
|
|
1
|
-
const helpers = require('
|
|
2
|
-
const utils = require('
|
|
3
|
-
const
|
|
4
|
-
const
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
1
|
+
const helpers = require('../../helpers/index.js');
|
|
2
|
+
const utils = require('../../utils.js');
|
|
3
|
+
const constants = require('../../constants.js');
|
|
4
|
+
const { ga4EventsEnhancedConfig } = require('./config.js');
|
|
5
|
+
const { validateEnhancedEventsConfig } = require('./validation.js');
|
|
6
|
+
const documentation = require('../../documentation.js');
|
|
7
|
+
const { createTable } = require('../../createTable.js');
|
|
8
|
+
const { getTableDescriptionSections } = require('./tableDescription.js');
|
|
9
|
+
|
|
10
|
+
// Column metadata for the GA4 Events Enhanced table
|
|
11
|
+
const columnMetadata = {
|
|
12
|
+
descriptions: require('./columns/columnDescriptions.json'),
|
|
13
|
+
lineage: require('./columns/columnLineage.json'),
|
|
14
|
+
typicalUse: require('./columns/columnTypicalUse.json'),
|
|
15
|
+
};
|
|
8
16
|
|
|
9
17
|
// default configuration for the GA4 Events Enhanced table
|
|
10
18
|
const defaultConfig = {
|
|
@@ -291,7 +299,7 @@ ${excludedEventsSQL}`,
|
|
|
291
299
|
// Exported wrapper: merge config, validate, then delegate to the internal function
|
|
292
300
|
const generateEnhancedEventsSQL = (config) => {
|
|
293
301
|
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
294
|
-
|
|
302
|
+
validateEnhancedEventsConfig(mergedConfig);
|
|
295
303
|
return _generateEnhancedEventsSQL(mergedConfig);
|
|
296
304
|
};
|
|
297
305
|
|
|
@@ -316,58 +324,24 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
316
324
|
*
|
|
317
325
|
* @returns {Object} The Dataform publish() object for the enhanced events table, supporting chaining (e.g. .preOps, .query).
|
|
318
326
|
*/
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if (typeof sourceTable === 'string' && /^`[^\.]+\.[^\.]+\.[^\.]+`$/.test(sourceTable)) {
|
|
329
|
-
return sourceTable.split('.')[1];
|
|
330
|
-
}
|
|
331
|
-
throw new Error(`Unable to extract the dataset name from sourceTable, received: ${JSON.stringify(sourceTable)}`);
|
|
332
|
-
};
|
|
333
|
-
|
|
334
|
-
const dataset = getDatasetName(mergedConfig.sourceTable);
|
|
335
|
-
|
|
336
|
-
// Build dataformTableConfig: static defaults (from defaultConfig.js) → dynamic fields → user overrides.
|
|
337
|
-
// Deep-clone defaults to prevent Dataform's publish() from mutating nested objects (e.g. bigquery)
|
|
338
|
-
// across multiple createTable calls in the same process.
|
|
339
|
-
const dataformTableConfig = utils.mergeDataformTableConfigurations(
|
|
340
|
-
{
|
|
341
|
-
...JSON.parse(JSON.stringify(defaultConfig.dataformTableConfig || {})),
|
|
342
|
-
name: `${constants.DEFAULT_EVENTS_TABLE_NAME}_${dataset.replace('analytics_', '')}`,
|
|
343
|
-
schema: dataset,
|
|
344
|
-
columns: documentation.getColumnDescriptions(mergedConfig),
|
|
345
|
-
},
|
|
346
|
-
config.dataformTableConfig
|
|
347
|
-
);
|
|
348
|
-
|
|
349
|
-
// Pass dataformTableConfig to getTableDescription via a new object to avoid mutating mergedConfig
|
|
350
|
-
// (Dataform's sandboxed runtime may freeze objects returned by mergeSQLConfigurations)
|
|
351
|
-
const tableDescription = documentation.getTableDescription({ ...mergedConfig, dataformTableConfig });
|
|
352
|
-
|
|
353
|
-
// Set description (user override from the merge wins if provided)
|
|
354
|
-
if (!dataformTableConfig.description) {
|
|
355
|
-
dataformTableConfig.description = tableDescription;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
// create the table using Dataform publish()
|
|
359
|
-
return dataformPublish(dataformTableConfig.name, dataformTableConfig).preOps(ctx => {
|
|
360
|
-
return preOperations.setPreOperations(utils.setDataformContext(ctx, mergedConfig));
|
|
361
|
-
}).query(ctx => {
|
|
362
|
-
return _generateEnhancedEventsSQL(utils.setDataformContext(ctx, mergedConfig));
|
|
363
|
-
});
|
|
327
|
+
// Table module definition — conforms to the shared createTable interface
|
|
328
|
+
const tableModule = {
|
|
329
|
+
defaultConfig,
|
|
330
|
+
defaultTableName: constants.DEFAULT_EVENTS_TABLE_NAME,
|
|
331
|
+
validate: validateEnhancedEventsConfig,
|
|
332
|
+
generateSql: _generateEnhancedEventsSQL,
|
|
333
|
+
getColumnDescriptions: (config) => documentation.getColumnDescriptions(config, columnMetadata),
|
|
334
|
+
getTableDescription: (config) => documentation.buildTableDescription(config, getTableDescriptionSections(config)),
|
|
335
|
+
};
|
|
364
336
|
|
|
337
|
+
const createEnhancedEventsTable = (dataformPublish, config) => {
|
|
338
|
+
return createTable(dataformPublish, config, tableModule);
|
|
365
339
|
};
|
|
366
340
|
|
|
367
341
|
// Exported wrapper: merge config, validate, then delegate to preOperations module
|
|
368
342
|
const setPreOperations = (config) => {
|
|
369
343
|
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
370
|
-
|
|
344
|
+
validateEnhancedEventsConfig(mergedConfig);
|
|
371
345
|
return preOperations.setPreOperations(mergedConfig);
|
|
372
346
|
};
|
|
373
347
|
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
const { isExcluded } = require('../../documentation.js');
|
|
2
|
+
const tableAgentInstructions = require('./columns/tableAgentInstructions.json');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Builds the GA4-specific table description sections for ga4_events_enhanced.
|
|
6
|
+
* These are passed to buildTableDescription() which adds shared sections
|
|
7
|
+
* (package attribution, config JSON dump).
|
|
8
|
+
*
|
|
9
|
+
* @param {Object} config - The merged configuration object.
|
|
10
|
+
* @returns {string[]} Array of description section strings.
|
|
11
|
+
*/
|
|
12
|
+
const getTableDescriptionSections = (config) => {
|
|
13
|
+
// Only use user-configured excludedColumns for filtering AI instructions.
|
|
14
|
+
// defaultExcludedColumns refers to raw GA4 export columns excluded during extraction
|
|
15
|
+
// (e.g. session_id is excluded from the raw export but exists as a derived column in the final table).
|
|
16
|
+
const excludedColumns = config.excludedColumns || [];
|
|
17
|
+
|
|
18
|
+
const excludedEvents = [
|
|
19
|
+
...(config.defaultExcludedEvents || []),
|
|
20
|
+
...(config.excludedEvents || []),
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const sections = [];
|
|
24
|
+
|
|
25
|
+
// 1. Overview
|
|
26
|
+
const overviewLines = [
|
|
27
|
+
'GA4 Events Enhanced',
|
|
28
|
+
'',
|
|
29
|
+
'An enhanced version of the GA4 BigQuery export. Each row is one event.',
|
|
30
|
+
];
|
|
31
|
+
if (config.timezone) {
|
|
32
|
+
overviewLines.push(`Timezone: ${config.timezone}.`);
|
|
33
|
+
}
|
|
34
|
+
sections.push(overviewLines.join('\n'));
|
|
35
|
+
|
|
36
|
+
// 2. Key Fields
|
|
37
|
+
const keyFieldLines = tableAgentInstructions.keyFields
|
|
38
|
+
.filter(kf => !isExcluded(kf.dependsOn, excludedColumns))
|
|
39
|
+
.map(kf => `- ${kf.field}: ${kf.note}`);
|
|
40
|
+
|
|
41
|
+
// Add promoted event params
|
|
42
|
+
if (config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
|
|
43
|
+
config.eventParamsToColumns.forEach(p => {
|
|
44
|
+
const columnName = p.columnName || p.name;
|
|
45
|
+
keyFieldLines.push(`- ${columnName}: Promoted event parameter '${p.name}'. Available as a top-level column for direct filtering.`);
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (keyFieldLines.length > 0) {
|
|
50
|
+
sections.push('KEY FIELDS:\n' + keyFieldLines.join('\n'));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// 3. Synonyms
|
|
54
|
+
const synonymLines = tableAgentInstructions.synonyms
|
|
55
|
+
.filter(s => !isExcluded(s.dependsOn, excludedColumns))
|
|
56
|
+
.map(s => `- "${s.terms.join('" / "')}" → ${s.sql}`);
|
|
57
|
+
|
|
58
|
+
if (synonymLines.length > 0) {
|
|
59
|
+
sections.push('SYNONYMS:\n' + synonymLines.join('\n'));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 4. Filtering and Grouping
|
|
63
|
+
const guidanceLines = tableAgentInstructions.filteringGuidance
|
|
64
|
+
.filter(g => !isExcluded(g.dependsOn, excludedColumns))
|
|
65
|
+
.map(g => `- ${g.text}`);
|
|
66
|
+
|
|
67
|
+
if (guidanceLines.length > 0) {
|
|
68
|
+
sections.push('FILTERING AND GROUPING:\n' + guidanceLines.join('\n'));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// 5. Event Vocabulary
|
|
72
|
+
const vocabParts = [];
|
|
73
|
+
const autoEvents = tableAgentInstructions.eventVocabulary.autoCollectedAndEnhanced
|
|
74
|
+
.filter(e => !excludedEvents.includes(e));
|
|
75
|
+
if (autoEvents.length > 0) {
|
|
76
|
+
vocabParts.push(`Auto-collected and enhanced measurement: ${autoEvents.join(', ')}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!isExcluded(['ecommerce'], excludedColumns)) {
|
|
80
|
+
const ecomEvents = tableAgentInstructions.eventVocabulary.ecommerce
|
|
81
|
+
.filter(e => !excludedEvents.includes(e));
|
|
82
|
+
if (ecomEvents.length > 0) {
|
|
83
|
+
vocabParts.push(`Ecommerce (recommended): ${ecomEvents.join(', ')}`);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (vocabParts.length > 0) {
|
|
88
|
+
sections.push('COMMON EVENT NAMES:\n' + vocabParts.join('\n'));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// 6. Table Features
|
|
92
|
+
const featureLines = [
|
|
93
|
+
'Combines daily, intraday, and fresh exports; the best available version of each event is used.',
|
|
94
|
+
'Incremental updates: non-final data is replaced with the latest available data on every run.',
|
|
95
|
+
'Promotes key fields (e.g. page_location, session_id) to top-level columns for faster queries.',
|
|
96
|
+
'Session-level fields: landing_page, user_id resolution, and configurable session parameters.',
|
|
97
|
+
];
|
|
98
|
+
sections.push('TABLE FEATURES:\n' + featureLines.map(f => `- ${f}`).join('\n'));
|
|
99
|
+
|
|
100
|
+
return sections;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
module.exports = { getTableDescriptionSections };
|