ga4-export-fixer 0.4.6 → 0.4.7-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/createTable.js +58 -0
- package/defaultConfig.js +0 -66
- package/documentation.js +222 -306
- package/index.js +1 -1
- package/inputValidation.js +0 -184
- package/package.json +3 -3
- package/tables/ga4EventsEnhanced/config.js +68 -0
- package/tables/{ga4EventsEnhanced.js → ga4EventsEnhanced/index.js} +28 -54
- package/tables/ga4EventsEnhanced/tableDescription.js +103 -0
- package/tables/ga4EventsEnhanced/validation.js +185 -0
- package/utils.js +22 -1
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnDescriptions.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnLineage.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/columnTypicalUse.json +0 -0
- /package/{columns → tables/ga4EventsEnhanced/columns}/tableAgentInstructions.json +0 -0
package/createTable.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
const utils = require('./utils.js');
|
|
2
|
+
const preOperations = require('./preOperations.js');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Shared createTable lifecycle for all table modules.
|
|
6
|
+
*
|
|
7
|
+
* Merges configuration, validates it, builds the Dataform table config
|
|
8
|
+
* (name, schema, columns, description), and publishes the table with
|
|
9
|
+
* pre-operations and the module's SQL query.
|
|
10
|
+
*
|
|
11
|
+
* @param {Function} dataformPublish - Dataform publish() function.
|
|
12
|
+
* @param {Object} userConfig - User-provided configuration.
|
|
13
|
+
* @param {Object} tableModule - Table module definition conforming to the table module interface:
|
|
14
|
+
* @param {Object} tableModule.defaultConfig - Default config extending baseConfig.
|
|
15
|
+
* @param {string} tableModule.defaultTableName - Default table name (e.g. 'ga4_events_enhanced').
|
|
16
|
+
* @param {Function} tableModule.validate - (mergedConfig, options?) => void.
|
|
17
|
+
* @param {Function} tableModule.generateSql - (mergedConfig) => string.
|
|
18
|
+
* @param {Function} tableModule.getColumnDescriptions - (mergedConfig) => Dataform columns object.
|
|
19
|
+
* @param {Function} tableModule.getTableDescription - (mergedConfig) => string.
|
|
20
|
+
* @returns {Object} The Dataform publish() object for the table.
|
|
21
|
+
*/
|
|
22
|
+
const createTable = (dataformPublish, userConfig, tableModule) => {
|
|
23
|
+
const mergedConfig = utils.mergeSQLConfigurations(tableModule.defaultConfig, userConfig);
|
|
24
|
+
tableModule.validate(mergedConfig, { skipDataformContextFields: true });
|
|
25
|
+
|
|
26
|
+
const dataset = utils.getDatasetName(mergedConfig.sourceTable);
|
|
27
|
+
|
|
28
|
+
// Build dataformTableConfig: static defaults → dynamic fields → user overrides.
|
|
29
|
+
// Deep-clone defaults to prevent Dataform's publish() from mutating nested objects (e.g. bigquery)
|
|
30
|
+
// across multiple createTable calls in the same process.
|
|
31
|
+
const dataformTableConfig = utils.mergeDataformTableConfigurations(
|
|
32
|
+
{
|
|
33
|
+
...JSON.parse(JSON.stringify(tableModule.defaultConfig.dataformTableConfig || {})),
|
|
34
|
+
name: `${tableModule.defaultTableName}_${dataset.replace('analytics_', '')}`,
|
|
35
|
+
schema: dataset,
|
|
36
|
+
columns: tableModule.getColumnDescriptions(mergedConfig),
|
|
37
|
+
},
|
|
38
|
+
userConfig.dataformTableConfig
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// Pass dataformTableConfig to getTableDescription via a new object to avoid mutating mergedConfig
|
|
42
|
+
// (Dataform's sandboxed runtime may freeze objects returned by mergeSQLConfigurations)
|
|
43
|
+
const tableDescription = tableModule.getTableDescription({ ...mergedConfig, dataformTableConfig });
|
|
44
|
+
|
|
45
|
+
// Set description (user override from the merge wins if provided)
|
|
46
|
+
if (!dataformTableConfig.description) {
|
|
47
|
+
dataformTableConfig.description = tableDescription;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Create the table using Dataform publish()
|
|
51
|
+
return dataformPublish(dataformTableConfig.name, dataformTableConfig).preOps(ctx => {
|
|
52
|
+
return preOperations.setPreOperations(utils.setDataformContext(ctx, mergedConfig));
|
|
53
|
+
}).query(ctx => {
|
|
54
|
+
return tableModule.generateSql(utils.setDataformContext(ctx, mergedConfig));
|
|
55
|
+
});
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
module.exports = { createTable };
|
package/defaultConfig.js
CHANGED
|
@@ -28,72 +28,6 @@ const baseConfig = {
|
|
|
28
28
|
},
|
|
29
29
|
};
|
|
30
30
|
|
|
31
|
-
/*
|
|
32
|
-
The default configuration for the GA4 Events Enhanced table.
|
|
33
|
-
*/
|
|
34
|
-
const ga4EventsEnhancedConfig = {
|
|
35
|
-
...baseConfig,
|
|
36
|
-
sourceTable: undefined,
|
|
37
|
-
sourceTableType: 'GA4_EXPORT', // used with pre operations to detect if ga4 export specific pre operations are needed
|
|
38
|
-
// optional but recommended
|
|
39
|
-
schemaLock: undefined,
|
|
40
|
-
// only used with js tables
|
|
41
|
-
dataformTableConfig: {
|
|
42
|
-
type: 'incremental',
|
|
43
|
-
bigquery: {
|
|
44
|
-
partitionBy: 'event_date',
|
|
45
|
-
clusterBy: ['event_name', 'session_id', 'page_location', 'data_is_final'],
|
|
46
|
-
labels: {
|
|
47
|
-
'ga4_export_fixer': 'true'
|
|
48
|
-
}
|
|
49
|
-
},
|
|
50
|
-
onSchemaChange: 'EXTEND',
|
|
51
|
-
tags: ['ga4_export_fixer'],
|
|
52
|
-
},
|
|
53
|
-
// optional
|
|
54
|
-
includedExportTypes: {
|
|
55
|
-
daily: true,
|
|
56
|
-
fresh: false,
|
|
57
|
-
intraday: true,
|
|
58
|
-
},
|
|
59
|
-
timezone: 'Etc/UTC',
|
|
60
|
-
customTimestampParam: undefined,
|
|
61
|
-
dataIsFinal: {
|
|
62
|
-
detectionMethod: 'DAY_THRESHOLD', // 'EXPORT_TYPE' or 'DAY_THRESHOLD'
|
|
63
|
-
dayThreshold: 3 // only used if detectionMethod is 'DAY_THRESHOLD'
|
|
64
|
-
// according to GA4 documentation, the data up to 72 hours old is subject to possible changes
|
|
65
|
-
// in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
|
|
66
|
-
},
|
|
67
|
-
// number of additional days to take in for taking into account sessions that overlap days
|
|
68
|
-
bufferDays: 1,
|
|
69
|
-
// these parameters are excluded by default because they've been made available in other columns
|
|
70
|
-
defaultExcludedEventParams: [
|
|
71
|
-
'page_location',
|
|
72
|
-
'ga_session_id',
|
|
73
|
-
//'custom_event_timestamp', // removed if customTimestampParam is used
|
|
74
|
-
],
|
|
75
|
-
excludedEventParams: [],
|
|
76
|
-
eventParamsToColumns: [
|
|
77
|
-
//{name: 'page_location', type: 'string', columnName: 'page_location2'},
|
|
78
|
-
],
|
|
79
|
-
sessionParams: [],
|
|
80
|
-
defaultExcludedEvents: [],
|
|
81
|
-
// session_start and first_visit are excluded via the excludedEvents array
|
|
82
|
-
// this allows the user to include them if needed
|
|
83
|
-
excludedEvents: [
|
|
84
|
-
'session_start',
|
|
85
|
-
'first_visit'
|
|
86
|
-
],
|
|
87
|
-
defaultExcludedColumns: [
|
|
88
|
-
'event_dimensions', // legacy column, not needed
|
|
89
|
-
'traffic_source', // renamed to user_traffic_source
|
|
90
|
-
'session_id'
|
|
91
|
-
],
|
|
92
|
-
// exclude these columns when extracting raw data from the export tables
|
|
93
|
-
excludedColumns: [],
|
|
94
|
-
};
|
|
95
|
-
|
|
96
31
|
module.exports = {
|
|
97
32
|
baseConfig,
|
|
98
|
-
ga4EventsEnhancedConfig,
|
|
99
33
|
};
|