ga4-export-fixer 0.1.4 → 0.1.5-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -1
- package/helpers.js +56 -32
- package/package.json +3 -2
- package/preOperations.js +3 -3
- package/tables/ga4EventsEnhanced.js +7 -8
- package/utils.js +13 -3
package/README.md
CHANGED
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
**ga4-export-fixer** is a **Dataform NPM package** that transforms raw GA4 BigQuery export data into a cleaner, more queryable incremental table. It combines daily and intraday exports so the best available version of each event is always in use, adds session-level fields like `session_id` and `landing_page`, promotes key event parameters to columns, and fixes known GA4 export issues — handling the boilerplate transformations that are otherwise tedious to include in every GA4 query.
|
|
4
4
|
|
|
5
|
+
The goal of the package is to **speed up development** when building data models and pipelines on top of GA4 export data, allowing you to focus on your use case instead of wrestling with the raw export format.
|
|
6
|
+
|
|
7
|
+
### Planned Features
|
|
8
|
+
|
|
9
|
+
- Support for using only intraday export data
|
|
10
|
+
- Tools for building on top of the enhanced events table
|
|
11
|
+
- Column descriptions
|
|
12
|
+
- Default configurations for app and web properties
|
|
13
|
+
- Item list attribution
|
|
14
|
+
- Data enrichment (item-level, session-level, event-level)
|
|
15
|
+
- Support for fresh export (GA4 360)
|
|
16
|
+
- Custom processing steps (additional CTEs)
|
|
17
|
+
- Custom traffic source attribution
|
|
18
|
+
|
|
5
19
|
## Installation
|
|
6
20
|
|
|
7
21
|
### Bash
|
|
@@ -19,7 +33,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
19
33
|
{
|
|
20
34
|
"dependencies": {
|
|
21
35
|
"@dataform/core": "3.0.42",
|
|
22
|
-
"ga4-export-fixer": "0.1.
|
|
36
|
+
"ga4-export-fixer": "0.1.4"
|
|
23
37
|
}
|
|
24
38
|
}
|
|
25
39
|
```
|
package/helpers.js
CHANGED
|
@@ -139,66 +139,90 @@ const ga4ExportDateFilter = (exportType, start, end) => {
|
|
|
139
139
|
}
|
|
140
140
|
};
|
|
141
141
|
|
|
142
|
-
// Filter the export tables by date range for both intraday and daily exports
|
|
143
142
|
/**
|
|
144
|
-
*
|
|
145
|
-
*
|
|
146
|
-
*
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
*
|
|
150
|
-
*
|
|
151
|
-
*
|
|
152
|
-
*
|
|
153
|
-
*
|
|
154
|
-
*
|
|
155
|
-
*
|
|
156
|
-
*
|
|
157
|
-
*
|
|
158
|
-
*
|
|
159
|
-
*
|
|
160
|
-
*
|
|
161
|
-
*
|
|
162
|
-
*
|
|
163
|
-
* @param {
|
|
164
|
-
*
|
|
165
|
-
*
|
|
166
|
-
*
|
|
167
|
-
*
|
|
168
|
-
* @param {Object} [config.includedExportTypes] Should contain `intraday` (boolean).
|
|
169
|
-
* @param {number} [config.bufferDays] Number of buffer days to extend date range for daily exports.
|
|
170
|
-
* @returns {string} SQL condition as a string that can be injected into a WHERE clause.
|
|
143
|
+
* Builds a `_table_suffix` WHERE clause for GA4 BigQuery export tables (daily and/or intraday).
|
|
144
|
+
*
|
|
145
|
+
* Date boundaries are resolved differently depending on the mode:
|
|
146
|
+
* - **test** -- literal dates from `config.testConfig`
|
|
147
|
+
* - **incremental** -- BigQuery variable placeholders set by pre-operations
|
|
148
|
+
* - **full refresh** -- static dates from `config.preOperations`
|
|
149
|
+
*
|
|
150
|
+
* `bufferDays` is subtracted from the daily start date so sessions that span
|
|
151
|
+
* midnight are not partially excluded.
|
|
152
|
+
*
|
|
153
|
+
* When both daily and intraday exports are enabled, the intraday start date
|
|
154
|
+
* comes from a dedicated variable (`INTRADAY_DATE_RANGE_START_VARIABLE`) so
|
|
155
|
+
* intraday tables that already have a corresponding daily table are excluded.
|
|
156
|
+
* When only intraday is enabled, the daily start-date logic (including buffer
|
|
157
|
+
* days) is reused instead.
|
|
158
|
+
*
|
|
159
|
+
* @param {Object} config
|
|
160
|
+
* @param {boolean} config.test - Use literal test dates.
|
|
161
|
+
* @param {Object} config.testConfig - `{ dateRangeStart, dateRangeEnd }`.
|
|
162
|
+
* @param {boolean} config.incremental - Use BigQuery variable placeholders.
|
|
163
|
+
* @param {Object} config.preOperations - `{ dateRangeStartFullRefresh, dateRangeEnd }`.
|
|
164
|
+
* @param {Object} config.includedExportTypes - `{ daily: boolean, intraday: boolean }`.
|
|
165
|
+
* @param {number} [config.bufferDays=0] - Extra days subtracted from the start date.
|
|
166
|
+
* @returns {string} SQL fragment for a WHERE clause.
|
|
171
167
|
*/
|
|
172
168
|
const ga4ExportDateFilters = (config) => {
|
|
173
169
|
const bufferDays = config.bufferDays || 0;
|
|
174
170
|
|
|
175
171
|
const getStartDate = () => {
|
|
172
|
+
//test mode
|
|
176
173
|
if (config.test) {
|
|
177
174
|
return config.testConfig.dateRangeStart;
|
|
178
175
|
}
|
|
179
176
|
if (config.incremental) {
|
|
180
177
|
return constants.DATE_RANGE_START_VARIABLE;
|
|
181
178
|
}
|
|
179
|
+
// full refresh
|
|
182
180
|
return config.preOperations.dateRangeStartFullRefresh;
|
|
183
181
|
};
|
|
184
182
|
|
|
185
183
|
const getEndDate = () => {
|
|
184
|
+
// test mode, avoid using a BigQuery variable
|
|
186
185
|
if (config.test) {
|
|
187
186
|
return config.testConfig.dateRangeEnd;
|
|
188
187
|
}
|
|
188
|
+
// use checkpoint variable with incremental refresh -> allows pre processing any part of the table without having to do a full refresh
|
|
189
189
|
if (config.incremental) {
|
|
190
190
|
return constants.DATE_RANGE_END_VARIABLE;
|
|
191
191
|
}
|
|
192
|
+
// full refresh
|
|
192
193
|
return config.preOperations.dateRangeEnd;
|
|
193
194
|
};
|
|
194
195
|
|
|
195
|
-
const
|
|
196
|
+
const getIntradayStartDate = () => {
|
|
197
|
+
// In test mode, skip pre-operations even though intraday and daily tables may temporarily overlap.
|
|
198
|
+
if (config.test) {
|
|
199
|
+
return config.testConfig.dateRangeStart;
|
|
200
|
+
}
|
|
201
|
+
// Dedicated variable excludes intraday tables that overlap with already-processed daily tables.
|
|
202
|
+
if (config.includedExportTypes.intraday && config.includedExportTypes.daily) {
|
|
203
|
+
return constants.INTRADAY_DATE_RANGE_START_VARIABLE;
|
|
204
|
+
}
|
|
205
|
+
// Without daily export, reuse the daily start-date logic and apply bufferDays
|
|
206
|
+
// (buffer is normally only applied to the daily start date).
|
|
207
|
+
if (config.includedExportTypes.intraday && !config.includedExportTypes.daily) {
|
|
208
|
+
// use the same start date as if daily export was in use
|
|
209
|
+
// include the buffer days as well (not included otherwise for intraday data)
|
|
210
|
+
return `${getStartDate()}-${bufferDays}`;
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const dailyStart = `${getStartDate()}-${bufferDays}`;
|
|
215
|
+
const intradayStart = getIntradayStartDate();
|
|
196
216
|
const end = getEndDate();
|
|
197
|
-
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
const dateFilters = [
|
|
220
|
+
config.includedExportTypes.daily ? ga4ExportDateFilter('daily', dailyStart, end) : null,
|
|
221
|
+
config.includedExportTypes.intraday ? ga4ExportDateFilter('intraday', intradayStart, end) : null,
|
|
222
|
+
];
|
|
198
223
|
|
|
199
224
|
return `(
|
|
200
|
-
${
|
|
201
|
-
${config.includedExportTypes.intraday ? `or ${ga4ExportDateFilter('intraday', intradayStart, end)}` : ''}
|
|
225
|
+
${dateFilters.filter(filter => !!filter).join(' or ')}
|
|
202
226
|
)`;
|
|
203
227
|
};
|
|
204
228
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5-dev.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -14,11 +14,12 @@
|
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "node tests/ga4EventsEnhanced.test.js",
|
|
16
16
|
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
17
|
+
"test:merge": "node tests/mergeSQLConfigurations.test.js",
|
|
17
18
|
"prepublishOnly": "node scripts/updateReadme.js"
|
|
18
19
|
},
|
|
19
20
|
"repository": {
|
|
20
21
|
"type": "git",
|
|
21
|
-
"url": "git+https://github.com/tanelytics/ga4-export-fixer"
|
|
22
|
+
"url": "git+https://github.com/tanelytics/ga4-export-fixer.git"
|
|
22
23
|
},
|
|
23
24
|
"author": "Taneli Salonen",
|
|
24
25
|
"license": "MIT",
|
package/preOperations.js
CHANGED
|
@@ -140,9 +140,9 @@ const setPreOperations = (config) => {
|
|
|
140
140
|
{
|
|
141
141
|
type: 'variable',
|
|
142
142
|
name: constants.INTRADAY_DATE_RANGE_START_VARIABLE,
|
|
143
|
-
// variable only needed
|
|
144
|
-
value: config.includedExportTypes.intraday ? getDateRangeStartIntraday(config) : undefined,
|
|
145
|
-
comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data.',
|
|
143
|
+
// variable only needed if intraday export tables are included together with daily export tables
|
|
144
|
+
value: config.includedExportTypes.intraday && config.includedExportTypes.daily ? getDateRangeStartIntraday(config) : undefined,
|
|
145
|
+
comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data. Only needed if intraday export tables are included together with daily export tables.',
|
|
146
146
|
},
|
|
147
147
|
{
|
|
148
148
|
type: 'variable',
|
|
@@ -52,11 +52,13 @@ const defaultConfig = {
|
|
|
52
52
|
//{name: 'page_location', type: 'string', columnName: 'page_location2'},
|
|
53
53
|
],
|
|
54
54
|
sessionParams: [],
|
|
55
|
-
defaultExcludedEvents: [
|
|
55
|
+
defaultExcludedEvents: [],
|
|
56
|
+
// session_start and first_visit are excluded via the excludedEvents array
|
|
57
|
+
// this allows the user to include them if needed
|
|
58
|
+
excludedEvents: [
|
|
56
59
|
'session_start',
|
|
57
60
|
'first_visit'
|
|
58
61
|
],
|
|
59
|
-
excludedEvents: [],
|
|
60
62
|
defaultExcludedColumns: [
|
|
61
63
|
'event_dimensions', // legacy column, not needed
|
|
62
64
|
'traffic_source', // renamed to user_traffic_source
|
|
@@ -214,7 +216,7 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
214
216
|
const mainTimestampColumn = mergedConfig.customTimestampParam ? 'event_custom_timestamp' : 'event_timestamp';
|
|
215
217
|
|
|
216
218
|
// exlude these events from the table
|
|
217
|
-
const excludedEvents =
|
|
219
|
+
const excludedEvents = mergedConfig.excludedEvents;
|
|
218
220
|
const excludedEventsSQL = excludedEvents.length > 0 ? `and event_name not in (${excludedEvents.map(event => `'${event}'`).join(',')})` : '';
|
|
219
221
|
|
|
220
222
|
// promote these event parameters to columns
|
|
@@ -228,7 +230,7 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
228
230
|
};
|
|
229
231
|
|
|
230
232
|
const getExcludedColumns = () => {
|
|
231
|
-
const allExcludedColumns =
|
|
233
|
+
const allExcludedColumns = mergedConfig.excludedColumns;
|
|
232
234
|
const excludedColumns = {};
|
|
233
235
|
allExcludedColumns.forEach(c => {
|
|
234
236
|
excludedColumns[c] = undefined;
|
|
@@ -259,10 +261,7 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
259
261
|
page: helpers.extractPageDetails(),
|
|
260
262
|
// event parameters and user properties
|
|
261
263
|
...promotedEventParameters(),
|
|
262
|
-
event_params: helpers.filterEventParams(
|
|
263
|
-
mergedConfig.defaultExcludedEventParams, mergedConfig.excludedEventParams),
|
|
264
|
-
'exclude'
|
|
265
|
-
),
|
|
264
|
+
event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
|
|
266
265
|
user_properties: 'user_properties',
|
|
267
266
|
// traffic source
|
|
268
267
|
collected_traffic_source: 'collected_traffic_source',
|
package/utils.js
CHANGED
|
@@ -87,7 +87,10 @@ ${groupByClause}`;
|
|
|
87
87
|
*
|
|
88
88
|
* Rules:
|
|
89
89
|
* - Nested objects are merged recursively key by key
|
|
90
|
-
* - Arrays
|
|
90
|
+
* - Arrays with a "default" counterpart (e.g. excludedEvents + defaultExcludedEvents)
|
|
91
|
+
* are merged with mergeUniqueArrays, with user values taking precedence
|
|
92
|
+
* - Arrays that are themselves a "default" version, or have no default counterpart,
|
|
93
|
+
* are overwritten by user input
|
|
91
94
|
* - Default values are preserved unless explicitly overridden (including with undefined)
|
|
92
95
|
* - Explicitly setting a value to undefined in inputConfig will override the default
|
|
93
96
|
* - Date fields: after merging, specific date fields (listed in dateFields) are processed via processDate().
|
|
@@ -120,9 +123,16 @@ const mergeSQLConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
120
123
|
continue;
|
|
121
124
|
}
|
|
122
125
|
|
|
123
|
-
// Handle arrays:
|
|
126
|
+
// Handle arrays: merge with default counterpart if one exists, otherwise overwrite
|
|
124
127
|
if (Array.isArray(defaultValue) && Array.isArray(inputValue)) {
|
|
125
|
-
|
|
128
|
+
// check if the array has a "default" counterpart
|
|
129
|
+
// for example, excludedEvents and defaultExcludedEvents
|
|
130
|
+
const defaultKey = 'default' + key.charAt(0).toUpperCase() + key.slice(1);
|
|
131
|
+
if (!key.startsWith('default') && defaultKey in result) {
|
|
132
|
+
result[key] = mergeUniqueArrays(inputValue, result[defaultKey]);
|
|
133
|
+
} else {
|
|
134
|
+
result[key] = inputValue;
|
|
135
|
+
}
|
|
126
136
|
continue;
|
|
127
137
|
}
|
|
128
138
|
|