ga4-export-fixer 0.1.1 → 0.1.3-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +173 -9
- package/helpers.js +8 -6
- package/package.json +3 -2
- package/tables/ga4EventsEnhanced.js +7 -5
- package/utils.js +29 -7
package/README.md
CHANGED
|
@@ -17,13 +17,15 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
17
17
|
**`package.json`**
|
|
18
18
|
```json
|
|
19
19
|
{
|
|
20
|
-
"name": "my_dataform_repo",
|
|
21
20
|
"dependencies": {
|
|
22
|
-
"@dataform/core": "3.0.
|
|
23
|
-
"ga4-export-fixer": "0.1.
|
|
21
|
+
"@dataform/core": "3.0.42",
|
|
22
|
+
"ga4-export-fixer": "0.1.2"
|
|
24
23
|
}
|
|
25
24
|
}
|
|
26
25
|
```
|
|
26
|
+
|
|
27
|
+
**Note:** The best practice is to specify the package version explicitly (e.g. `"0.1.2"`) rather than using `"latest"` or `"*"`, to avoid unexpected breaking changes when the package is updated.
|
|
28
|
+
|
|
27
29
|
In Google Cloud Dataform, click "Install Packages" to install it in your development workspace.
|
|
28
30
|
|
|
29
31
|
If your Dataform repository does not have a package.json file, see this guide: https://docs.cloud.google.com/dataform/docs/manage-repository#move-to-package-json
|
|
@@ -46,6 +48,8 @@ The main features include:
|
|
|
46
48
|
|
|
47
49
|
Create a new **ga4_events_enhanced** table using a **.js** file in your repository's **definitions** folder.
|
|
48
50
|
|
|
51
|
+
##### Using Defaults
|
|
52
|
+
|
|
49
53
|
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
50
54
|
```javascript
|
|
51
55
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
@@ -57,6 +61,53 @@ const config = {
|
|
|
57
61
|
ga4EventsEnhanced.createTable(publish, config);
|
|
58
62
|
```
|
|
59
63
|
|
|
64
|
+
##### With Custom Configuration
|
|
65
|
+
|
|
66
|
+
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
67
|
+
```javascript
|
|
68
|
+
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
69
|
+
|
|
70
|
+
const config = {
|
|
71
|
+
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
|
|
72
|
+
schemaLock: '20260101', // prevent possible issues from updates to the export schema
|
|
73
|
+
customTimestampParam: 'custom_event_timestamp', // custom timestamp collected as an event param
|
|
74
|
+
timezone: 'Europe/Helsinki',
|
|
75
|
+
// not needed data
|
|
76
|
+
excludedColumns: [
|
|
77
|
+
'app_info',
|
|
78
|
+
'publisher'
|
|
79
|
+
],
|
|
80
|
+
// not needed events
|
|
81
|
+
excludedEvents: [
|
|
82
|
+
'user_engagement'
|
|
83
|
+
],
|
|
84
|
+
// transform to session-level
|
|
85
|
+
sessionParams: [
|
|
86
|
+
'user_agent'
|
|
87
|
+
],
|
|
88
|
+
// promote as columns
|
|
89
|
+
eventParamsToColumns: [
|
|
90
|
+
{name: 'session_engaged'},
|
|
91
|
+
{name: 'ga_session_number', type: 'int'},
|
|
92
|
+
{name: 'page_type', type: 'string'},
|
|
93
|
+
],
|
|
94
|
+
// not needed in the event_params array
|
|
95
|
+
excludedEventParams: [
|
|
96
|
+
'session_engaged',
|
|
97
|
+
'ga_session_number',
|
|
98
|
+
'page_type',
|
|
99
|
+
'user_agent'
|
|
100
|
+
],
|
|
101
|
+
// use day threshold for data_is_final
|
|
102
|
+
dataIsFinal: {
|
|
103
|
+
detectionMethod: 'DAY_THRESHOLD',
|
|
104
|
+
dayThreshold: 4
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
ga4EventsEnhanced.createTable(publish, config);
|
|
109
|
+
```
|
|
110
|
+
|
|
60
111
|
#### SQLX Deployment
|
|
61
112
|
|
|
62
113
|
Alternatively, you can create the **ga4_events_enhanced** table using a .SQLX file.
|
|
@@ -91,19 +142,132 @@ pre_operations {
|
|
|
91
142
|
}
|
|
92
143
|
```
|
|
93
144
|
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
#### Configuration Object
|
|
148
|
+
|
|
149
|
+
All fields are optional except `sourceTable`. Default values are applied automatically, so you only need to specify the fields you want to override.
|
|
150
|
+
|
|
151
|
+
| Field | Type | Default | Description |
|
|
152
|
+
|-------|------|---------|-------------|
|
|
153
|
+
| `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format `` `project.dataset.table` `` |
|
|
154
|
+
| `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
|
|
155
|
+
| `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
|
|
156
|
+
| `schemaLock` | string (YYYYMMDD) | `undefined` | Lock the table schema to a specific date. Must be a valid date >= `"20241009"` |
|
|
157
|
+
| `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
|
|
158
|
+
| `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
|
|
159
|
+
| `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight |
|
|
160
|
+
| `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
|
|
161
|
+
| `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
|
|
162
|
+
| `excludedEvents` | string[] | `[]` | Event names to exclude from the table |
|
|
163
|
+
| `excludedColumns` | string[] | `[]` | Default GA4 export columns to exclude from the final table, for example `'app_info'` or `'publisher'` |
|
|
164
|
+
| `sessionParams` | string[] | `[]` | Event parameter names to aggregate as session-level parameters |
|
|
165
|
+
|
|
166
|
+
**`includedExportTypes`** — which GA4 export types to include:
|
|
167
|
+
|
|
168
|
+
| Field | Type | Default | Description |
|
|
169
|
+
|-------|------|---------|-------------|
|
|
170
|
+
| `includedExportTypes.daily` | boolean | `true` | Include daily (processed) export |
|
|
171
|
+
| `includedExportTypes.intraday` | boolean | `true` | Include intraday export |
|
|
172
|
+
|
|
173
|
+
**`dataIsFinal`** — how to determine whether data is final (not expected to change):
|
|
174
|
+
|
|
175
|
+
| Field | Type | Default | Description |
|
|
176
|
+
|-------|------|---------|-------------|
|
|
177
|
+
| `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix, all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event) |
|
|
178
|
+
| `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
|
|
179
|
+
|
|
180
|
+
**`testConfig`** — date range used when `test` is `true`:
|
|
181
|
+
|
|
182
|
+
| Field | Type | Default | Description |
|
|
183
|
+
|-------|------|---------|-------------|
|
|
184
|
+
| `testConfig.dateRangeStart` | string (SQL date) | `'current_date()-1'` | Start date for test queries |
|
|
185
|
+
| `testConfig.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for test queries |
|
|
186
|
+
|
|
187
|
+
**`preOperations`** — date range and incremental refresh configuration:
|
|
188
|
+
|
|
189
|
+
| Field | Type | Default | Description |
|
|
190
|
+
|-------|------|---------|-------------|
|
|
191
|
+
| `preOperations.dateRangeStartFullRefresh` | string (SQL date) | `'date(2000, 1, 1)'` | Start date for full refresh |
|
|
192
|
+
| `preOperations.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for queries |
|
|
193
|
+
| `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of previous days to scan from the result table when determining the incremental refresh start checkpoint. A higher value is required if the table updates have fallen behind for some reason |
|
|
194
|
+
| `preOperations.incrementalStartOverride` | string (SQL date) | `undefined` | Override the incremental start date to re-process a specific range |
|
|
195
|
+
| `preOperations.incrementalEndOverride` | string (SQL date) | `undefined` | Override the incremental end date to re-process a specific range |
|
|
196
|
+
|
|
197
|
+
**`eventParamsToColumns`** — each item in the array is an object:
|
|
198
|
+
|
|
199
|
+
| Field | Type | Required | Description |
|
|
200
|
+
|-------|------|----------|-------------|
|
|
201
|
+
| `name` | string | Yes | Event parameter name |
|
|
202
|
+
| `type` | string | No | Data type: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, or `'float64'`. If omitted, returns the value converted to a string |
|
|
203
|
+
| `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
|
|
204
|
+
|
|
205
|
+
Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
|
|
206
|
+
|
|
94
207
|
### Helpers
|
|
95
208
|
|
|
96
|
-
The helpers contain templates for common SQL
|
|
209
|
+
The helpers contain templates for common SQL expressions. The functions are referenced by **ga4EventsEnhanced** but can also be imported as utility functions for working with GA4 data.
|
|
97
210
|
|
|
98
211
|
```javascript
|
|
99
212
|
const { helpers } = require('ga4-export-fixer');
|
|
100
|
-
|
|
101
|
-
// Unnest event parameters, date filters, URL extraction, session aggregation, etc.
|
|
102
|
-
helpers.unnestEventParam('page_location', 'string');
|
|
103
|
-
helpers.ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()');
|
|
104
|
-
helpers.extractPageDetails();
|
|
105
213
|
```
|
|
106
214
|
|
|
215
|
+
#### SQL Templates
|
|
216
|
+
|
|
217
|
+
| Name | Example | Description |
|
|
218
|
+
|------|---------|-------------|
|
|
219
|
+
| `eventDate` | `helpers.eventDate` | Casts `event_date` string to a DATE using YYYYMMDD format |
|
|
220
|
+
| `sessionId` | `helpers.sessionId` | Builds a session ID by concatenating `user_pseudo_id` and `ga_session_id` |
|
|
221
|
+
|
|
222
|
+
#### Functions
|
|
223
|
+
|
|
224
|
+
**Unnesting parameters**
|
|
225
|
+
|
|
226
|
+
| Function | Example | Description |
|
|
227
|
+
|----------|---------|-------------|
|
|
228
|
+
| `unnestEventParam` | `unnestEventParam('page_location', 'string')` | Extracts a value from the `event_params` array by key. Supported types: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, `'float64'`. Omit type to get the value converted as a string |
|
|
229
|
+
|
|
230
|
+
**Date and time**
|
|
231
|
+
|
|
232
|
+
| Function | Example | Description |
|
|
233
|
+
|----------|---------|-------------|
|
|
234
|
+
| `getEventTimestampMicros` | `getEventTimestampMicros('custom_ts')` | Returns SQL for event timestamp in microseconds. With a custom parameter, uses it (converted from ms) with fallback to `event_timestamp` |
|
|
235
|
+
| `getEventDateTime` | `getEventDateTime({ timezone: 'Europe/Helsinki' })` | Returns SQL for event datetime in the given timezone. Defaults to `'Etc/UTC'` |
|
|
236
|
+
|
|
237
|
+
**Date filters**
|
|
238
|
+
|
|
239
|
+
| Function | Example | Description |
|
|
240
|
+
|----------|---------|-------------|
|
|
241
|
+
| `ga4ExportDateFilter` | `ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()')` | Generates a `_table_suffix` filter for a single export type (`'daily'` or `'intraday'`) and date range |
|
|
242
|
+
|
|
243
|
+
**Page details**
|
|
244
|
+
|
|
245
|
+
| Function | Example | Description |
|
|
246
|
+
|----------|---------|-------------|
|
|
247
|
+
| `extractUrlHostname` | `extractUrlHostname('page_location')` | Extracts hostname from a URL column |
|
|
248
|
+
| `extractUrlPath` | `extractUrlPath('page_location')` | Extracts the path component from a URL column |
|
|
249
|
+
| `extractUrlQuery` | `extractUrlQuery('page_location')` | Extracts the query string (including `?`) from a URL column |
|
|
250
|
+
| `extractUrlQueryParams` | `extractUrlQueryParams('page_location')` | Parses URL query parameters into `ARRAY<STRUCT<key STRING, value STRING>>` |
|
|
251
|
+
| `extractPageDetails` | `extractPageDetails()` | Returns a struct with `hostname`, `path`, `query`, and `query_params`. Defaults to `page_location` event parameter |
|
|
252
|
+
|
|
253
|
+
**Aggregation**
|
|
254
|
+
|
|
255
|
+
| Function | Example | Description |
|
|
256
|
+
|----------|---------|-------------|
|
|
257
|
+
| `aggregateValue` | `aggregateValue('user_id', 'last', 'event_timestamp')` | Aggregates a column using `'max'`, `'min'`, `'first'`, `'last'`, or `'any'`. `'first'` and `'last'` use the timestamp column for ordering |
|
|
258
|
+
|
|
259
|
+
**Ecommerce**
|
|
260
|
+
|
|
261
|
+
| Function | Example | Description |
|
|
262
|
+
|----------|---------|-------------|
|
|
263
|
+
| `fixEcommerceStruct` | `fixEcommerceStruct()` | Cleans the ecommerce struct: sets `transaction_id` to null when `'(not set)'`, and fixes missing/NaN `purchase_revenue` for purchase events |
|
|
264
|
+
|
|
265
|
+
**Data freshness**
|
|
266
|
+
|
|
267
|
+
| Function | Example | Description |
|
|
268
|
+
|----------|---------|-------------|
|
|
269
|
+
| `isFinalData` | `isFinalData('DAY_THRESHOLD', 4)` | Returns SQL that evaluates to `true` when data is final. `'EXPORT_TYPE'` checks table suffix; `'DAY_THRESHOLD'` uses days since event (`dayThreshold` is required and must be a non-negative integer) |
|
|
270
|
+
|
|
107
271
|
## License
|
|
108
272
|
|
|
109
273
|
MIT
|
package/helpers.js
CHANGED
|
@@ -634,19 +634,21 @@ const isFinalData = (detectionMethod, dayThreshold) => {
|
|
|
634
634
|
throw new Error(`isFinalData: Unsupported detectionMethod '${detectionMethod}'. Supported values are 'EXPORT_TYPE' and 'DAY_THRESHOLD'.`);
|
|
635
635
|
}
|
|
636
636
|
|
|
637
|
-
if (
|
|
638
|
-
|
|
637
|
+
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
638
|
+
if (typeof dayThreshold === 'undefined') {
|
|
639
|
+
throw new Error("isFinalData: 'dayThreshold' is required when using 'DAY_THRESHOLD' detectionMethod.");
|
|
640
|
+
}
|
|
641
|
+
if (!Number.isInteger(dayThreshold) || dayThreshold < 0) {
|
|
642
|
+
throw new Error("isFinalData: 'dayThreshold' must be an integer greater than or equal to 0 when using 'DAY_THRESHOLD' detectionMethod.");
|
|
643
|
+
}
|
|
639
644
|
}
|
|
640
645
|
|
|
641
|
-
const defaultDayThreshold = 3;
|
|
642
|
-
const threshold = typeof dayThreshold !== 'undefined' ? dayThreshold : defaultDayThreshold;
|
|
643
|
-
|
|
644
646
|
if (detectionMethod === 'EXPORT_TYPE') {
|
|
645
647
|
return 'if(_table_suffix like \'intraday_%\' or _table_suffix like \'fresh_%\', false, true)';
|
|
646
648
|
}
|
|
647
649
|
|
|
648
650
|
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
649
|
-
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${
|
|
651
|
+
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${dayThreshold}, true, false)`;
|
|
650
652
|
}
|
|
651
653
|
};
|
|
652
654
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3-dev.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
],
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "node tests/ga4EventsEnhanced.test.js",
|
|
16
|
-
"test:events": "node tests/ga4EventsEnhanced.test.js"
|
|
16
|
+
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
17
|
+
"prepublishOnly": "node scripts/updateReadme.js"
|
|
17
18
|
},
|
|
18
19
|
"repository": {
|
|
19
20
|
"type": "git",
|
|
@@ -39,13 +39,13 @@ const defaultConfig = {
|
|
|
39
39
|
// this is useful if you want to re-process only a specific date range
|
|
40
40
|
incrementalStartOverride: undefined,
|
|
41
41
|
incrementalEndOverride: undefined,
|
|
42
|
-
numberOfPreviousDaysToScan:
|
|
42
|
+
numberOfPreviousDaysToScan: 10,
|
|
43
43
|
},
|
|
44
44
|
// these parameters are excluded by default because they've been made available in other columns
|
|
45
45
|
defaultExcludedEventParams: [
|
|
46
46
|
'page_location',
|
|
47
47
|
'ga_session_id',
|
|
48
|
-
//'custom_event_timestamp', //
|
|
48
|
+
//'custom_event_timestamp', // removed if customTimestampParam is used
|
|
49
49
|
],
|
|
50
50
|
excludedEventParams: [],
|
|
51
51
|
eventParamsToColumns: [
|
|
@@ -57,12 +57,13 @@ const defaultConfig = {
|
|
|
57
57
|
'first_visit'
|
|
58
58
|
],
|
|
59
59
|
excludedEvents: [],
|
|
60
|
-
|
|
61
|
-
excludedColumns: [
|
|
60
|
+
defaultExcludedColumns: [
|
|
62
61
|
'event_dimensions', // legacy column, not needed
|
|
63
62
|
'traffic_source', // renamed to user_traffic_source
|
|
64
63
|
'session_id'
|
|
65
64
|
],
|
|
65
|
+
// exclude these columns when extracting raw data from the export tables
|
|
66
|
+
excludedColumns: [],
|
|
66
67
|
};
|
|
67
68
|
|
|
68
69
|
// List the columns in the order they should be in the final table
|
|
@@ -227,8 +228,9 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
227
228
|
};
|
|
228
229
|
|
|
229
230
|
const getExcludedColumns = () => {
|
|
231
|
+
const allExcludedColumns = utils.mergeUniqueArrays(mergedConfig.defaultExcludedColumns, mergedConfig.excludedColumns);
|
|
230
232
|
const excludedColumns = {};
|
|
231
|
-
|
|
233
|
+
allExcludedColumns.forEach(c => {
|
|
232
234
|
excludedColumns[c] = undefined;
|
|
233
235
|
});
|
|
234
236
|
return excludedColumns;
|
package/utils.js
CHANGED
|
@@ -105,6 +105,7 @@ const mergeSQLConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
105
105
|
return defaultConfig;
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
+
// the merged configuration object
|
|
108
109
|
const result = { ...defaultConfig };
|
|
109
110
|
|
|
110
111
|
for (const key in inputConfig) {
|
|
@@ -170,6 +171,27 @@ const mergeSQLConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
170
171
|
}
|
|
171
172
|
}
|
|
172
173
|
|
|
174
|
+
// support different formats for passing the sourceTable path
|
|
175
|
+
const fixSourceTable = (sourceTable) => {
|
|
176
|
+
if (isDataformTableReferenceObject(sourceTable)) {
|
|
177
|
+
return sourceTable;
|
|
178
|
+
}
|
|
179
|
+
if (typeof sourceTable === 'string') {
|
|
180
|
+
const tablePath = sourceTable.replace(/[`"']/g, '').trim();
|
|
181
|
+
if (/^[a-zA-Z0-9-]+\.[a-zA-Z0-9_]+(\.[^\.]+)?$/.test(tablePath)) {
|
|
182
|
+
const project = tablePath.split('.')[0];
|
|
183
|
+
const dataset = tablePath.split('.')[1];
|
|
184
|
+
return `\`${project}.${dataset}.events_*\``;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
throw new Error(`sourceTable must be a Dataform table reference or a string in the format '\`project.dataset.table\`'. Received: ${JSON.stringify(sourceTable)}`);
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
// process the sourceTable to support different formats
|
|
191
|
+
if (result.sourceTable) {
|
|
192
|
+
result.sourceTable = fixSourceTable(result.sourceTable);
|
|
193
|
+
}
|
|
194
|
+
|
|
173
195
|
return result;
|
|
174
196
|
};
|
|
175
197
|
|
|
@@ -178,15 +200,15 @@ const mergeSQLConfigurations = (defaultConfig, inputConfig = {}) => {
|
|
|
178
200
|
*
|
|
179
201
|
* A Dataform table reference object is expected to have the properties: 'name', 'project', and 'dataset'.
|
|
180
202
|
*
|
|
181
|
-
* @param {Object}
|
|
203
|
+
* @param {Object} obj - The object to check.
|
|
182
204
|
* @returns {boolean} True if the object is a Dataform table reference, false otherwise.
|
|
183
205
|
*/
|
|
184
|
-
const isDataformTableReferenceObject = (
|
|
185
|
-
return
|
|
186
|
-
typeof
|
|
187
|
-
Object.hasOwn(
|
|
188
|
-
Object.hasOwn(
|
|
189
|
-
Object.hasOwn(
|
|
206
|
+
const isDataformTableReferenceObject = (obj) => {
|
|
207
|
+
return obj &&
|
|
208
|
+
typeof obj === 'object' &&
|
|
209
|
+
Object.hasOwn(obj, 'name') &&
|
|
210
|
+
Object.hasOwn(obj, 'project') &&
|
|
211
|
+
Object.hasOwn(obj, 'dataset');
|
|
190
212
|
};
|
|
191
213
|
|
|
192
214
|
|