ga4-export-fixer 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -7
- package/helpers.js +8 -6
- package/package.json +3 -2
- package/tables/ga4EventsEnhanced.js +7 -5
package/README.md
CHANGED
|
@@ -20,7 +20,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
20
20
|
"name": "my_dataform_repo",
|
|
21
21
|
"dependencies": {
|
|
22
22
|
"@dataform/core": "3.0.39",
|
|
23
|
-
"ga4-export-fixer": "0.1.
|
|
23
|
+
"ga4-export-fixer": "0.1.1"
|
|
24
24
|
}
|
|
25
25
|
}
|
|
26
26
|
```
|
|
@@ -46,6 +46,8 @@ The main features include:
|
|
|
46
46
|
|
|
47
47
|
Create a new **ga4_events_enhanced** table using a **.js** file in your repository's **definitions** folder.
|
|
48
48
|
|
|
49
|
+
##### Using Defaults
|
|
50
|
+
|
|
49
51
|
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
50
52
|
```javascript
|
|
51
53
|
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
@@ -57,6 +59,48 @@ const config = {
|
|
|
57
59
|
ga4EventsEnhanced.createTable(publish, config);
|
|
58
60
|
```
|
|
59
61
|
|
|
62
|
+
##### With Custom Configuration
|
|
63
|
+
|
|
64
|
+
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
65
|
+
```javascript
|
|
66
|
+
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
67
|
+
|
|
68
|
+
const config = {
|
|
69
|
+
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
|
|
70
|
+
schemaLock: '20260101', // prevent possible issues from updates to the export schema
|
|
71
|
+
customTimestampParam: 'custom_event_timestamp', // custom timestamp collected as an event param
|
|
72
|
+
timezone: 'Europe/Helsinki',
|
|
73
|
+
// not needed data
|
|
74
|
+
excludedColumns: [
|
|
75
|
+
'app_info',
|
|
76
|
+
'publisher'
|
|
77
|
+
],
|
|
78
|
+
// not needed events
|
|
79
|
+
excludedEvents: [
|
|
80
|
+
'user_engagement'
|
|
81
|
+
],
|
|
82
|
+
// transform to session-level
|
|
83
|
+
sessionParams: [
|
|
84
|
+
'user_agent'
|
|
85
|
+
],
|
|
86
|
+
// promote as columns
|
|
87
|
+
eventParamsToColumns: [
|
|
88
|
+
{name: 'session_engaged'},
|
|
89
|
+
{name: 'ga_session_number', type: 'int'},
|
|
90
|
+
{name: 'page_type', type: 'string'},
|
|
91
|
+
],
|
|
92
|
+
// not needed in the event_params array
|
|
93
|
+
excludedEventParams: [
|
|
94
|
+
'session_engaged',
|
|
95
|
+
'ga_session_number',
|
|
96
|
+
'page_type',
|
|
97
|
+
'user_agent'
|
|
98
|
+
]
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
ga4EventsEnhanced.createTable(publish, config);
|
|
102
|
+
```
|
|
103
|
+
|
|
60
104
|
#### SQLX Deployment
|
|
61
105
|
|
|
62
106
|
Alternatively, you can create the **ga4_events_enhanced** table using a .SQLX file.
|
|
@@ -91,19 +135,132 @@ pre_operations {
|
|
|
91
135
|
}
|
|
92
136
|
```
|
|
93
137
|
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
#### Configuration Object
|
|
141
|
+
|
|
142
|
+
All fields are optional except `sourceTable`. Default values are applied automatically, so you only need to specify the fields you want to override.
|
|
143
|
+
|
|
144
|
+
| Field | Type | Default | Description |
|
|
145
|
+
|-------|------|---------|-------------|
|
|
146
|
+
| `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format `` `project.dataset.table` `` |
|
|
147
|
+
| `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
|
|
148
|
+
| `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
|
|
149
|
+
| `schemaLock` | string (YYYYMMDD) | `undefined` | Lock the table schema to a specific date. Must be a valid date >= `"20241009"` |
|
|
150
|
+
| `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
|
|
151
|
+
| `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
|
|
152
|
+
| `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight |
|
|
153
|
+
| `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
|
|
154
|
+
| `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
|
|
155
|
+
| `excludedEvents` | string[] | `[]` | Event names to exclude from the table |
|
|
156
|
+
| `excludedColumns` | string[] | `[]` | Default GA4 export columns to exclude from the final table, for example `'app_info'` or `'publisher'` |
|
|
157
|
+
| `sessionParams` | string[] | `[]` | Event parameter names to aggregate as session-level parameters |
|
|
158
|
+
|
|
159
|
+
**`includedExportTypes`** — which GA4 export types to include:
|
|
160
|
+
|
|
161
|
+
| Field | Type | Default | Description |
|
|
162
|
+
|-------|------|---------|-------------|
|
|
163
|
+
| `includedExportTypes.daily` | boolean | `true` | Include daily (processed) export |
|
|
164
|
+
| `includedExportTypes.intraday` | boolean | `true` | Include intraday export |
|
|
165
|
+
|
|
166
|
+
**`dataIsFinal`** — how to determine whether data is final (not expected to change):
|
|
167
|
+
|
|
168
|
+
| Field | Type | Default | Description |
|
|
169
|
+
|-------|------|---------|-------------|
|
|
170
|
+
| `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix, all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event) |
|
|
171
|
+
| `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
|
|
172
|
+
|
|
173
|
+
**`testConfig`** — date range used when `test` is `true`:
|
|
174
|
+
|
|
175
|
+
| Field | Type | Default | Description |
|
|
176
|
+
|-------|------|---------|-------------|
|
|
177
|
+
| `testConfig.dateRangeStart` | string (SQL date) | `'current_date()-1'` | Start date for test queries |
|
|
178
|
+
| `testConfig.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for test queries |
|
|
179
|
+
|
|
180
|
+
**`preOperations`** — date range and incremental refresh configuration:
|
|
181
|
+
|
|
182
|
+
| Field | Type | Default | Description |
|
|
183
|
+
|-------|------|---------|-------------|
|
|
184
|
+
| `preOperations.dateRangeStartFullRefresh` | string (SQL date) | `'date(2000, 1, 1)'` | Start date for full refresh |
|
|
185
|
+
| `preOperations.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for queries |
|
|
186
|
+
| `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of previous days to scan from the result table when determining the incremental refresh start checkpoint. A higher value is required if the table updates have fallen behind for some reason |
|
|
187
|
+
| `preOperations.incrementalStartOverride` | string (SQL date) | `undefined` | Override the incremental start date to re-process a specific range |
|
|
188
|
+
| `preOperations.incrementalEndOverride` | string (SQL date) | `undefined` | Override the incremental end date to re-process a specific range |
|
|
189
|
+
|
|
190
|
+
**`eventParamsToColumns`** — each item in the array is an object:
|
|
191
|
+
|
|
192
|
+
| Field | Type | Required | Description |
|
|
193
|
+
|-------|------|----------|-------------|
|
|
194
|
+
| `name` | string | Yes | Event parameter name |
|
|
195
|
+
| `type` | string | No | Data type: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, or `'float64'`. If omitted, returns the value converted to a string |
|
|
196
|
+
| `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
|
|
197
|
+
|
|
198
|
+
Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
|
|
199
|
+
|
|
94
200
|
### Helpers
|
|
95
201
|
|
|
96
|
-
The helpers contain templates for common SQL
|
|
202
|
+
The helpers contain templates for common SQL expressions. The functions are referenced by **ga4EventsEnhanced** but can also be imported as utility functions for working with GA4 data.
|
|
97
203
|
|
|
98
204
|
```javascript
|
|
99
205
|
const { helpers } = require('ga4-export-fixer');
|
|
100
|
-
|
|
101
|
-
// Unnest event parameters, date filters, URL extraction, session aggregation, etc.
|
|
102
|
-
helpers.unnestEventParam('page_location', 'string');
|
|
103
|
-
helpers.ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()');
|
|
104
|
-
helpers.extractPageDetails();
|
|
105
206
|
```
|
|
106
207
|
|
|
208
|
+
#### SQL Templates
|
|
209
|
+
|
|
210
|
+
| Name | Example | Description |
|
|
211
|
+
|------|---------|-------------|
|
|
212
|
+
| `eventDate` | `helpers.eventDate` | Casts `event_date` string to a DATE using YYYYMMDD format |
|
|
213
|
+
| `sessionId` | `helpers.sessionId` | Builds a session ID by concatenating `user_pseudo_id` and `ga_session_id` |
|
|
214
|
+
|
|
215
|
+
#### Functions
|
|
216
|
+
|
|
217
|
+
**Unnesting parameters**
|
|
218
|
+
|
|
219
|
+
| Function | Example | Description |
|
|
220
|
+
|----------|---------|-------------|
|
|
221
|
+
| `unnestEventParam` | `unnestEventParam('page_location', 'string')` | Extracts a value from the `event_params` array by key. Supported types: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, `'float64'`. Omit type to get the value converted as a string |
|
|
222
|
+
|
|
223
|
+
**Date and time**
|
|
224
|
+
|
|
225
|
+
| Function | Example | Description |
|
|
226
|
+
|----------|---------|-------------|
|
|
227
|
+
| `getEventTimestampMicros` | `getEventTimestampMicros('custom_ts')` | Returns SQL for event timestamp in microseconds. With a custom parameter, uses it (converted from ms) with fallback to `event_timestamp` |
|
|
228
|
+
| `getEventDateTime` | `getEventDateTime({ timezone: 'Europe/Helsinki' })` | Returns SQL for event datetime in the given timezone. Defaults to `'Etc/UTC'` |
|
|
229
|
+
|
|
230
|
+
**Date filters**
|
|
231
|
+
|
|
232
|
+
| Function | Example | Description |
|
|
233
|
+
|----------|---------|-------------|
|
|
234
|
+
| `ga4ExportDateFilter` | `ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()')` | Generates a `_table_suffix` filter for a single export type (`'daily'` or `'intraday'`) and date range |
|
|
235
|
+
|
|
236
|
+
**Page details**
|
|
237
|
+
|
|
238
|
+
| Function | Example | Description |
|
|
239
|
+
|----------|---------|-------------|
|
|
240
|
+
| `extractUrlHostname` | `extractUrlHostname('page_location')` | Extracts hostname from a URL column |
|
|
241
|
+
| `extractUrlPath` | `extractUrlPath('page_location')` | Extracts the path component from a URL column |
|
|
242
|
+
| `extractUrlQuery` | `extractUrlQuery('page_location')` | Extracts the query string (including `?`) from a URL column |
|
|
243
|
+
| `extractUrlQueryParams` | `extractUrlQueryParams('page_location')` | Parses URL query parameters into `ARRAY<STRUCT<key STRING, value STRING>>` |
|
|
244
|
+
| `extractPageDetails` | `extractPageDetails()` | Returns a struct with `hostname`, `path`, `query`, and `query_params`. Defaults to `page_location` event parameter |
|
|
245
|
+
|
|
246
|
+
**Aggregation**
|
|
247
|
+
|
|
248
|
+
| Function | Example | Description |
|
|
249
|
+
|----------|---------|-------------|
|
|
250
|
+
| `aggregateValue` | `aggregateValue('user_id', 'last', 'event_timestamp')` | Aggregates a column using `'max'`, `'min'`, `'first'`, `'last'`, or `'any'`. `'first'` and `'last'` use the timestamp column for ordering |
|
|
251
|
+
|
|
252
|
+
**Ecommerce**
|
|
253
|
+
|
|
254
|
+
| Function | Example | Description |
|
|
255
|
+
|----------|---------|-------------|
|
|
256
|
+
| `fixEcommerceStruct` | `fixEcommerceStruct()` | Cleans the ecommerce struct: sets `transaction_id` to null when `'(not set)'`, and fixes missing/NaN `purchase_revenue` for purchase events |
|
|
257
|
+
|
|
258
|
+
**Data freshness**
|
|
259
|
+
|
|
260
|
+
| Function | Example | Description |
|
|
261
|
+
|----------|---------|-------------|
|
|
262
|
+
| `isFinalData` | `isFinalData('DAY_THRESHOLD', 4)` | Returns SQL that evaluates to `true` when data is final. `'EXPORT_TYPE'` checks table suffix; `'DAY_THRESHOLD'` uses days since event (`dayThreshold` is required and must be a non-negative integer) |
|
|
263
|
+
|
|
107
264
|
## License
|
|
108
265
|
|
|
109
266
|
MIT
|
package/helpers.js
CHANGED
|
@@ -634,19 +634,21 @@ const isFinalData = (detectionMethod, dayThreshold) => {
|
|
|
634
634
|
throw new Error(`isFinalData: Unsupported detectionMethod '${detectionMethod}'. Supported values are 'EXPORT_TYPE' and 'DAY_THRESHOLD'.`);
|
|
635
635
|
}
|
|
636
636
|
|
|
637
|
-
if (
|
|
638
|
-
|
|
637
|
+
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
638
|
+
if (typeof dayThreshold === 'undefined') {
|
|
639
|
+
throw new Error("isFinalData: 'dayThreshold' is required when using 'DAY_THRESHOLD' detectionMethod.");
|
|
640
|
+
}
|
|
641
|
+
if (!Number.isInteger(dayThreshold) || dayThreshold < 0) {
|
|
642
|
+
throw new Error("isFinalData: 'dayThreshold' must be an integer greater than or equal to 0 when using 'DAY_THRESHOLD' detectionMethod.");
|
|
643
|
+
}
|
|
639
644
|
}
|
|
640
645
|
|
|
641
|
-
const defaultDayThreshold = 3;
|
|
642
|
-
const threshold = typeof dayThreshold !== 'undefined' ? dayThreshold : defaultDayThreshold;
|
|
643
|
-
|
|
644
646
|
if (detectionMethod === 'EXPORT_TYPE') {
|
|
645
647
|
return 'if(_table_suffix like \'intraday_%\' or _table_suffix like \'fresh_%\', false, true)';
|
|
646
648
|
}
|
|
647
649
|
|
|
648
650
|
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
649
|
-
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${
|
|
651
|
+
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${dayThreshold}, true, false)`;
|
|
650
652
|
}
|
|
651
653
|
};
|
|
652
654
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
],
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "node tests/ga4EventsEnhanced.test.js",
|
|
16
|
-
"test:events": "node tests/ga4EventsEnhanced.test.js"
|
|
16
|
+
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
17
|
+
"prepublishOnly": "node scripts/updateReadme.js"
|
|
17
18
|
},
|
|
18
19
|
"repository": {
|
|
19
20
|
"type": "git",
|
|
@@ -39,13 +39,13 @@ const defaultConfig = {
|
|
|
39
39
|
// this is useful if you want to re-process only a specific date range
|
|
40
40
|
incrementalStartOverride: undefined,
|
|
41
41
|
incrementalEndOverride: undefined,
|
|
42
|
-
numberOfPreviousDaysToScan:
|
|
42
|
+
numberOfPreviousDaysToScan: 10,
|
|
43
43
|
},
|
|
44
44
|
// these parameters are excluded by default because they've been made available in other columns
|
|
45
45
|
defaultExcludedEventParams: [
|
|
46
46
|
'page_location',
|
|
47
47
|
'ga_session_id',
|
|
48
|
-
//'custom_event_timestamp', //
|
|
48
|
+
//'custom_event_timestamp', // removed if customTimestampParam is used
|
|
49
49
|
],
|
|
50
50
|
excludedEventParams: [],
|
|
51
51
|
eventParamsToColumns: [
|
|
@@ -57,12 +57,13 @@ const defaultConfig = {
|
|
|
57
57
|
'first_visit'
|
|
58
58
|
],
|
|
59
59
|
excludedEvents: [],
|
|
60
|
-
|
|
61
|
-
excludedColumns: [
|
|
60
|
+
defaultExcludedColumns: [
|
|
62
61
|
'event_dimensions', // legacy column, not needed
|
|
63
62
|
'traffic_source', // renamed to user_traffic_source
|
|
64
63
|
'session_id'
|
|
65
64
|
],
|
|
65
|
+
// exclude these columns when extracting raw data from the export tables
|
|
66
|
+
excludedColumns: [],
|
|
66
67
|
};
|
|
67
68
|
|
|
68
69
|
// List the columns in the order they should be in the final table
|
|
@@ -227,8 +228,9 @@ const generateEnhancedEventsSQL = (config) => {
|
|
|
227
228
|
};
|
|
228
229
|
|
|
229
230
|
const getExcludedColumns = () => {
|
|
231
|
+
const allExcludedColumns = utils.mergeUniqueArrays(mergedConfig.defaultExcludedColumns, mergedConfig.excludedColumns);
|
|
230
232
|
const excludedColumns = {};
|
|
231
|
-
|
|
233
|
+
allExcludedColumns.forEach(c => {
|
|
232
234
|
excludedColumns[c] = undefined;
|
|
233
235
|
});
|
|
234
236
|
return excludedColumns;
|