ga4-export-fixer 0.1.6-dev.9 → 0.2.1-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +446 -321
- package/package.json +2 -1
- package/preOperations.js +1 -1
- package/tables/ga4EventsEnhanced.js +4 -1
package/README.md
CHANGED
|
@@ -1,321 +1,446 @@
|
|
|
1
|
-
# ga4-export-fixer
|
|
2
|
-
|
|
3
|
-
**ga4-export-fixer** is a **Dataform NPM package** that transforms raw GA4 BigQuery export data into a cleaner, more queryable incremental table. It combines daily and intraday exports so the best available version of each event is always in use, adds session-level fields like `session_id` and `landing_page`, promotes key event parameters to columns, and fixes known GA4 export issues — handling the boilerplate transformations that are otherwise tedious to include in every GA4 query.
|
|
4
|
-
|
|
5
|
-
The goal of the package is to **speed up development** when building data models and pipelines on top of GA4 export data, allowing you to focus on your use case instead of wrestling with the raw export format.
|
|
6
|
-
|
|
7
|
-
###
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
//
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
|
266
|
-
|
|
267
|
-
| `
|
|
268
|
-
| `
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
|
275
|
-
|
|
276
|
-
| `
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
|
316
|
-
|
|
317
|
-
| `
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
1
|
+
# ga4-export-fixer
|
|
2
|
+
|
|
3
|
+
**ga4-export-fixer** is a **Dataform NPM package** that transforms raw GA4 BigQuery export data into a cleaner, more queryable incremental table. It combines daily and intraday exports so the best available version of each event is always in use, adds session-level fields like `session_id` and `landing_page`, promotes key event parameters to columns, and fixes known GA4 export issues — handling the boilerplate transformations that are otherwise tedious to include in every GA4 query.
|
|
4
|
+
|
|
5
|
+
The goal of the package is to **speed up development** when building data models and pipelines on top of GA4 export data, allowing you to focus on your use case instead of wrestling with the raw export format.
|
|
6
|
+
|
|
7
|
+
### Table of Contents
|
|
8
|
+
<!-- TOC -->
|
|
9
|
+
- [Planned, Upcoming Features](#planned-upcoming-features)
|
|
10
|
+
- [Installation](#installation)
|
|
11
|
+
- [Bash](#bash)
|
|
12
|
+
- [In Google Cloud Dataform](#in-google-cloud-dataform)
|
|
13
|
+
- [Usage](#usage)
|
|
14
|
+
- [Create GA4 Events Enhanced Table](#create-ga4-events-enhanced-table)
|
|
15
|
+
- [Building on top of the ga4_events_enhanced table](#building-on-top-of-the-ga4_events_enhanced-table)
|
|
16
|
+
- [Configuration Object](#configuration-object)
|
|
17
|
+
- [Helpers](#helpers)
|
|
18
|
+
- [License](#license)
|
|
19
|
+
<!-- /TOC -->
|
|
20
|
+
|
|
21
|
+
### Planned, Upcoming Features
|
|
22
|
+
|
|
23
|
+
- Column descriptions
|
|
24
|
+
- Web and app specific default configurations
|
|
25
|
+
- Ecommerce item list attribution
|
|
26
|
+
- Custom channel grouping
|
|
27
|
+
- Data enrichment (item-level, session-level, event-level)
|
|
28
|
+
- Support for fresh export (GA4 360)
|
|
29
|
+
- Custom processing steps (additional CTEs)
|
|
30
|
+
- Custom traffic source attribution
|
|
31
|
+
- Default assertions
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
### Bash
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npm install ga4-export-fixer
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### In Google Cloud Dataform
|
|
42
|
+
|
|
43
|
+
Include the package in the package.json file in your Dataform repository.
|
|
44
|
+
|
|
45
|
+
`**package.json**`
|
|
46
|
+
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"dependencies": {
|
|
50
|
+
"@dataform/core": "3.0.42",
|
|
51
|
+
"ga4-export-fixer": "0.2.0"
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Note:** The best practice is to specify the package version explicitly (e.g. `"0.1.2"`) rather than using `"latest"` or `"*"`, to avoid unexpected breaking changes when the package is updated.
|
|
57
|
+
|
|
58
|
+
In Google Cloud Dataform, click "Install Packages" to install it in your development workspace.
|
|
59
|
+
|
|
60
|
+
If your Dataform repository does not have a package.json file, see this guide: [https://docs.cloud.google.com/dataform/docs/manage-repository#move-to-package-json](https://docs.cloud.google.com/dataform/docs/manage-repository#move-to-package-json)
|
|
61
|
+
|
|
62
|
+
## Usage
|
|
63
|
+
|
|
64
|
+
### Create GA4 Events Enhanced Table
|
|
65
|
+
|
|
66
|
+
Creates an **enhanced** version of the GA4 BigQuery export (daily & intraday).
|
|
67
|
+
|
|
68
|
+
The main features include:
|
|
69
|
+
|
|
70
|
+
- **Best available data at any time** – Combines daily (processed) and intraday exports so the most complete, accurate version of the data is always available
|
|
71
|
+
- **Robust incremental updates** – Run on any schedule (daily, hourly, or custom)
|
|
72
|
+
- **Flexible schema, better optimized for building data models** – Keeps the flexible structure of the original export while promoting key fields (e.g. `page_location`, `session_id`) to columns for better query performance; **partitioning and clustering** enabled
|
|
73
|
+
- **Event parameter handling** – Promote event params to columns; include or exclude by name
|
|
74
|
+
- **Session parameters** – Promote selected event parameters as session-level parameters
|
|
75
|
+
|
|
76
|
+
#### JS Deployment (Recommended)
|
|
77
|
+
|
|
78
|
+
Create a new **ga4_events_enhanced** table using a **.js** file in your repository's **definitions** folder.
|
|
79
|
+
|
|
80
|
+
##### Using Defaults
|
|
81
|
+
|
|
82
|
+
`**definitions/ga4/ga4_events_enhanced.js`**
|
|
83
|
+
|
|
84
|
+
```javascript
|
|
85
|
+
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
86
|
+
|
|
87
|
+
const config = {
|
|
88
|
+
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
ga4EventsEnhanced.createTable(publish, config);
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
##### With Custom Configuration
|
|
95
|
+
|
|
96
|
+
`**definitions/ga4/ga4_events_enhanced.js**`
|
|
97
|
+
|
|
98
|
+
```javascript
|
|
99
|
+
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
100
|
+
|
|
101
|
+
const config = {
|
|
102
|
+
sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
|
|
103
|
+
// use dataformTableConfig to make changes to the default Dataform table configuration
|
|
104
|
+
dataformTableConfig: {
|
|
105
|
+
schema: 'ga4'
|
|
106
|
+
}
|
|
107
|
+
// test configurations
|
|
108
|
+
test: false,
|
|
109
|
+
testConfig: {
|
|
110
|
+
dateRangeStart: 'current_date()-1',
|
|
111
|
+
dateRangeEnd: 'current_date()',
|
|
112
|
+
},
|
|
113
|
+
schemaLock: '20260101', // prevent possible issues from updates to the export schema
|
|
114
|
+
customTimestampParam: 'custom_event_timestamp', // custom timestamp collected as an event param
|
|
115
|
+
timezone: 'Europe/Helsinki',
|
|
116
|
+
// not needed data
|
|
117
|
+
excludedColumns: [
|
|
118
|
+
'app_info',
|
|
119
|
+
'publisher'
|
|
120
|
+
],
|
|
121
|
+
// not needed events
|
|
122
|
+
excludedEvents: [
|
|
123
|
+
'session_start',
|
|
124
|
+
'first_visit',
|
|
125
|
+
'user_engagement'
|
|
126
|
+
],
|
|
127
|
+
// transform to session-level
|
|
128
|
+
sessionParams: [
|
|
129
|
+
'user_agent'
|
|
130
|
+
],
|
|
131
|
+
// promote as columns
|
|
132
|
+
eventParamsToColumns: [
|
|
133
|
+
{name: 'session_engaged'},
|
|
134
|
+
{name: 'ga_session_number', type: 'int'},
|
|
135
|
+
{name: 'page_type', type: 'string'},
|
|
136
|
+
],
|
|
137
|
+
// not needed in the event_params array
|
|
138
|
+
excludedEventParams: [
|
|
139
|
+
'session_engaged',
|
|
140
|
+
'ga_session_number',
|
|
141
|
+
'page_type',
|
|
142
|
+
'user_agent'
|
|
143
|
+
],
|
|
144
|
+
// use day threshold for data_is_final
|
|
145
|
+
dataIsFinal: {
|
|
146
|
+
detectionMethod: 'DAY_THRESHOLD',
|
|
147
|
+
dayThreshold: 4
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
ga4EventsEnhanced.createTable(publish, config);
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### SQLX Deployment
|
|
155
|
+
|
|
156
|
+
Alternatively, you can create the **ga4_events_enhanced** table using a .SQLX file.
|
|
157
|
+
|
|
158
|
+
`**definitions/ga4/ga4_events_enhanced.sqlx`**
|
|
159
|
+
|
|
160
|
+
```javascript
|
|
161
|
+
config {
|
|
162
|
+
type: "incremental",
|
|
163
|
+
description: "GA4 Events Enhanced table",
|
|
164
|
+
schema: "ga4",
|
|
165
|
+
bigquery: {
|
|
166
|
+
partitionBy: "event_date",
|
|
167
|
+
clusterBy: ['event_name', 'session_id', 'page_location', 'data_is_final'],
|
|
168
|
+
},
|
|
169
|
+
tags: ['ga4_export_fixer']
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
js {
|
|
173
|
+
const { ga4EventsEnhanced } = require('ga4-export-fixer');
|
|
174
|
+
|
|
175
|
+
const config = {
|
|
176
|
+
sourceTable: ref(constants.GA4_TABLES.MY_GA4_EXPORT),
|
|
177
|
+
self: self(),
|
|
178
|
+
incremental: incremental()
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
${ga4EventsEnhanced.generateSql(config)}
|
|
183
|
+
|
|
184
|
+
pre_operations {
|
|
185
|
+
${ga4EventsEnhanced.setPreOperations(config)}
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Building on top of the ga4_events_enhanced table
|
|
190
|
+
|
|
191
|
+
Setting up incremental updates is easy using the **setPreOperations()** function. Just ensure that your result table includes the **data_is_final** flag from the **ga4_enhanced_events** table.
|
|
192
|
+
|
|
193
|
+
The **incrementalDateFilter()** function applies the same date filtering used by **ga4_events_enhanced**, based on the **config** options and the variables declared by **setPreOperations()**.
|
|
194
|
+
|
|
195
|
+
Key fields such as session_id, user_id, and session_traffic_source_last_click are available as clean, sessionized versions that handle edge cases like sessions spanning midnight.
|
|
196
|
+
|
|
197
|
+
`**definitions/ga4/ga4_sessions.sqlx`**
|
|
198
|
+
|
|
199
|
+
```javascript
|
|
200
|
+
config {
|
|
201
|
+
type: "incremental",
|
|
202
|
+
description: "GA4 sessions table",
|
|
203
|
+
schema: "ga4_export_fixer",
|
|
204
|
+
bigquery: {
|
|
205
|
+
partitionBy: "event_date",
|
|
206
|
+
clusterBy: ['session_id', 'data_is_final'],
|
|
207
|
+
},
|
|
208
|
+
tags: ['ga4_export_fixer']
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
js {
|
|
212
|
+
const { setPreOperations, helpers } = require('ga4-export-fixer');
|
|
213
|
+
|
|
214
|
+
const config = {
|
|
215
|
+
self: self(),
|
|
216
|
+
incremental: incremental(),
|
|
217
|
+
/*
|
|
218
|
+
Default options that can be overriden:
|
|
219
|
+
test: false,
|
|
220
|
+
testConfig: {
|
|
221
|
+
dateRangeStart: 'current_date()-1',
|
|
222
|
+
dateRangeEnd: 'current_date()',
|
|
223
|
+
},
|
|
224
|
+
preOperations: {
|
|
225
|
+
dateRangeStartFullRefresh: 'date(2000, 1, 1)',
|
|
226
|
+
dateRangeEnd: 'current_date()',
|
|
227
|
+
// incremental date range overrides allow re-processing only a subset of the data:
|
|
228
|
+
//incrementalStartOverride: undefined,
|
|
229
|
+
//incrementalEndOverride: undefined,
|
|
230
|
+
},
|
|
231
|
+
*/
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
select
|
|
236
|
+
event_date,
|
|
237
|
+
session_id,
|
|
238
|
+
user_pseudo_id,
|
|
239
|
+
user_id,
|
|
240
|
+
any_value(session_traffic_source_last_click.cross_channel_campaign) as session_traffic_source,
|
|
241
|
+
any_value(landing_page) as landing_page,
|
|
242
|
+
current_datetime() as row_inserted_timestamp,
|
|
243
|
+
min(data_is_final) as data_is_final
|
|
244
|
+
from
|
|
245
|
+
${ref('ga4_events_enhanced_298233330')}
|
|
246
|
+
where
|
|
247
|
+
${helpers.incrementalDateFilter(config)}
|
|
248
|
+
group by
|
|
249
|
+
event_date,
|
|
250
|
+
session_id,
|
|
251
|
+
user_pseudo_id,
|
|
252
|
+
user_id
|
|
253
|
+
|
|
254
|
+
pre_operations {
|
|
255
|
+
${setPreOperations(config)}
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Configuration Object
|
|
260
|
+
|
|
261
|
+
All fields are optional except `sourceTable`. Default values are applied automatically, so you only need to specify the fields you want to override.
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
| Field | Type | Default/Required | Description |
|
|
265
|
+
| ---------------------- | ----------------------- | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
266
|
+
| `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format ``project.dataset.table`` |
|
|
267
|
+
| `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
|
|
268
|
+
| `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
|
|
269
|
+
| `dataformTableConfig` | object | **In JS deployment only.** [See default](#default-dataformtableconfig) | Override the default Dataform table configuration for JS deployment. See: [ITableConfig reference](https://docs.cloud.google.com/dataform/docs/reference/dataform-core-reference#itableconfig) |
|
|
270
|
+
| `schemaLock` | string (YYYYMMDD) | `undefined` | Lock the table schema to a specific date. Must be a valid date >= `"20241009"` |
|
|
271
|
+
| `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
|
|
272
|
+
| `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
|
|
273
|
+
| `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight |
|
|
274
|
+
| `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
|
|
275
|
+
| `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
|
|
276
|
+
| `excludedEvents` | string[] | `['session_start', 'first_visit']` | Event names to exclude from the table. These events are excluded by default because they have no use for analysis purposes. Override this to include them if needed |
|
|
277
|
+
| `excludedColumns` | string[] | `[]` | Default GA4 export columns to exclude from the final table, for example `'app_info'` or `'publisher'` |
|
|
278
|
+
| `sessionParams` | string[] | `[]` | Event parameter names to aggregate as session-level parameters |
|
|
279
|
+
|
|
280
|
+
<a id="default-dataformtableconfig"></a>
|
|
281
|
+
<details>
|
|
282
|
+
<summary><strong>Default dataformTableConfig</strong></summary>
|
|
283
|
+
|
|
284
|
+
```json
|
|
285
|
+
{
|
|
286
|
+
"name": "ga4_events_enhanced_<dataset_id>",
|
|
287
|
+
"type": "incremental",
|
|
288
|
+
"schema": "ga4_export_fixer",
|
|
289
|
+
"description": "<default description>",
|
|
290
|
+
"bigquery": {
|
|
291
|
+
"partitionBy": "event_date",
|
|
292
|
+
"clusterBy": [
|
|
293
|
+
"event_name",
|
|
294
|
+
"session_id",
|
|
295
|
+
"page_location",
|
|
296
|
+
"data_is_final"
|
|
297
|
+
],
|
|
298
|
+
"labels": {
|
|
299
|
+
"ga4_export_fixer": "true"
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
"tags": [
|
|
303
|
+
"ga4_export_fixer"
|
|
304
|
+
]
|
|
305
|
+
}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
</details>
|
|
309
|
+
<br>
|
|
310
|
+
|
|
311
|
+
`**includedExportTypes`** — which GA4 export types to include:
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
| Field | Type | Default | Description |
|
|
315
|
+
| ------------------------------ | ------- | ------- | -------------------------------- |
|
|
316
|
+
| `includedExportTypes.daily` | boolean | `true` | Include daily (processed) export |
|
|
317
|
+
| `includedExportTypes.intraday` | boolean | `true` | Include intraday export |
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
> **Intraday-only mode:** Set `daily` to `false` and `intraday` to `true` to use only intraday export tables. When using intraday-only mode, `dataIsFinal.detectionMethod` must be set to `'DAY_THRESHOLD'`.
|
|
321
|
+
|
|
322
|
+
`**dataIsFinal`** — how to determine whether data is final (not expected to change):
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
| Field | Type | Default | Description |
|
|
326
|
+
| ----------------------------- | ------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
327
|
+
| `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix; all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event). Must be `'DAY_THRESHOLD'` when only intraday export is enabled |
|
|
328
|
+
| `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
`**testConfig**` — date range used when `test` is `true`:
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
| Field | Type | Default | Description |
|
|
335
|
+
| --------------------------- | ----------------- | -------------------- | --------------------------- |
|
|
336
|
+
| `testConfig.dateRangeStart` | string (SQL date) | `'current_date()-1'` | Start date for test queries |
|
|
337
|
+
| `testConfig.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for test queries |
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
`**preOperations**` — date range and incremental refresh configuration:
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
| Field | Type | Default | Description |
|
|
344
|
+
| ------------------------------------------ | ----------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
345
|
+
| `preOperations.dateRangeStartFullRefresh` | string (SQL date) | `'date(2000, 1, 1)'` | Start date for full refresh |
|
|
346
|
+
| `preOperations.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for queries |
|
|
347
|
+
| `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of previous days to scan from the result table when determining the incremental refresh start checkpoint. A higher value is required if the table updates have fallen behind for some reason |
|
|
348
|
+
| `preOperations.incrementalStartOverride` | string (SQL date) | `undefined` | Override the incremental start date to re-process a specific range |
|
|
349
|
+
| `preOperations.incrementalEndOverride` | string (SQL date) | `undefined` | Override the incremental end date to re-process a specific range |
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
`**eventParamsToColumns**` — each item in the array is an object:
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
| Field | Type | Required | Description |
|
|
356
|
+
| ------------ | ------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
357
|
+
| `name` | string | Yes | Event parameter name |
|
|
358
|
+
| `type` | string | No | Data type: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, or `'float64'`. If omitted, returns the value converted to a string |
|
|
359
|
+
| `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
|
|
363
|
+
|
|
364
|
+
### Helpers
|
|
365
|
+
|
|
366
|
+
The helpers contain templates for common SQL expressions. The functions are referenced by **ga4EventsEnhanced** but can also be imported as utility functions for working with GA4 data.
|
|
367
|
+
|
|
368
|
+
```javascript
|
|
369
|
+
const { helpers } = require('ga4-export-fixer');
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
#### SQL Templates
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
| Name | Example | Description |
|
|
376
|
+
| ----------- | ------------------- | ------------------------------------------------------------------------- |
|
|
377
|
+
| `eventDate` | `helpers.eventDate` | Casts `event_date` string to a DATE using YYYYMMDD format |
|
|
378
|
+
| `sessionId` | `helpers.sessionId` | Builds a session ID by concatenating `user_pseudo_id` and `ga_session_id` |
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
#### Functions
|
|
382
|
+
|
|
383
|
+
**Unnesting parameters**
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
| Function | Example | Description |
|
|
387
|
+
| ------------------ | --------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
388
|
+
| `unnestEventParam` | `unnestEventParam('page_location', 'string')` | Extracts a value from the `event_params` array by key. Supported types: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, `'float64'`. Omit type to get the value converted as a string |
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
**Date and time**
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
| Function | Example | Description |
|
|
395
|
+
| ------------------------- | --------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
|
396
|
+
| `getEventTimestampMicros` | `getEventTimestampMicros('custom_ts')` | Returns SQL for event timestamp in microseconds. With a custom parameter, uses it (converted from ms) with fallback to `event_timestamp` |
|
|
397
|
+
| `getEventDateTime` | `getEventDateTime({ timezone: 'Europe/Helsinki' })` | Returns SQL for event datetime in the given timezone. Defaults to `'Etc/UTC'` |
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
**Date filters**
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
| Function | Example | Description |
|
|
404
|
+
| --------------------- | -------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
|
|
405
|
+
| `ga4ExportDateFilter` | `ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()')` | Generates a `_table_suffix` filter for a single export type (`'daily'` or `'intraday'`) and date range |
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
**Page details**
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
| Function | Example | Description |
|
|
412
|
+
| ----------------------- | ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
|
|
413
|
+
| `extractUrlHostname` | `extractUrlHostname('page_location')` | Extracts hostname from a URL column |
|
|
414
|
+
| `extractUrlPath` | `extractUrlPath('page_location')` | Extracts the path component from a URL column |
|
|
415
|
+
| `extractUrlQuery` | `extractUrlQuery('page_location')` | Extracts the query string (including `?`) from a URL column |
|
|
416
|
+
| `extractUrlQueryParams` | `extractUrlQueryParams('page_location')` | Parses URL query parameters into `ARRAY<STRUCT<key STRING, value STRING>>` |
|
|
417
|
+
| `extractPageDetails` | `extractPageDetails()` | Returns a struct with `hostname`, `path`, `query`, and `query_params`. Defaults to `page_location` event parameter |
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
**Aggregation**
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
| Function | Example | Description |
|
|
424
|
+
| ---------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------- |
|
|
425
|
+
| `aggregateValue` | `aggregateValue('user_id', 'last', 'event_timestamp')` | Aggregates a column using `'max'`, `'min'`, `'first'`, `'last'`, or `'any'`. `'first'` and `'last'` use the timestamp column for ordering |
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
**Ecommerce**
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
| Function | Example | Description |
|
|
432
|
+
| -------------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
433
|
+
| `fixEcommerceStruct` | `fixEcommerceStruct()` | Cleans the ecommerce struct: sets `transaction_id` to null when `'(not set)'`, and fixes missing/NaN `purchase_revenue` for purchase events |
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
**Data freshness**
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
| Function | Example | Description |
|
|
440
|
+
| ------------- | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
441
|
+
| `isFinalData` | `isFinalData('DAY_THRESHOLD', 4)` | Returns SQL that evaluates to `true` when data is final. `'EXPORT_TYPE'` checks table suffix; `'DAY_THRESHOLD'` uses days since event (`dayThreshold` is required and must be a non-negative integer) |
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
## License
|
|
445
|
+
|
|
446
|
+
MIT
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1-dev.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
"test": "node tests/ga4EventsEnhanced.test.js",
|
|
19
19
|
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
20
20
|
"test:merge": "node tests/mergeSQLConfigurations.test.js",
|
|
21
|
+
"readme": "node scripts/updateReadme.js",
|
|
21
22
|
"prepublishOnly": "node scripts/updateReadme.js"
|
|
22
23
|
},
|
|
23
24
|
"repository": {
|
package/preOperations.js
CHANGED
|
@@ -329,7 +329,10 @@ const createEnhancedEventsTable = (dataformPublish, config) => {
|
|
|
329
329
|
- Other improvements and refinements based on configuration
|
|
330
330
|
|
|
331
331
|
${constants.TABLE_DESCRIPTION_SUFFIX}
|
|
332
|
-
${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}
|
|
332
|
+
${constants.TABLE_DESCRIPTION_DOCUMENTATION_LINK}
|
|
333
|
+
|
|
334
|
+
The last full table refresh was done using this configuration:
|
|
335
|
+
${JSON.stringify(mergedConfig, null, 2)}`;
|
|
333
336
|
|
|
334
337
|
// the defaults for the dataform table config
|
|
335
338
|
const defaultDataformTableConfig = {
|