ga4-export-fixer 0.6.2-dev.4 → 0.7.1-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -20
- package/helpers/ga4Transforms.js +35 -0
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/index.js +1 -6
package/README.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
<img src="docs/images/header.svg" alt="ga4-export-fixer">
|
|
2
|
+
|
|
3
|
+
# An enhanced, incremental GA4 events table, built with Dataform
|
|
2
4
|
|
|
3
5
|
[](https://www.npmjs.com/package/ga4-export-fixer)
|
|
6
|
+
[](https://github.com/tanelytics/ga4-export-fixer/blob/main/LICENSE)
|
|
7
|
+

|
|
4
8
|
|
|
5
9
|
**ga4-export-fixer** is a **Dataform NPM package** that transforms raw GA4 BigQuery export data into a cleaner, more queryable incremental table. It combines **daily, fresh (360), and intraday exports** so the best available version of each event is always in use, adds session-level fields like `session_id` and `landing_page`, promotes key event parameters to columns, and fixes known GA4 export issues — handling the boilerplate transformations that are otherwise tedious to include in every GA4 query.
|
|
6
10
|
|
|
@@ -10,10 +14,10 @@ The goal of the package is to **speed up development** when building data models
|
|
|
10
14
|
|
|
11
15
|
*Example data model built with ga4-export-fixer*
|
|
12
16
|
|
|
13
|
-
|
|
17
|
+
## Table of Contents
|
|
14
18
|
<!-- TOC -->
|
|
15
|
-
|
|
16
|
-
|
|
19
|
+
- [Main Features](#main-features)
|
|
20
|
+
- [Planned Features](#planned-features)
|
|
17
21
|
- [Installation](#installation)
|
|
18
22
|
- [Bash](#bash)
|
|
19
23
|
- [In Google Cloud Dataform](#in-google-cloud-dataform)
|
|
@@ -21,12 +25,12 @@ The goal of the package is to **speed up development** when building data models
|
|
|
21
25
|
- [Create GA4 Events Enhanced Table](#create-ga4-events-enhanced-table)
|
|
22
26
|
- [Configuration Object](#configuration-object)
|
|
23
27
|
- [Assertions](#assertions)
|
|
24
|
-
- [Creating Incremental Downstream Tables from ga4_events_enhanced](#creating-incremental-downstream-tables-from-ga4_events_enhanced)
|
|
28
|
+
- [Creating Incremental Downstream Tables from `ga4_events_enhanced`](#creating-incremental-downstream-tables-from-ga4_events_enhanced)
|
|
25
29
|
- [Helpers](#helpers)
|
|
26
30
|
- [License](#license)
|
|
27
31
|
<!-- /TOC -->
|
|
28
32
|
|
|
29
|
-
|
|
33
|
+
## Main Features
|
|
30
34
|
|
|
31
35
|
<table>
|
|
32
36
|
<tr>
|
|
@@ -105,31 +109,33 @@ The goal of the package is to **speed up development** when building data models
|
|
|
105
109
|
<code>data_is_final</code> flag and <code>export_type</code> label on every row
|
|
106
110
|
</td>
|
|
107
111
|
<td valign="top">
|
|
108
|
-
<b
|
|
109
|
-
|
|
112
|
+
<b>🔍 Data Quality Assertions</b><br>
|
|
113
|
+
Built-in daily assertion reconciles sessions, events, and revenue between the enhanced table and raw export
|
|
110
114
|
</td>
|
|
111
115
|
</tr>
|
|
112
116
|
<tr>
|
|
117
|
+
<td valign="top">
|
|
118
|
+
<b>🔃 Selective Re-processing</b><br>
|
|
119
|
+
Re-process a date range without full table rebuild using <code>incrementalStartOverride</code> and <code>incrementalEndOverride</code>
|
|
120
|
+
</td>
|
|
113
121
|
<td valign="top">
|
|
114
122
|
<b>📑 Batch Processing</b><br>
|
|
115
123
|
Process large exports in smaller batches via <code>numberOfDaysToProcess</code>
|
|
116
124
|
</td>
|
|
125
|
+
</tr>
|
|
126
|
+
<tr>
|
|
117
127
|
<td valign="top">
|
|
118
128
|
<b>🕐 Timezone-Aware Datetime</b><br>
|
|
119
129
|
<code>event_datetime</code> converted to a configurable IANA timezone
|
|
120
130
|
</td>
|
|
121
|
-
</tr>
|
|
122
|
-
<tr>
|
|
123
131
|
<td valign="top">
|
|
124
132
|
<b>🛡️ Zero Dependencies</b><br>
|
|
125
133
|
No additional external dependencies added to your Dataform repository
|
|
126
134
|
</td>
|
|
127
|
-
<td valign="top">
|
|
128
|
-
</td>
|
|
129
135
|
</tr>
|
|
130
136
|
</table>
|
|
131
137
|
|
|
132
|
-
|
|
138
|
+
## Planned Features
|
|
133
139
|
|
|
134
140
|
Features under consideration for future releases:
|
|
135
141
|
|
|
@@ -138,7 +144,6 @@ Features under consideration for future releases:
|
|
|
138
144
|
- Data enrichment (item-level, session-level, event-level)
|
|
139
145
|
- Custom processing steps (additional CTEs)
|
|
140
146
|
- Custom traffic source attribution
|
|
141
|
-
- Default assertions
|
|
142
147
|
|
|
143
148
|
## Installation
|
|
144
149
|
|
|
@@ -158,7 +163,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
158
163
|
{
|
|
159
164
|
"dependencies": {
|
|
160
165
|
"@dataform/core": "3.0.42",
|
|
161
|
-
"ga4-export-fixer": "0.
|
|
166
|
+
"ga4-export-fixer": "0.7.0"
|
|
162
167
|
}
|
|
163
168
|
}
|
|
164
169
|
```
|
|
@@ -175,11 +180,11 @@ If your Dataform repository does not have a package.json file, see this guide: [
|
|
|
175
180
|
|
|
176
181
|
Creates an **enhanced** version of the GA4 BigQuery export (daily & intraday).
|
|
177
182
|
|
|
178
|
-
#### JS Deployment (Recommended)
|
|
183
|
+
#### JS Deployment (Recommended) 
|
|
179
184
|
|
|
180
185
|
Create a new **ga4_events_enhanced** table using a **.js** file in your repository's **definitions** folder.
|
|
181
186
|
|
|
182
|
-
|
|
187
|
+
**Using Defaults**
|
|
183
188
|
|
|
184
189
|
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
185
190
|
|
|
@@ -193,7 +198,7 @@ const config = {
|
|
|
193
198
|
ga4EventsEnhanced.createTable(publish, config);
|
|
194
199
|
```
|
|
195
200
|
|
|
196
|
-
|
|
201
|
+
**With Custom Configuration**
|
|
197
202
|
|
|
198
203
|
**`definitions/ga4/ga4_events_enhanced.js`**
|
|
199
204
|
|
|
@@ -256,7 +261,7 @@ const config = {
|
|
|
256
261
|
ga4EventsEnhanced.createTable(publish, config);
|
|
257
262
|
```
|
|
258
263
|
|
|
259
|
-
#### SQLX Deployment
|
|
264
|
+
#### SQLX Deployment 
|
|
260
265
|
|
|
261
266
|
Alternatively, you can create the **ga4_events_enhanced** table using a .SQLX file.
|
|
262
267
|
|
|
@@ -292,6 +297,10 @@ pre_operations {
|
|
|
292
297
|
}
|
|
293
298
|
```
|
|
294
299
|
|
|
300
|
+
<br>
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
295
304
|
### Configuration Object
|
|
296
305
|
|
|
297
306
|
All fields are optional except `sourceTable`. Default values are applied automatically, so you only need to specify the fields you want to override.
|
|
@@ -445,6 +454,10 @@ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
|
|
|
445
454
|
|
|
446
455
|
> **Note:** This feature adds a compute-heavy CTE with a window function over unnested items. Only enable it if you need item list attribution for ecommerce analysis.
|
|
447
456
|
|
|
457
|
+
<br>
|
|
458
|
+
|
|
459
|
+
---
|
|
460
|
+
|
|
448
461
|
### Assertions
|
|
449
462
|
|
|
450
463
|
The package includes built-in data quality assertions that can be automatically created alongside the enhanced events table. Pass Dataform's `assert` function as the third argument to `createTable`:
|
|
@@ -503,7 +516,11 @@ assert('daily_quality_check', {
|
|
|
503
516
|
});
|
|
504
517
|
```
|
|
505
518
|
|
|
506
|
-
|
|
519
|
+
<br>
|
|
520
|
+
|
|
521
|
+
---
|
|
522
|
+
|
|
523
|
+
### Creating Incremental Downstream Tables from `ga4_events_enhanced`
|
|
507
524
|
|
|
508
525
|
Setting up incremental updates is easy using the **setPreOperations()** function. Just ensure that your result table includes the **data_is_final** flag from the **ga4_events_enhanced** table.
|
|
509
526
|
|
|
@@ -573,6 +590,10 @@ pre_operations {
|
|
|
573
590
|
}
|
|
574
591
|
```
|
|
575
592
|
|
|
593
|
+
<br>
|
|
594
|
+
|
|
595
|
+
---
|
|
596
|
+
|
|
576
597
|
### Helpers
|
|
577
598
|
|
|
578
599
|
The helpers contain templates for common SQL expressions. The functions are referenced by **ga4EventsEnhanced** but can also be imported as utility functions for working with GA4 data.
|
package/helpers/ga4Transforms.js
CHANGED
|
@@ -195,6 +195,40 @@ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs)
|
|
|
195
195
|
)`;
|
|
196
196
|
};
|
|
197
197
|
|
|
198
|
+
/**
|
|
199
|
+
* Generates a SQL expression for a deterministic hash-based row id used by the
|
|
200
|
+
* item list attribution join. Only computed for events in `ecommerceEventsFilter`;
|
|
201
|
+
* other events get NULL.
|
|
202
|
+
*
|
|
203
|
+
* The row_number() window keeps the id stable across CTE re-evaluations:
|
|
204
|
+
* BigQuery may inline the CTE and re-run the window per reference, so without
|
|
205
|
+
* a stable ordering the two sides of the downstream join could hash differently.
|
|
206
|
+
* partition by event_name avoids a single-partition bottleneck.
|
|
207
|
+
* Residual collisions (identical event_timestamp + identical items) are safe —
|
|
208
|
+
* the rows are interchangeable, so arbitrary row number assignment between them
|
|
209
|
+
* produces the same result.
|
|
210
|
+
*
|
|
211
|
+
* @param {string} ecommerceEventsFilter - Comma-separated, quoted list of event names
|
|
212
|
+
* (e.g., "'purchase', 'add_to_cart'").
|
|
213
|
+
* @returns {string} SQL expression that evaluates to the row id or NULL.
|
|
214
|
+
*/
|
|
215
|
+
const itemListAttributionRowId = (ecommerceEventsFilter) => {
|
|
216
|
+
return `if(
|
|
217
|
+
event_name in (${ecommerceEventsFilter}),
|
|
218
|
+
farm_fingerprint(concat(
|
|
219
|
+
user_pseudo_id,
|
|
220
|
+
cast(event_timestamp as string),
|
|
221
|
+
event_name,
|
|
222
|
+
to_json_string(items),
|
|
223
|
+
cast(row_number() over(
|
|
224
|
+
partition by event_name, user_pseudo_id
|
|
225
|
+
order by event_timestamp, to_json_string(items)
|
|
226
|
+
) as string)
|
|
227
|
+
)),
|
|
228
|
+
null
|
|
229
|
+
)`;
|
|
230
|
+
};
|
|
231
|
+
|
|
198
232
|
/**
|
|
199
233
|
* Official GA4 ecommerce events that carry item data.
|
|
200
234
|
* Based on: https://developers.google.com/analytics/devguides/collection/ga4/ecommerce
|
|
@@ -223,5 +257,6 @@ module.exports = {
|
|
|
223
257
|
isGa4ExportColumn,
|
|
224
258
|
getGa4ExportType,
|
|
225
259
|
itemListAttributionExpr,
|
|
260
|
+
itemListAttributionRowId,
|
|
226
261
|
ga4EcommerceEvents
|
|
227
262
|
};
|
package/package.json
CHANGED
|
@@ -229,12 +229,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
229
229
|
// ecommerce
|
|
230
230
|
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
231
231
|
items: 'items',
|
|
232
|
-
|
|
233
|
-
// row_number() breaks hash collisions for batched events with identical data.
|
|
234
|
-
// partition by event_name avoids a single-partition bottleneck in the window function.
|
|
235
|
-
// Non-determinism is safe: colliding rows have identical items (to_json_string(items) is in the hash),
|
|
236
|
-
// so swapping row numbers between them produces the same final result.
|
|
237
|
-
_item_list_attribution_row_id: itemListAttribution ? `if(event_name in (${ecommerceEventsFilter}), farm_fingerprint(concat(user_pseudo_id, cast(event_timestamp as string), event_name, to_json_string(items), cast(row_number() over(partition by event_name, user_pseudo_id) as string))), null)` : undefined,
|
|
232
|
+
_item_list_attribution_row_id: itemListAttribution ? helpers.itemListAttributionRowId(ecommerceEventsFilter) : undefined,
|
|
238
233
|
// flag if the data is "final" and is not expected to change anymore
|
|
239
234
|
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
240
235
|
export_type: helpers.getGa4ExportType('_table_suffix'),
|