ga4-export-fixer 0.5.1-dev.4 → 0.5.2-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -150,7 +150,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
150
150
|
{
|
|
151
151
|
"dependencies": {
|
|
152
152
|
"@dataform/core": "3.0.42",
|
|
153
|
-
"ga4-export-fixer": "0.5.
|
|
153
|
+
"ga4-export-fixer": "0.5.1"
|
|
154
154
|
}
|
|
155
155
|
}
|
|
156
156
|
```
|
|
@@ -294,7 +294,8 @@ All fields are optional except `sourceTable`. Default values are applied automat
|
|
|
294
294
|
| `schemaLock` | string | `undefined` | Lock the table schema to a specific GA4 export table suffix. Accepts `"YYYYMMDD"` (daily), `"intraday_YYYYMMDD"`, or `"fresh_YYYYMMDD"`. Date must be >= `"20241009"` |
|
|
295
295
|
| `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
|
|
296
296
|
| `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
|
|
297
|
-
| `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight
|
|
297
|
+
| `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight. Auto-adjusted when `itemListAttribution.lookbackType` is `'TIME'` and the lookback exceeds `bufferDays` |
|
|
298
|
+
| `itemListAttribution` | object | `undefined` | Enable item list attribution. See [Item List Attribution](#item-list-attribution) |
|
|
298
299
|
| `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
|
|
299
300
|
| `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
|
|
300
301
|
| `excludedEvents` | string[] | `['session_start', 'first_visit']` | Event names to exclude from the table. These events are excluded by default because they have no use for analysis purposes. Override this to include them if needed |
|
|
@@ -412,6 +413,25 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
|
|
|
412
413
|
| `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
|
|
413
414
|
|
|
414
415
|
|
|
416
|
+
<a id="item-list-attribution"></a>
|
|
417
|
+
|
|
418
|
+
**`itemListAttribution`** — when set to an object, enables attribution of `item_list_name`, `item_list_id`, and `item_list_index` from `select_item`/`select_promotion` events to downstream ecommerce events (e.g. `add_to_cart`, `purchase`). Disabled by default.
|
|
419
|
+
|
|
420
|
+
| Field | Type | Required | Description |
|
|
421
|
+
| ---------------- | ------- | --------------------------- | --------------------------------------------------------------------- |
|
|
422
|
+
| `lookbackType` | string | Yes | `'SESSION'` (partition by session) or `'TIME'` (time-based window) |
|
|
423
|
+
| `lookbackTimeMs` | integer | When `lookbackType: 'TIME'` | Lookback window in milliseconds (e.g. `86400000` for 24h) |
|
|
424
|
+
|
|
425
|
+
```javascript
|
|
426
|
+
// Session-based: attribute within the same session
|
|
427
|
+
itemListAttribution: { lookbackType: 'SESSION' }
|
|
428
|
+
|
|
429
|
+
// Time-based: attribute within a 24-hour window across sessions
|
|
430
|
+
itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
> **Note:** This feature adds a compute-heavy CTE with a window function over unnested items. Only enable it if you need item list attribution for ecommerce analysis.
|
|
434
|
+
|
|
415
435
|
Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
|
|
416
436
|
|
|
417
437
|
### Creating Incremental Downstream Tables from ga4_events_enhanced
|
package/helpers/ga4Transforms.js
CHANGED
|
@@ -157,10 +157,71 @@ const getGa4ExportType = (tableSuffix) => {
|
|
|
157
157
|
end`;
|
|
158
158
|
};
|
|
159
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Generates a SQL LAST_VALUE window function that attributes item list fields
|
|
162
|
+
* (item_list_name, item_list_id, item_list_index) from select_item/select_promotion
|
|
163
|
+
* events to downstream ecommerce events using a lookback window.
|
|
164
|
+
*
|
|
165
|
+
* Returns a struct containing all three attributed fields via a single window sort.
|
|
166
|
+
*
|
|
167
|
+
* @param {'SESSION'|'TIME'} lookbackType - Window scope: session-based or time-based
|
|
168
|
+
* @param {string} timestampColumn - Column to order by ('event_timestamp' or 'event_custom_timestamp')
|
|
169
|
+
* @param {number} [lookbackTimeMs] - Lookback window in milliseconds (required when lookbackType is 'TIME')
|
|
170
|
+
* @returns {string} SQL expression that evaluates to a struct with item_list_name, item_list_id, item_list_index
|
|
171
|
+
*/
|
|
172
|
+
const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs) => {
|
|
173
|
+
const selectEvents = `event_name in ('select_item', 'select_promotion')`;
|
|
174
|
+
const structExpr = `struct(item.item_list_name, item.item_list_id, item.item_list_index)`;
|
|
175
|
+
|
|
176
|
+
let partitionBy;
|
|
177
|
+
let frameBounds;
|
|
178
|
+
|
|
179
|
+
if (lookbackType === 'SESSION') {
|
|
180
|
+
partitionBy = 'session_id, item.item_id';
|
|
181
|
+
frameBounds = 'rows between unbounded preceding and current row';
|
|
182
|
+
} else {
|
|
183
|
+
// TIME-based: range window in microseconds
|
|
184
|
+
const lookbackMicros = lookbackTimeMs * 1000;
|
|
185
|
+
partitionBy = 'user_pseudo_id, item.item_id';
|
|
186
|
+
frameBounds = `range between ${lookbackMicros} preceding and current row`;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return `last_value(
|
|
190
|
+
if(${selectEvents}, ${structExpr}, null) ignore nulls
|
|
191
|
+
) over(
|
|
192
|
+
partition by ${partitionBy}
|
|
193
|
+
order by ${timestampColumn} asc
|
|
194
|
+
${frameBounds}
|
|
195
|
+
)`;
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Official GA4 ecommerce events that carry item data.
|
|
200
|
+
* Based on: https://developers.google.com/analytics/devguides/collection/ga4/ecommerce
|
|
201
|
+
*/
|
|
202
|
+
const ga4EcommerceEvents = [
|
|
203
|
+
'view_item_list',
|
|
204
|
+
'select_item',
|
|
205
|
+
'view_promotion',
|
|
206
|
+
'select_promotion',
|
|
207
|
+
'view_item',
|
|
208
|
+
'add_to_wishlist',
|
|
209
|
+
'add_to_cart',
|
|
210
|
+
'remove_from_cart',
|
|
211
|
+
'view_cart',
|
|
212
|
+
'begin_checkout',
|
|
213
|
+
'add_shipping_info',
|
|
214
|
+
'add_payment_info',
|
|
215
|
+
'purchase',
|
|
216
|
+
'refund',
|
|
217
|
+
];
|
|
218
|
+
|
|
160
219
|
module.exports = {
|
|
161
220
|
sessionId,
|
|
162
221
|
fixEcommerceStruct,
|
|
163
222
|
isFinalData,
|
|
164
223
|
isGa4ExportColumn,
|
|
165
|
-
getGa4ExportType
|
|
224
|
+
getGa4ExportType,
|
|
225
|
+
itemListAttributionExpr,
|
|
226
|
+
ga4EcommerceEvents
|
|
166
227
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.2-dev.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"test:validation": "node tests/inputValidation.test.js",
|
|
27
27
|
"test:createTable": "node tests/createTable.test.js",
|
|
28
28
|
"test:integration": "node tests/integration/integration.test.js",
|
|
29
|
+
"release:dev": "./scripts/release-dev.sh",
|
|
29
30
|
"readme": "node scripts/updateReadme.js",
|
|
30
31
|
"prepublishOnly": "node scripts/updateReadme.js"
|
|
31
32
|
},
|
|
@@ -36,6 +36,8 @@ const ga4EventsEnhancedConfig = {
|
|
|
36
36
|
// according to GA4 documentation, the data up to 72 hours old is subject to possible changes
|
|
37
37
|
// in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
|
|
38
38
|
},
|
|
39
|
+
// optional item list attribution - disabled by default (compute-heavy, only useful for ecommerce sites)
|
|
40
|
+
itemListAttribution: undefined,
|
|
39
41
|
// number of additional days to take in for taking into account sessions that overlap days
|
|
40
42
|
bufferDays: 1,
|
|
41
43
|
// these parameters are excluded by default because they've been made available in other columns
|
|
@@ -159,6 +159,17 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
159
159
|
// the most accurate available timestamp column
|
|
160
160
|
const timestampColumn = mergedConfig.customTimestampParam ? 'event_custom_timestamp' : 'event_timestamp';
|
|
161
161
|
|
|
162
|
+
// item list attribution config
|
|
163
|
+
const itemListAttribution = mergedConfig.itemListAttribution;
|
|
164
|
+
|
|
165
|
+
// auto-adjust bufferDays for time-based item list attribution lookback
|
|
166
|
+
const effectiveBufferDays = (itemListAttribution && itemListAttribution.lookbackType === 'TIME')
|
|
167
|
+
? Math.max(mergedConfig.bufferDays, Math.ceil(itemListAttribution.lookbackTimeMs / (24 * 60 * 60 * 1000)))
|
|
168
|
+
: mergedConfig.bufferDays;
|
|
169
|
+
const dateFilterConfig = effectiveBufferDays !== mergedConfig.bufferDays
|
|
170
|
+
? { ...mergedConfig, bufferDays: effectiveBufferDays }
|
|
171
|
+
: mergedConfig;
|
|
172
|
+
|
|
162
173
|
// exlude these events from the table
|
|
163
174
|
const excludedEvents = mergedConfig.excludedEvents;
|
|
164
175
|
const excludedEventsSQL = excludedEvents.length > 0 ? `and event_name not in (${excludedEvents.map(event => `'${event}'`).join(',')})` : '';
|
|
@@ -214,6 +225,8 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
214
225
|
// ecommerce
|
|
215
226
|
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
216
227
|
items: 'items',
|
|
228
|
+
// unique row id for item list attribution join
|
|
229
|
+
_event_row_id: itemListAttribution ? 'row_number() over()' : undefined,
|
|
217
230
|
// flag if the data is "final" and is not expected to change anymore
|
|
218
231
|
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
219
232
|
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
@@ -227,7 +240,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
227
240
|
},
|
|
228
241
|
},
|
|
229
242
|
from: mergedConfig.sourceTable,
|
|
230
|
-
where: `${helpers.ga4ExportDateFilters(
|
|
243
|
+
where: `${helpers.ga4ExportDateFilters(dateFilterConfig)}
|
|
231
244
|
${excludedEventsSQL}`,
|
|
232
245
|
};
|
|
233
246
|
|
|
@@ -248,28 +261,64 @@ ${excludedEventsSQL}`,
|
|
|
248
261
|
groupBy: ['session_id']
|
|
249
262
|
};
|
|
250
263
|
|
|
264
|
+
// item list attribution CTE: unnest items, attribute via window function, re-aggregate
|
|
265
|
+
const itemListDataStep = itemListAttribution ? (() => {
|
|
266
|
+
const attrExpr = helpers.itemListAttributionExpr(
|
|
267
|
+
itemListAttribution.lookbackType,
|
|
268
|
+
timestampColumn,
|
|
269
|
+
itemListAttribution.lookbackTimeMs
|
|
270
|
+
);
|
|
271
|
+
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
272
|
+
const ecommerceFilter = helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ');
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
name: 'item_list_data',
|
|
276
|
+
columns: {
|
|
277
|
+
'_event_row_id': '_event_row_id',
|
|
278
|
+
'[sql]items': `array_agg(
|
|
279
|
+
(select as struct item.* replace(
|
|
280
|
+
coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
|
|
281
|
+
coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
|
|
282
|
+
coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
|
|
283
|
+
))
|
|
284
|
+
) as items`,
|
|
285
|
+
},
|
|
286
|
+
from: `(select _event_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceFilter}))`,
|
|
287
|
+
groupBy: ['_event_row_id'],
|
|
288
|
+
};
|
|
289
|
+
})() : null;
|
|
290
|
+
|
|
251
291
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
252
292
|
|
|
293
|
+
// When item list attribution is enabled, override the items column and exclude _event_row_id
|
|
294
|
+
// COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
|
|
295
|
+
const itemListOverrides = itemListDataStep ? {
|
|
296
|
+
items: 'coalesce(item_list_data.items, event_data.items)',
|
|
297
|
+
} : {};
|
|
298
|
+
const itemListExcludedColumns = itemListDataStep ? ['_event_row_id'] : [];
|
|
299
|
+
|
|
253
300
|
// Join event_data and session_data, include additional logic
|
|
254
301
|
const finalStep = {
|
|
255
302
|
name: 'final',
|
|
256
303
|
columns: {
|
|
257
304
|
// get the most important columns in the correct order
|
|
258
305
|
...finalColumnOrder,
|
|
306
|
+
...itemListOverrides,
|
|
259
307
|
// get the rest of the event_data columns
|
|
260
308
|
'[sql]event_data': utils.selectOtherColumns(
|
|
261
|
-
eventDataStep,
|
|
309
|
+
eventDataStep,
|
|
262
310
|
Object.keys(finalColumnOrder),
|
|
263
311
|
[
|
|
264
312
|
'entrances',
|
|
265
313
|
mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
|
|
266
314
|
'data_is_final',
|
|
267
315
|
'export_type',
|
|
316
|
+
...itemListExcludedColumns,
|
|
268
317
|
]
|
|
269
318
|
),
|
|
270
|
-
// get the rest of the session_data columns
|
|
319
|
+
// get the rest of the session_data columns
|
|
271
320
|
'[sql]session_data': utils.selectOtherColumns(
|
|
272
|
-
sessionDataStep,
|
|
321
|
+
sessionDataStep,
|
|
273
322
|
Object.keys(finalColumnOrder),
|
|
274
323
|
[]
|
|
275
324
|
),
|
|
@@ -280,6 +329,10 @@ ${excludedEventsSQL}`,
|
|
|
280
329
|
},
|
|
281
330
|
from: 'event_data',
|
|
282
331
|
leftJoin: [
|
|
332
|
+
...(itemListDataStep ? [{
|
|
333
|
+
table: 'item_list_data',
|
|
334
|
+
condition: 'using(_event_row_id)'
|
|
335
|
+
}] : []),
|
|
283
336
|
{
|
|
284
337
|
table: 'session_data',
|
|
285
338
|
condition: 'using(session_id)'
|
|
@@ -290,6 +343,7 @@ ${excludedEventsSQL}`,
|
|
|
290
343
|
|
|
291
344
|
const steps = [
|
|
292
345
|
eventDataStep,
|
|
346
|
+
...(itemListDataStep ? [itemListDataStep] : []),
|
|
293
347
|
sessionDataStep,
|
|
294
348
|
finalStep,
|
|
295
349
|
];
|
|
@@ -129,6 +129,29 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
|
|
|
129
129
|
throw new Error(`config.dataIsFinal.detectionMethod must be 'DAY_THRESHOLD' when daily export is not enabled (config.includedExportTypes.daily is false). A dayThreshold of 1 is recommended for intraday only setups. With fresh export, the GA4 data is subject to possible changes for up to 72 hours. Received: ${JSON.stringify(config.dataIsFinal.detectionMethod)}`);
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
// itemListAttribution - optional; must be undefined or a valid config object
|
|
133
|
+
if (typeof config.itemListAttribution !== 'undefined') {
|
|
134
|
+
if (!config.itemListAttribution || typeof config.itemListAttribution !== 'object' || Array.isArray(config.itemListAttribution)) {
|
|
135
|
+
throw new Error(`config.itemListAttribution must be an object when provided. Received: ${JSON.stringify(config.itemListAttribution)}`);
|
|
136
|
+
}
|
|
137
|
+
if (typeof config.itemListAttribution.lookbackType === 'undefined') {
|
|
138
|
+
throw new Error("config.itemListAttribution.lookbackType is required. Must be 'SESSION' or 'TIME'.");
|
|
139
|
+
}
|
|
140
|
+
if (config.itemListAttribution.lookbackType !== 'SESSION' && config.itemListAttribution.lookbackType !== 'TIME') {
|
|
141
|
+
throw new Error(`config.itemListAttribution.lookbackType must be 'SESSION' or 'TIME'. Received: ${JSON.stringify(config.itemListAttribution.lookbackType)}`);
|
|
142
|
+
}
|
|
143
|
+
if (config.itemListAttribution.lookbackType === 'TIME') {
|
|
144
|
+
if (typeof config.itemListAttribution.lookbackTimeMs === 'undefined') {
|
|
145
|
+
throw new Error("config.itemListAttribution.lookbackTimeMs is required when lookbackType is 'TIME'.");
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (typeof config.itemListAttribution.lookbackTimeMs !== 'undefined') {
|
|
149
|
+
if (typeof config.itemListAttribution.lookbackTimeMs !== 'number' || !Number.isInteger(config.itemListAttribution.lookbackTimeMs) || config.itemListAttribution.lookbackTimeMs <= 0) {
|
|
150
|
+
throw new Error(`config.itemListAttribution.lookbackTimeMs must be a positive integer. Received: ${JSON.stringify(config.itemListAttribution.lookbackTimeMs)}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
132
155
|
// bufferDays - required
|
|
133
156
|
if (typeof config.bufferDays !== 'number' || !Number.isInteger(config.bufferDays) || config.bufferDays < 0) {
|
|
134
157
|
throw new Error(`config.bufferDays must be a non-negative integer. Received: ${JSON.stringify(config.bufferDays)}`);
|