ga4-export-fixer 0.5.1 → 0.5.2-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -294,7 +294,8 @@ All fields are optional except `sourceTable`. Default values are applied automat
294
294
  | `schemaLock` | string | `undefined` | Lock the table schema to a specific GA4 export table suffix. Accepts `"YYYYMMDD"` (daily), `"intraday_YYYYMMDD"`, or `"fresh_YYYYMMDD"`. Date must be >= `"20241009"` |
295
295
  | `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
296
296
  | `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
297
- | `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight |
297
+ | `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight. Auto-adjusted when `itemListAttribution.lookbackType` is `'TIME'` and the lookback exceeds `bufferDays` |
298
+ | `itemListAttribution` | object | `undefined` | Enable item list attribution. See [Item List Attribution](#item-list-attribution) |
298
299
  | `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
299
300
  | `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
300
301
  | `excludedEvents` | string[] | `['session_start', 'first_visit']` | Event names to exclude from the table. These events are excluded by default because they have no use for analysis purposes. Override this to include them if needed |
@@ -412,6 +413,25 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
412
413
  | `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
413
414
 
414
415
 
416
+ <a id="item-list-attribution"></a>
417
+
418
+ **`itemListAttribution`** — when set to an object, enables attribution of `item_list_name`, `item_list_id`, and `item_list_index` from `select_item`/`select_promotion` events to downstream ecommerce events (e.g. `add_to_cart`, `purchase`). Disabled by default.
419
+
420
+ | Field | Type | Required | Description |
421
+ | ---------------- | ------- | --------------------------- | --------------------------------------------------------------------- |
422
+ | `lookbackType` | string | Yes | `'SESSION'` (partition by session) or `'TIME'` (time-based window) |
423
+ | `lookbackTimeMs` | integer | When `lookbackType: 'TIME'` | Lookback window in milliseconds (e.g. `86400000` for 24h) |
424
+
425
+ ```javascript
426
+ // Session-based: attribute within the same session
427
+ itemListAttribution: { lookbackType: 'SESSION' }
428
+
429
+ // Time-based: attribute within a 24-hour window across sessions
430
+ itemListAttribution: { lookbackType: 'TIME', lookbackTimeMs: 86400000 }
431
+ ```
432
+
433
+ > **Note:** This feature adds a compute-heavy CTE with a window function over unnested items. Only enable it if you need item list attribution for ecommerce analysis.
434
+
415
435
  Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
416
436
 
417
437
  ### Creating Incremental Downstream Tables from ga4_events_enhanced
@@ -157,10 +157,71 @@ const getGa4ExportType = (tableSuffix) => {
157
157
  end`;
158
158
  };
159
159
 
160
+ /**
161
+ * Generates a SQL LAST_VALUE window function that attributes item list fields
162
+ * (item_list_name, item_list_id, item_list_index) from select_item/select_promotion
163
+ * events to downstream ecommerce events using a lookback window.
164
+ *
165
+ * Returns a struct containing all three attributed fields via a single window sort.
166
+ *
167
+ * @param {'SESSION'|'TIME'} lookbackType - Window scope: session-based or time-based
168
+ * @param {string} timestampColumn - Column to order by ('event_timestamp' or 'event_custom_timestamp')
169
+ * @param {number} [lookbackTimeMs] - Lookback window in milliseconds (required when lookbackType is 'TIME')
170
+ * @returns {string} SQL expression that evaluates to a struct with item_list_name, item_list_id, item_list_index
171
+ */
172
+ const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs) => {
173
+ const selectEvents = `event_name in ('select_item', 'select_promotion')`;
174
+ const structExpr = `struct(item.item_list_name, item.item_list_id, item.item_list_index)`;
175
+
176
+ let partitionBy;
177
+ let frameBounds;
178
+
179
+ if (lookbackType === 'SESSION') {
180
+ partitionBy = 'session_id, item.item_id';
181
+ frameBounds = 'rows between unbounded preceding and current row';
182
+ } else {
183
+ // TIME-based: range window in microseconds
184
+ const lookbackMicros = lookbackTimeMs * 1000;
185
+ partitionBy = 'user_pseudo_id, item.item_id';
186
+ frameBounds = `range between ${lookbackMicros} preceding and current row`;
187
+ }
188
+
189
+ return `last_value(
190
+ if(${selectEvents}, ${structExpr}, null) ignore nulls
191
+ ) over(
192
+ partition by ${partitionBy}
193
+ order by ${timestampColumn} asc
194
+ ${frameBounds}
195
+ )`;
196
+ };
197
+
198
+ /**
199
+ * Official GA4 ecommerce events that carry item data.
200
+ * Based on: https://developers.google.com/analytics/devguides/collection/ga4/ecommerce
201
+ */
202
+ const ga4EcommerceEvents = [
203
+ 'view_item_list',
204
+ 'select_item',
205
+ 'view_promotion',
206
+ 'select_promotion',
207
+ 'view_item',
208
+ 'add_to_wishlist',
209
+ 'add_to_cart',
210
+ 'remove_from_cart',
211
+ 'view_cart',
212
+ 'begin_checkout',
213
+ 'add_shipping_info',
214
+ 'add_payment_info',
215
+ 'purchase',
216
+ 'refund',
217
+ ];
218
+
160
219
  module.exports = {
161
220
  sessionId,
162
221
  fixEcommerceStruct,
163
222
  isFinalData,
164
223
  isGa4ExportColumn,
165
- getGa4ExportType
224
+ getGa4ExportType,
225
+ itemListAttributionExpr,
226
+ ga4EcommerceEvents
166
227
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.5.1",
3
+ "version": "0.5.2-dev.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -26,6 +26,7 @@
26
26
  "test:validation": "node tests/inputValidation.test.js",
27
27
  "test:createTable": "node tests/createTable.test.js",
28
28
  "test:integration": "node tests/integration/integration.test.js",
29
+ "release:dev": "./scripts/release-dev.sh",
29
30
  "readme": "node scripts/updateReadme.js",
30
31
  "prepublishOnly": "node scripts/updateReadme.js"
31
32
  },
@@ -36,6 +36,8 @@ const ga4EventsEnhancedConfig = {
36
36
  // according to GA4 documentation, the data up to 72 hours old is subject to possible changes
37
37
  // in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
38
38
  },
39
+ // optional item list attribution - disabled by default (compute-heavy, only useful for ecommerce sites)
40
+ itemListAttribution: undefined,
39
41
  // number of additional days to take in for taking into account sessions that overlap days
40
42
  bufferDays: 1,
41
43
  // these parameters are excluded by default because they've been made available in other columns
@@ -159,6 +159,17 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
159
159
  // the most accurate available timestamp column
160
160
  const timestampColumn = mergedConfig.customTimestampParam ? 'event_custom_timestamp' : 'event_timestamp';
161
161
 
162
+ // item list attribution config
163
+ const itemListAttribution = mergedConfig.itemListAttribution;
164
+
165
+ // auto-adjust bufferDays for time-based item list attribution lookback
166
+ const effectiveBufferDays = (itemListAttribution && itemListAttribution.lookbackType === 'TIME')
167
+ ? Math.max(mergedConfig.bufferDays, Math.ceil(itemListAttribution.lookbackTimeMs / (24 * 60 * 60 * 1000)))
168
+ : mergedConfig.bufferDays;
169
+ const dateFilterConfig = effectiveBufferDays !== mergedConfig.bufferDays
170
+ ? { ...mergedConfig, bufferDays: effectiveBufferDays }
171
+ : mergedConfig;
172
+
162
173
  // exlude these events from the table
163
174
  const excludedEvents = mergedConfig.excludedEvents;
164
175
  const excludedEventsSQL = excludedEvents.length > 0 ? `and event_name not in (${excludedEvents.map(event => `'${event}'`).join(',')})` : '';
@@ -214,6 +225,8 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
214
225
  // ecommerce
215
226
  ecommerce: helpers.fixEcommerceStruct('ecommerce'),
216
227
  items: 'items',
228
+ // unique row id for item list attribution join
229
+ _event_row_id: itemListAttribution ? 'row_number() over()' : undefined,
217
230
  // flag if the data is "final" and is not expected to change anymore
218
231
  data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
219
232
  export_type: helpers.getGa4ExportType('_table_suffix'),
@@ -227,7 +240,7 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
227
240
  },
228
241
  },
229
242
  from: mergedConfig.sourceTable,
230
- where: `${helpers.ga4ExportDateFilters(mergedConfig)}
243
+ where: `${helpers.ga4ExportDateFilters(dateFilterConfig)}
231
244
  ${excludedEventsSQL}`,
232
245
  };
233
246
 
@@ -248,28 +261,64 @@ ${excludedEventsSQL}`,
248
261
  groupBy: ['session_id']
249
262
  };
250
263
 
264
+ // item list attribution CTE: unnest items, attribute via window function, re-aggregate
265
+ const itemListDataStep = itemListAttribution ? (() => {
266
+ const attrExpr = helpers.itemListAttributionExpr(
267
+ itemListAttribution.lookbackType,
268
+ timestampColumn,
269
+ itemListAttribution.lookbackTimeMs
270
+ );
271
+ const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
272
+ const ecommerceFilter = helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ');
273
+
274
+ return {
275
+ name: 'item_list_data',
276
+ columns: {
277
+ '_event_row_id': '_event_row_id',
278
+ 'items': `array_agg(
279
+ (select as struct item.* replace(
280
+ coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
281
+ coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
282
+ coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
283
+ ))
284
+ )`,
285
+ },
286
+ from: `(select _event_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceFilter}))`,
287
+ groupBy: ['_event_row_id'],
288
+ };
289
+ })() : null;
290
+
251
291
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
252
292
 
293
+ // When item list attribution is enabled, override the items column and exclude _event_row_id
294
+ // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
295
+ const itemListOverrides = itemListDataStep ? {
296
+ items: 'coalesce(item_list_data.items, event_data.items)',
297
+ } : {};
298
+ const itemListExcludedColumns = itemListDataStep ? ['_event_row_id'] : [];
299
+
253
300
  // Join event_data and session_data, include additional logic
254
301
  const finalStep = {
255
302
  name: 'final',
256
303
  columns: {
257
304
  // get the most important columns in the correct order
258
305
  ...finalColumnOrder,
306
+ ...itemListOverrides,
259
307
  // get the rest of the event_data columns
260
308
  '[sql]event_data': utils.selectOtherColumns(
261
- eventDataStep,
309
+ eventDataStep,
262
310
  Object.keys(finalColumnOrder),
263
311
  [
264
312
  'entrances',
265
313
  mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
266
314
  'data_is_final',
267
315
  'export_type',
316
+ ...itemListExcludedColumns,
268
317
  ]
269
318
  ),
270
- // get the rest of the session_data columns
319
+ // get the rest of the session_data columns
271
320
  '[sql]session_data': utils.selectOtherColumns(
272
- sessionDataStep,
321
+ sessionDataStep,
273
322
  Object.keys(finalColumnOrder),
274
323
  []
275
324
  ),
@@ -280,6 +329,10 @@ ${excludedEventsSQL}`,
280
329
  },
281
330
  from: 'event_data',
282
331
  leftJoin: [
332
+ ...(itemListDataStep ? [{
333
+ table: 'item_list_data',
334
+ condition: 'using(_event_row_id)'
335
+ }] : []),
283
336
  {
284
337
  table: 'session_data',
285
338
  condition: 'using(session_id)'
@@ -290,6 +343,7 @@ ${excludedEventsSQL}`,
290
343
 
291
344
  const steps = [
292
345
  eventDataStep,
346
+ ...(itemListDataStep ? [itemListDataStep] : []),
293
347
  sessionDataStep,
294
348
  finalStep,
295
349
  ];
@@ -129,6 +129,29 @@ const validateEnhancedEventsConfig = (config, options = {}) => {
129
129
  throw new Error(`config.dataIsFinal.detectionMethod must be 'DAY_THRESHOLD' when daily export is not enabled (config.includedExportTypes.daily is false). A dayThreshold of 1 is recommended for intraday only setups. With fresh export, the GA4 data is subject to possible changes for up to 72 hours. Received: ${JSON.stringify(config.dataIsFinal.detectionMethod)}`);
130
130
  }
131
131
 
132
+ // itemListAttribution - optional; must be undefined or a valid config object
133
+ if (typeof config.itemListAttribution !== 'undefined') {
134
+ if (!config.itemListAttribution || typeof config.itemListAttribution !== 'object' || Array.isArray(config.itemListAttribution)) {
135
+ throw new Error(`config.itemListAttribution must be an object when provided. Received: ${JSON.stringify(config.itemListAttribution)}`);
136
+ }
137
+ if (typeof config.itemListAttribution.lookbackType === 'undefined') {
138
+ throw new Error("config.itemListAttribution.lookbackType is required. Must be 'SESSION' or 'TIME'.");
139
+ }
140
+ if (config.itemListAttribution.lookbackType !== 'SESSION' && config.itemListAttribution.lookbackType !== 'TIME') {
141
+ throw new Error(`config.itemListAttribution.lookbackType must be 'SESSION' or 'TIME'. Received: ${JSON.stringify(config.itemListAttribution.lookbackType)}`);
142
+ }
143
+ if (config.itemListAttribution.lookbackType === 'TIME') {
144
+ if (typeof config.itemListAttribution.lookbackTimeMs === 'undefined') {
145
+ throw new Error("config.itemListAttribution.lookbackTimeMs is required when lookbackType is 'TIME'.");
146
+ }
147
+ }
148
+ if (typeof config.itemListAttribution.lookbackTimeMs !== 'undefined') {
149
+ if (typeof config.itemListAttribution.lookbackTimeMs !== 'number' || !Number.isInteger(config.itemListAttribution.lookbackTimeMs) || config.itemListAttribution.lookbackTimeMs <= 0) {
150
+ throw new Error(`config.itemListAttribution.lookbackTimeMs must be a positive integer. Received: ${JSON.stringify(config.itemListAttribution.lookbackTimeMs)}`);
151
+ }
152
+ }
153
+ }
154
+
132
155
  // bufferDays - required
133
156
  if (typeof config.bufferDays !== 'number' || !Number.isInteger(config.bufferDays) || config.bufferDays < 0) {
134
157
  throw new Error(`config.bufferDays must be a non-negative integer. Received: ${JSON.stringify(config.bufferDays)}`);