ga4-export-fixer 0.7.1 → 0.8.0-dev.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.7.1",
3
+ "version": "0.8.0-dev.2",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,7 @@
17
17
  "createTable.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js",
21
21
  "test:summary": "node tests/testRunner.js",
22
22
  "test:docs": "node tests/documentation.test.js",
23
23
  "test:preops": "node tests/preOperations.test.js",
@@ -26,6 +26,8 @@
26
26
  "test:validation": "node tests/inputValidation.test.js",
27
27
  "test:assertions": "node tests/assertions.test.js",
28
28
  "test:createTable": "node tests/createTable.test.js",
29
+ "test:queryBuilder": "node tests/queryBuilder.test.js",
30
+ "test:customSteps": "node tests/customSteps.test.js",
29
31
  "test:integration": "node tests/integration/integration.test.js",
30
32
  "release:dev": "./scripts/release-dev.sh",
31
33
  "readme": "node scripts/updateReadme.js",
@@ -1,70 +1,73 @@
1
- const { baseConfig } = require('../../defaultConfig.js');
2
-
3
- /*
4
- The default configuration for the GA4 Events Enhanced table.
5
- */
6
- const ga4EventsEnhancedConfig = {
7
- ...baseConfig,
8
- sourceTable: undefined,
9
- sourceTableType: 'GA4_EXPORT', // used with pre operations to detect if ga4 export specific pre operations are needed
10
- // optional but recommended
11
- schemaLock: undefined,
12
- // only used with js tables
13
- dataformTableConfig: {
14
- type: 'incremental',
15
- bigquery: {
16
- partitionBy: 'event_date',
17
- clusterBy: ['event_name', 'session_id', 'page_location', 'data_is_final'],
18
- labels: {
19
- 'ga4_export_fixer': 'true'
20
- }
21
- },
22
- onSchemaChange: 'EXTEND',
23
- tags: ['ga4_export_fixer'],
24
- },
25
- // optional
26
- includedExportTypes: {
27
- daily: true,
28
- fresh: false,
29
- intraday: true,
30
- },
31
- timezone: 'Etc/UTC',
32
- customTimestampParam: undefined,
33
- dataIsFinal: {
34
- detectionMethod: 'DAY_THRESHOLD', // 'EXPORT_TYPE' or 'DAY_THRESHOLD'
35
- dayThreshold: 3 // only used if detectionMethod is 'DAY_THRESHOLD'
36
- // according to GA4 documentation, the data up to 72 hours old is subject to possible changes
37
- // in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
38
- },
39
- // optional item list attribution - disabled by default (compute-heavy, only useful for ecommerce sites)
40
- itemListAttribution: undefined,
41
- // number of additional days to take in for taking into account sessions that overlap days
42
- bufferDays: 1,
43
- // these parameters are excluded by default because they've been made available in other columns
44
- defaultExcludedEventParams: [
45
- 'page_location',
46
- 'ga_session_id',
47
- //'custom_event_timestamp', // removed if customTimestampParam is used
48
- ],
49
- excludedEventParams: [],
50
- eventParamsToColumns: [
51
- //{name: 'page_location', type: 'string', columnName: 'page_location2'},
52
- ],
53
- sessionParams: [],
54
- defaultExcludedEvents: [],
55
- // session_start and first_visit are excluded via the excludedEvents array
56
- // this allows the user to include them if needed
57
- excludedEvents: [
58
- 'session_start',
59
- 'first_visit'
60
- ],
61
- defaultExcludedColumns: [
62
- 'event_dimensions', // legacy column, not needed
63
- 'traffic_source', // renamed to user_traffic_source
64
- 'session_id'
65
- ],
66
- // exclude these columns when extracting raw data from the export tables
67
- excludedColumns: [],
68
- };
69
-
70
- module.exports = { ga4EventsEnhancedConfig };
1
+ const { baseConfig } = require('../../defaultConfig.js');
2
+
3
+ /*
4
+ The default configuration for the GA4 Events Enhanced table.
5
+ */
6
+ const ga4EventsEnhancedConfig = {
7
+ ...baseConfig,
8
+ sourceTable: undefined,
9
+ sourceTableType: 'GA4_EXPORT', // used with pre operations to detect if ga4 export specific pre operations are needed
10
+ // optional but recommended
11
+ schemaLock: undefined,
12
+ // only used with js tables
13
+ dataformTableConfig: {
14
+ type: 'incremental',
15
+ bigquery: {
16
+ partitionBy: 'event_date',
17
+ clusterBy: ['event_name', 'session_id', 'page_location', 'data_is_final'],
18
+ labels: {
19
+ 'ga4_export_fixer': 'true'
20
+ }
21
+ },
22
+ onSchemaChange: 'EXTEND',
23
+ tags: ['ga4_export_fixer'],
24
+ },
25
+ // optional
26
+ includedExportTypes: {
27
+ daily: true,
28
+ fresh: false,
29
+ intraday: true,
30
+ },
31
+ timezone: 'Etc/UTC',
32
+ customTimestampParam: undefined,
33
+ dataIsFinal: {
34
+ detectionMethod: 'DAY_THRESHOLD', // 'EXPORT_TYPE' or 'DAY_THRESHOLD'
35
+ dayThreshold: 3 // only used if detectionMethod is 'DAY_THRESHOLD'
36
+ // according to GA4 documentation, the data up to 72 hours old is subject to possible changes
37
+ // in reality, there have been cases where the data has changed even after 72 hours (4 day window would have covered these)
38
+ },
39
+ // optional item list attribution - disabled by default (compute-heavy, only useful for ecommerce sites)
40
+ itemListAttribution: undefined,
41
+ // number of additional days to take in for taking into account sessions that overlap days
42
+ bufferDays: 1,
43
+ // these parameters are excluded by default because they've been made available in other columns
44
+ defaultExcludedEventParams: [
45
+ 'page_location',
46
+ 'ga_session_id',
47
+ //'custom_event_timestamp', // removed if customTimestampParam is used
48
+ ],
49
+ excludedEventParams: [],
50
+ eventParamsToColumns: [
51
+ //{name: 'page_location', type: 'string', columnName: 'page_location2'},
52
+ ],
53
+ sessionParams: [],
54
+ defaultExcludedEvents: [],
55
+ // session_start and first_visit are excluded via the excludedEvents array
56
+ // this allows the user to include them if needed
57
+ excludedEvents: [
58
+ 'session_start',
59
+ 'first_visit'
60
+ ],
61
+ defaultExcludedColumns: [
62
+ 'event_dimensions', // legacy column, not needed
63
+ 'traffic_source', // renamed to user_traffic_source
64
+ 'session_id'
65
+ ],
66
+ // exclude these columns when extracting raw data from the export tables
67
+ excludedColumns: [],
68
+ // user-defined CTEs appended to the pipeline after enhanced_events
69
+ // each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
70
+ customSteps: [],
71
+ };
72
+
73
+ module.exports = { ga4EventsEnhancedConfig };
@@ -106,9 +106,9 @@ const getFinalColumnOrder = (eventDataStep, sessionDataStep) => {
106
106
  // Construct the columns object: key is column name, value is {step.name}.{column}
107
107
  const columnOrder = {};
108
108
  for (const col of finalColumnOrder) {
109
- if (sessionDataStep?.columns?.hasOwnProperty(col) && sessionDataStep.columns[col] !== undefined) {
109
+ if (sessionDataStep?.select?.columns?.hasOwnProperty(col) && sessionDataStep.select.columns[col] !== undefined) {
110
110
  columnOrder[col] = `${sessionDataStep.name}.${col}`;
111
- } else if (eventDataStep?.columns?.hasOwnProperty(col) && eventDataStep.columns[col] !== undefined) {
111
+ } else if (eventDataStep?.select?.columns?.hasOwnProperty(col) && eventDataStep.select.columns[col] !== undefined) {
112
112
  columnOrder[col] = `${eventDataStep.name}.${col}`;
113
113
  }
114
114
  }
@@ -200,46 +200,48 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
200
200
  // initial step: extract data from the export tables
201
201
  const eventDataStep = {
202
202
  name: 'event_data',
203
- columns: {
204
- // exclude default export columns that are not needed
205
- // do this first so that the columns defined later are not excluded
206
- ...getExcludedColumns(),
207
- // date and time
208
- event_date: helpers.eventDate,
209
- event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
210
- event_timestamp: 'event_timestamp',
211
- event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
212
- // event name
213
- event_name: 'event_name',
214
- // identifiers
215
- session_id: helpers.sessionId,
216
- user_pseudo_id: 'user_pseudo_id',
217
- user_id: 'user_id',
218
- // page
219
- page_location: helpers.unnestEventParam('page_location', 'string'),
220
- page: helpers.extractPageDetails(),
221
- // event parameters and user properties
222
- ...promotedEventParameters(),
223
- event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
224
- user_properties: 'user_properties',
225
- // traffic source
226
- collected_traffic_source: 'collected_traffic_source',
227
- session_traffic_source_last_click: 'session_traffic_source_last_click',
228
- user_traffic_source: 'traffic_source',
229
- // ecommerce
230
- ecommerce: helpers.fixEcommerceStruct('ecommerce'),
231
- items: 'items',
232
- _item_list_attribution_row_id: itemListAttribution ? helpers.itemListAttributionRowId(ecommerceEventsFilter) : undefined,
233
- // flag if the data is "final" and is not expected to change anymore
234
- data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
235
- export_type: helpers.getGa4ExportType('_table_suffix'),
236
- // prep columns for later steps
237
- entrances: helpers.unnestEventParam('entrances', 'int'),
238
- session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
239
- // include all other columns from the export data
240
- get '[sql]other_columns'() {
241
- const definedColumns = Object.keys(this);
242
- return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
203
+ select: {
204
+ columns: {
205
+ // exclude default export columns that are not needed
206
+ // do this first so that the columns defined later are not excluded
207
+ ...getExcludedColumns(),
208
+ // date and time
209
+ event_date: helpers.eventDate,
210
+ event_datetime: `extract(datetime from timestamp_micros(${helpers.getEventTimestampMicros(mergedConfig.customTimestampParam)}) at time zone '${mergedConfig.timezone}')`,
211
+ event_timestamp: 'event_timestamp',
212
+ event_custom_timestamp: mergedConfig.customTimestampParam ? helpers.getEventTimestampMicros(mergedConfig.customTimestampParam) : undefined,
213
+ // event name
214
+ event_name: 'event_name',
215
+ // identifiers
216
+ session_id: helpers.sessionId,
217
+ user_pseudo_id: 'user_pseudo_id',
218
+ user_id: 'user_id',
219
+ // page
220
+ page_location: helpers.unnestEventParam('page_location', 'string'),
221
+ page: helpers.extractPageDetails(),
222
+ // event parameters and user properties
223
+ ...promotedEventParameters(),
224
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
225
+ user_properties: 'user_properties',
226
+ // traffic source
227
+ collected_traffic_source: 'collected_traffic_source',
228
+ session_traffic_source_last_click: 'session_traffic_source_last_click',
229
+ user_traffic_source: 'traffic_source',
230
+ // ecommerce
231
+ ecommerce: helpers.fixEcommerceStruct('ecommerce'),
232
+ items: 'items',
233
+ _item_list_attribution_row_id: itemListAttribution ? helpers.itemListAttributionRowId(ecommerceEventsFilter) : undefined,
234
+ // flag if the data is "final" and is not expected to change anymore
235
+ data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
236
+ export_type: helpers.getGa4ExportType('_table_suffix'),
237
+ // prep columns for later steps
238
+ entrances: helpers.unnestEventParam('entrances', 'int'),
239
+ session_params_prep: mergedConfig.sessionParams.length > 0 ? helpers.filterEventParams(mergedConfig.sessionParams, 'include') : undefined,
240
+ // include all other columns from the export data
241
+ get '[sql]other_columns'() {
242
+ const definedColumns = Object.keys(this);
243
+ return `* except (${definedColumns.filter(column => helpers.isGa4ExportColumn(column)).join(', ')})`;
244
+ },
243
245
  },
244
246
  },
245
247
  from: mergedConfig.sourceTable,
@@ -250,18 +252,20 @@ ${excludedEventsSQL}`,
250
252
  // Do session-level data aggregation
251
253
  const sessionDataStep = {
252
254
  name: 'session_data',
253
- columns: {
254
- session_id: 'session_id',
255
- user_id: helpers.aggregateValue('user_id', 'last', timestampColumn),
256
- merged_user_id: `ifnull(${helpers.aggregateValue('user_id', 'last', timestampColumn)}, any_value(user_pseudo_id))`,
257
- session_params: helpers.aggregateSessionParams(mergedConfig.sessionParams, 'session_params_prep', timestampColumn),
258
- session_traffic_source_last_click: helpers.aggregateValue('session_traffic_source_last_click', 'first', timestampColumn),
259
- session_first_traffic_source: `array_agg(collected_traffic_source order by ${timestampColumn} limit 1)[safe_offset(0)]`, // don't ignore nulls
260
- landing_page: helpers.aggregateValue(`if(entrances > 0, page, null)`, 'first', timestampColumn),
255
+ select: {
256
+ columns: {
257
+ session_id: 'session_id',
258
+ user_id: helpers.aggregateValue('user_id', 'last', timestampColumn),
259
+ merged_user_id: `ifnull(${helpers.aggregateValue('user_id', 'last', timestampColumn)}, any_value(user_pseudo_id))`,
260
+ session_params: helpers.aggregateSessionParams(mergedConfig.sessionParams, 'session_params_prep', timestampColumn),
261
+ session_traffic_source_last_click: helpers.aggregateValue('session_traffic_source_last_click', 'first', timestampColumn),
262
+ session_first_traffic_source: `array_agg(collected_traffic_source order by ${timestampColumn} limit 1)[safe_offset(0)]`, // don't ignore nulls
263
+ landing_page: helpers.aggregateValue(`if(entrances > 0, page, null)`, 'first', timestampColumn),
264
+ },
261
265
  },
262
266
  from: 'event_data',
263
267
  where: `session_id is not null`,
264
- groupBy: ['session_id']
268
+ 'group by': 'session_id',
265
269
  };
266
270
 
267
271
  // item list attribution CTEs:
@@ -277,11 +281,13 @@ ${excludedEventsSQL}`,
277
281
 
278
282
  const attributionStep = {
279
283
  name: 'item_list_attribution',
280
- columns: {
281
- '_item_list_attribution_row_id': '_item_list_attribution_row_id',
282
- 'event_name': 'event_name',
283
- 'item': 'item',
284
- '_item_list_attr': attrExpr,
284
+ select: {
285
+ columns: {
286
+ '_item_list_attribution_row_id': '_item_list_attribution_row_id',
287
+ 'event_name': 'event_name',
288
+ 'item': 'item',
289
+ '_item_list_attr': attrExpr,
290
+ },
285
291
  },
286
292
  from: 'event_data, unnest(items) as item',
287
293
  where: `event_name in (${ecommerceEventsFilter})`,
@@ -289,18 +295,20 @@ ${excludedEventsSQL}`,
289
295
 
290
296
  const dataStep = {
291
297
  name: 'item_list_data',
292
- columns: {
293
- '_item_list_attribution_row_id': '_item_list_attribution_row_id',
294
- 'items': `array_agg(
298
+ select: {
299
+ columns: {
300
+ '_item_list_attribution_row_id': '_item_list_attribution_row_id',
301
+ 'items': `array_agg(
295
302
  (select as struct item.* replace(
296
303
  coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
297
304
  coalesce(if(${passthroughEvents}, item.item_list_id, _item_list_attr.item_list_id), '(not set)') as item_list_id,
298
305
  coalesce(if(${passthroughEvents}, item.item_list_index, _item_list_attr.item_list_index)) as item_list_index
299
306
  ))
300
307
  )`,
308
+ },
301
309
  },
302
310
  from: 'item_list_attribution',
303
- groupBy: ['_item_list_attribution_row_id'],
311
+ 'group by': '_item_list_attribution_row_id',
304
312
  };
305
313
 
306
314
  return [attributionStep, dataStep];
@@ -316,56 +324,79 @@ ${excludedEventsSQL}`,
316
324
  const itemListExcludedColumns = itemListSteps ? ['_item_list_attribution_row_id'] : [];
317
325
 
318
326
  // Join event_data and session_data, include additional logic
319
- const finalStep = {
320
- name: 'final',
321
- columns: {
322
- // get the most important columns in the correct order
323
- ...finalColumnOrder,
324
- ...itemListOverrides,
325
- // get the rest of the event_data columns
326
- '[sql]event_data': utils.selectOtherColumns(
327
- eventDataStep,
328
- Object.keys(finalColumnOrder),
329
- [
330
- 'entrances',
331
- mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
332
- 'data_is_final',
333
- 'export_type',
334
- ...itemListExcludedColumns,
335
- ]
336
- ),
337
- // get the rest of the session_data columns
338
- '[sql]session_data': utils.selectOtherColumns(
339
- sessionDataStep,
340
- Object.keys(finalColumnOrder),
341
- []
342
- ),
343
- // include additional columns
344
- row_inserted_timestamp: 'current_timestamp()',
345
- data_is_final: 'data_is_final',
346
- export_type: 'export_type',
327
+ // Named 'enhanced_events' so user-supplied customSteps can reference it as a stable handle.
328
+ const enhancedEventsStep = {
329
+ name: 'enhanced_events',
330
+ select: {
331
+ columns: {
332
+ // get the most important columns in the correct order
333
+ ...finalColumnOrder,
334
+ ...itemListOverrides,
335
+ // get the rest of the event_data columns
336
+ '[sql]event_data': utils.selectOtherColumns(
337
+ eventDataStep,
338
+ Object.keys(finalColumnOrder),
339
+ [
340
+ 'entrances',
341
+ mergedConfig.sessionParams.length > 0 ? 'session_params_prep' : undefined,
342
+ 'data_is_final',
343
+ 'export_type',
344
+ ...itemListExcludedColumns,
345
+ ]
346
+ ),
347
+ // get the rest of the session_data columns
348
+ '[sql]session_data': utils.selectOtherColumns(
349
+ sessionDataStep,
350
+ Object.keys(finalColumnOrder),
351
+ []
352
+ ),
353
+ // include additional columns
354
+ row_inserted_timestamp: 'current_timestamp()',
355
+ data_is_final: 'data_is_final',
356
+ export_type: 'export_type',
357
+ },
347
358
  },
348
359
  from: 'event_data',
349
- leftJoin: [
360
+ joins: [
350
361
  ...(itemListSteps ? [{
362
+ type: 'left',
351
363
  table: 'item_list_data',
352
- condition: 'using(_item_list_attribution_row_id)'
364
+ on: 'using(_item_list_attribution_row_id)'
353
365
  }] : []),
354
366
  {
367
+ type: 'left',
355
368
  table: 'session_data',
356
- condition: 'using(session_id)'
369
+ on: 'using(session_id)'
357
370
  }
358
371
  ],
359
372
  where: helpers.incrementalDateFilter(mergedConfig)
360
373
  };
361
374
 
362
- const steps = [
375
+ const packageSteps = [
363
376
  eventDataStep,
364
377
  ...(itemListSteps ?? []),
365
378
  sessionDataStep,
366
- finalStep,
379
+ enhancedEventsStep,
367
380
  ];
368
381
 
382
+ // Layer 2 validation: customSteps name must not collide with package step names.
383
+ // Reserved set is derived from packageSteps at runtime (single source of truth) — what
384
+ // is reserved depends on config (e.g. item_list_* exist only when itemListAttribution is on).
385
+ const customSteps = mergedConfig.customSteps ?? [];
386
+ if (customSteps.length > 0) {
387
+ const reservedNames = new Set(packageSteps.map(s => s.name));
388
+ for (const [i, step] of customSteps.entries()) {
389
+ if (reservedNames.has(step.name)) {
390
+ throw new Error(
391
+ `config.customSteps[${i}].name '${step.name}' collides with a reserved package CTE name. ` +
392
+ `Reserved names (active for this config): ${[...reservedNames].join(', ')}. Choose a different name.`
393
+ );
394
+ }
395
+ }
396
+ }
397
+
398
+ const steps = [...packageSteps, ...customSteps];
399
+
369
400
  return utils.queryBuilder(steps);
370
401
  };
371
402