ga4-export-fixer 0.5.2-dev.3 → 0.5.2-dev.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.5.2-dev.3",
3
+ "version": "0.5.2-dev.5",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,13 +17,14 @@
17
17
  "createTable.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js",
21
21
  "test:summary": "node tests/testRunner.js",
22
22
  "test:docs": "node tests/documentation.test.js",
23
23
  "test:preops": "node tests/preOperations.test.js",
24
24
  "test:events": "node tests/ga4EventsEnhanced.test.js",
25
25
  "test:merge": "node tests/mergeSQLConfigurations.test.js",
26
26
  "test:validation": "node tests/inputValidation.test.js",
27
+ "test:assertions": "node tests/assertions.test.js",
27
28
  "test:createTable": "node tests/createTable.test.js",
28
29
  "test:integration": "node tests/integration/integration.test.js",
29
30
  "release:dev": "./scripts/release-dev.sh",
@@ -0,0 +1,5 @@
1
+ const { generateItemRevenueAssertionSql } = require('./itemRevenue.js');
2
+
3
+ module.exports = {
4
+ itemRevenue: generateItemRevenueAssertionSql,
5
+ };
@@ -0,0 +1,135 @@
1
+ const helpers = require('../../../helpers/index.js');
2
+ const utils = require('../../../utils.js');
3
+ const { ga4EventsEnhancedConfig } = require('../config.js');
4
+ const { validateEnhancedEventsConfig } = require('../validation.js');
5
+
6
+ const defaultConfig = { ...ga4EventsEnhancedConfig };
7
+
8
+ // Ecommerce events that carry item data (excluding refund — refunds reverse revenue
9
+ // and are handled separately in some pipelines, but item_revenue on refund rows
10
+ // should still reconcile 1:1 between enhanced and raw).
11
+ const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
12
+
13
+ /**
14
+ * Builds a _table_suffix date filter for the assertion's raw-side query.
15
+ *
16
+ * Uses the low-level ga4ExportDateFilter() helper per enabled export type
17
+ * with a fixed 5-day lookback window. This is intentionally separate from
18
+ * the pipeline's ga4ExportDateFilters() which depends on incremental state
19
+ * and BigQuery pre-operation variables.
20
+ *
21
+ * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
22
+ * @returns {string} SQL fragment for a WHERE clause
23
+ */
24
+ const buildAssertionDateFilter = (includedExportTypes) => {
25
+ const start = 'date_sub(current_date(), interval 5 day)';
26
+ const end = 'current_date()';
27
+
28
+ const filters = [
29
+ includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
30
+ includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
31
+ includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
32
+ ].filter(Boolean);
33
+
34
+ return filters.join(' or ');
35
+ };
36
+
37
+ /**
38
+ * Generates a SQL assertion query that reconciles item_revenue between the
39
+ * enhanced events table and the raw GA4 export data.
40
+ *
41
+ * The query compares item_revenue grouped by (event_date, item_id) for the
42
+ * last 5 days of final data. Returns mismatched rows — 0 rows means the
43
+ * assertion passes.
44
+ *
45
+ * @param {string} tableRef - Fully qualified reference to the enhanced table
46
+ * (e.g., ctx.ref('ga4_events_enhanced_123456789') in Dataform, or a backtick-quoted string).
47
+ * @param {Object} mergedConfig - Merged table configuration (after merge + validation).
48
+ * @returns {string} SQL query returning violating rows
49
+ */
50
+ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
51
+ // excluded events filter (same logic as the enhanced table pipeline)
52
+ const excludedEvents = mergedConfig.excludedEvents;
53
+ const excludedEventsSQL = excludedEvents.length > 0
54
+ ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
+ : '';
56
+
57
+ // data_is_final condition for the raw side
58
+ const dataIsFinalCondition = helpers.isFinalData(
59
+ mergedConfig.dataIsFinal.detectionMethod,
60
+ mergedConfig.dataIsFinal.dayThreshold
61
+ );
62
+
63
+ // date filter for the raw side (per-export-type, fixed 5-day window)
64
+ const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
65
+
66
+ return `with enhanced_revenue as (
67
+ select
68
+ event_date,
69
+ item.item_id,
70
+ sum(item.item_revenue) as total_item_revenue,
71
+ count(*) as item_count
72
+ from
73
+ ${tableRef},
74
+ unnest(items) as item
75
+ where
76
+ data_is_final = true
77
+ and event_date >= date_sub(current_date(), interval 5 day)
78
+ and event_name in (${ecommerceEvents})
79
+ group by event_date, item.item_id
80
+ ),
81
+ raw_revenue as (
82
+ select
83
+ cast(event_date as date format 'YYYYMMDD') as event_date,
84
+ item.item_id,
85
+ sum(item.item_revenue) as total_item_revenue,
86
+ count(*) as item_count
87
+ from
88
+ ${mergedConfig.sourceTable},
89
+ unnest(items) as item
90
+ where
91
+ (${dateFilter})
92
+ ${excludedEventsSQL}
93
+ and event_name in (${ecommerceEvents})
94
+ and ${dataIsFinalCondition}
95
+ and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
96
+ group by event_date, item.item_id
97
+ )
98
+ select
99
+ coalesce(e.event_date, r.event_date) as event_date,
100
+ coalesce(e.item_id, r.item_id) as item_id,
101
+ e.total_item_revenue as enhanced_revenue,
102
+ r.total_item_revenue as raw_revenue,
103
+ e.item_count as enhanced_count,
104
+ r.item_count as raw_count
105
+ from
106
+ enhanced_revenue e
107
+ full outer join
108
+ raw_revenue r using(event_date, item_id)
109
+ where
110
+ round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
111
+ or e.item_count != r.item_count
112
+ or e.event_date is null
113
+ or r.event_date is null`;
114
+ };
115
+
116
+ /**
117
+ * Generates an item_revenue reconciliation assertion SQL query.
118
+ *
119
+ * Merges the provided config with defaults, validates, then generates a SQL
120
+ * query comparing item_revenue between the enhanced table and raw export data.
121
+ *
122
+ * @param {string} tableRef - Fully qualified reference to the enhanced table.
123
+ * @param {Object} config - User-provided table configuration.
124
+ * @returns {string} SQL query returning violating rows (0 rows = pass)
125
+ */
126
+ const generateItemRevenueAssertionSql = (tableRef, config) => {
127
+ if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
128
+ throw new Error('assertions.itemRevenue: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
129
+ }
130
+ const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
131
+ validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
132
+ return _generateItemRevenueAssertionSql(tableRef, mergedConfig);
133
+ };
134
+
135
+ module.exports = { generateItemRevenueAssertionSql };
@@ -7,6 +7,7 @@ const { validateEnhancedEventsConfig } = require('./validation.js');
7
7
  const documentation = require('../../documentation.js');
8
8
  const { createTable } = require('../../createTable.js');
9
9
  const { getTableDescriptionSections } = require('./tableDescription.js');
10
+ const assertions = require('./assertions/index.js');
10
11
 
11
12
  // Column metadata for the GA4 Events Enhanced table
12
13
  const columnMetadata = {
@@ -161,6 +162,9 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
161
162
 
162
163
  // item list attribution config
163
164
  const itemListAttribution = mergedConfig.itemListAttribution;
165
+ const ecommerceEventsFilter = itemListAttribution
166
+ ? helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ')
167
+ : null;
164
168
 
165
169
  // auto-adjust bufferDays for time-based item list attribution lookback
166
170
  const effectiveBufferDays = (itemListAttribution && itemListAttribution.lookbackType === 'TIME')
@@ -225,11 +229,12 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
225
229
  // ecommerce
226
230
  ecommerce: helpers.fixEcommerceStruct('ecommerce'),
227
231
  items: 'items',
228
- // unique row id for item list attribution join.
229
- // row_number() over() breaks hash collisions for batched events with identical data.
232
+ // unique row id for item list attribution join. Only computed for ecommerce events.
233
+ // row_number() breaks hash collisions for batched events with identical data.
234
+ // partition by event_name avoids a single-partition bottleneck in the window function.
230
235
  // Non-determinism is safe: colliding rows have identical items (to_json_string(items) is in the hash),
231
236
  // so swapping row numbers between them produces the same final result.
232
- _event_row_id: itemListAttribution ? `farm_fingerprint(concat(user_pseudo_id, cast(event_timestamp as string), event_name, to_json_string(items), cast(row_number() over() as string)))` : undefined,
237
+ _item_list_attribution_row_id: itemListAttribution ? `if(event_name in (${ecommerceEventsFilter}), farm_fingerprint(concat(user_pseudo_id, cast(event_timestamp as string), event_name, to_json_string(items), cast(row_number() over(partition by event_name, user_pseudo_id) as string))), null)` : undefined,
233
238
  // flag if the data is "final" and is not expected to change anymore
234
239
  data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
235
240
  export_type: helpers.getGa4ExportType('_table_suffix'),
@@ -272,12 +277,11 @@ ${excludedEventsSQL}`,
272
277
  itemListAttribution.lookbackTimeMs
273
278
  );
274
279
  const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
275
- const ecommerceFilter = helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ');
276
280
 
277
281
  return {
278
282
  name: 'item_list_data',
279
283
  columns: {
280
- '_event_row_id': '_event_row_id',
284
+ '_item_list_attribution_row_id': '_item_list_attribution_row_id',
281
285
  'items': `array_agg(
282
286
  (select as struct item.* replace(
283
287
  coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
@@ -286,19 +290,19 @@ ${excludedEventsSQL}`,
286
290
  ))
287
291
  )`,
288
292
  },
289
- from: `(select _event_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceFilter}))`,
290
- groupBy: ['_event_row_id'],
293
+ from: `(select _item_list_attribution_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceEventsFilter}))`,
294
+ groupBy: ['_item_list_attribution_row_id'],
291
295
  };
292
296
  })() : null;
293
297
 
294
298
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
295
299
 
296
- // When item list attribution is enabled, override the items column and exclude _event_row_id
300
+ // When item list attribution is enabled, override the items column and exclude _item_list_attribution_row_id
297
301
  // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
298
302
  const itemListOverrides = itemListDataStep ? {
299
303
  items: 'coalesce(item_list_data.items, event_data.items)',
300
304
  } : {};
301
- const itemListExcludedColumns = itemListDataStep ? ['_event_row_id'] : [];
305
+ const itemListExcludedColumns = itemListDataStep ? ['_item_list_attribution_row_id'] : [];
302
306
 
303
307
  // Join event_data and session_data, include additional logic
304
308
  const finalStep = {
@@ -334,7 +338,7 @@ ${excludedEventsSQL}`,
334
338
  leftJoin: [
335
339
  ...(itemListDataStep ? [{
336
340
  table: 'item_list_data',
337
- condition: 'using(_event_row_id)'
341
+ condition: 'using(_item_list_attribution_row_id)'
338
342
  }] : []),
339
343
  {
340
344
  table: 'session_data',
@@ -418,5 +422,6 @@ module.exports = {
418
422
  generateSql: generateEnhancedEventsSQL,
419
423
  setPreOperations: setPreOperations,
420
424
  getColumnDescriptions: getColumnDescriptions,
421
- getTableDescription: getTableDescription
425
+ getTableDescription: getTableDescription,
426
+ assertions,
422
427
  }