ga4-export-fixer 0.5.2-dev.6 → 0.5.2-dev.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.5.2-dev.6",
3
+ "version": "0.5.2-dev.8",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -0,0 +1,168 @@
1
+ const helpers = require('../../../helpers/index.js');
2
+ const utils = require('../../../utils.js');
3
+ const { ga4EventsEnhancedConfig } = require('../config.js');
4
+ const { validateEnhancedEventsConfig } = require('../validation.js');
5
+
6
+ const defaultConfig = { ...ga4EventsEnhancedConfig };
7
+
8
+ /**
9
+ * Builds a _table_suffix date filter for the assertion's raw-side query.
10
+ *
11
+ * Uses the low-level ga4ExportDateFilter() helper per enabled export type
12
+ * with a fixed 5-day lookback window. This is intentionally separate from
13
+ * the pipeline's ga4ExportDateFilters() which depends on incremental state
14
+ * and BigQuery pre-operation variables.
15
+ *
16
+ * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
17
+ * @returns {string} SQL fragment for a WHERE clause
18
+ */
19
+ const buildAssertionDateFilter = (includedExportTypes) => {
20
+ const start = 'date_sub(current_date(), interval 5 day)';
21
+ const end = 'current_date()';
22
+
23
+ const filters = [
24
+ includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
25
+ includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
26
+ includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
27
+ ].filter(Boolean);
28
+
29
+ return filters.join(' or ');
30
+ };
31
+
32
+ /**
33
+ * Generates a SQL assertion query that validates daily data quality between the
34
+ * enhanced events table and the raw GA4 export data.
35
+ *
36
+ * The query compares session count, event count, and total item_revenue
37
+ * aggregated per (event_date, data_is_final) for the last 5 days.
38
+ * Returns violating rows -- 0 rows means the assertion passes.
39
+ *
40
+ * Five violation types are detected:
41
+ * - MISSING_DAY: Raw data has events but enhanced table has none for this day
42
+ * - SESSION_COUNT_MISMATCH: Final data session count differs
43
+ * - EVENT_COUNT_MISMATCH: Final data event count differs
44
+ * - REVENUE_MISMATCH: Final data total item_revenue differs
45
+ * - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
46
+ *
47
+ * @param {string} tableRef - Fully qualified reference to the enhanced table
48
+ * @param {Object} mergedConfig - Merged table configuration (after merge + validation)
49
+ * @returns {string} SQL query returning violating rows
50
+ */
51
+ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
52
+ const excludedEvents = mergedConfig.excludedEvents;
53
+ const excludedEventsSQL = excludedEvents.length > 0
54
+ ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
+ : '';
56
+
57
+ const dataIsFinalCondition = helpers.isFinalData(
58
+ mergedConfig.dataIsFinal.detectionMethod,
59
+ mergedConfig.dataIsFinal.dayThreshold
60
+ );
61
+
62
+ const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
63
+
64
+ return `with enhanced_daily as (
65
+ select
66
+ event_date,
67
+ data_is_final,
68
+ count(distinct session_id) as session_count,
69
+ count(*) as event_count,
70
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
71
+ from
72
+ ${tableRef}
73
+ where
74
+ event_date >= date_sub(current_date(), interval 5 day)
75
+ group by event_date, data_is_final
76
+ ),
77
+ raw_daily as (
78
+ select
79
+ cast(event_date as date format 'YYYYMMDD') as event_date,
80
+ ${dataIsFinalCondition} as data_is_final,
81
+ count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
82
+ count(*) as event_count,
83
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
84
+ from
85
+ ${mergedConfig.sourceTable}
86
+ where
87
+ (${dateFilter})
88
+ ${excludedEventsSQL}
89
+ and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
90
+ group by event_date, data_is_final
91
+ )
92
+ select
93
+ coalesce(e.event_date, r.event_date) as event_date,
94
+ coalesce(e.data_is_final, r.data_is_final) as data_is_final,
95
+ e.session_count as enhanced_sessions,
96
+ r.session_count as raw_sessions,
97
+ e.event_count as enhanced_events,
98
+ r.event_count as raw_events,
99
+ round(e.total_item_revenue, 2) as enhanced_revenue,
100
+ round(r.total_item_revenue, 2) as raw_revenue,
101
+ case
102
+ when e.event_date is null and r.event_count > 0
103
+ then 'MISSING_DAY'
104
+ when coalesce(e.data_is_final, r.data_is_final) = true
105
+ and e.session_count != r.session_count
106
+ then 'SESSION_COUNT_MISMATCH'
107
+ when coalesce(e.data_is_final, r.data_is_final) = true
108
+ and e.event_count != r.event_count
109
+ then 'EVENT_COUNT_MISMATCH'
110
+ when coalesce(e.data_is_final, r.data_is_final) = true
111
+ and round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
112
+ then 'REVENUE_MISMATCH'
113
+ when coalesce(e.data_is_final, r.data_is_final) = false
114
+ and coalesce(e.event_count, 0) > coalesce(r.event_count, 0)
115
+ then 'NON_FINAL_EXCESS_EVENTS'
116
+ end as violation_type
117
+ from
118
+ enhanced_daily e
119
+ full outer join
120
+ raw_daily r using(event_date, data_is_final)
121
+ where
122
+ (coalesce(e.data_is_final, r.data_is_final) = true and (
123
+ e.session_count != r.session_count
124
+ or e.event_count != r.event_count
125
+ or round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
126
+ or e.event_date is null
127
+ ))
128
+ or
129
+ (e.event_date is null and r.event_count > 0)
130
+ or
131
+ (coalesce(e.data_is_final, r.data_is_final) = false
132
+ and coalesce(e.event_count, 0) > coalesce(r.event_count, 0))`;
133
+ };
134
+
135
+ /**
136
+ * Generates a daily quality assertion SQL query.
137
+ *
138
+ * Merges the provided config with defaults, validates, then generates a SQL
139
+ * query comparing daily aggregates (session count, event count, item_revenue)
140
+ * between the enhanced table and raw export data, and checks for missing days
141
+ * and non-final data inflation.
142
+ *
143
+ * @param {string} tableRef - Fully qualified reference to the enhanced table.
144
+ * @param {Object} config - User-provided table configuration.
145
+ * @returns {string} SQL query returning violating rows (0 rows = pass)
146
+ */
147
+ const generateDailyQualityAssertionSql = (tableRef, config) => {
148
+ if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
149
+ throw new Error('assertions.dailyQuality: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
150
+ }
151
+ const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
152
+
153
+ if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
154
+ throw new Error(
155
+ 'assertions.dailyQuality: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
156
+ 'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
157
+ ' .query(ctx => ga4EventsEnhanced.assertions.dailyQuality(\n' +
158
+ ' ctx.ref(\'enhanced_table_name\'),\n' +
159
+ ' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
160
+ ' ))'
161
+ );
162
+ }
163
+
164
+ validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
165
+ return _generateDailyQualityAssertionSql(tableRef, mergedConfig);
166
+ };
167
+
168
+ module.exports = { generateDailyQualityAssertionSql };
@@ -1,5 +1,7 @@
1
1
  const { generateItemRevenueAssertionSql } = require('./itemRevenue.js');
2
+ const { generateDailyQualityAssertionSql } = require('./dailyQuality.js');
2
3
 
3
4
  module.exports = {
4
5
  itemRevenue: generateItemRevenueAssertionSql,
6
+ dailyQuality: generateDailyQualityAssertionSql,
5
7
  };
@@ -269,8 +269,10 @@ ${excludedEventsSQL}`,
269
269
  groupBy: ['session_id']
270
270
  };
271
271
 
272
- // item list attribution CTE: unnest items, attribute via window function, re-aggregate
273
- const itemListDataStep = itemListAttribution ? (() => {
272
+ // item list attribution CTEs:
273
+ // 1. item_list_unnest: unnest items from ecommerce events, compute attribution via window function
274
+ // 2. item_list_data: re-aggregate items with attributed list fields
275
+ const itemListSteps = itemListAttribution ? (() => {
274
276
  const attrExpr = helpers.itemListAttributionExpr(
275
277
  itemListAttribution.lookbackType,
276
278
  timestampColumn,
@@ -278,7 +280,19 @@ ${excludedEventsSQL}`,
278
280
  );
279
281
  const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
280
282
 
281
- return {
283
+ const attributionStep = {
284
+ name: 'item_list_attribution',
285
+ columns: {
286
+ '_item_list_attribution_row_id': '_item_list_attribution_row_id',
287
+ 'event_name': 'event_name',
288
+ 'item': 'item',
289
+ '_item_list_attr': attrExpr,
290
+ },
291
+ from: 'event_data, unnest(items) as item',
292
+ where: `event_name in (${ecommerceEventsFilter})`,
293
+ };
294
+
295
+ const dataStep = {
282
296
  name: 'item_list_data',
283
297
  columns: {
284
298
  '_item_list_attribution_row_id': '_item_list_attribution_row_id',
@@ -290,19 +304,21 @@ ${excludedEventsSQL}`,
290
304
  ))
291
305
  )`,
292
306
  },
293
- from: `(select _item_list_attribution_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceEventsFilter}))`,
307
+ from: 'item_list_attribution',
294
308
  groupBy: ['_item_list_attribution_row_id'],
295
309
  };
310
+
311
+ return [attributionStep, dataStep];
296
312
  })() : null;
297
313
 
298
314
  const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
299
315
 
300
316
  // When item list attribution is enabled, override the items column and exclude _item_list_attribution_row_id
301
317
  // COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
302
- const itemListOverrides = itemListDataStep ? {
318
+ const itemListOverrides = itemListSteps ? {
303
319
  items: 'coalesce(item_list_data.items, event_data.items)',
304
320
  } : {};
305
- const itemListExcludedColumns = itemListDataStep ? ['_item_list_attribution_row_id'] : [];
321
+ const itemListExcludedColumns = itemListSteps ? ['_item_list_attribution_row_id'] : [];
306
322
 
307
323
  // Join event_data and session_data, include additional logic
308
324
  const finalStep = {
@@ -336,7 +352,7 @@ ${excludedEventsSQL}`,
336
352
  },
337
353
  from: 'event_data',
338
354
  leftJoin: [
339
- ...(itemListDataStep ? [{
355
+ ...(itemListSteps ? [{
340
356
  table: 'item_list_data',
341
357
  condition: 'using(_item_list_attribution_row_id)'
342
358
  }] : []),
@@ -350,7 +366,7 @@ ${excludedEventsSQL}`,
350
366
 
351
367
  const steps = [
352
368
  eventDataStep,
353
- ...(itemListDataStep ? [itemListDataStep] : []),
369
+ ...(itemListSteps ?? []),
354
370
  sessionDataStep,
355
371
  finalStep,
356
372
  ];