ga4-export-fixer 0.5.2-dev.6 → 0.5.2-dev.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.5.2-dev.6",
3
+ "version": "0.5.2-dev.7",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -0,0 +1,168 @@
1
+ const helpers = require('../../../helpers/index.js');
2
+ const utils = require('../../../utils.js');
3
+ const { ga4EventsEnhancedConfig } = require('../config.js');
4
+ const { validateEnhancedEventsConfig } = require('../validation.js');
5
+
6
+ const defaultConfig = { ...ga4EventsEnhancedConfig };
7
+
8
+ /**
9
+ * Builds a _table_suffix date filter for the assertion's raw-side query.
10
+ *
11
+ * Uses the low-level ga4ExportDateFilter() helper per enabled export type
12
+ * with a fixed 5-day lookback window. This is intentionally separate from
13
+ * the pipeline's ga4ExportDateFilters() which depends on incremental state
14
+ * and BigQuery pre-operation variables.
15
+ *
16
+ * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
17
+ * @returns {string} SQL fragment for a WHERE clause
18
+ */
19
+ const buildAssertionDateFilter = (includedExportTypes) => {
20
+ const start = 'date_sub(current_date(), interval 5 day)';
21
+ const end = 'current_date()';
22
+
23
+ const filters = [
24
+ includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
25
+ includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
26
+ includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
27
+ ].filter(Boolean);
28
+
29
+ return filters.join(' or ');
30
+ };
31
+
32
+ /**
33
+ * Generates a SQL assertion query that validates daily data quality between the
34
+ * enhanced events table and the raw GA4 export data.
35
+ *
36
+ * The query compares session count, event count, and total item_revenue
37
+ * aggregated per (event_date, data_is_final) for the last 5 days.
38
+ * Returns violating rows -- 0 rows means the assertion passes.
39
+ *
40
+ * Five violation types are detected:
41
+ * - MISSING_DAY: Raw data has events but enhanced table has none for this day
42
+ * - SESSION_COUNT_MISMATCH: Final data session count differs
43
+ * - EVENT_COUNT_MISMATCH: Final data event count differs
44
+ * - REVENUE_MISMATCH: Final data total item_revenue differs
45
+ * - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
46
+ *
47
+ * @param {string} tableRef - Fully qualified reference to the enhanced table
48
+ * @param {Object} mergedConfig - Merged table configuration (after merge + validation)
49
+ * @returns {string} SQL query returning violating rows
50
+ */
51
+ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
52
+ const excludedEvents = mergedConfig.excludedEvents;
53
+ const excludedEventsSQL = excludedEvents.length > 0
54
+ ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
+ : '';
56
+
57
+ const dataIsFinalCondition = helpers.isFinalData(
58
+ mergedConfig.dataIsFinal.detectionMethod,
59
+ mergedConfig.dataIsFinal.dayThreshold
60
+ );
61
+
62
+ const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
63
+
64
+ return `with enhanced_daily as (
65
+ select
66
+ event_date,
67
+ data_is_final,
68
+ count(distinct session_id) as session_count,
69
+ count(*) as event_count,
70
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
71
+ from
72
+ ${tableRef}
73
+ where
74
+ event_date >= date_sub(current_date(), interval 5 day)
75
+ group by event_date, data_is_final
76
+ ),
77
+ raw_daily as (
78
+ select
79
+ cast(event_date as date format 'YYYYMMDD') as event_date,
80
+ ${dataIsFinalCondition} as data_is_final,
81
+ count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
82
+ count(*) as event_count,
83
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
84
+ from
85
+ ${mergedConfig.sourceTable}
86
+ where
87
+ (${dateFilter})
88
+ ${excludedEventsSQL}
89
+ and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
90
+ group by event_date, data_is_final
91
+ )
92
+ select
93
+ coalesce(e.event_date, r.event_date) as event_date,
94
+ coalesce(e.data_is_final, r.data_is_final) as data_is_final,
95
+ e.session_count as enhanced_sessions,
96
+ r.session_count as raw_sessions,
97
+ e.event_count as enhanced_events,
98
+ r.event_count as raw_events,
99
+ round(e.total_item_revenue, 2) as enhanced_revenue,
100
+ round(r.total_item_revenue, 2) as raw_revenue,
101
+ case
102
+ when e.event_date is null and r.event_count > 0
103
+ then 'MISSING_DAY'
104
+ when coalesce(e.data_is_final, r.data_is_final) = true
105
+ and e.session_count != r.session_count
106
+ then 'SESSION_COUNT_MISMATCH'
107
+ when coalesce(e.data_is_final, r.data_is_final) = true
108
+ and e.event_count != r.event_count
109
+ then 'EVENT_COUNT_MISMATCH'
110
+ when coalesce(e.data_is_final, r.data_is_final) = true
111
+ and round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
112
+ then 'REVENUE_MISMATCH'
113
+ when coalesce(e.data_is_final, r.data_is_final) = false
114
+ and coalesce(e.event_count, 0) > coalesce(r.event_count, 0)
115
+ then 'NON_FINAL_EXCESS_EVENTS'
116
+ end as violation_type
117
+ from
118
+ enhanced_daily e
119
+ full outer join
120
+ raw_daily r using(event_date, data_is_final)
121
+ where
122
+ (coalesce(e.data_is_final, r.data_is_final) = true and (
123
+ e.session_count != r.session_count
124
+ or e.event_count != r.event_count
125
+ or round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
126
+ or e.event_date is null
127
+ ))
128
+ or
129
+ (e.event_date is null and r.event_count > 0)
130
+ or
131
+ (coalesce(e.data_is_final, r.data_is_final) = false
132
+ and coalesce(e.event_count, 0) > coalesce(r.event_count, 0))`;
133
+ };
134
+
135
+ /**
136
+ * Generates a daily quality assertion SQL query.
137
+ *
138
+ * Merges the provided config with defaults, validates, then generates a SQL
139
+ * query comparing daily aggregates (session count, event count, item_revenue)
140
+ * between the enhanced table and raw export data, and checks for missing days
141
+ * and non-final data inflation.
142
+ *
143
+ * @param {string} tableRef - Fully qualified reference to the enhanced table.
144
+ * @param {Object} config - User-provided table configuration.
145
+ * @returns {string} SQL query returning violating rows (0 rows = pass)
146
+ */
147
+ const generateDailyQualityAssertionSql = (tableRef, config) => {
148
+ if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
149
+ throw new Error('assertions.dailyQuality: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
150
+ }
151
+ const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
152
+
153
+ if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
154
+ throw new Error(
155
+ 'assertions.dailyQuality: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
156
+ 'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
157
+ ' .query(ctx => ga4EventsEnhanced.assertions.dailyQuality(\n' +
158
+ ' ctx.ref(\'enhanced_table_name\'),\n' +
159
+ ' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
160
+ ' ))'
161
+ );
162
+ }
163
+
164
+ validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
165
+ return _generateDailyQualityAssertionSql(tableRef, mergedConfig);
166
+ };
167
+
168
+ module.exports = { generateDailyQualityAssertionSql };
@@ -1,5 +1,7 @@
1
1
  const { generateItemRevenueAssertionSql } = require('./itemRevenue.js');
2
+ const { generateDailyQualityAssertionSql } = require('./dailyQuality.js');
2
3
 
3
4
  module.exports = {
4
5
  itemRevenue: generateItemRevenueAssertionSql,
6
+ dailyQuality: generateDailyQualityAssertionSql,
5
7
  };