ga4-export-fixer 0.5.2-dev.6 → 0.5.2-dev.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
const helpers = require('../../../helpers/index.js');
|
|
2
|
+
const utils = require('../../../utils.js');
|
|
3
|
+
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
|
+
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
|
|
6
|
+
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
10
|
+
*
|
|
11
|
+
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
12
|
+
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
13
|
+
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
14
|
+
* and BigQuery pre-operation variables.
|
|
15
|
+
*
|
|
16
|
+
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
17
|
+
* @returns {string} SQL fragment for a WHERE clause
|
|
18
|
+
*/
|
|
19
|
+
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
20
|
+
const start = 'date_sub(current_date(), interval 5 day)';
|
|
21
|
+
const end = 'current_date()';
|
|
22
|
+
|
|
23
|
+
const filters = [
|
|
24
|
+
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
25
|
+
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
26
|
+
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
27
|
+
].filter(Boolean);
|
|
28
|
+
|
|
29
|
+
return filters.join(' or ');
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generates a SQL assertion query that validates daily data quality between the
|
|
34
|
+
* enhanced events table and the raw GA4 export data.
|
|
35
|
+
*
|
|
36
|
+
* The query compares session count, event count, and total item_revenue
|
|
37
|
+
* aggregated per (event_date, data_is_final) for the last 5 days.
|
|
38
|
+
* Returns violating rows -- 0 rows means the assertion passes.
|
|
39
|
+
*
|
|
40
|
+
* Five violation types are detected:
|
|
41
|
+
* - MISSING_DAY: Raw data has events but enhanced table has none for this day
|
|
42
|
+
* - SESSION_COUNT_MISMATCH: Final data session count differs
|
|
43
|
+
* - EVENT_COUNT_MISMATCH: Final data event count differs
|
|
44
|
+
* - REVENUE_MISMATCH: Final data total item_revenue differs
|
|
45
|
+
* - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
|
|
46
|
+
*
|
|
47
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table
|
|
48
|
+
* @param {Object} mergedConfig - Merged table configuration (after merge + validation)
|
|
49
|
+
* @returns {string} SQL query returning violating rows
|
|
50
|
+
*/
|
|
51
|
+
const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
52
|
+
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
|
+
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
+
? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
55
|
+
: '';
|
|
56
|
+
|
|
57
|
+
const dataIsFinalCondition = helpers.isFinalData(
|
|
58
|
+
mergedConfig.dataIsFinal.detectionMethod,
|
|
59
|
+
mergedConfig.dataIsFinal.dayThreshold
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
|
|
63
|
+
|
|
64
|
+
return `with enhanced_daily as (
|
|
65
|
+
select
|
|
66
|
+
event_date,
|
|
67
|
+
data_is_final,
|
|
68
|
+
count(distinct session_id) as session_count,
|
|
69
|
+
count(*) as event_count,
|
|
70
|
+
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
71
|
+
from
|
|
72
|
+
${tableRef}
|
|
73
|
+
where
|
|
74
|
+
event_date >= date_sub(current_date(), interval 5 day)
|
|
75
|
+
group by event_date, data_is_final
|
|
76
|
+
),
|
|
77
|
+
raw_daily as (
|
|
78
|
+
select
|
|
79
|
+
cast(event_date as date format 'YYYYMMDD') as event_date,
|
|
80
|
+
${dataIsFinalCondition} as data_is_final,
|
|
81
|
+
count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
|
|
82
|
+
count(*) as event_count,
|
|
83
|
+
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
84
|
+
from
|
|
85
|
+
${mergedConfig.sourceTable}
|
|
86
|
+
where
|
|
87
|
+
(${dateFilter})
|
|
88
|
+
${excludedEventsSQL}
|
|
89
|
+
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
90
|
+
group by event_date, data_is_final
|
|
91
|
+
)
|
|
92
|
+
select
|
|
93
|
+
coalesce(e.event_date, r.event_date) as event_date,
|
|
94
|
+
coalesce(e.data_is_final, r.data_is_final) as data_is_final,
|
|
95
|
+
e.session_count as enhanced_sessions,
|
|
96
|
+
r.session_count as raw_sessions,
|
|
97
|
+
e.event_count as enhanced_events,
|
|
98
|
+
r.event_count as raw_events,
|
|
99
|
+
round(e.total_item_revenue, 2) as enhanced_revenue,
|
|
100
|
+
round(r.total_item_revenue, 2) as raw_revenue,
|
|
101
|
+
case
|
|
102
|
+
when e.event_date is null and r.event_count > 0
|
|
103
|
+
then 'MISSING_DAY'
|
|
104
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
105
|
+
and e.session_count != r.session_count
|
|
106
|
+
then 'SESSION_COUNT_MISMATCH'
|
|
107
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
108
|
+
and e.event_count != r.event_count
|
|
109
|
+
then 'EVENT_COUNT_MISMATCH'
|
|
110
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
111
|
+
and round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
112
|
+
then 'REVENUE_MISMATCH'
|
|
113
|
+
when coalesce(e.data_is_final, r.data_is_final) = false
|
|
114
|
+
and coalesce(e.event_count, 0) > coalesce(r.event_count, 0)
|
|
115
|
+
then 'NON_FINAL_EXCESS_EVENTS'
|
|
116
|
+
end as violation_type
|
|
117
|
+
from
|
|
118
|
+
enhanced_daily e
|
|
119
|
+
full outer join
|
|
120
|
+
raw_daily r using(event_date, data_is_final)
|
|
121
|
+
where
|
|
122
|
+
(coalesce(e.data_is_final, r.data_is_final) = true and (
|
|
123
|
+
e.session_count != r.session_count
|
|
124
|
+
or e.event_count != r.event_count
|
|
125
|
+
or round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
126
|
+
or e.event_date is null
|
|
127
|
+
))
|
|
128
|
+
or
|
|
129
|
+
(e.event_date is null and r.event_count > 0)
|
|
130
|
+
or
|
|
131
|
+
(coalesce(e.data_is_final, r.data_is_final) = false
|
|
132
|
+
and coalesce(e.event_count, 0) > coalesce(r.event_count, 0))`;
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Generates a daily quality assertion SQL query.
|
|
137
|
+
*
|
|
138
|
+
* Merges the provided config with defaults, validates, then generates a SQL
|
|
139
|
+
* query comparing daily aggregates (session count, event count, item_revenue)
|
|
140
|
+
* between the enhanced table and raw export data, and checks for missing days
|
|
141
|
+
* and non-final data inflation.
|
|
142
|
+
*
|
|
143
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table.
|
|
144
|
+
* @param {Object} config - User-provided table configuration.
|
|
145
|
+
* @returns {string} SQL query returning violating rows (0 rows = pass)
|
|
146
|
+
*/
|
|
147
|
+
const generateDailyQualityAssertionSql = (tableRef, config) => {
|
|
148
|
+
if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
|
|
149
|
+
throw new Error('assertions.dailyQuality: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
|
|
150
|
+
}
|
|
151
|
+
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
152
|
+
|
|
153
|
+
if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
|
|
154
|
+
throw new Error(
|
|
155
|
+
'assertions.dailyQuality: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
|
|
156
|
+
'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
|
|
157
|
+
' .query(ctx => ga4EventsEnhanced.assertions.dailyQuality(\n' +
|
|
158
|
+
' ctx.ref(\'enhanced_table_name\'),\n' +
|
|
159
|
+
' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
|
|
160
|
+
' ))'
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
|
|
165
|
+
return _generateDailyQualityAssertionSql(tableRef, mergedConfig);
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
module.exports = { generateDailyQualityAssertionSql };
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const { generateItemRevenueAssertionSql } = require('./itemRevenue.js');
|
|
2
|
+
const { generateDailyQualityAssertionSql } = require('./dailyQuality.js');
|
|
2
3
|
|
|
3
4
|
module.exports = {
|
|
4
5
|
itemRevenue: generateItemRevenueAssertionSql,
|
|
6
|
+
dailyQuality: generateDailyQualityAssertionSql,
|
|
5
7
|
};
|