ga4-export-fixer 0.5.2-dev.6 → 0.5.2-dev.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
const helpers = require('../../../helpers/index.js');
|
|
2
|
+
const utils = require('../../../utils.js');
|
|
3
|
+
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
|
+
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
|
|
6
|
+
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
10
|
+
*
|
|
11
|
+
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
12
|
+
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
13
|
+
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
14
|
+
* and BigQuery pre-operation variables.
|
|
15
|
+
*
|
|
16
|
+
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
17
|
+
* @returns {string} SQL fragment for a WHERE clause
|
|
18
|
+
*/
|
|
19
|
+
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
20
|
+
const start = 'date_sub(current_date(), interval 5 day)';
|
|
21
|
+
const end = 'current_date()';
|
|
22
|
+
|
|
23
|
+
const filters = [
|
|
24
|
+
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
25
|
+
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
26
|
+
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
27
|
+
].filter(Boolean);
|
|
28
|
+
|
|
29
|
+
return filters.join(' or ');
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generates a SQL assertion query that validates daily data quality between the
|
|
34
|
+
* enhanced events table and the raw GA4 export data.
|
|
35
|
+
*
|
|
36
|
+
* The query compares session count, event count, and total item_revenue
|
|
37
|
+
* aggregated per (event_date, data_is_final) for the last 5 days.
|
|
38
|
+
* Returns violating rows -- 0 rows means the assertion passes.
|
|
39
|
+
*
|
|
40
|
+
* Five violation types are detected:
|
|
41
|
+
* - MISSING_DAY: Raw data has events but enhanced table has none for this day
|
|
42
|
+
* - SESSION_COUNT_MISMATCH: Final data session count differs
|
|
43
|
+
* - EVENT_COUNT_MISMATCH: Final data event count differs
|
|
44
|
+
* - REVENUE_MISMATCH: Final data total item_revenue differs
|
|
45
|
+
* - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
|
|
46
|
+
*
|
|
47
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table
|
|
48
|
+
* @param {Object} mergedConfig - Merged table configuration (after merge + validation)
|
|
49
|
+
* @returns {string} SQL query returning violating rows
|
|
50
|
+
*/
|
|
51
|
+
const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
52
|
+
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
|
+
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
+
? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
55
|
+
: '';
|
|
56
|
+
|
|
57
|
+
const dataIsFinalCondition = helpers.isFinalData(
|
|
58
|
+
mergedConfig.dataIsFinal.detectionMethod,
|
|
59
|
+
mergedConfig.dataIsFinal.dayThreshold
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
|
|
63
|
+
|
|
64
|
+
return `with enhanced_daily as (
|
|
65
|
+
select
|
|
66
|
+
event_date,
|
|
67
|
+
data_is_final,
|
|
68
|
+
count(distinct session_id) as session_count,
|
|
69
|
+
count(*) as event_count,
|
|
70
|
+
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
71
|
+
from
|
|
72
|
+
${tableRef}
|
|
73
|
+
where
|
|
74
|
+
event_date >= date_sub(current_date(), interval 5 day)
|
|
75
|
+
group by event_date, data_is_final
|
|
76
|
+
),
|
|
77
|
+
raw_daily as (
|
|
78
|
+
select
|
|
79
|
+
cast(event_date as date format 'YYYYMMDD') as event_date,
|
|
80
|
+
${dataIsFinalCondition} as data_is_final,
|
|
81
|
+
count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
|
|
82
|
+
count(*) as event_count,
|
|
83
|
+
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
84
|
+
from
|
|
85
|
+
${mergedConfig.sourceTable}
|
|
86
|
+
where
|
|
87
|
+
(${dateFilter})
|
|
88
|
+
${excludedEventsSQL}
|
|
89
|
+
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
90
|
+
group by event_date, data_is_final
|
|
91
|
+
)
|
|
92
|
+
select
|
|
93
|
+
coalesce(e.event_date, r.event_date) as event_date,
|
|
94
|
+
coalesce(e.data_is_final, r.data_is_final) as data_is_final,
|
|
95
|
+
e.session_count as enhanced_sessions,
|
|
96
|
+
r.session_count as raw_sessions,
|
|
97
|
+
e.event_count as enhanced_events,
|
|
98
|
+
r.event_count as raw_events,
|
|
99
|
+
round(e.total_item_revenue, 2) as enhanced_revenue,
|
|
100
|
+
round(r.total_item_revenue, 2) as raw_revenue,
|
|
101
|
+
case
|
|
102
|
+
when e.event_date is null and r.event_count > 0
|
|
103
|
+
then 'MISSING_DAY'
|
|
104
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
105
|
+
and e.session_count != r.session_count
|
|
106
|
+
then 'SESSION_COUNT_MISMATCH'
|
|
107
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
108
|
+
and e.event_count != r.event_count
|
|
109
|
+
then 'EVENT_COUNT_MISMATCH'
|
|
110
|
+
when coalesce(e.data_is_final, r.data_is_final) = true
|
|
111
|
+
and round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
112
|
+
then 'REVENUE_MISMATCH'
|
|
113
|
+
when coalesce(e.data_is_final, r.data_is_final) = false
|
|
114
|
+
and coalesce(e.event_count, 0) > coalesce(r.event_count, 0)
|
|
115
|
+
then 'NON_FINAL_EXCESS_EVENTS'
|
|
116
|
+
end as violation_type
|
|
117
|
+
from
|
|
118
|
+
enhanced_daily e
|
|
119
|
+
full outer join
|
|
120
|
+
raw_daily r using(event_date, data_is_final)
|
|
121
|
+
where
|
|
122
|
+
(coalesce(e.data_is_final, r.data_is_final) = true and (
|
|
123
|
+
e.session_count != r.session_count
|
|
124
|
+
or e.event_count != r.event_count
|
|
125
|
+
or round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
126
|
+
or e.event_date is null
|
|
127
|
+
))
|
|
128
|
+
or
|
|
129
|
+
(e.event_date is null and r.event_count > 0)
|
|
130
|
+
or
|
|
131
|
+
(coalesce(e.data_is_final, r.data_is_final) = false
|
|
132
|
+
and coalesce(e.event_count, 0) > coalesce(r.event_count, 0))`;
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Generates a daily quality assertion SQL query.
|
|
137
|
+
*
|
|
138
|
+
* Merges the provided config with defaults, validates, then generates a SQL
|
|
139
|
+
* query comparing daily aggregates (session count, event count, item_revenue)
|
|
140
|
+
* between the enhanced table and raw export data, and checks for missing days
|
|
141
|
+
* and non-final data inflation.
|
|
142
|
+
*
|
|
143
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table.
|
|
144
|
+
* @param {Object} config - User-provided table configuration.
|
|
145
|
+
* @returns {string} SQL query returning violating rows (0 rows = pass)
|
|
146
|
+
*/
|
|
147
|
+
const generateDailyQualityAssertionSql = (tableRef, config) => {
|
|
148
|
+
if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
|
|
149
|
+
throw new Error('assertions.dailyQuality: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
|
|
150
|
+
}
|
|
151
|
+
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
152
|
+
|
|
153
|
+
if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
|
|
154
|
+
throw new Error(
|
|
155
|
+
'assertions.dailyQuality: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
|
|
156
|
+
'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
|
|
157
|
+
' .query(ctx => ga4EventsEnhanced.assertions.dailyQuality(\n' +
|
|
158
|
+
' ctx.ref(\'enhanced_table_name\'),\n' +
|
|
159
|
+
' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
|
|
160
|
+
' ))'
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
|
|
165
|
+
return _generateDailyQualityAssertionSql(tableRef, mergedConfig);
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
module.exports = { generateDailyQualityAssertionSql };
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const { generateItemRevenueAssertionSql } = require('./itemRevenue.js');
|
|
2
|
+
const { generateDailyQualityAssertionSql } = require('./dailyQuality.js');
|
|
2
3
|
|
|
3
4
|
module.exports = {
|
|
4
5
|
itemRevenue: generateItemRevenueAssertionSql,
|
|
6
|
+
dailyQuality: generateDailyQualityAssertionSql,
|
|
5
7
|
};
|
|
@@ -269,8 +269,10 @@ ${excludedEventsSQL}`,
|
|
|
269
269
|
groupBy: ['session_id']
|
|
270
270
|
};
|
|
271
271
|
|
|
272
|
-
// item list attribution
|
|
273
|
-
|
|
272
|
+
// item list attribution CTEs:
|
|
273
|
+
// 1. item_list_unnest: unnest items from ecommerce events, compute attribution via window function
|
|
274
|
+
// 2. item_list_data: re-aggregate items with attributed list fields
|
|
275
|
+
const itemListSteps = itemListAttribution ? (() => {
|
|
274
276
|
const attrExpr = helpers.itemListAttributionExpr(
|
|
275
277
|
itemListAttribution.lookbackType,
|
|
276
278
|
timestampColumn,
|
|
@@ -278,7 +280,19 @@ ${excludedEventsSQL}`,
|
|
|
278
280
|
);
|
|
279
281
|
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
280
282
|
|
|
281
|
-
|
|
283
|
+
const attributionStep = {
|
|
284
|
+
name: 'item_list_attribution',
|
|
285
|
+
columns: {
|
|
286
|
+
'_item_list_attribution_row_id': '_item_list_attribution_row_id',
|
|
287
|
+
'event_name': 'event_name',
|
|
288
|
+
'item': 'item',
|
|
289
|
+
'_item_list_attr': attrExpr,
|
|
290
|
+
},
|
|
291
|
+
from: 'event_data, unnest(items) as item',
|
|
292
|
+
where: `event_name in (${ecommerceEventsFilter})`,
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
const dataStep = {
|
|
282
296
|
name: 'item_list_data',
|
|
283
297
|
columns: {
|
|
284
298
|
'_item_list_attribution_row_id': '_item_list_attribution_row_id',
|
|
@@ -290,19 +304,21 @@ ${excludedEventsSQL}`,
|
|
|
290
304
|
))
|
|
291
305
|
)`,
|
|
292
306
|
},
|
|
293
|
-
from:
|
|
307
|
+
from: 'item_list_attribution',
|
|
294
308
|
groupBy: ['_item_list_attribution_row_id'],
|
|
295
309
|
};
|
|
310
|
+
|
|
311
|
+
return [attributionStep, dataStep];
|
|
296
312
|
})() : null;
|
|
297
313
|
|
|
298
314
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
299
315
|
|
|
300
316
|
// When item list attribution is enabled, override the items column and exclude _item_list_attribution_row_id
|
|
301
317
|
// COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
|
|
302
|
-
const itemListOverrides =
|
|
318
|
+
const itemListOverrides = itemListSteps ? {
|
|
303
319
|
items: 'coalesce(item_list_data.items, event_data.items)',
|
|
304
320
|
} : {};
|
|
305
|
-
const itemListExcludedColumns =
|
|
321
|
+
const itemListExcludedColumns = itemListSteps ? ['_item_list_attribution_row_id'] : [];
|
|
306
322
|
|
|
307
323
|
// Join event_data and session_data, include additional logic
|
|
308
324
|
const finalStep = {
|
|
@@ -336,7 +352,7 @@ ${excludedEventsSQL}`,
|
|
|
336
352
|
},
|
|
337
353
|
from: 'event_data',
|
|
338
354
|
leftJoin: [
|
|
339
|
-
...(
|
|
355
|
+
...(itemListSteps ? [{
|
|
340
356
|
table: 'item_list_data',
|
|
341
357
|
condition: 'using(_item_list_attribution_row_id)'
|
|
342
358
|
}] : []),
|
|
@@ -350,7 +366,7 @@ ${excludedEventsSQL}`,
|
|
|
350
366
|
|
|
351
367
|
const steps = [
|
|
352
368
|
eventDataStep,
|
|
353
|
-
...(
|
|
369
|
+
...(itemListSteps ?? []),
|
|
354
370
|
sessionDataStep,
|
|
355
371
|
finalStep,
|
|
356
372
|
];
|