ga4-export-fixer 0.6.2-dev.1 → 0.6.2-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -2,32 +2,11 @@ const helpers = require('../../../helpers/index.js');
|
|
|
2
2
|
const utils = require('../../../utils.js');
|
|
3
3
|
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
4
|
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
const { buildDedupedRawSource } = require('./shared.js');
|
|
5
6
|
|
|
6
7
|
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
10
|
-
*
|
|
11
|
-
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
12
|
-
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
13
|
-
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
14
|
-
* and BigQuery pre-operation variables.
|
|
15
|
-
*
|
|
16
|
-
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
17
|
-
* @returns {string} SQL fragment for a WHERE clause
|
|
18
|
-
*/
|
|
19
|
-
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
20
|
-
const start = 'date_sub(current_date(), interval 5 day)';
|
|
21
|
-
const end = 'current_date()';
|
|
22
|
-
|
|
23
|
-
const filters = [
|
|
24
|
-
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
25
|
-
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
26
|
-
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
27
|
-
].filter(Boolean);
|
|
28
|
-
|
|
29
|
-
return filters.join(' or ');
|
|
30
|
-
};
|
|
9
|
+
const ASSERTION_LOOKBACK_DAYS = 5;
|
|
31
10
|
|
|
32
11
|
/**
|
|
33
12
|
* Generates a SQL assertion query that validates daily data quality between the
|
|
@@ -51,15 +30,15 @@ const buildAssertionDateFilter = (includedExportTypes) => {
|
|
|
51
30
|
const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
52
31
|
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
32
|
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
-
? `
|
|
55
|
-
: '';
|
|
33
|
+
? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
34
|
+
: 'true';
|
|
56
35
|
|
|
57
36
|
const dataIsFinalCondition = helpers.isFinalData(
|
|
58
37
|
mergedConfig.dataIsFinal.detectionMethod,
|
|
59
38
|
mergedConfig.dataIsFinal.dayThreshold
|
|
60
39
|
);
|
|
61
40
|
|
|
62
|
-
const
|
|
41
|
+
const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
|
|
63
42
|
|
|
64
43
|
return `with enhanced_daily as (
|
|
65
44
|
select
|
|
@@ -71,7 +50,7 @@ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
|
71
50
|
from
|
|
72
51
|
${tableRef}
|
|
73
52
|
where
|
|
74
|
-
event_date >= date_sub(current_date(), interval
|
|
53
|
+
event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
75
54
|
group by event_date, data_is_final
|
|
76
55
|
),
|
|
77
56
|
raw_daily as (
|
|
@@ -82,11 +61,9 @@ raw_daily as (
|
|
|
82
61
|
count(*) as event_count,
|
|
83
62
|
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
84
63
|
from
|
|
85
|
-
${
|
|
64
|
+
${dedupedRawSource}
|
|
86
65
|
where
|
|
87
|
-
(${dateFilter})
|
|
88
66
|
${excludedEventsSQL}
|
|
89
|
-
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
90
67
|
group by event_date, data_is_final
|
|
91
68
|
),
|
|
92
69
|
daily_comparison as (
|
|
@@ -2,38 +2,17 @@ const helpers = require('../../../helpers/index.js');
|
|
|
2
2
|
const utils = require('../../../utils.js');
|
|
3
3
|
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
4
|
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
const { buildDedupedRawSource } = require('./shared.js');
|
|
5
6
|
|
|
6
7
|
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
8
|
|
|
9
|
+
const ASSERTION_LOOKBACK_DAYS = 5;
|
|
10
|
+
|
|
8
11
|
// Ecommerce events that carry item data (excluding refund — refunds reverse revenue
|
|
9
12
|
// and are handled separately in some pipelines, but item_revenue on refund rows
|
|
10
13
|
// should still reconcile 1:1 between enhanced and raw).
|
|
11
14
|
const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
|
|
12
15
|
|
|
13
|
-
/**
|
|
14
|
-
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
15
|
-
*
|
|
16
|
-
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
17
|
-
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
18
|
-
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
19
|
-
* and BigQuery pre-operation variables.
|
|
20
|
-
*
|
|
21
|
-
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
22
|
-
* @returns {string} SQL fragment for a WHERE clause
|
|
23
|
-
*/
|
|
24
|
-
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
25
|
-
const start = 'date_sub(current_date(), interval 5 day)';
|
|
26
|
-
const end = 'current_date()';
|
|
27
|
-
|
|
28
|
-
const filters = [
|
|
29
|
-
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
30
|
-
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
31
|
-
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
32
|
-
].filter(Boolean);
|
|
33
|
-
|
|
34
|
-
return filters.join(' or ');
|
|
35
|
-
};
|
|
36
|
-
|
|
37
16
|
/**
|
|
38
17
|
* Generates a SQL assertion query that reconciles item_revenue between the
|
|
39
18
|
* enhanced events table and the raw GA4 export data.
|
|
@@ -51,8 +30,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
51
30
|
// excluded events filter (same logic as the enhanced table pipeline)
|
|
52
31
|
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
32
|
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
-
? `
|
|
55
|
-
: '';
|
|
33
|
+
? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
34
|
+
: 'true';
|
|
56
35
|
|
|
57
36
|
// data_is_final condition for the raw side
|
|
58
37
|
const dataIsFinalCondition = helpers.isFinalData(
|
|
@@ -60,8 +39,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
60
39
|
mergedConfig.dataIsFinal.dayThreshold
|
|
61
40
|
);
|
|
62
41
|
|
|
63
|
-
//
|
|
64
|
-
const
|
|
42
|
+
// deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
|
|
43
|
+
const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
|
|
65
44
|
|
|
66
45
|
return `with enhanced_revenue as (
|
|
67
46
|
select
|
|
@@ -74,7 +53,7 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
74
53
|
unnest(items) as item
|
|
75
54
|
where
|
|
76
55
|
data_is_final = true
|
|
77
|
-
and event_date >= date_sub(current_date(), interval
|
|
56
|
+
and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
78
57
|
and event_name in (${ecommerceEvents})
|
|
79
58
|
group by event_date, item.item_id
|
|
80
59
|
),
|
|
@@ -85,14 +64,12 @@ raw_revenue as (
|
|
|
85
64
|
sum(item.item_revenue) as total_item_revenue,
|
|
86
65
|
count(*) as item_count
|
|
87
66
|
from
|
|
88
|
-
${
|
|
67
|
+
${dedupedRawSource},
|
|
89
68
|
unnest(items) as item
|
|
90
69
|
where
|
|
91
|
-
(${dateFilter})
|
|
92
70
|
${excludedEventsSQL}
|
|
93
71
|
and event_name in (${ecommerceEvents})
|
|
94
72
|
and ${dataIsFinalCondition}
|
|
95
|
-
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
96
73
|
group by event_date, item.item_id
|
|
97
74
|
)
|
|
98
75
|
select
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
const helpers = require('../../../helpers/index.js');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
5
|
+
*
|
|
6
|
+
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
7
|
+
* over a caller-provided lookback window. Intentionally separate from the
|
|
8
|
+
* pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
9
|
+
* and BigQuery pre-operation variables.
|
|
10
|
+
*
|
|
11
|
+
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
12
|
+
* @param {number} lookbackDays - Number of days to look back from current_date().
|
|
13
|
+
* @returns {string} SQL fragment for a WHERE clause
|
|
14
|
+
*/
|
|
15
|
+
const buildAssertionDateFilter = (includedExportTypes, lookbackDays) => {
|
|
16
|
+
const start = `date_sub(current_date(), interval ${lookbackDays} day)`;
|
|
17
|
+
const end = 'current_date()';
|
|
18
|
+
|
|
19
|
+
const filters = [
|
|
20
|
+
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
21
|
+
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
22
|
+
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
23
|
+
].filter(Boolean);
|
|
24
|
+
|
|
25
|
+
return filters.join(' or ');
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Builds a deduplicated raw-source subquery for assertion use.
|
|
30
|
+
*
|
|
31
|
+
* Replicates what setPreOperations() does at pipeline time, without access
|
|
32
|
+
* to its BigQuery variables. Covers all seven combinations of
|
|
33
|
+
* includedExportTypes {daily, fresh, intraday}:
|
|
34
|
+
*
|
|
35
|
+
* - qualify dense_rank() over (partition by date, order by _table_suffix) = 1
|
|
36
|
+
* picks the highest-priority table per day. Alphabetical order gives
|
|
37
|
+
* daily ('20260115') < fresh ('fresh_20260115') < intraday ('intraday_20260115'),
|
|
38
|
+
* matching the pipeline's daily > fresh > intraday priority.
|
|
39
|
+
* - When fresh and intraday are both enabled, intraday rows with
|
|
40
|
+
* event_timestamp > max(fresh.event_timestamp) for the same date are
|
|
41
|
+
* additionally admitted — matching the FRESH_MAX_EVENT_TIMESTAMP boundary.
|
|
42
|
+
*
|
|
43
|
+
* @param {Object} mergedConfig - Merged table configuration.
|
|
44
|
+
* @param {number} lookbackDays - Number of days to look back from current_date().
|
|
45
|
+
* @returns {string} SQL fragment: a parenthesized subquery usable in a FROM clause.
|
|
46
|
+
*/
|
|
47
|
+
const buildDedupedRawSource = (mergedConfig, lookbackDays) => {
|
|
48
|
+
const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes, lookbackDays);
|
|
49
|
+
const freshAndIntraday = mergedConfig.includedExportTypes.fresh && mergedConfig.includedExportTypes.intraday;
|
|
50
|
+
|
|
51
|
+
const intradayException = freshAndIntraday
|
|
52
|
+
? `
|
|
53
|
+
or (
|
|
54
|
+
starts_with(_table_suffix, 'intraday_')
|
|
55
|
+
and dense_rank() over (
|
|
56
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
57
|
+
order by _table_suffix
|
|
58
|
+
) = 2
|
|
59
|
+
and event_timestamp > max(if(starts_with(_table_suffix, 'fresh_'), event_timestamp, null)) over (
|
|
60
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
61
|
+
)
|
|
62
|
+
)`
|
|
63
|
+
: '';
|
|
64
|
+
|
|
65
|
+
// _table_suffix is a pseudo-column and not propagated by SELECT *; select it
|
|
66
|
+
// explicitly so downstream CTEs (e.g., isFinalData('EXPORT_TYPE')) can still reference it.
|
|
67
|
+
return `(
|
|
68
|
+
select
|
|
69
|
+
*,
|
|
70
|
+
_table_suffix
|
|
71
|
+
from
|
|
72
|
+
${mergedConfig.sourceTable}
|
|
73
|
+
where
|
|
74
|
+
(${dateFilter})
|
|
75
|
+
qualify
|
|
76
|
+
dense_rank() over (
|
|
77
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
78
|
+
order by _table_suffix
|
|
79
|
+
) = 1${intradayException}
|
|
80
|
+
)`;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
module.exports = { buildAssertionDateFilter, buildDedupedRawSource };
|