ga4-export-fixer 0.6.2-dev.1 → 0.6.2-dev.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -457,7 +457,7 @@ This creates the table along with the default-enabled assertions, using the same
457
457
 
458
458
  | Assertion | Name | Enabled by default | Description |
459
459
  | --------- | ---- | ------------------ | ----------- |
460
- | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
460
+ | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
461
461
  | `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
462
462
 
463
463
  Assertions inherit the table's schema and tags from `dataformTableConfig`. Each assertion queries the last 5 days of data.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.6.2-dev.1",
3
+ "version": "0.6.2-dev.3",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -2,46 +2,27 @@ const helpers = require('../../../helpers/index.js');
2
2
  const utils = require('../../../utils.js');
3
3
  const { ga4EventsEnhancedConfig } = require('../config.js');
4
4
  const { validateEnhancedEventsConfig } = require('../validation.js');
5
+ const { buildDedupedRawSource } = require('./shared.js');
5
6
 
6
7
  const defaultConfig = { ...ga4EventsEnhancedConfig };
7
8
 
8
- /**
9
- * Builds a _table_suffix date filter for the assertion's raw-side query.
10
- *
11
- * Uses the low-level ga4ExportDateFilter() helper per enabled export type
12
- * with a fixed 5-day lookback window. This is intentionally separate from
13
- * the pipeline's ga4ExportDateFilters() which depends on incremental state
14
- * and BigQuery pre-operation variables.
15
- *
16
- * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
17
- * @returns {string} SQL fragment for a WHERE clause
18
- */
19
- const buildAssertionDateFilter = (includedExportTypes) => {
20
- const start = 'date_sub(current_date(), interval 5 day)';
21
- const end = 'current_date()';
22
-
23
- const filters = [
24
- includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
25
- includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
26
- includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
27
- ].filter(Boolean);
28
-
29
- return filters.join(' or ');
30
- };
9
+ const ASSERTION_LOOKBACK_DAYS = 5;
31
10
 
32
11
  /**
33
12
  * Generates a SQL assertion query that validates daily data quality between the
34
13
  * enhanced events table and the raw GA4 export data.
35
14
  *
36
- * The query compares session count, event count, and total item_revenue
37
- * aggregated per (event_date, data_is_final) for the last 5 days.
38
- * Returns violating rows -- 0 rows means the assertion passes.
15
+ * The query compares session count, event count, total item_revenue, and total
16
+ * purchase_revenue aggregated per (event_date, data_is_final) for the last 5
17
+ * days. Returns violating rows -- 0 rows means the assertion passes.
39
18
  *
40
- * Five violation types are detected:
19
+ * Six violation types are detected:
41
20
  * - MISSING_DAY: Raw data has events but enhanced table has none for this day
42
21
  * - SESSION_COUNT_MISMATCH: Final data session count differs
43
22
  * - EVENT_COUNT_MISMATCH: Final data event count differs
44
- * - REVENUE_MISMATCH: Final data total item_revenue differs
23
+ * - ITEM_REVENUE_MISMATCH: Final data total item_revenue differs
24
+ * - PURCHASE_REVENUE_MISMATCH: Final data total ecommerce.purchase_revenue differs
25
+ * (raw side applies fixEcommerceStruct() to mirror the enhanced pipeline's fix)
45
26
  * - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
46
27
  *
47
28
  * @param {string} tableRef - Fully qualified reference to the enhanced table
@@ -51,15 +32,15 @@ const buildAssertionDateFilter = (includedExportTypes) => {
51
32
  const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
52
33
  const excludedEvents = mergedConfig.excludedEvents;
53
34
  const excludedEventsSQL = excludedEvents.length > 0
54
- ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
- : '';
35
+ ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
36
+ : 'true';
56
37
 
57
38
  const dataIsFinalCondition = helpers.isFinalData(
58
39
  mergedConfig.dataIsFinal.detectionMethod,
59
40
  mergedConfig.dataIsFinal.dayThreshold
60
41
  );
61
42
 
62
- const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
43
+ const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
63
44
 
64
45
  return `with enhanced_daily as (
65
46
  select
@@ -67,11 +48,12 @@ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
67
48
  data_is_final,
68
49
  count(distinct session_id) as session_count,
69
50
  count(*) as event_count,
70
- coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
51
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue,
52
+ coalesce(sum(ecommerce.purchase_revenue), 0) as total_purchase_revenue
71
53
  from
72
54
  ${tableRef}
73
55
  where
74
- event_date >= date_sub(current_date(), interval 5 day)
56
+ event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
75
57
  group by event_date, data_is_final
76
58
  ),
77
59
  raw_daily as (
@@ -80,13 +62,12 @@ raw_daily as (
80
62
  ${dataIsFinalCondition} as data_is_final,
81
63
  count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
82
64
  count(*) as event_count,
83
- coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
65
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue,
66
+ coalesce(sum(${helpers.fixEcommerceStruct()}.purchase_revenue), 0) as total_purchase_revenue
84
67
  from
85
- ${mergedConfig.sourceTable}
68
+ ${dedupedRawSource}
86
69
  where
87
- (${dateFilter})
88
70
  ${excludedEventsSQL}
89
- and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
90
71
  group by event_date, data_is_final
91
72
  ),
92
73
  daily_comparison as (
@@ -97,8 +78,10 @@ daily_comparison as (
97
78
  r.session_count as raw_sessions,
98
79
  e.event_count as enhanced_events,
99
80
  r.event_count as raw_events,
100
- round(e.total_item_revenue, 2) as enhanced_revenue,
101
- round(r.total_item_revenue, 2) as raw_revenue
81
+ round(e.total_item_revenue, 2) as enhanced_item_revenue,
82
+ round(r.total_item_revenue, 2) as raw_item_revenue,
83
+ round(e.total_purchase_revenue, 2) as enhanced_purchase_revenue,
84
+ round(r.total_purchase_revenue, 2) as raw_purchase_revenue
102
85
  from
103
86
  enhanced_daily e
104
87
  full outer join
@@ -111,8 +94,10 @@ select
111
94
  raw_sessions,
112
95
  enhanced_events,
113
96
  raw_events,
114
- enhanced_revenue,
115
- raw_revenue,
97
+ enhanced_item_revenue,
98
+ raw_item_revenue,
99
+ enhanced_purchase_revenue,
100
+ raw_purchase_revenue,
116
101
  violation_type
117
102
  from
118
103
  daily_comparison,
@@ -120,7 +105,8 @@ from
120
105
  if(enhanced_events is null and raw_events > 0, 'MISSING_DAY', null),
121
106
  if(data_is_final = true and enhanced_sessions != raw_sessions, 'SESSION_COUNT_MISMATCH', null),
122
107
  if(data_is_final = true and enhanced_events != raw_events, 'EVENT_COUNT_MISMATCH', null),
123
- if(data_is_final = true and enhanced_revenue != raw_revenue, 'REVENUE_MISMATCH', null),
108
+ if(data_is_final = true and enhanced_item_revenue != raw_item_revenue, 'ITEM_REVENUE_MISMATCH', null),
109
+ if(data_is_final = true and enhanced_purchase_revenue != raw_purchase_revenue, 'PURCHASE_REVENUE_MISMATCH', null),
124
110
  if(data_is_final = false and coalesce(enhanced_events, 0) > coalesce(raw_events, 0), 'NON_FINAL_EXCESS_EVENTS', null)
125
111
  ]) as violation_type
126
112
  where
@@ -131,9 +117,9 @@ where
131
117
  * Generates a daily quality assertion SQL query.
132
118
  *
133
119
  * Merges the provided config with defaults, validates, then generates a SQL
134
- * query comparing daily aggregates (session count, event count, item_revenue)
135
- * between the enhanced table and raw export data, and checks for missing days
136
- * and non-final data inflation.
120
+ * query comparing daily aggregates (session count, event count, item_revenue,
121
+ * ecommerce.purchase_revenue) between the enhanced table and raw export data,
122
+ * and checks for missing days and non-final data inflation.
137
123
  *
138
124
  * @param {string} tableRef - Fully qualified reference to the enhanced table.
139
125
  * @param {Object} config - User-provided table configuration.
@@ -2,38 +2,17 @@ const helpers = require('../../../helpers/index.js');
2
2
  const utils = require('../../../utils.js');
3
3
  const { ga4EventsEnhancedConfig } = require('../config.js');
4
4
  const { validateEnhancedEventsConfig } = require('../validation.js');
5
+ const { buildDedupedRawSource } = require('./shared.js');
5
6
 
6
7
  const defaultConfig = { ...ga4EventsEnhancedConfig };
7
8
 
9
+ const ASSERTION_LOOKBACK_DAYS = 5;
10
+
8
11
  // Ecommerce events that carry item data (excluding refund — refunds reverse revenue
9
12
  // and are handled separately in some pipelines, but item_revenue on refund rows
10
13
  // should still reconcile 1:1 between enhanced and raw).
11
14
  const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
12
15
 
13
- /**
14
- * Builds a _table_suffix date filter for the assertion's raw-side query.
15
- *
16
- * Uses the low-level ga4ExportDateFilter() helper per enabled export type
17
- * with a fixed 5-day lookback window. This is intentionally separate from
18
- * the pipeline's ga4ExportDateFilters() which depends on incremental state
19
- * and BigQuery pre-operation variables.
20
- *
21
- * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
22
- * @returns {string} SQL fragment for a WHERE clause
23
- */
24
- const buildAssertionDateFilter = (includedExportTypes) => {
25
- const start = 'date_sub(current_date(), interval 5 day)';
26
- const end = 'current_date()';
27
-
28
- const filters = [
29
- includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
30
- includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
31
- includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
32
- ].filter(Boolean);
33
-
34
- return filters.join(' or ');
35
- };
36
-
37
16
  /**
38
17
  * Generates a SQL assertion query that reconciles item_revenue between the
39
18
  * enhanced events table and the raw GA4 export data.
@@ -51,8 +30,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
51
30
  // excluded events filter (same logic as the enhanced table pipeline)
52
31
  const excludedEvents = mergedConfig.excludedEvents;
53
32
  const excludedEventsSQL = excludedEvents.length > 0
54
- ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
- : '';
33
+ ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
34
+ : 'true';
56
35
 
57
36
  // data_is_final condition for the raw side
58
37
  const dataIsFinalCondition = helpers.isFinalData(
@@ -60,8 +39,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
60
39
  mergedConfig.dataIsFinal.dayThreshold
61
40
  );
62
41
 
63
- // date filter for the raw side (per-export-type, fixed 5-day window)
64
- const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
42
+ // deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
43
+ const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
65
44
 
66
45
  return `with enhanced_revenue as (
67
46
  select
@@ -74,7 +53,7 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
74
53
  unnest(items) as item
75
54
  where
76
55
  data_is_final = true
77
- and event_date >= date_sub(current_date(), interval 5 day)
56
+ and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
78
57
  and event_name in (${ecommerceEvents})
79
58
  group by event_date, item.item_id
80
59
  ),
@@ -85,14 +64,12 @@ raw_revenue as (
85
64
  sum(item.item_revenue) as total_item_revenue,
86
65
  count(*) as item_count
87
66
  from
88
- ${mergedConfig.sourceTable},
67
+ ${dedupedRawSource},
89
68
  unnest(items) as item
90
69
  where
91
- (${dateFilter})
92
70
  ${excludedEventsSQL}
93
71
  and event_name in (${ecommerceEvents})
94
72
  and ${dataIsFinalCondition}
95
- and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
96
73
  group by event_date, item.item_id
97
74
  )
98
75
  select
@@ -0,0 +1,83 @@
1
+ const helpers = require('../../../helpers/index.js');
2
+
3
+ /**
4
+ * Builds a _table_suffix date filter for the assertion's raw-side query.
5
+ *
6
+ * Uses the low-level ga4ExportDateFilter() helper per enabled export type
7
+ * over a caller-provided lookback window. Intentionally separate from the
8
+ * pipeline's ga4ExportDateFilters() which depends on incremental state
9
+ * and BigQuery pre-operation variables.
10
+ *
11
+ * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
12
+ * @param {number} lookbackDays - Number of days to look back from current_date().
13
+ * @returns {string} SQL fragment for a WHERE clause
14
+ */
15
+ const buildAssertionDateFilter = (includedExportTypes, lookbackDays) => {
16
+ const start = `date_sub(current_date(), interval ${lookbackDays} day)`;
17
+ const end = 'current_date()';
18
+
19
+ const filters = [
20
+ includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
21
+ includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
22
+ includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
23
+ ].filter(Boolean);
24
+
25
+ return filters.join(' or ');
26
+ };
27
+
28
+ /**
29
+ * Builds a deduplicated raw-source subquery for assertion use.
30
+ *
31
+ * Replicates what setPreOperations() does at pipeline time, without access
32
+ * to its BigQuery variables. Covers all seven combinations of
33
+ * includedExportTypes {daily, fresh, intraday}:
34
+ *
35
+ * - qualify dense_rank() over (partition by date, order by _table_suffix) = 1
36
+ * picks the highest-priority table per day. Alphabetical order gives
37
+ * daily ('20260115') < fresh ('fresh_20260115') < intraday ('intraday_20260115'),
38
+ * matching the pipeline's daily > fresh > intraday priority.
39
+ * - When fresh and intraday are both enabled, intraday rows with
40
+ * event_timestamp > max(fresh.event_timestamp) for the same date are
41
+ * additionally admitted — matching the FRESH_MAX_EVENT_TIMESTAMP boundary.
42
+ *
43
+ * @param {Object} mergedConfig - Merged table configuration.
44
+ * @param {number} lookbackDays - Number of days to look back from current_date().
45
+ * @returns {string} SQL fragment: a parenthesized subquery usable in a FROM clause.
46
+ */
47
+ const buildDedupedRawSource = (mergedConfig, lookbackDays) => {
48
+ const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes, lookbackDays);
49
+ const freshAndIntraday = mergedConfig.includedExportTypes.fresh && mergedConfig.includedExportTypes.intraday;
50
+
51
+ const intradayException = freshAndIntraday
52
+ ? `
53
+ or (
54
+ starts_with(_table_suffix, 'intraday_')
55
+ and dense_rank() over (
56
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
57
+ order by _table_suffix
58
+ ) = 2
59
+ and event_timestamp > max(if(starts_with(_table_suffix, 'fresh_'), event_timestamp, null)) over (
60
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
61
+ )
62
+ )`
63
+ : '';
64
+
65
+ // _table_suffix is a pseudo-column and not propagated by SELECT *; select it
66
+ // explicitly so downstream CTEs (e.g., isFinalData('EXPORT_TYPE')) can still reference it.
67
+ return `(
68
+ select
69
+ *,
70
+ _table_suffix
71
+ from
72
+ ${mergedConfig.sourceTable}
73
+ where
74
+ (${dateFilter})
75
+ qualify
76
+ dense_rank() over (
77
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
78
+ order by _table_suffix
79
+ ) = 1${intradayException}
80
+ )`;
81
+ };
82
+
83
+ module.exports = { buildAssertionDateFilter, buildDedupedRawSource };