ga4-export-fixer 0.6.2-dev.2 → 0.6.2-dev.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -457,19 +457,18 @@ This creates the table along with the default-enabled assertions, using the same
457
457
 
458
458
  | Assertion | Name | Enabled by default | Description |
459
459
  | --------- | ---- | ------------------ | ----------- |
460
- | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
461
- | `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
460
+ | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Also reconciles item_revenue at the (event_date, item_id) grain on purchase events for days both sides consider final. Detects missing days, count mismatches, and non-final data inflation |
462
461
 
463
- Assertions inherit the table's schema and tags from `dataformTableConfig`. Each assertion queries the last 5 days of data.
462
+ The assertion inherits the table's schema and tags from `dataformTableConfig` and queries the last 5 days of data.
464
463
 
465
464
  #### Selective Assertions
466
465
 
467
- Enable opt-in assertions by setting them to `true`, or disable default-enabled ones by setting them to `false`:
466
+ Disable the assertion by setting it to `false`:
468
467
 
469
468
  ```javascript
470
469
  ga4EventsEnhanced.createTable(publish, config, {
471
470
  assert,
472
- assertions: { dailyQuality: true, itemRevenue: true },
471
+ assertions: { dailyQuality: false },
473
472
  });
474
473
  ```
475
474
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.6.2-dev.2",
3
+ "version": "0.6.2-dev.4",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -12,16 +12,27 @@ const ASSERTION_LOOKBACK_DAYS = 5;
12
12
  * Generates a SQL assertion query that validates daily data quality between the
13
13
  * enhanced events table and the raw GA4 export data.
14
14
  *
15
- * The query compares session count, event count, and total item_revenue
16
- * aggregated per (event_date, data_is_final) for the last 5 days.
15
+ * The query runs two aggregations for the last 5 days and unions the violations:
16
+ * - Day grain: (event_date, data_is_final) -- session/event counts, item_revenue,
17
+ * ecommerce.purchase_revenue.
18
+ * - Item-id grain: (event_date, item_id) on purchase events for days both sides
19
+ * consider final -- validates per-item_id revenue and item-row count.
20
+ *
17
21
  * Returns violating rows -- 0 rows means the assertion passes.
18
22
  *
19
- * Five violation types are detected:
23
+ * Eight violation types are detected:
20
24
  * - MISSING_DAY: Raw data has events but enhanced table has none for this day
21
25
  * - SESSION_COUNT_MISMATCH: Final data session count differs
22
26
  * - EVENT_COUNT_MISMATCH: Final data event count differs
23
- * - REVENUE_MISMATCH: Final data total item_revenue differs
27
+ * - ITEM_REVENUE_MISMATCH: Final data total item_revenue differs
28
+ * - PURCHASE_REVENUE_MISMATCH: Final data total ecommerce.purchase_revenue differs
29
+ * (raw side applies fixEcommerceStruct() to mirror the enhanced pipeline's fix)
24
30
  * - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
31
+ * - ITEM_REVENUE_MISMATCH_BY_ID: Per-item_id item_revenue differs on a shared-final day
32
+ * - ITEM_COUNT_MISMATCH_BY_ID: Per-item_id purchase item-row count differs on a shared-final day
33
+ *
34
+ * Day-level rows leave item_id / enhanced_item_count / raw_item_count NULL.
35
+ * Item-id-level rows leave session / event / purchase_revenue columns NULL.
25
36
  *
26
37
  * @param {string} tableRef - Fully qualified reference to the enhanced table
27
38
  * @param {Object} mergedConfig - Merged table configuration (after merge + validation)
@@ -46,7 +57,8 @@ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
46
57
  data_is_final,
47
58
  count(distinct session_id) as session_count,
48
59
  count(*) as event_count,
49
- coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
60
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue,
61
+ coalesce(sum(ecommerce.purchase_revenue), 0) as total_purchase_revenue
50
62
  from
51
63
  ${tableRef}
52
64
  where
@@ -59,7 +71,8 @@ raw_daily as (
59
71
  ${dataIsFinalCondition} as data_is_final,
60
72
  count(distinct concat(user_pseudo_id, cast((select value.int_value from unnest(event_params) where key = 'ga_session_id') as string))) as session_count,
61
73
  count(*) as event_count,
62
- coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
74
+ coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue,
75
+ coalesce(sum(${helpers.fixEcommerceStruct()}.purchase_revenue), 0) as total_purchase_revenue
63
76
  from
64
77
  ${dedupedRawSource}
65
78
  where
@@ -74,22 +87,81 @@ daily_comparison as (
74
87
  r.session_count as raw_sessions,
75
88
  e.event_count as enhanced_events,
76
89
  r.event_count as raw_events,
77
- round(e.total_item_revenue, 2) as enhanced_revenue,
78
- round(r.total_item_revenue, 2) as raw_revenue
90
+ round(e.total_item_revenue, 2) as enhanced_item_revenue,
91
+ round(r.total_item_revenue, 2) as raw_item_revenue,
92
+ round(e.total_purchase_revenue, 2) as enhanced_purchase_revenue,
93
+ round(r.total_purchase_revenue, 2) as raw_purchase_revenue
79
94
  from
80
95
  enhanced_daily e
81
96
  full outer join
82
97
  raw_daily r using(event_date, data_is_final)
98
+ ),
99
+ enhanced_items as (
100
+ select
101
+ event_date,
102
+ item.item_id,
103
+ sum(item.item_revenue) as total_item_revenue,
104
+ count(*) as item_count
105
+ from
106
+ ${tableRef},
107
+ unnest(items) as item
108
+ where
109
+ data_is_final = true
110
+ and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
111
+ and event_name = 'purchase'
112
+ group by event_date, item.item_id
113
+ ),
114
+ raw_items as (
115
+ select
116
+ cast(event_date as date format 'YYYYMMDD') as event_date,
117
+ item.item_id,
118
+ sum(item.item_revenue) as total_item_revenue,
119
+ count(*) as item_count
120
+ from
121
+ ${dedupedRawSource},
122
+ unnest(items) as item
123
+ where
124
+ ${excludedEventsSQL}
125
+ and event_name = 'purchase'
126
+ and ${dataIsFinalCondition}
127
+ group by event_date, item.item_id
128
+ ),
129
+ shared_final_days as (
130
+ select event_date
131
+ from daily_comparison
132
+ where data_is_final = true
133
+ and enhanced_events is not null
134
+ and raw_events is not null
135
+ ),
136
+ item_comparison as (
137
+ select
138
+ coalesce(e.event_date, r.event_date) as event_date,
139
+ coalesce(e.item_id, r.item_id) as item_id,
140
+ round(e.total_item_revenue, 2) as enhanced_item_revenue,
141
+ round(r.total_item_revenue, 2) as raw_item_revenue,
142
+ e.item_count as enhanced_item_count,
143
+ r.item_count as raw_item_count
144
+ from
145
+ enhanced_items e
146
+ full outer join
147
+ raw_items r using(event_date, item_id)
148
+ where
149
+ coalesce(e.event_date, r.event_date) in (select event_date from shared_final_days)
83
150
  )
84
151
  select
85
152
  event_date,
86
153
  data_is_final,
154
+ null as item_id,
87
155
  enhanced_sessions,
88
156
  raw_sessions,
89
157
  enhanced_events,
90
158
  raw_events,
91
- enhanced_revenue,
92
- raw_revenue,
159
+ enhanced_item_revenue,
160
+ raw_item_revenue,
161
+ enhanced_purchase_revenue,
162
+ raw_purchase_revenue,
163
+ cast(null as int64) as enhanced_item_count,
164
+ cast(null as int64) as raw_item_count,
93
165
  violation_type
94
166
  from
95
167
  daily_comparison,
@@ -97,9 +169,34 @@ from
97
169
  if(enhanced_events is null and raw_events > 0, 'MISSING_DAY', null),
98
170
  if(data_is_final = true and enhanced_sessions != raw_sessions, 'SESSION_COUNT_MISMATCH', null),
99
171
  if(data_is_final = true and enhanced_events != raw_events, 'EVENT_COUNT_MISMATCH', null),
100
- if(data_is_final = true and enhanced_revenue != raw_revenue, 'REVENUE_MISMATCH', null),
172
+ if(data_is_final = true and enhanced_item_revenue != raw_item_revenue, 'ITEM_REVENUE_MISMATCH', null),
173
+ if(data_is_final = true and enhanced_purchase_revenue != raw_purchase_revenue, 'PURCHASE_REVENUE_MISMATCH', null),
101
174
  if(data_is_final = false and coalesce(enhanced_events, 0) > coalesce(raw_events, 0), 'NON_FINAL_EXCESS_EVENTS', null)
102
175
  ]) as violation_type
176
+ where
177
+ violation_type is not null
178
+ union all
179
+ select
180
+ event_date,
181
+ true as data_is_final,
182
+ item_id,
183
+ cast(null as int64) as enhanced_sessions,
184
+ cast(null as int64) as raw_sessions,
185
+ cast(null as int64) as enhanced_events,
186
+ cast(null as int64) as raw_events,
187
+ enhanced_item_revenue,
188
+ raw_item_revenue,
189
+ cast(null as float64) as enhanced_purchase_revenue,
190
+ cast(null as float64) as raw_purchase_revenue,
191
+ enhanced_item_count,
192
+ raw_item_count,
193
+ violation_type
194
+ from
195
+ item_comparison,
196
+ unnest([
197
+ if(round(coalesce(enhanced_item_revenue, 0), 2) != round(coalesce(raw_item_revenue, 0), 2), 'ITEM_REVENUE_MISMATCH_BY_ID', null),
198
+ if(coalesce(enhanced_item_count, 0) != coalesce(raw_item_count, 0), 'ITEM_COUNT_MISMATCH_BY_ID', null)
199
+ ]) as violation_type
103
200
  where
104
201
  violation_type is not null`;
105
202
  };
@@ -108,9 +205,10 @@ where
108
205
  * Generates a daily quality assertion SQL query.
109
206
  *
110
207
  * Merges the provided config with defaults, validates, then generates a SQL
111
- * query comparing daily aggregates (session count, event count, item_revenue)
112
- * between the enhanced table and raw export data, and checks for missing days
113
- * and non-final data inflation.
208
+ * query comparing daily aggregates (session count, event count, item_revenue,
209
+ * ecommerce.purchase_revenue) between the enhanced table and raw export data,
210
+ * plus a per-item_id revenue/row-count check on purchase events for shared-final
211
+ * days. Also checks for missing days and non-final data inflation.
114
212
  *
115
213
  * @param {string} tableRef - Fully qualified reference to the enhanced table.
116
214
  * @param {Object} config - User-provided table configuration.
@@ -1,11 +1,8 @@
1
- const { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql } = require('./itemRevenue.js');
2
1
  const { generateDailyQualityAssertionSql, _generateDailyQualityAssertionSql } = require('./dailyQuality.js');
3
2
 
4
3
  module.exports = {
5
- itemRevenue: generateItemRevenueAssertionSql,
6
4
  dailyQuality: generateDailyQualityAssertionSql,
7
5
  _internal: {
8
6
  dailyQuality: { generate: _generateDailyQualityAssertionSql, defaultName: 'daily_quality' },
9
- itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue', enabledByDefault: false },
10
7
  },
11
8
  };
@@ -1,126 +0,0 @@
1
- const helpers = require('../../../helpers/index.js');
2
- const utils = require('../../../utils.js');
3
- const { ga4EventsEnhancedConfig } = require('../config.js');
4
- const { validateEnhancedEventsConfig } = require('../validation.js');
5
- const { buildDedupedRawSource } = require('./shared.js');
6
-
7
- const defaultConfig = { ...ga4EventsEnhancedConfig };
8
-
9
- const ASSERTION_LOOKBACK_DAYS = 5;
10
-
11
- // Ecommerce events that carry item data (excluding refund — refunds reverse revenue
12
- // and are handled separately in some pipelines, but item_revenue on refund rows
13
- // should still reconcile 1:1 between enhanced and raw).
14
- const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
15
-
16
- /**
17
- * Generates a SQL assertion query that reconciles item_revenue between the
18
- * enhanced events table and the raw GA4 export data.
19
- *
20
- * The query compares item_revenue grouped by (event_date, item_id) for the
21
- * last 5 days of final data. Returns mismatched rows — 0 rows means the
22
- * assertion passes.
23
- *
24
- * @param {string} tableRef - Fully qualified reference to the enhanced table
25
- * (e.g., ctx.ref('ga4_events_enhanced_123456789') in Dataform, or a backtick-quoted string).
26
- * @param {Object} mergedConfig - Merged table configuration (after merge + validation).
27
- * @returns {string} SQL query returning violating rows
28
- */
29
- const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
30
- // excluded events filter (same logic as the enhanced table pipeline)
31
- const excludedEvents = mergedConfig.excludedEvents;
32
- const excludedEventsSQL = excludedEvents.length > 0
33
- ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
34
- : 'true';
35
-
36
- // data_is_final condition for the raw side
37
- const dataIsFinalCondition = helpers.isFinalData(
38
- mergedConfig.dataIsFinal.detectionMethod,
39
- mergedConfig.dataIsFinal.dayThreshold
40
- );
41
-
42
- // deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
43
- const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
44
-
45
- return `with enhanced_revenue as (
46
- select
47
- event_date,
48
- item.item_id,
49
- sum(item.item_revenue) as total_item_revenue,
50
- count(*) as item_count
51
- from
52
- ${tableRef},
53
- unnest(items) as item
54
- where
55
- data_is_final = true
56
- and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
57
- and event_name in (${ecommerceEvents})
58
- group by event_date, item.item_id
59
- ),
60
- raw_revenue as (
61
- select
62
- cast(event_date as date format 'YYYYMMDD') as event_date,
63
- item.item_id,
64
- sum(item.item_revenue) as total_item_revenue,
65
- count(*) as item_count
66
- from
67
- ${dedupedRawSource},
68
- unnest(items) as item
69
- where
70
- ${excludedEventsSQL}
71
- and event_name in (${ecommerceEvents})
72
- and ${dataIsFinalCondition}
73
- group by event_date, item.item_id
74
- )
75
- select
76
- coalesce(e.event_date, r.event_date) as event_date,
77
- coalesce(e.item_id, r.item_id) as item_id,
78
- e.total_item_revenue as enhanced_revenue,
79
- r.total_item_revenue as raw_revenue,
80
- e.item_count as enhanced_count,
81
- r.item_count as raw_count
82
- from
83
- enhanced_revenue e
84
- full outer join
85
- raw_revenue r using(event_date, item_id)
86
- where
87
- round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
88
- or e.item_count != r.item_count
89
- or e.event_date is null
90
- or r.event_date is null`;
91
- };
92
-
93
- /**
94
- * Generates an item_revenue reconciliation assertion SQL query.
95
- *
96
- * Merges the provided config with defaults, validates, then generates a SQL
97
- * query comparing item_revenue between the enhanced table and raw export data.
98
- *
99
- * @param {string} tableRef - Fully qualified reference to the enhanced table.
100
- * @param {Object} config - User-provided table configuration.
101
- * @returns {string} SQL query returning violating rows (0 rows = pass)
102
- */
103
- const generateItemRevenueAssertionSql = (tableRef, config) => {
104
- if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
105
- throw new Error('assertions.itemRevenue: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
106
- }
107
- const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
108
-
109
- // The assertion interpolates sourceTable directly into SQL (no Dataform ctx available).
110
- // If sourceTable is still a Dataform reference object, it would render as [object Object].
111
- if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
112
- throw new Error(
113
- 'assertions.itemRevenue: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
114
- 'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
115
- ' .query(ctx => ga4EventsEnhanced.assertions.itemRevenue(\n' +
116
- ' ctx.ref(\'enhanced_table_name\'),\n' +
117
- ' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
118
- ' ))'
119
- );
120
- }
121
-
122
- validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
123
- return _generateItemRevenueAssertionSql(tableRef, mergedConfig);
124
- };
125
-
126
- module.exports = { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql };