ga4-export-fixer 0.6.2-dev.3 → 0.6.2-dev.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -457,19 +457,18 @@ This creates the table along with the default-enabled assertions, using the same
457
457
 
458
458
  | Assertion | Name | Enabled by default | Description |
459
459
  | --------- | ---- | ------------------ | ----------- |
460
- | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
461
- | `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
460
+ | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Also reconciles item_revenue at the (event_date, item_id) grain on purchase events for days both sides consider final. Detects missing days, count mismatches, and non-final data inflation |
462
461
 
463
- Assertions inherit the table's schema and tags from `dataformTableConfig`. Each assertion queries the last 5 days of data.
462
+ The assertion inherits the table's schema and tags from `dataformTableConfig` and queries the last 5 days of data.
464
463
 
465
464
  #### Selective Assertions
466
465
 
467
- Enable opt-in assertions by setting them to `true`, or disable default-enabled ones by setting them to `false`:
466
+ Disable the assertion by setting it to `false`:
468
467
 
469
468
  ```javascript
470
469
  ga4EventsEnhanced.createTable(publish, config, {
471
470
  assert,
472
- assertions: { dailyQuality: true, itemRevenue: true },
471
+ assertions: { dailyQuality: false },
473
472
  });
474
473
  ```
475
474
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.6.2-dev.3",
3
+ "version": "0.6.2-dev.4",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -12,11 +12,15 @@ const ASSERTION_LOOKBACK_DAYS = 5;
12
12
  * Generates a SQL assertion query that validates daily data quality between the
13
13
  * enhanced events table and the raw GA4 export data.
14
14
  *
15
- * The query compares session count, event count, total item_revenue, and total
16
- * purchase_revenue aggregated per (event_date, data_is_final) for the last 5
17
- * days. Returns violating rows -- 0 rows means the assertion passes.
15
+ * The query runs two aggregations for the last 5 days and unions the violations:
16
+ * - Day grain: (event_date, data_is_final) -- session/event counts, item_revenue,
17
+ * ecommerce.purchase_revenue.
18
+ * - Item-id grain: (event_date, item_id) on purchase events for days both sides
19
+ * consider final -- validates per-item_id revenue and item-row count.
18
20
  *
19
- * Six violation types are detected:
21
+ * Returns violating rows -- 0 rows means the assertion passes.
22
+ *
23
+ * Eight violation types are detected:
20
24
  * - MISSING_DAY: Raw data has events but enhanced table has none for this day
21
25
  * - SESSION_COUNT_MISMATCH: Final data session count differs
22
26
  * - EVENT_COUNT_MISMATCH: Final data event count differs
@@ -24,6 +28,11 @@ const ASSERTION_LOOKBACK_DAYS = 5;
24
28
  * - PURCHASE_REVENUE_MISMATCH: Final data total ecommerce.purchase_revenue differs
25
29
  * (raw side applies fixEcommerceStruct() to mirror the enhanced pipeline's fix)
26
30
  * - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
31
+ * - ITEM_REVENUE_MISMATCH_BY_ID: Per-item_id item_revenue differs on a shared-final day
32
+ * - ITEM_COUNT_MISMATCH_BY_ID: Per-item_id purchase item-row count differs on a shared-final day
33
+ *
34
+ * Day-level rows leave item_id / enhanced_item_count / raw_item_count NULL.
35
+ * Item-id-level rows leave session / event / purchase_revenue columns NULL.
27
36
  *
28
37
  * @param {string} tableRef - Fully qualified reference to the enhanced table
29
38
  * @param {Object} mergedConfig - Merged table configuration (after merge + validation)
@@ -86,10 +95,63 @@ daily_comparison as (
86
95
  enhanced_daily e
87
96
  full outer join
88
97
  raw_daily r using(event_date, data_is_final)
98
+ ),
99
+ enhanced_items as (
100
+ select
101
+ event_date,
102
+ item.item_id,
103
+ sum(item.item_revenue) as total_item_revenue,
104
+ count(*) as item_count
105
+ from
106
+ ${tableRef},
107
+ unnest(items) as item
108
+ where
109
+ data_is_final = true
110
+ and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
111
+ and event_name = 'purchase'
112
+ group by event_date, item.item_id
113
+ ),
114
+ raw_items as (
115
+ select
116
+ cast(event_date as date format 'YYYYMMDD') as event_date,
117
+ item.item_id,
118
+ sum(item.item_revenue) as total_item_revenue,
119
+ count(*) as item_count
120
+ from
121
+ ${dedupedRawSource},
122
+ unnest(items) as item
123
+ where
124
+ ${excludedEventsSQL}
125
+ and event_name = 'purchase'
126
+ and ${dataIsFinalCondition}
127
+ group by event_date, item.item_id
128
+ ),
129
+ shared_final_days as (
130
+ select event_date
131
+ from daily_comparison
132
+ where data_is_final = true
133
+ and enhanced_events is not null
134
+ and raw_events is not null
135
+ ),
136
+ item_comparison as (
137
+ select
138
+ coalesce(e.event_date, r.event_date) as event_date,
139
+ coalesce(e.item_id, r.item_id) as item_id,
140
+ round(e.total_item_revenue, 2) as enhanced_item_revenue,
141
+ round(r.total_item_revenue, 2) as raw_item_revenue,
142
+ e.item_count as enhanced_item_count,
143
+ r.item_count as raw_item_count
144
+ from
145
+ enhanced_items e
146
+ full outer join
147
+ raw_items r using(event_date, item_id)
148
+ where
149
+ coalesce(e.event_date, r.event_date) in (select event_date from shared_final_days)
89
150
  )
90
151
  select
91
152
  event_date,
92
153
  data_is_final,
154
+ null as item_id,
93
155
  enhanced_sessions,
94
156
  raw_sessions,
95
157
  enhanced_events,
@@ -98,6 +160,8 @@ select
98
160
  raw_item_revenue,
99
161
  enhanced_purchase_revenue,
100
162
  raw_purchase_revenue,
163
+ cast(null as int64) as enhanced_item_count,
164
+ cast(null as int64) as raw_item_count,
101
165
  violation_type
102
166
  from
103
167
  daily_comparison,
@@ -109,6 +173,30 @@ from
109
173
  if(data_is_final = true and enhanced_purchase_revenue != raw_purchase_revenue, 'PURCHASE_REVENUE_MISMATCH', null),
110
174
  if(data_is_final = false and coalesce(enhanced_events, 0) > coalesce(raw_events, 0), 'NON_FINAL_EXCESS_EVENTS', null)
111
175
  ]) as violation_type
176
+ where
177
+ violation_type is not null
178
+ union all
179
+ select
180
+ event_date,
181
+ true as data_is_final,
182
+ item_id,
183
+ cast(null as int64) as enhanced_sessions,
184
+ cast(null as int64) as raw_sessions,
185
+ cast(null as int64) as enhanced_events,
186
+ cast(null as int64) as raw_events,
187
+ enhanced_item_revenue,
188
+ raw_item_revenue,
189
+ cast(null as float64) as enhanced_purchase_revenue,
190
+ cast(null as float64) as raw_purchase_revenue,
191
+ enhanced_item_count,
192
+ raw_item_count,
193
+ violation_type
194
+ from
195
+ item_comparison,
196
+ unnest([
197
+ if(round(coalesce(enhanced_item_revenue, 0), 2) != round(coalesce(raw_item_revenue, 0), 2), 'ITEM_REVENUE_MISMATCH_BY_ID', null),
198
+ if(coalesce(enhanced_item_count, 0) != coalesce(raw_item_count, 0), 'ITEM_COUNT_MISMATCH_BY_ID', null)
199
+ ]) as violation_type
112
200
  where
113
201
  violation_type is not null`;
114
202
  };
@@ -119,7 +207,8 @@ where
119
207
  * Merges the provided config with defaults, validates, then generates a SQL
120
208
  * query comparing daily aggregates (session count, event count, item_revenue,
121
209
  * ecommerce.purchase_revenue) between the enhanced table and raw export data,
122
- * and checks for missing days and non-final data inflation.
210
+ * plus a per-item_id revenue/row-count check on purchase events for shared-final
211
+ * days. Also checks for missing days and non-final data inflation.
123
212
  *
124
213
  * @param {string} tableRef - Fully qualified reference to the enhanced table.
125
214
  * @param {Object} config - User-provided table configuration.
@@ -1,11 +1,8 @@
1
- const { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql } = require('./itemRevenue.js');
2
1
  const { generateDailyQualityAssertionSql, _generateDailyQualityAssertionSql } = require('./dailyQuality.js');
3
2
 
4
3
  module.exports = {
5
- itemRevenue: generateItemRevenueAssertionSql,
6
4
  dailyQuality: generateDailyQualityAssertionSql,
7
5
  _internal: {
8
6
  dailyQuality: { generate: _generateDailyQualityAssertionSql, defaultName: 'daily_quality' },
9
- itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue', enabledByDefault: false },
10
7
  },
11
8
  };
@@ -1,126 +0,0 @@
1
- const helpers = require('../../../helpers/index.js');
2
- const utils = require('../../../utils.js');
3
- const { ga4EventsEnhancedConfig } = require('../config.js');
4
- const { validateEnhancedEventsConfig } = require('../validation.js');
5
- const { buildDedupedRawSource } = require('./shared.js');
6
-
7
- const defaultConfig = { ...ga4EventsEnhancedConfig };
8
-
9
- const ASSERTION_LOOKBACK_DAYS = 5;
10
-
11
- // Ecommerce events that carry item data (excluding refund — refunds reverse revenue
12
- // and are handled separately in some pipelines, but item_revenue on refund rows
13
- // should still reconcile 1:1 between enhanced and raw).
14
- const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
15
-
16
- /**
17
- * Generates a SQL assertion query that reconciles item_revenue between the
18
- * enhanced events table and the raw GA4 export data.
19
- *
20
- * The query compares item_revenue grouped by (event_date, item_id) for the
21
- * last 5 days of final data. Returns mismatched rows — 0 rows means the
22
- * assertion passes.
23
- *
24
- * @param {string} tableRef - Fully qualified reference to the enhanced table
25
- * (e.g., ctx.ref('ga4_events_enhanced_123456789') in Dataform, or a backtick-quoted string).
26
- * @param {Object} mergedConfig - Merged table configuration (after merge + validation).
27
- * @returns {string} SQL query returning violating rows
28
- */
29
- const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
30
- // excluded events filter (same logic as the enhanced table pipeline)
31
- const excludedEvents = mergedConfig.excludedEvents;
32
- const excludedEventsSQL = excludedEvents.length > 0
33
- ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
34
- : 'true';
35
-
36
- // data_is_final condition for the raw side
37
- const dataIsFinalCondition = helpers.isFinalData(
38
- mergedConfig.dataIsFinal.detectionMethod,
39
- mergedConfig.dataIsFinal.dayThreshold
40
- );
41
-
42
- // deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
43
- const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
44
-
45
- return `with enhanced_revenue as (
46
- select
47
- event_date,
48
- item.item_id,
49
- sum(item.item_revenue) as total_item_revenue,
50
- count(*) as item_count
51
- from
52
- ${tableRef},
53
- unnest(items) as item
54
- where
55
- data_is_final = true
56
- and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
57
- and event_name in (${ecommerceEvents})
58
- group by event_date, item.item_id
59
- ),
60
- raw_revenue as (
61
- select
62
- cast(event_date as date format 'YYYYMMDD') as event_date,
63
- item.item_id,
64
- sum(item.item_revenue) as total_item_revenue,
65
- count(*) as item_count
66
- from
67
- ${dedupedRawSource},
68
- unnest(items) as item
69
- where
70
- ${excludedEventsSQL}
71
- and event_name in (${ecommerceEvents})
72
- and ${dataIsFinalCondition}
73
- group by event_date, item.item_id
74
- )
75
- select
76
- coalesce(e.event_date, r.event_date) as event_date,
77
- coalesce(e.item_id, r.item_id) as item_id,
78
- e.total_item_revenue as enhanced_revenue,
79
- r.total_item_revenue as raw_revenue,
80
- e.item_count as enhanced_count,
81
- r.item_count as raw_count
82
- from
83
- enhanced_revenue e
84
- full outer join
85
- raw_revenue r using(event_date, item_id)
86
- where
87
- round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
88
- or e.item_count != r.item_count
89
- or e.event_date is null
90
- or r.event_date is null`;
91
- };
92
-
93
- /**
94
- * Generates an item_revenue reconciliation assertion SQL query.
95
- *
96
- * Merges the provided config with defaults, validates, then generates a SQL
97
- * query comparing item_revenue between the enhanced table and raw export data.
98
- *
99
- * @param {string} tableRef - Fully qualified reference to the enhanced table.
100
- * @param {Object} config - User-provided table configuration.
101
- * @returns {string} SQL query returning violating rows (0 rows = pass)
102
- */
103
- const generateItemRevenueAssertionSql = (tableRef, config) => {
104
- if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
105
- throw new Error('assertions.itemRevenue: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
106
- }
107
- const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
108
-
109
- // The assertion interpolates sourceTable directly into SQL (no Dataform ctx available).
110
- // If sourceTable is still a Dataform reference object, it would render as [object Object].
111
- if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
112
- throw new Error(
113
- 'assertions.itemRevenue: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
114
- 'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
115
- ' .query(ctx => ga4EventsEnhanced.assertions.itemRevenue(\n' +
116
- ' ctx.ref(\'enhanced_table_name\'),\n' +
117
- ' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
118
- ' ))'
119
- );
120
- }
121
-
122
- validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
123
- return _generateItemRevenueAssertionSql(tableRef, mergedConfig);
124
- };
125
-
126
- module.exports = { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql };