ga4-export-fixer 0.6.2-dev.3 → 0.6.2-dev.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -457,19 +457,18 @@ This creates the table along with the default-enabled assertions, using the same
|
|
|
457
457
|
|
|
458
458
|
| Assertion | Name | Enabled by default | Description |
|
|
459
459
|
| --------- | ---- | ------------------ | ----------- |
|
|
460
|
-
| `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
|
|
461
|
-
| `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
|
|
460
|
+
| `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, item revenue, and ecommerce purchase revenue per day between the enhanced table and raw export. Also reconciles item_revenue at the (event_date, item_id) grain on purchase events for days both sides consider final. Detects missing days, count mismatches, and non-final data inflation |
|
|
462
461
|
|
|
463
|
-
|
|
462
|
+
The assertion inherits the table's schema and tags from `dataformTableConfig` and queries the last 5 days of data.
|
|
464
463
|
|
|
465
464
|
#### Selective Assertions
|
|
466
465
|
|
|
467
|
-
|
|
466
|
+
Disable the assertion by setting it to `false`:
|
|
468
467
|
|
|
469
468
|
```javascript
|
|
470
469
|
ga4EventsEnhanced.createTable(publish, config, {
|
|
471
470
|
assert,
|
|
472
|
-
assertions: { dailyQuality:
|
|
471
|
+
assertions: { dailyQuality: false },
|
|
473
472
|
});
|
|
474
473
|
```
|
|
475
474
|
|
package/package.json
CHANGED
|
@@ -12,11 +12,15 @@ const ASSERTION_LOOKBACK_DAYS = 5;
|
|
|
12
12
|
* Generates a SQL assertion query that validates daily data quality between the
|
|
13
13
|
* enhanced events table and the raw GA4 export data.
|
|
14
14
|
*
|
|
15
|
-
* The query
|
|
16
|
-
*
|
|
17
|
-
*
|
|
15
|
+
* The query runs two aggregations for the last 5 days and unions the violations:
|
|
16
|
+
* - Day grain: (event_date, data_is_final) -- session/event counts, item_revenue,
|
|
17
|
+
* ecommerce.purchase_revenue.
|
|
18
|
+
* - Item-id grain: (event_date, item_id) on purchase events for days both sides
|
|
19
|
+
* consider final -- validates per-item_id revenue and item-row count.
|
|
18
20
|
*
|
|
19
|
-
*
|
|
21
|
+
* Returns violating rows -- 0 rows means the assertion passes.
|
|
22
|
+
*
|
|
23
|
+
* Eight violation types are detected:
|
|
20
24
|
* - MISSING_DAY: Raw data has events but enhanced table has none for this day
|
|
21
25
|
* - SESSION_COUNT_MISMATCH: Final data session count differs
|
|
22
26
|
* - EVENT_COUNT_MISMATCH: Final data event count differs
|
|
@@ -24,6 +28,11 @@ const ASSERTION_LOOKBACK_DAYS = 5;
|
|
|
24
28
|
* - PURCHASE_REVENUE_MISMATCH: Final data total ecommerce.purchase_revenue differs
|
|
25
29
|
* (raw side applies fixEcommerceStruct() to mirror the enhanced pipeline's fix)
|
|
26
30
|
* - NON_FINAL_EXCESS_EVENTS: Non-final enhanced data has more events than raw
|
|
31
|
+
* - ITEM_REVENUE_MISMATCH_BY_ID: Per-item_id item_revenue differs on a shared-final day
|
|
32
|
+
* - ITEM_COUNT_MISMATCH_BY_ID: Per-item_id purchase item-row count differs on a shared-final day
|
|
33
|
+
*
|
|
34
|
+
* Day-level rows leave item_id / enhanced_item_count / raw_item_count NULL.
|
|
35
|
+
* Item-id-level rows leave session / event / purchase_revenue columns NULL.
|
|
27
36
|
*
|
|
28
37
|
* @param {string} tableRef - Fully qualified reference to the enhanced table
|
|
29
38
|
* @param {Object} mergedConfig - Merged table configuration (after merge + validation)
|
|
@@ -86,10 +95,63 @@ daily_comparison as (
|
|
|
86
95
|
enhanced_daily e
|
|
87
96
|
full outer join
|
|
88
97
|
raw_daily r using(event_date, data_is_final)
|
|
98
|
+
),
|
|
99
|
+
enhanced_items as (
|
|
100
|
+
select
|
|
101
|
+
event_date,
|
|
102
|
+
item.item_id,
|
|
103
|
+
sum(item.item_revenue) as total_item_revenue,
|
|
104
|
+
count(*) as item_count
|
|
105
|
+
from
|
|
106
|
+
${tableRef},
|
|
107
|
+
unnest(items) as item
|
|
108
|
+
where
|
|
109
|
+
data_is_final = true
|
|
110
|
+
and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
111
|
+
and event_name = 'purchase'
|
|
112
|
+
group by event_date, item.item_id
|
|
113
|
+
),
|
|
114
|
+
raw_items as (
|
|
115
|
+
select
|
|
116
|
+
cast(event_date as date format 'YYYYMMDD') as event_date,
|
|
117
|
+
item.item_id,
|
|
118
|
+
sum(item.item_revenue) as total_item_revenue,
|
|
119
|
+
count(*) as item_count
|
|
120
|
+
from
|
|
121
|
+
${dedupedRawSource},
|
|
122
|
+
unnest(items) as item
|
|
123
|
+
where
|
|
124
|
+
${excludedEventsSQL}
|
|
125
|
+
and event_name = 'purchase'
|
|
126
|
+
and ${dataIsFinalCondition}
|
|
127
|
+
group by event_date, item.item_id
|
|
128
|
+
),
|
|
129
|
+
shared_final_days as (
|
|
130
|
+
select event_date
|
|
131
|
+
from daily_comparison
|
|
132
|
+
where data_is_final = true
|
|
133
|
+
and enhanced_events is not null
|
|
134
|
+
and raw_events is not null
|
|
135
|
+
),
|
|
136
|
+
item_comparison as (
|
|
137
|
+
select
|
|
138
|
+
coalesce(e.event_date, r.event_date) as event_date,
|
|
139
|
+
coalesce(e.item_id, r.item_id) as item_id,
|
|
140
|
+
round(e.total_item_revenue, 2) as enhanced_item_revenue,
|
|
141
|
+
round(r.total_item_revenue, 2) as raw_item_revenue,
|
|
142
|
+
e.item_count as enhanced_item_count,
|
|
143
|
+
r.item_count as raw_item_count
|
|
144
|
+
from
|
|
145
|
+
enhanced_items e
|
|
146
|
+
full outer join
|
|
147
|
+
raw_items r using(event_date, item_id)
|
|
148
|
+
where
|
|
149
|
+
coalesce(e.event_date, r.event_date) in (select event_date from shared_final_days)
|
|
89
150
|
)
|
|
90
151
|
select
|
|
91
152
|
event_date,
|
|
92
153
|
data_is_final,
|
|
154
|
+
null as item_id,
|
|
93
155
|
enhanced_sessions,
|
|
94
156
|
raw_sessions,
|
|
95
157
|
enhanced_events,
|
|
@@ -98,6 +160,8 @@ select
|
|
|
98
160
|
raw_item_revenue,
|
|
99
161
|
enhanced_purchase_revenue,
|
|
100
162
|
raw_purchase_revenue,
|
|
163
|
+
cast(null as int64) as enhanced_item_count,
|
|
164
|
+
cast(null as int64) as raw_item_count,
|
|
101
165
|
violation_type
|
|
102
166
|
from
|
|
103
167
|
daily_comparison,
|
|
@@ -109,6 +173,30 @@ from
|
|
|
109
173
|
if(data_is_final = true and enhanced_purchase_revenue != raw_purchase_revenue, 'PURCHASE_REVENUE_MISMATCH', null),
|
|
110
174
|
if(data_is_final = false and coalesce(enhanced_events, 0) > coalesce(raw_events, 0), 'NON_FINAL_EXCESS_EVENTS', null)
|
|
111
175
|
]) as violation_type
|
|
176
|
+
where
|
|
177
|
+
violation_type is not null
|
|
178
|
+
union all
|
|
179
|
+
select
|
|
180
|
+
event_date,
|
|
181
|
+
true as data_is_final,
|
|
182
|
+
item_id,
|
|
183
|
+
cast(null as int64) as enhanced_sessions,
|
|
184
|
+
cast(null as int64) as raw_sessions,
|
|
185
|
+
cast(null as int64) as enhanced_events,
|
|
186
|
+
cast(null as int64) as raw_events,
|
|
187
|
+
enhanced_item_revenue,
|
|
188
|
+
raw_item_revenue,
|
|
189
|
+
cast(null as float64) as enhanced_purchase_revenue,
|
|
190
|
+
cast(null as float64) as raw_purchase_revenue,
|
|
191
|
+
enhanced_item_count,
|
|
192
|
+
raw_item_count,
|
|
193
|
+
violation_type
|
|
194
|
+
from
|
|
195
|
+
item_comparison,
|
|
196
|
+
unnest([
|
|
197
|
+
if(round(coalesce(enhanced_item_revenue, 0), 2) != round(coalesce(raw_item_revenue, 0), 2), 'ITEM_REVENUE_MISMATCH_BY_ID', null),
|
|
198
|
+
if(coalesce(enhanced_item_count, 0) != coalesce(raw_item_count, 0), 'ITEM_COUNT_MISMATCH_BY_ID', null)
|
|
199
|
+
]) as violation_type
|
|
112
200
|
where
|
|
113
201
|
violation_type is not null`;
|
|
114
202
|
};
|
|
@@ -119,7 +207,8 @@ where
|
|
|
119
207
|
* Merges the provided config with defaults, validates, then generates a SQL
|
|
120
208
|
* query comparing daily aggregates (session count, event count, item_revenue,
|
|
121
209
|
* ecommerce.purchase_revenue) between the enhanced table and raw export data,
|
|
122
|
-
*
|
|
210
|
+
* plus a per-item_id revenue/row-count check on purchase events for shared-final
|
|
211
|
+
* days. Also checks for missing days and non-final data inflation.
|
|
123
212
|
*
|
|
124
213
|
* @param {string} tableRef - Fully qualified reference to the enhanced table.
|
|
125
214
|
* @param {Object} config - User-provided table configuration.
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
const { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql } = require('./itemRevenue.js');
|
|
2
1
|
const { generateDailyQualityAssertionSql, _generateDailyQualityAssertionSql } = require('./dailyQuality.js');
|
|
3
2
|
|
|
4
3
|
module.exports = {
|
|
5
|
-
itemRevenue: generateItemRevenueAssertionSql,
|
|
6
4
|
dailyQuality: generateDailyQualityAssertionSql,
|
|
7
5
|
_internal: {
|
|
8
6
|
dailyQuality: { generate: _generateDailyQualityAssertionSql, defaultName: 'daily_quality' },
|
|
9
|
-
itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue', enabledByDefault: false },
|
|
10
7
|
},
|
|
11
8
|
};
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
const helpers = require('../../../helpers/index.js');
|
|
2
|
-
const utils = require('../../../utils.js');
|
|
3
|
-
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
|
-
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
-
const { buildDedupedRawSource } = require('./shared.js');
|
|
6
|
-
|
|
7
|
-
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
8
|
-
|
|
9
|
-
const ASSERTION_LOOKBACK_DAYS = 5;
|
|
10
|
-
|
|
11
|
-
// Ecommerce events that carry item data (excluding refund — refunds reverse revenue
|
|
12
|
-
// and are handled separately in some pipelines, but item_revenue on refund rows
|
|
13
|
-
// should still reconcile 1:1 between enhanced and raw).
|
|
14
|
-
const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Generates a SQL assertion query that reconciles item_revenue between the
|
|
18
|
-
* enhanced events table and the raw GA4 export data.
|
|
19
|
-
*
|
|
20
|
-
* The query compares item_revenue grouped by (event_date, item_id) for the
|
|
21
|
-
* last 5 days of final data. Returns mismatched rows — 0 rows means the
|
|
22
|
-
* assertion passes.
|
|
23
|
-
*
|
|
24
|
-
* @param {string} tableRef - Fully qualified reference to the enhanced table
|
|
25
|
-
* (e.g., ctx.ref('ga4_events_enhanced_123456789') in Dataform, or a backtick-quoted string).
|
|
26
|
-
* @param {Object} mergedConfig - Merged table configuration (after merge + validation).
|
|
27
|
-
* @returns {string} SQL query returning violating rows
|
|
28
|
-
*/
|
|
29
|
-
const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
30
|
-
// excluded events filter (same logic as the enhanced table pipeline)
|
|
31
|
-
const excludedEvents = mergedConfig.excludedEvents;
|
|
32
|
-
const excludedEventsSQL = excludedEvents.length > 0
|
|
33
|
-
? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
34
|
-
: 'true';
|
|
35
|
-
|
|
36
|
-
// data_is_final condition for the raw side
|
|
37
|
-
const dataIsFinalCondition = helpers.isFinalData(
|
|
38
|
-
mergedConfig.dataIsFinal.detectionMethod,
|
|
39
|
-
mergedConfig.dataIsFinal.dayThreshold
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
// deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
|
|
43
|
-
const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
|
|
44
|
-
|
|
45
|
-
return `with enhanced_revenue as (
|
|
46
|
-
select
|
|
47
|
-
event_date,
|
|
48
|
-
item.item_id,
|
|
49
|
-
sum(item.item_revenue) as total_item_revenue,
|
|
50
|
-
count(*) as item_count
|
|
51
|
-
from
|
|
52
|
-
${tableRef},
|
|
53
|
-
unnest(items) as item
|
|
54
|
-
where
|
|
55
|
-
data_is_final = true
|
|
56
|
-
and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
57
|
-
and event_name in (${ecommerceEvents})
|
|
58
|
-
group by event_date, item.item_id
|
|
59
|
-
),
|
|
60
|
-
raw_revenue as (
|
|
61
|
-
select
|
|
62
|
-
cast(event_date as date format 'YYYYMMDD') as event_date,
|
|
63
|
-
item.item_id,
|
|
64
|
-
sum(item.item_revenue) as total_item_revenue,
|
|
65
|
-
count(*) as item_count
|
|
66
|
-
from
|
|
67
|
-
${dedupedRawSource},
|
|
68
|
-
unnest(items) as item
|
|
69
|
-
where
|
|
70
|
-
${excludedEventsSQL}
|
|
71
|
-
and event_name in (${ecommerceEvents})
|
|
72
|
-
and ${dataIsFinalCondition}
|
|
73
|
-
group by event_date, item.item_id
|
|
74
|
-
)
|
|
75
|
-
select
|
|
76
|
-
coalesce(e.event_date, r.event_date) as event_date,
|
|
77
|
-
coalesce(e.item_id, r.item_id) as item_id,
|
|
78
|
-
e.total_item_revenue as enhanced_revenue,
|
|
79
|
-
r.total_item_revenue as raw_revenue,
|
|
80
|
-
e.item_count as enhanced_count,
|
|
81
|
-
r.item_count as raw_count
|
|
82
|
-
from
|
|
83
|
-
enhanced_revenue e
|
|
84
|
-
full outer join
|
|
85
|
-
raw_revenue r using(event_date, item_id)
|
|
86
|
-
where
|
|
87
|
-
round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
88
|
-
or e.item_count != r.item_count
|
|
89
|
-
or e.event_date is null
|
|
90
|
-
or r.event_date is null`;
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Generates an item_revenue reconciliation assertion SQL query.
|
|
95
|
-
*
|
|
96
|
-
* Merges the provided config with defaults, validates, then generates a SQL
|
|
97
|
-
* query comparing item_revenue between the enhanced table and raw export data.
|
|
98
|
-
*
|
|
99
|
-
* @param {string} tableRef - Fully qualified reference to the enhanced table.
|
|
100
|
-
* @param {Object} config - User-provided table configuration.
|
|
101
|
-
* @returns {string} SQL query returning violating rows (0 rows = pass)
|
|
102
|
-
*/
|
|
103
|
-
const generateItemRevenueAssertionSql = (tableRef, config) => {
|
|
104
|
-
if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
|
|
105
|
-
throw new Error('assertions.itemRevenue: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
|
|
106
|
-
}
|
|
107
|
-
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
108
|
-
|
|
109
|
-
// The assertion interpolates sourceTable directly into SQL (no Dataform ctx available).
|
|
110
|
-
// If sourceTable is still a Dataform reference object, it would render as [object Object].
|
|
111
|
-
if (utils.isDataformTableReferenceObject(mergedConfig.sourceTable)) {
|
|
112
|
-
throw new Error(
|
|
113
|
-
'assertions.itemRevenue: config.sourceTable is a Dataform table reference object, but assertions do not have access to Dataform context to resolve it. ' +
|
|
114
|
-
'Resolve it with ctx.ref() before passing it to the assertion:\n\n' +
|
|
115
|
-
' .query(ctx => ga4EventsEnhanced.assertions.itemRevenue(\n' +
|
|
116
|
-
' ctx.ref(\'enhanced_table_name\'),\n' +
|
|
117
|
-
' { ...config, sourceTable: ctx.ref(config.sourceTable) }\n' +
|
|
118
|
-
' ))'
|
|
119
|
-
);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
|
|
123
|
-
return _generateItemRevenueAssertionSql(tableRef, mergedConfig);
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
module.exports = { generateItemRevenueAssertionSql, _generateItemRevenueAssertionSql };
|