ga4-export-fixer 0.6.2-dev.0 → 0.6.2-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/createTable.js +1 -0
- package/package.json +1 -1
- package/tables/ga4EventsEnhanced/assertions/dailyQuality.js +7 -30
- package/tables/ga4EventsEnhanced/assertions/index.js +1 -1
- package/tables/ga4EventsEnhanced/assertions/itemRevenue.js +9 -32
- package/tables/ga4EventsEnhanced/assertions/shared.js +83 -0
package/README.md
CHANGED
|
@@ -453,23 +453,23 @@ The package includes built-in data quality assertions that can be automatically
|
|
|
453
453
|
ga4EventsEnhanced.createTable(publish, config, { assert });
|
|
454
454
|
```
|
|
455
455
|
|
|
456
|
-
This creates the table
|
|
456
|
+
This creates the table along with the default-enabled assertions, using the same configuration:
|
|
457
457
|
|
|
458
|
-
| Assertion | Name | Description |
|
|
459
|
-
| --------- | ---- | ----------- |
|
|
460
|
-
| `dailyQuality` | `{tableName}_daily_quality` | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
|
|
461
|
-
| `itemRevenue` | `{tableName}_item_revenue` | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
|
|
458
|
+
| Assertion | Name | Enabled by default | Description |
|
|
459
|
+
| --------- | ---- | ------------------ | ----------- |
|
|
460
|
+
| `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
|
|
461
|
+
| `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
|
|
462
462
|
|
|
463
463
|
Assertions inherit the table's schema and tags from `dataformTableConfig`. Each assertion queries the last 5 days of data.
|
|
464
464
|
|
|
465
465
|
#### Selective Assertions
|
|
466
466
|
|
|
467
|
-
|
|
467
|
+
Enable opt-in assertions by setting them to `true`, or disable default-enabled ones by setting them to `false`:
|
|
468
468
|
|
|
469
469
|
```javascript
|
|
470
470
|
ga4EventsEnhanced.createTable(publish, config, {
|
|
471
471
|
assert,
|
|
472
|
-
assertions: { dailyQuality: true, itemRevenue:
|
|
472
|
+
assertions: { dailyQuality: true, itemRevenue: true },
|
|
473
473
|
});
|
|
474
474
|
```
|
|
475
475
|
|
package/createTable.js
CHANGED
|
@@ -67,6 +67,7 @@ const createTable = (dataformPublish, userConfig, tableModule, options) => {
|
|
|
67
67
|
for (const [key, assertionDef] of Object.entries(tableModule.assertions)) {
|
|
68
68
|
const assertionOption = options.assertions?.[key];
|
|
69
69
|
if (assertionOption === false) continue;
|
|
70
|
+
if (assertionOption === undefined && assertionDef.enabledByDefault === false) continue;
|
|
70
71
|
|
|
71
72
|
const assertionName = `${tableName}_${assertionDef.defaultName}`;
|
|
72
73
|
const assertionDataformConfig = {
|
package/package.json
CHANGED
|
@@ -2,32 +2,11 @@ const helpers = require('../../../helpers/index.js');
|
|
|
2
2
|
const utils = require('../../../utils.js');
|
|
3
3
|
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
4
|
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
const { buildDedupedRawSource } = require('./shared.js');
|
|
5
6
|
|
|
6
7
|
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
10
|
-
*
|
|
11
|
-
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
12
|
-
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
13
|
-
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
14
|
-
* and BigQuery pre-operation variables.
|
|
15
|
-
*
|
|
16
|
-
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
17
|
-
* @returns {string} SQL fragment for a WHERE clause
|
|
18
|
-
*/
|
|
19
|
-
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
20
|
-
const start = 'date_sub(current_date(), interval 5 day)';
|
|
21
|
-
const end = 'current_date()';
|
|
22
|
-
|
|
23
|
-
const filters = [
|
|
24
|
-
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
25
|
-
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
26
|
-
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
27
|
-
].filter(Boolean);
|
|
28
|
-
|
|
29
|
-
return filters.join(' or ');
|
|
30
|
-
};
|
|
9
|
+
const ASSERTION_LOOKBACK_DAYS = 5;
|
|
31
10
|
|
|
32
11
|
/**
|
|
33
12
|
* Generates a SQL assertion query that validates daily data quality between the
|
|
@@ -51,15 +30,15 @@ const buildAssertionDateFilter = (includedExportTypes) => {
|
|
|
51
30
|
const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
52
31
|
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
32
|
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
-
? `
|
|
55
|
-
: '';
|
|
33
|
+
? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
34
|
+
: 'true';
|
|
56
35
|
|
|
57
36
|
const dataIsFinalCondition = helpers.isFinalData(
|
|
58
37
|
mergedConfig.dataIsFinal.detectionMethod,
|
|
59
38
|
mergedConfig.dataIsFinal.dayThreshold
|
|
60
39
|
);
|
|
61
40
|
|
|
62
|
-
const
|
|
41
|
+
const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
|
|
63
42
|
|
|
64
43
|
return `with enhanced_daily as (
|
|
65
44
|
select
|
|
@@ -71,7 +50,7 @@ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
|
|
|
71
50
|
from
|
|
72
51
|
${tableRef}
|
|
73
52
|
where
|
|
74
|
-
event_date >= date_sub(current_date(), interval
|
|
53
|
+
event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
75
54
|
group by event_date, data_is_final
|
|
76
55
|
),
|
|
77
56
|
raw_daily as (
|
|
@@ -82,11 +61,9 @@ raw_daily as (
|
|
|
82
61
|
count(*) as event_count,
|
|
83
62
|
coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
|
|
84
63
|
from
|
|
85
|
-
${
|
|
64
|
+
${dedupedRawSource}
|
|
86
65
|
where
|
|
87
|
-
(${dateFilter})
|
|
88
66
|
${excludedEventsSQL}
|
|
89
|
-
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
90
67
|
group by event_date, data_is_final
|
|
91
68
|
),
|
|
92
69
|
daily_comparison as (
|
|
@@ -6,6 +6,6 @@ module.exports = {
|
|
|
6
6
|
dailyQuality: generateDailyQualityAssertionSql,
|
|
7
7
|
_internal: {
|
|
8
8
|
dailyQuality: { generate: _generateDailyQualityAssertionSql, defaultName: 'daily_quality' },
|
|
9
|
-
itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue' },
|
|
9
|
+
itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue', enabledByDefault: false },
|
|
10
10
|
},
|
|
11
11
|
};
|
|
@@ -2,38 +2,17 @@ const helpers = require('../../../helpers/index.js');
|
|
|
2
2
|
const utils = require('../../../utils.js');
|
|
3
3
|
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
4
|
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
const { buildDedupedRawSource } = require('./shared.js');
|
|
5
6
|
|
|
6
7
|
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
8
|
|
|
9
|
+
const ASSERTION_LOOKBACK_DAYS = 5;
|
|
10
|
+
|
|
8
11
|
// Ecommerce events that carry item data (excluding refund — refunds reverse revenue
|
|
9
12
|
// and are handled separately in some pipelines, but item_revenue on refund rows
|
|
10
13
|
// should still reconcile 1:1 between enhanced and raw).
|
|
11
14
|
const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
|
|
12
15
|
|
|
13
|
-
/**
|
|
14
|
-
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
15
|
-
*
|
|
16
|
-
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
17
|
-
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
18
|
-
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
19
|
-
* and BigQuery pre-operation variables.
|
|
20
|
-
*
|
|
21
|
-
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
22
|
-
* @returns {string} SQL fragment for a WHERE clause
|
|
23
|
-
*/
|
|
24
|
-
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
25
|
-
const start = 'date_sub(current_date(), interval 5 day)';
|
|
26
|
-
const end = 'current_date()';
|
|
27
|
-
|
|
28
|
-
const filters = [
|
|
29
|
-
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
30
|
-
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
31
|
-
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
32
|
-
].filter(Boolean);
|
|
33
|
-
|
|
34
|
-
return filters.join(' or ');
|
|
35
|
-
};
|
|
36
|
-
|
|
37
16
|
/**
|
|
38
17
|
* Generates a SQL assertion query that reconciles item_revenue between the
|
|
39
18
|
* enhanced events table and the raw GA4 export data.
|
|
@@ -51,8 +30,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
51
30
|
// excluded events filter (same logic as the enhanced table pipeline)
|
|
52
31
|
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
32
|
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
-
? `
|
|
55
|
-
: '';
|
|
33
|
+
? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
34
|
+
: 'true';
|
|
56
35
|
|
|
57
36
|
// data_is_final condition for the raw side
|
|
58
37
|
const dataIsFinalCondition = helpers.isFinalData(
|
|
@@ -60,8 +39,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
60
39
|
mergedConfig.dataIsFinal.dayThreshold
|
|
61
40
|
);
|
|
62
41
|
|
|
63
|
-
//
|
|
64
|
-
const
|
|
42
|
+
// deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
|
|
43
|
+
const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
|
|
65
44
|
|
|
66
45
|
return `with enhanced_revenue as (
|
|
67
46
|
select
|
|
@@ -74,7 +53,7 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
|
74
53
|
unnest(items) as item
|
|
75
54
|
where
|
|
76
55
|
data_is_final = true
|
|
77
|
-
and event_date >= date_sub(current_date(), interval
|
|
56
|
+
and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
|
|
78
57
|
and event_name in (${ecommerceEvents})
|
|
79
58
|
group by event_date, item.item_id
|
|
80
59
|
),
|
|
@@ -85,14 +64,12 @@ raw_revenue as (
|
|
|
85
64
|
sum(item.item_revenue) as total_item_revenue,
|
|
86
65
|
count(*) as item_count
|
|
87
66
|
from
|
|
88
|
-
${
|
|
67
|
+
${dedupedRawSource},
|
|
89
68
|
unnest(items) as item
|
|
90
69
|
where
|
|
91
|
-
(${dateFilter})
|
|
92
70
|
${excludedEventsSQL}
|
|
93
71
|
and event_name in (${ecommerceEvents})
|
|
94
72
|
and ${dataIsFinalCondition}
|
|
95
|
-
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
96
73
|
group by event_date, item.item_id
|
|
97
74
|
)
|
|
98
75
|
select
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
const helpers = require('../../../helpers/index.js');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
5
|
+
*
|
|
6
|
+
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
7
|
+
* over a caller-provided lookback window. Intentionally separate from the
|
|
8
|
+
* pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
9
|
+
* and BigQuery pre-operation variables.
|
|
10
|
+
*
|
|
11
|
+
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
12
|
+
* @param {number} lookbackDays - Number of days to look back from current_date().
|
|
13
|
+
* @returns {string} SQL fragment for a WHERE clause
|
|
14
|
+
*/
|
|
15
|
+
const buildAssertionDateFilter = (includedExportTypes, lookbackDays) => {
|
|
16
|
+
const start = `date_sub(current_date(), interval ${lookbackDays} day)`;
|
|
17
|
+
const end = 'current_date()';
|
|
18
|
+
|
|
19
|
+
const filters = [
|
|
20
|
+
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
21
|
+
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
22
|
+
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
23
|
+
].filter(Boolean);
|
|
24
|
+
|
|
25
|
+
return filters.join(' or ');
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Builds a deduplicated raw-source subquery for assertion use.
|
|
30
|
+
*
|
|
31
|
+
* Replicates what setPreOperations() does at pipeline time, without access
|
|
32
|
+
* to its BigQuery variables. Covers all seven combinations of
|
|
33
|
+
* includedExportTypes {daily, fresh, intraday}:
|
|
34
|
+
*
|
|
35
|
+
* - qualify dense_rank() over (partition by date, order by _table_suffix) = 1
|
|
36
|
+
* picks the highest-priority table per day. Alphabetical order gives
|
|
37
|
+
* daily ('20260115') < fresh ('fresh_20260115') < intraday ('intraday_20260115'),
|
|
38
|
+
* matching the pipeline's daily > fresh > intraday priority.
|
|
39
|
+
* - When fresh and intraday are both enabled, intraday rows with
|
|
40
|
+
* event_timestamp > max(fresh.event_timestamp) for the same date are
|
|
41
|
+
* additionally admitted — matching the FRESH_MAX_EVENT_TIMESTAMP boundary.
|
|
42
|
+
*
|
|
43
|
+
* @param {Object} mergedConfig - Merged table configuration.
|
|
44
|
+
* @param {number} lookbackDays - Number of days to look back from current_date().
|
|
45
|
+
* @returns {string} SQL fragment: a parenthesized subquery usable in a FROM clause.
|
|
46
|
+
*/
|
|
47
|
+
const buildDedupedRawSource = (mergedConfig, lookbackDays) => {
|
|
48
|
+
const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes, lookbackDays);
|
|
49
|
+
const freshAndIntraday = mergedConfig.includedExportTypes.fresh && mergedConfig.includedExportTypes.intraday;
|
|
50
|
+
|
|
51
|
+
const intradayException = freshAndIntraday
|
|
52
|
+
? `
|
|
53
|
+
or (
|
|
54
|
+
starts_with(_table_suffix, 'intraday_')
|
|
55
|
+
and dense_rank() over (
|
|
56
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
57
|
+
order by _table_suffix
|
|
58
|
+
) = 2
|
|
59
|
+
and event_timestamp > max(if(starts_with(_table_suffix, 'fresh_'), event_timestamp, null)) over (
|
|
60
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
61
|
+
)
|
|
62
|
+
)`
|
|
63
|
+
: '';
|
|
64
|
+
|
|
65
|
+
// _table_suffix is a pseudo-column and not propagated by SELECT *; select it
|
|
66
|
+
// explicitly so downstream CTEs (e.g., isFinalData('EXPORT_TYPE')) can still reference it.
|
|
67
|
+
return `(
|
|
68
|
+
select
|
|
69
|
+
*,
|
|
70
|
+
_table_suffix
|
|
71
|
+
from
|
|
72
|
+
${mergedConfig.sourceTable}
|
|
73
|
+
where
|
|
74
|
+
(${dateFilter})
|
|
75
|
+
qualify
|
|
76
|
+
dense_rank() over (
|
|
77
|
+
partition by regexp_extract(_table_suffix, r'[0-9]+')
|
|
78
|
+
order by _table_suffix
|
|
79
|
+
) = 1${intradayException}
|
|
80
|
+
)`;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
module.exports = { buildAssertionDateFilter, buildDedupedRawSource };
|