ga4-export-fixer 0.6.2-dev.0 → 0.6.2-dev.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -453,23 +453,23 @@ The package includes built-in data quality assertions that can be automatically
453
453
  ga4EventsEnhanced.createTable(publish, config, { assert });
454
454
  ```
455
455
 
456
- This creates the table and the following assertions using the same configuration:
456
+ This creates the table along with the default-enabled assertions, using the same configuration:
457
457
 
458
- | Assertion | Name | Description |
459
- | --------- | ---- | ----------- |
460
- | `dailyQuality` | `{tableName}_daily_quality` | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
461
- | `itemRevenue` | `{tableName}_item_revenue` | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
458
+ | Assertion | Name | Enabled by default | Description |
459
+ | --------- | ---- | ------------------ | ----------- |
460
+ | `dailyQuality` | `{tableName}_daily_quality` | Yes | Compares session count, event count, and item revenue per day between the enhanced table and raw export. Detects missing days, count mismatches, and non-final data inflation |
461
+ | `itemRevenue` | `{tableName}_item_revenue` | No (opt-in) | Reconciles item_revenue at the (event_date, item_id) grain between the enhanced table and raw export |
462
462
 
463
463
  Assertions inherit the table's schema and tags from `dataformTableConfig`. Each assertion queries the last 5 days of data.
464
464
 
465
465
  #### Selective Assertions
466
466
 
467
- Disable individual assertions by setting them to `false`:
467
+ Enable opt-in assertions by setting them to `true`, or disable default-enabled ones by setting them to `false`:
468
468
 
469
469
  ```javascript
470
470
  ga4EventsEnhanced.createTable(publish, config, {
471
471
  assert,
472
- assertions: { dailyQuality: true, itemRevenue: false },
472
+ assertions: { dailyQuality: true, itemRevenue: true },
473
473
  });
474
474
  ```
475
475
 
package/createTable.js CHANGED
@@ -67,6 +67,7 @@ const createTable = (dataformPublish, userConfig, tableModule, options) => {
67
67
  for (const [key, assertionDef] of Object.entries(tableModule.assertions)) {
68
68
  const assertionOption = options.assertions?.[key];
69
69
  if (assertionOption === false) continue;
70
+ if (assertionOption === undefined && assertionDef.enabledByDefault === false) continue;
70
71
 
71
72
  const assertionName = `${tableName}_${assertionDef.defaultName}`;
72
73
  const assertionDataformConfig = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.6.2-dev.0",
3
+ "version": "0.6.2-dev.2",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -2,32 +2,11 @@ const helpers = require('../../../helpers/index.js');
2
2
  const utils = require('../../../utils.js');
3
3
  const { ga4EventsEnhancedConfig } = require('../config.js');
4
4
  const { validateEnhancedEventsConfig } = require('../validation.js');
5
+ const { buildDedupedRawSource } = require('./shared.js');
5
6
 
6
7
  const defaultConfig = { ...ga4EventsEnhancedConfig };
7
8
 
8
- /**
9
- * Builds a _table_suffix date filter for the assertion's raw-side query.
10
- *
11
- * Uses the low-level ga4ExportDateFilter() helper per enabled export type
12
- * with a fixed 5-day lookback window. This is intentionally separate from
13
- * the pipeline's ga4ExportDateFilters() which depends on incremental state
14
- * and BigQuery pre-operation variables.
15
- *
16
- * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
17
- * @returns {string} SQL fragment for a WHERE clause
18
- */
19
- const buildAssertionDateFilter = (includedExportTypes) => {
20
- const start = 'date_sub(current_date(), interval 5 day)';
21
- const end = 'current_date()';
22
-
23
- const filters = [
24
- includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
25
- includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
26
- includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
27
- ].filter(Boolean);
28
-
29
- return filters.join(' or ');
30
- };
9
+ const ASSERTION_LOOKBACK_DAYS = 5;
31
10
 
32
11
  /**
33
12
  * Generates a SQL assertion query that validates daily data quality between the
@@ -51,15 +30,15 @@ const buildAssertionDateFilter = (includedExportTypes) => {
51
30
  const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
52
31
  const excludedEvents = mergedConfig.excludedEvents;
53
32
  const excludedEventsSQL = excludedEvents.length > 0
54
- ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
- : '';
33
+ ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
34
+ : 'true';
56
35
 
57
36
  const dataIsFinalCondition = helpers.isFinalData(
58
37
  mergedConfig.dataIsFinal.detectionMethod,
59
38
  mergedConfig.dataIsFinal.dayThreshold
60
39
  );
61
40
 
62
- const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
41
+ const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
63
42
 
64
43
  return `with enhanced_daily as (
65
44
  select
@@ -71,7 +50,7 @@ const _generateDailyQualityAssertionSql = (tableRef, mergedConfig) => {
71
50
  from
72
51
  ${tableRef}
73
52
  where
74
- event_date >= date_sub(current_date(), interval 5 day)
53
+ event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
75
54
  group by event_date, data_is_final
76
55
  ),
77
56
  raw_daily as (
@@ -82,11 +61,9 @@ raw_daily as (
82
61
  count(*) as event_count,
83
62
  coalesce(sum((select sum(item.item_revenue) from unnest(items) as item)), 0) as total_item_revenue
84
63
  from
85
- ${mergedConfig.sourceTable}
64
+ ${dedupedRawSource}
86
65
  where
87
- (${dateFilter})
88
66
  ${excludedEventsSQL}
89
- and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
90
67
  group by event_date, data_is_final
91
68
  ),
92
69
  daily_comparison as (
@@ -6,6 +6,6 @@ module.exports = {
6
6
  dailyQuality: generateDailyQualityAssertionSql,
7
7
  _internal: {
8
8
  dailyQuality: { generate: _generateDailyQualityAssertionSql, defaultName: 'daily_quality' },
9
- itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue' },
9
+ itemRevenue: { generate: _generateItemRevenueAssertionSql, defaultName: 'item_revenue', enabledByDefault: false },
10
10
  },
11
11
  };
@@ -2,38 +2,17 @@ const helpers = require('../../../helpers/index.js');
2
2
  const utils = require('../../../utils.js');
3
3
  const { ga4EventsEnhancedConfig } = require('../config.js');
4
4
  const { validateEnhancedEventsConfig } = require('../validation.js');
5
+ const { buildDedupedRawSource } = require('./shared.js');
5
6
 
6
7
  const defaultConfig = { ...ga4EventsEnhancedConfig };
7
8
 
9
+ const ASSERTION_LOOKBACK_DAYS = 5;
10
+
8
11
  // Ecommerce events that carry item data (excluding refund — refunds reverse revenue
9
12
  // and are handled separately in some pipelines, but item_revenue on refund rows
10
13
  // should still reconcile 1:1 between enhanced and raw).
11
14
  const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
12
15
 
13
- /**
14
- * Builds a _table_suffix date filter for the assertion's raw-side query.
15
- *
16
- * Uses the low-level ga4ExportDateFilter() helper per enabled export type
17
- * with a fixed 5-day lookback window. This is intentionally separate from
18
- * the pipeline's ga4ExportDateFilters() which depends on incremental state
19
- * and BigQuery pre-operation variables.
20
- *
21
- * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
22
- * @returns {string} SQL fragment for a WHERE clause
23
- */
24
- const buildAssertionDateFilter = (includedExportTypes) => {
25
- const start = 'date_sub(current_date(), interval 5 day)';
26
- const end = 'current_date()';
27
-
28
- const filters = [
29
- includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
30
- includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
31
- includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
32
- ].filter(Boolean);
33
-
34
- return filters.join(' or ');
35
- };
36
-
37
16
  /**
38
17
  * Generates a SQL assertion query that reconciles item_revenue between the
39
18
  * enhanced events table and the raw GA4 export data.
@@ -51,8 +30,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
51
30
  // excluded events filter (same logic as the enhanced table pipeline)
52
31
  const excludedEvents = mergedConfig.excludedEvents;
53
32
  const excludedEventsSQL = excludedEvents.length > 0
54
- ? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
55
- : '';
33
+ ? `event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
34
+ : 'true';
56
35
 
57
36
  // data_is_final condition for the raw side
58
37
  const dataIsFinalCondition = helpers.isFinalData(
@@ -60,8 +39,8 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
60
39
  mergedConfig.dataIsFinal.dayThreshold
61
40
  );
62
41
 
63
- // date filter for the raw side (per-export-type, fixed 5-day window)
64
- const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
42
+ // deduplicated raw-source subquery (mirrors pipeline setPreOperations dedup)
43
+ const dedupedRawSource = buildDedupedRawSource(mergedConfig, ASSERTION_LOOKBACK_DAYS);
65
44
 
66
45
  return `with enhanced_revenue as (
67
46
  select
@@ -74,7 +53,7 @@ const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
74
53
  unnest(items) as item
75
54
  where
76
55
  data_is_final = true
77
- and event_date >= date_sub(current_date(), interval 5 day)
56
+ and event_date >= date_sub(current_date(), interval ${ASSERTION_LOOKBACK_DAYS} day)
78
57
  and event_name in (${ecommerceEvents})
79
58
  group by event_date, item.item_id
80
59
  ),
@@ -85,14 +64,12 @@ raw_revenue as (
85
64
  sum(item.item_revenue) as total_item_revenue,
86
65
  count(*) as item_count
87
66
  from
88
- ${mergedConfig.sourceTable},
67
+ ${dedupedRawSource},
89
68
  unnest(items) as item
90
69
  where
91
- (${dateFilter})
92
70
  ${excludedEventsSQL}
93
71
  and event_name in (${ecommerceEvents})
94
72
  and ${dataIsFinalCondition}
95
- and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
96
73
  group by event_date, item.item_id
97
74
  )
98
75
  select
@@ -0,0 +1,83 @@
1
+ const helpers = require('../../../helpers/index.js');
2
+
3
+ /**
4
+ * Builds a _table_suffix date filter for the assertion's raw-side query.
5
+ *
6
+ * Uses the low-level ga4ExportDateFilter() helper per enabled export type
7
+ * over a caller-provided lookback window. Intentionally separate from the
8
+ * pipeline's ga4ExportDateFilters() which depends on incremental state
9
+ * and BigQuery pre-operation variables.
10
+ *
11
+ * @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
12
+ * @param {number} lookbackDays - Number of days to look back from current_date().
13
+ * @returns {string} SQL fragment for a WHERE clause
14
+ */
15
+ const buildAssertionDateFilter = (includedExportTypes, lookbackDays) => {
16
+ const start = `date_sub(current_date(), interval ${lookbackDays} day)`;
17
+ const end = 'current_date()';
18
+
19
+ const filters = [
20
+ includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
21
+ includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
22
+ includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
23
+ ].filter(Boolean);
24
+
25
+ return filters.join(' or ');
26
+ };
27
+
28
+ /**
29
+ * Builds a deduplicated raw-source subquery for assertion use.
30
+ *
31
+ * Replicates what setPreOperations() does at pipeline time, without access
32
+ * to its BigQuery variables. Covers all seven combinations of
33
+ * includedExportTypes {daily, fresh, intraday}:
34
+ *
35
+ * - qualify dense_rank() over (partition by date, order by _table_suffix) = 1
36
+ * picks the highest-priority table per day. Alphabetical order gives
37
+ * daily ('20260115') < fresh ('fresh_20260115') < intraday ('intraday_20260115'),
38
+ * matching the pipeline's daily > fresh > intraday priority.
39
+ * - When fresh and intraday are both enabled, intraday rows with
40
+ * event_timestamp > max(fresh.event_timestamp) for the same date are
41
+ * additionally admitted — matching the FRESH_MAX_EVENT_TIMESTAMP boundary.
42
+ *
43
+ * @param {Object} mergedConfig - Merged table configuration.
44
+ * @param {number} lookbackDays - Number of days to look back from current_date().
45
+ * @returns {string} SQL fragment: a parenthesized subquery usable in a FROM clause.
46
+ */
47
+ const buildDedupedRawSource = (mergedConfig, lookbackDays) => {
48
+ const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes, lookbackDays);
49
+ const freshAndIntraday = mergedConfig.includedExportTypes.fresh && mergedConfig.includedExportTypes.intraday;
50
+
51
+ const intradayException = freshAndIntraday
52
+ ? `
53
+ or (
54
+ starts_with(_table_suffix, 'intraday_')
55
+ and dense_rank() over (
56
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
57
+ order by _table_suffix
58
+ ) = 2
59
+ and event_timestamp > max(if(starts_with(_table_suffix, 'fresh_'), event_timestamp, null)) over (
60
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
61
+ )
62
+ )`
63
+ : '';
64
+
65
+ // _table_suffix is a pseudo-column and not propagated by SELECT *; select it
66
+ // explicitly so downstream CTEs (e.g., isFinalData('EXPORT_TYPE')) can still reference it.
67
+ return `(
68
+ select
69
+ *,
70
+ _table_suffix
71
+ from
72
+ ${mergedConfig.sourceTable}
73
+ where
74
+ (${dateFilter})
75
+ qualify
76
+ dense_rank() over (
77
+ partition by regexp_extract(_table_suffix, r'[0-9]+')
78
+ order by _table_suffix
79
+ ) = 1${intradayException}
80
+ )`;
81
+ };
82
+
83
+ module.exports = { buildAssertionDateFilter, buildDedupedRawSource };