ga4-export-fixer 0.5.2-dev.3 → 0.5.2-dev.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.5.2-dev.
|
|
3
|
+
"version": "0.5.2-dev.5",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -17,13 +17,14 @@
|
|
|
17
17
|
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js",
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js",
|
|
21
21
|
"test:summary": "node tests/testRunner.js",
|
|
22
22
|
"test:docs": "node tests/documentation.test.js",
|
|
23
23
|
"test:preops": "node tests/preOperations.test.js",
|
|
24
24
|
"test:events": "node tests/ga4EventsEnhanced.test.js",
|
|
25
25
|
"test:merge": "node tests/mergeSQLConfigurations.test.js",
|
|
26
26
|
"test:validation": "node tests/inputValidation.test.js",
|
|
27
|
+
"test:assertions": "node tests/assertions.test.js",
|
|
27
28
|
"test:createTable": "node tests/createTable.test.js",
|
|
28
29
|
"test:integration": "node tests/integration/integration.test.js",
|
|
29
30
|
"release:dev": "./scripts/release-dev.sh",
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
const helpers = require('../../../helpers/index.js');
|
|
2
|
+
const utils = require('../../../utils.js');
|
|
3
|
+
const { ga4EventsEnhancedConfig } = require('../config.js');
|
|
4
|
+
const { validateEnhancedEventsConfig } = require('../validation.js');
|
|
5
|
+
|
|
6
|
+
const defaultConfig = { ...ga4EventsEnhancedConfig };
|
|
7
|
+
|
|
8
|
+
// Ecommerce events that carry item data (excluding refund — refunds reverse revenue
|
|
9
|
+
// and are handled separately in some pipelines, but item_revenue on refund rows
|
|
10
|
+
// should still reconcile 1:1 between enhanced and raw).
|
|
11
|
+
const ecommerceEvents = helpers.ga4EcommerceEvents.map(e => `'${e}'`).join(', ');
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Builds a _table_suffix date filter for the assertion's raw-side query.
|
|
15
|
+
*
|
|
16
|
+
* Uses the low-level ga4ExportDateFilter() helper per enabled export type
|
|
17
|
+
* with a fixed 5-day lookback window. This is intentionally separate from
|
|
18
|
+
* the pipeline's ga4ExportDateFilters() which depends on incremental state
|
|
19
|
+
* and BigQuery pre-operation variables.
|
|
20
|
+
*
|
|
21
|
+
* @param {Object} includedExportTypes - { daily: boolean, fresh: boolean, intraday: boolean }
|
|
22
|
+
* @returns {string} SQL fragment for a WHERE clause
|
|
23
|
+
*/
|
|
24
|
+
const buildAssertionDateFilter = (includedExportTypes) => {
|
|
25
|
+
const start = 'date_sub(current_date(), interval 5 day)';
|
|
26
|
+
const end = 'current_date()';
|
|
27
|
+
|
|
28
|
+
const filters = [
|
|
29
|
+
includedExportTypes.daily ? helpers.ga4ExportDateFilter('daily', start, end) : null,
|
|
30
|
+
includedExportTypes.fresh ? helpers.ga4ExportDateFilter('fresh', start, end) : null,
|
|
31
|
+
includedExportTypes.intraday ? helpers.ga4ExportDateFilter('intraday', start, end) : null,
|
|
32
|
+
].filter(Boolean);
|
|
33
|
+
|
|
34
|
+
return filters.join(' or ');
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Generates a SQL assertion query that reconciles item_revenue between the
|
|
39
|
+
* enhanced events table and the raw GA4 export data.
|
|
40
|
+
*
|
|
41
|
+
* The query compares item_revenue grouped by (event_date, item_id) for the
|
|
42
|
+
* last 5 days of final data. Returns mismatched rows — 0 rows means the
|
|
43
|
+
* assertion passes.
|
|
44
|
+
*
|
|
45
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table
|
|
46
|
+
* (e.g., ctx.ref('ga4_events_enhanced_123456789') in Dataform, or a backtick-quoted string).
|
|
47
|
+
* @param {Object} mergedConfig - Merged table configuration (after merge + validation).
|
|
48
|
+
* @returns {string} SQL query returning violating rows
|
|
49
|
+
*/
|
|
50
|
+
const _generateItemRevenueAssertionSql = (tableRef, mergedConfig) => {
|
|
51
|
+
// excluded events filter (same logic as the enhanced table pipeline)
|
|
52
|
+
const excludedEvents = mergedConfig.excludedEvents;
|
|
53
|
+
const excludedEventsSQL = excludedEvents.length > 0
|
|
54
|
+
? `and event_name not in (${excludedEvents.map(e => `'${e}'`).join(', ')})`
|
|
55
|
+
: '';
|
|
56
|
+
|
|
57
|
+
// data_is_final condition for the raw side
|
|
58
|
+
const dataIsFinalCondition = helpers.isFinalData(
|
|
59
|
+
mergedConfig.dataIsFinal.detectionMethod,
|
|
60
|
+
mergedConfig.dataIsFinal.dayThreshold
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
// date filter for the raw side (per-export-type, fixed 5-day window)
|
|
64
|
+
const dateFilter = buildAssertionDateFilter(mergedConfig.includedExportTypes);
|
|
65
|
+
|
|
66
|
+
return `with enhanced_revenue as (
|
|
67
|
+
select
|
|
68
|
+
event_date,
|
|
69
|
+
item.item_id,
|
|
70
|
+
sum(item.item_revenue) as total_item_revenue,
|
|
71
|
+
count(*) as item_count
|
|
72
|
+
from
|
|
73
|
+
${tableRef},
|
|
74
|
+
unnest(items) as item
|
|
75
|
+
where
|
|
76
|
+
data_is_final = true
|
|
77
|
+
and event_date >= date_sub(current_date(), interval 5 day)
|
|
78
|
+
and event_name in (${ecommerceEvents})
|
|
79
|
+
group by event_date, item.item_id
|
|
80
|
+
),
|
|
81
|
+
raw_revenue as (
|
|
82
|
+
select
|
|
83
|
+
cast(event_date as date format 'YYYYMMDD') as event_date,
|
|
84
|
+
item.item_id,
|
|
85
|
+
sum(item.item_revenue) as total_item_revenue,
|
|
86
|
+
count(*) as item_count
|
|
87
|
+
from
|
|
88
|
+
${mergedConfig.sourceTable},
|
|
89
|
+
unnest(items) as item
|
|
90
|
+
where
|
|
91
|
+
(${dateFilter})
|
|
92
|
+
${excludedEventsSQL}
|
|
93
|
+
and event_name in (${ecommerceEvents})
|
|
94
|
+
and ${dataIsFinalCondition}
|
|
95
|
+
and cast(event_date as date format 'YYYYMMDD') >= date_sub(current_date(), interval 5 day)
|
|
96
|
+
group by event_date, item.item_id
|
|
97
|
+
)
|
|
98
|
+
select
|
|
99
|
+
coalesce(e.event_date, r.event_date) as event_date,
|
|
100
|
+
coalesce(e.item_id, r.item_id) as item_id,
|
|
101
|
+
e.total_item_revenue as enhanced_revenue,
|
|
102
|
+
r.total_item_revenue as raw_revenue,
|
|
103
|
+
e.item_count as enhanced_count,
|
|
104
|
+
r.item_count as raw_count
|
|
105
|
+
from
|
|
106
|
+
enhanced_revenue e
|
|
107
|
+
full outer join
|
|
108
|
+
raw_revenue r using(event_date, item_id)
|
|
109
|
+
where
|
|
110
|
+
round(coalesce(e.total_item_revenue, 0), 2) != round(coalesce(r.total_item_revenue, 0), 2)
|
|
111
|
+
or e.item_count != r.item_count
|
|
112
|
+
or e.event_date is null
|
|
113
|
+
or r.event_date is null`;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Generates an item_revenue reconciliation assertion SQL query.
|
|
118
|
+
*
|
|
119
|
+
* Merges the provided config with defaults, validates, then generates a SQL
|
|
120
|
+
* query comparing item_revenue between the enhanced table and raw export data.
|
|
121
|
+
*
|
|
122
|
+
* @param {string} tableRef - Fully qualified reference to the enhanced table.
|
|
123
|
+
* @param {Object} config - User-provided table configuration.
|
|
124
|
+
* @returns {string} SQL query returning violating rows (0 rows = pass)
|
|
125
|
+
*/
|
|
126
|
+
const generateItemRevenueAssertionSql = (tableRef, config) => {
|
|
127
|
+
if (!tableRef || typeof tableRef !== 'string' || !tableRef.trim()) {
|
|
128
|
+
throw new Error('assertions.itemRevenue: tableRef is required and must be a non-empty string (e.g., ctx.ref(\'table_name\') or \'`project.dataset.table`\').');
|
|
129
|
+
}
|
|
130
|
+
const mergedConfig = utils.mergeSQLConfigurations(defaultConfig, config);
|
|
131
|
+
validateEnhancedEventsConfig(mergedConfig, { skipDataformContextFields: true });
|
|
132
|
+
return _generateItemRevenueAssertionSql(tableRef, mergedConfig);
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
module.exports = { generateItemRevenueAssertionSql };
|
|
@@ -7,6 +7,7 @@ const { validateEnhancedEventsConfig } = require('./validation.js');
|
|
|
7
7
|
const documentation = require('../../documentation.js');
|
|
8
8
|
const { createTable } = require('../../createTable.js');
|
|
9
9
|
const { getTableDescriptionSections } = require('./tableDescription.js');
|
|
10
|
+
const assertions = require('./assertions/index.js');
|
|
10
11
|
|
|
11
12
|
// Column metadata for the GA4 Events Enhanced table
|
|
12
13
|
const columnMetadata = {
|
|
@@ -161,6 +162,9 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
161
162
|
|
|
162
163
|
// item list attribution config
|
|
163
164
|
const itemListAttribution = mergedConfig.itemListAttribution;
|
|
165
|
+
const ecommerceEventsFilter = itemListAttribution
|
|
166
|
+
? helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ')
|
|
167
|
+
: null;
|
|
164
168
|
|
|
165
169
|
// auto-adjust bufferDays for time-based item list attribution lookback
|
|
166
170
|
const effectiveBufferDays = (itemListAttribution && itemListAttribution.lookbackType === 'TIME')
|
|
@@ -225,11 +229,12 @@ const _generateEnhancedEventsSQL = (mergedConfig) => {
|
|
|
225
229
|
// ecommerce
|
|
226
230
|
ecommerce: helpers.fixEcommerceStruct('ecommerce'),
|
|
227
231
|
items: 'items',
|
|
228
|
-
// unique row id for item list attribution join.
|
|
229
|
-
// row_number()
|
|
232
|
+
// unique row id for item list attribution join. Only computed for ecommerce events.
|
|
233
|
+
// row_number() breaks hash collisions for batched events with identical data.
|
|
234
|
+
// partition by event_name avoids a single-partition bottleneck in the window function.
|
|
230
235
|
// Non-determinism is safe: colliding rows have identical items (to_json_string(items) is in the hash),
|
|
231
236
|
// so swapping row numbers between them produces the same final result.
|
|
232
|
-
|
|
237
|
+
_item_list_attribution_row_id: itemListAttribution ? `if(event_name in (${ecommerceEventsFilter}), farm_fingerprint(concat(user_pseudo_id, cast(event_timestamp as string), event_name, to_json_string(items), cast(row_number() over(partition by event_name, user_pseudo_id) as string))), null)` : undefined,
|
|
233
238
|
// flag if the data is "final" and is not expected to change anymore
|
|
234
239
|
data_is_final: helpers.isFinalData(mergedConfig.dataIsFinal.detectionMethod, mergedConfig.dataIsFinal.dayThreshold),
|
|
235
240
|
export_type: helpers.getGa4ExportType('_table_suffix'),
|
|
@@ -272,12 +277,11 @@ ${excludedEventsSQL}`,
|
|
|
272
277
|
itemListAttribution.lookbackTimeMs
|
|
273
278
|
);
|
|
274
279
|
const passthroughEvents = `event_name in ('view_item_list', 'select_item', 'view_promotion', 'select_promotion')`;
|
|
275
|
-
const ecommerceFilter = helpers.ga4EcommerceEvents.filter(e => e !== 'refund').map(e => `'${e}'`).join(', ');
|
|
276
280
|
|
|
277
281
|
return {
|
|
278
282
|
name: 'item_list_data',
|
|
279
283
|
columns: {
|
|
280
|
-
'
|
|
284
|
+
'_item_list_attribution_row_id': '_item_list_attribution_row_id',
|
|
281
285
|
'items': `array_agg(
|
|
282
286
|
(select as struct item.* replace(
|
|
283
287
|
coalesce(if(${passthroughEvents}, item.item_list_name, _item_list_attr.item_list_name), '(not set)') as item_list_name,
|
|
@@ -286,19 +290,19 @@ ${excludedEventsSQL}`,
|
|
|
286
290
|
))
|
|
287
291
|
)`,
|
|
288
292
|
},
|
|
289
|
-
from: `(select
|
|
290
|
-
groupBy: ['
|
|
293
|
+
from: `(select _item_list_attribution_row_id, event_name, item, ${attrExpr} as _item_list_attr from event_data, unnest(items) as item where event_name in (${ecommerceEventsFilter}))`,
|
|
294
|
+
groupBy: ['_item_list_attribution_row_id'],
|
|
291
295
|
};
|
|
292
296
|
})() : null;
|
|
293
297
|
|
|
294
298
|
const finalColumnOrder = getFinalColumnOrder(eventDataStep, sessionDataStep);
|
|
295
299
|
|
|
296
|
-
// When item list attribution is enabled, override the items column and exclude
|
|
300
|
+
// When item list attribution is enabled, override the items column and exclude _item_list_attribution_row_id
|
|
297
301
|
// COALESCE handles events without items (not in ecommerce filter) where the LEFT JOIN returns NULL
|
|
298
302
|
const itemListOverrides = itemListDataStep ? {
|
|
299
303
|
items: 'coalesce(item_list_data.items, event_data.items)',
|
|
300
304
|
} : {};
|
|
301
|
-
const itemListExcludedColumns = itemListDataStep ? ['
|
|
305
|
+
const itemListExcludedColumns = itemListDataStep ? ['_item_list_attribution_row_id'] : [];
|
|
302
306
|
|
|
303
307
|
// Join event_data and session_data, include additional logic
|
|
304
308
|
const finalStep = {
|
|
@@ -334,7 +338,7 @@ ${excludedEventsSQL}`,
|
|
|
334
338
|
leftJoin: [
|
|
335
339
|
...(itemListDataStep ? [{
|
|
336
340
|
table: 'item_list_data',
|
|
337
|
-
condition: 'using(
|
|
341
|
+
condition: 'using(_item_list_attribution_row_id)'
|
|
338
342
|
}] : []),
|
|
339
343
|
{
|
|
340
344
|
table: 'session_data',
|
|
@@ -418,5 +422,6 @@ module.exports = {
|
|
|
418
422
|
generateSql: generateEnhancedEventsSQL,
|
|
419
423
|
setPreOperations: setPreOperations,
|
|
420
424
|
getColumnDescriptions: getColumnDescriptions,
|
|
421
|
-
getTableDescription: getTableDescription
|
|
425
|
+
getTableDescription: getTableDescription,
|
|
426
|
+
assertions,
|
|
422
427
|
}
|