ga4-export-fixer 0.8.0 → 0.9.0-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -5
- package/documentation.js +272 -223
- package/helpers/ga4Transforms.js +263 -262
- package/package.json +6 -5
- package/tables/ga4EventsEnhanced/config.js +4 -0
- package/tables/ga4EventsEnhanced/index.js +91 -21
- package/tables/ga4EventsEnhanced/validation.js +95 -0
- package/utils.js +30 -8
package/helpers/ga4Transforms.js
CHANGED
|
@@ -1,262 +1,263 @@
|
|
|
1
|
-
const { unnestEventParam } = require('./params');
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* SQL expression that builds a session ID by concatenating `user_pseudo_id` with the `ga_session_id` event parameter.
|
|
5
|
-
*/
|
|
6
|
-
const sessionId = `concat(user_pseudo_id, (select value.int_value from unnest(event_params) where key = 'ga_session_id'))`;
|
|
7
|
-
|
|
8
|
-
/*
|
|
9
|
-
Ecommerce
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Fixes and normalizes the ecommerce struct extracted from GA4 event data.
|
|
14
|
-
*
|
|
15
|
-
* This helper returns a SQL expression that:
|
|
16
|
-
* - Ensures `ecommerce.transaction_id` is set to NULL if it has the placeholder string '(not set)';
|
|
17
|
-
* - For 'purchase' events, normalizes `ecommerce.purchase_revenue` by:
|
|
18
|
-
* * Removing NaN values;
|
|
19
|
-
* * Filling missing purchase revenue (an old GA4 bug) with the event parameter 'value', safely cast as FLOAT64;
|
|
20
|
-
* - Leaves other fields in the ecommerce struct unchanged.
|
|
21
|
-
*
|
|
22
|
-
* The result is a new struct with the same shape as 'ecommerce' but with cleaned transaction_id and purchase_revenue.
|
|
23
|
-
*
|
|
24
|
-
* @returns {string} A SQL snippet for SELECT AS STRUCT ... REPLACE to normalize ecommerce fields.
|
|
25
|
-
*
|
|
26
|
-
* @example
|
|
27
|
-
* fixEcommerceStruct()
|
|
28
|
-
* // => SQL string that can be used in a SELECT list to normalize ecommerce columns
|
|
29
|
-
*/
|
|
30
|
-
const fixEcommerceStruct = () => {
|
|
31
|
-
return `(select as struct ecommerce.* replace(
|
|
32
|
-
if(ecommerce.transaction_id <> '(not set)', ecommerce.transaction_id, null) as transaction_id,
|
|
33
|
-
if(
|
|
34
|
-
event_name = 'purchase',
|
|
35
|
-
coalesce(
|
|
36
|
-
-- fix possible NaN values
|
|
37
|
-
if(is_nan(ecommerce.purchase_revenue), null, ecommerce.purchase_revenue),
|
|
38
|
-
-- fix an old ga4 bug where purchase_revenue was missing
|
|
39
|
-
safe_cast(${unnestEventParam('value')} as float64)
|
|
40
|
-
),
|
|
41
|
-
null
|
|
42
|
-
) as purchase_revenue
|
|
43
|
-
))`;
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
/*
|
|
47
|
-
Check if GA4 data is "final" and is not expected to change anymore
|
|
48
|
-
*/
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Generates a SQL expression to determine whether GA4 export data can be considered "final" (not subject to further change).
|
|
52
|
-
*
|
|
53
|
-
* Two detection methods are supported:
|
|
54
|
-
* - 'EXPORT_TYPE': Checks the table suffix; returns FALSE for intraday or "fresh" tables, TRUE for finalized data.
|
|
55
|
-
* - 'DAY_THRESHOLD': Considers data final if a configurable number of days has passed since event_date.
|
|
56
|
-
*
|
|
57
|
-
* @param {'EXPORT_TYPE'|'DAY_THRESHOLD'} detectionMethod - The method to use for finality determination.
|
|
58
|
-
* 'EXPORT_TYPE': Uses patterns in _table_suffix (e.g., 'intraday_%', 'fresh_%').
|
|
59
|
-
* 'DAY_THRESHOLD': Uses date difference between the current date and event_date.
|
|
60
|
-
* @param {number} [dayThreshold] - (Only for 'DAY_THRESHOLD') Number of days after which data is considered final. Required when detectionMethod is 'DAY_THRESHOLD'.
|
|
61
|
-
* @returns {string} SQL expression that evaluates to TRUE if the data is final, otherwise FALSE.
|
|
62
|
-
*
|
|
63
|
-
* @throws {Error} If an unsupported detectionMethod is provided.
|
|
64
|
-
*
|
|
65
|
-
* @example
|
|
66
|
-
* // Checks based on export type
|
|
67
|
-
* isFinalData('EXPORT_TYPE')
|
|
68
|
-
* // => "if(_table_suffix like 'intraday_%' or _table_suffix like 'fresh_%', false, true)"
|
|
69
|
-
*
|
|
70
|
-
* // Checks using a custom day threshold
|
|
71
|
-
* isFinalData('DAY_THRESHOLD', 5)
|
|
72
|
-
* // => "if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > 5, true, false)"
|
|
73
|
-
*/
|
|
74
|
-
const isFinalData = (detectionMethod, dayThreshold) => {
|
|
75
|
-
if (detectionMethod !== 'EXPORT_TYPE' && detectionMethod !== 'DAY_THRESHOLD') {
|
|
76
|
-
throw new Error(`isFinalData: Unsupported detectionMethod '${detectionMethod}'. Supported values are 'EXPORT_TYPE' and 'DAY_THRESHOLD'.`);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
80
|
-
if (typeof dayThreshold === 'undefined') {
|
|
81
|
-
throw new Error("isFinalData: 'dayThreshold' is required when using 'DAY_THRESHOLD' detectionMethod.");
|
|
82
|
-
}
|
|
83
|
-
if (!Number.isInteger(dayThreshold) || dayThreshold < 0) {
|
|
84
|
-
throw new Error("isFinalData: 'dayThreshold' must be an integer greater than or equal to 0 when using 'DAY_THRESHOLD' detectionMethod.");
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
if (detectionMethod === 'EXPORT_TYPE') {
|
|
89
|
-
return 'if(_table_suffix like \'intraday_%\' or _table_suffix like \'fresh_%\', false, true)';
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
93
|
-
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${dayThreshold}, true, false)`;
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Checks whether a given column name is part of the standard/expected GA4 BigQuery export columns.
|
|
99
|
-
*
|
|
100
|
-
* The list of recognized GA4 export columns is based on the official schema as of 2026-02-18.
|
|
101
|
-
* This function can be used to filter or validate column names when processing GA4 data exports.
|
|
102
|
-
*
|
|
103
|
-
* @param {string} columnName - The name of the column to check.
|
|
104
|
-
* @returns {boolean} True if the column name is a GA4 export column, otherwise false.
|
|
105
|
-
*/
|
|
106
|
-
const isGa4ExportColumn = (columnName) => {
|
|
107
|
-
// list updated 2026-02-18
|
|
108
|
-
const ga4ExportColumns = [
|
|
109
|
-
"event_date",
|
|
110
|
-
"event_timestamp",
|
|
111
|
-
"event_name",
|
|
112
|
-
"event_params",
|
|
113
|
-
"event_previous_timestamp",
|
|
114
|
-
"event_value_in_usd",
|
|
115
|
-
"event_bundle_sequence_id",
|
|
116
|
-
"event_server_timestamp_offset",
|
|
117
|
-
"user_id",
|
|
118
|
-
"user_pseudo_id",
|
|
119
|
-
"privacy_info",
|
|
120
|
-
"user_properties",
|
|
121
|
-
"user_first_touch_timestamp",
|
|
122
|
-
"user_ltv",
|
|
123
|
-
"device",
|
|
124
|
-
"geo",
|
|
125
|
-
"app_info",
|
|
126
|
-
"traffic_source",
|
|
127
|
-
"stream_id",
|
|
128
|
-
"platform",
|
|
129
|
-
"event_dimensions",
|
|
130
|
-
"ecommerce",
|
|
131
|
-
"items",
|
|
132
|
-
"collected_traffic_source",
|
|
133
|
-
"is_active_user",
|
|
134
|
-
"batch_event_index",
|
|
135
|
-
"batch_page_id",
|
|
136
|
-
"batch_ordering_id",
|
|
137
|
-
"session_traffic_source_last_click",
|
|
138
|
-
"publisher"
|
|
139
|
-
];
|
|
140
|
-
return ga4ExportColumns.includes(columnName);
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
/**
|
|
144
|
-
* Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
|
|
145
|
-
*
|
|
146
|
-
* Returns 'intraday' for suffixes like 'intraday_%', 'fresh' for 'fresh_%',
|
|
147
|
-
* and 'daily' for 8-digit date suffixes (YYYYMMDD).
|
|
148
|
-
*
|
|
149
|
-
* @param {string} tableSuffix - SQL expression or column reference for the table suffix (e.g., '_table_suffix').
|
|
150
|
-
* @returns {string} SQL CASE expression that evaluates to 'intraday', 'fresh', or 'daily'.
|
|
151
|
-
*/
|
|
152
|
-
const getGa4ExportType = (tableSuffix) => {
|
|
153
|
-
return `case
|
|
154
|
-
when ${tableSuffix} like 'intraday_%' then 'intraday'
|
|
155
|
-
when ${tableSuffix} like 'fresh_%' then 'fresh'
|
|
156
|
-
when regexp_contains(${tableSuffix}, r'^\\d{8}$') then 'daily'
|
|
157
|
-
end`;
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Generates a SQL LAST_VALUE window function that attributes item list fields
|
|
162
|
-
* (item_list_name, item_list_id, item_list_index) from select_item/select_promotion
|
|
163
|
-
* events to downstream ecommerce events using a lookback window.
|
|
164
|
-
*
|
|
165
|
-
* Returns a struct containing all three attributed fields via a single window sort.
|
|
166
|
-
*
|
|
167
|
-
* @param {'SESSION'|'TIME'} lookbackType - Window scope: session-based or time-based
|
|
168
|
-
* @param {string} timestampColumn - Column to order by ('event_timestamp' or 'event_custom_timestamp')
|
|
169
|
-
* @param {number} [lookbackTimeMs] - Lookback window in milliseconds (required when lookbackType is 'TIME')
|
|
170
|
-
* @returns {string} SQL expression that evaluates to a struct with item_list_name, item_list_id, item_list_index
|
|
171
|
-
*/
|
|
172
|
-
const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs) => {
|
|
173
|
-
const selectEvents = `event_name in ('select_item', 'select_promotion')`;
|
|
174
|
-
const structExpr = `struct(item.item_list_name, item.item_list_id, item.item_list_index)`;
|
|
175
|
-
|
|
176
|
-
let partitionBy;
|
|
177
|
-
let frameBounds;
|
|
178
|
-
|
|
179
|
-
if (lookbackType === 'SESSION') {
|
|
180
|
-
partitionBy = 'session_id, item.item_id';
|
|
181
|
-
frameBounds = 'rows between unbounded preceding and current row';
|
|
182
|
-
} else {
|
|
183
|
-
// TIME-based: range window in microseconds
|
|
184
|
-
const lookbackMicros = lookbackTimeMs * 1000;
|
|
185
|
-
partitionBy = 'user_pseudo_id, item.item_id';
|
|
186
|
-
frameBounds = `range between ${lookbackMicros} preceding and current row`;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
return `last_value(
|
|
190
|
-
if(${selectEvents}, ${structExpr}, null) ignore nulls
|
|
191
|
-
) over(
|
|
192
|
-
partition by ${partitionBy}
|
|
193
|
-
order by ${timestampColumn} asc
|
|
194
|
-
${frameBounds}
|
|
195
|
-
)`;
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Generates a SQL expression for a deterministic hash-based row id used by the
|
|
200
|
-
*
|
|
201
|
-
*
|
|
202
|
-
*
|
|
203
|
-
*
|
|
204
|
-
*
|
|
205
|
-
*
|
|
206
|
-
*
|
|
207
|
-
*
|
|
208
|
-
*
|
|
209
|
-
*
|
|
210
|
-
*
|
|
211
|
-
*
|
|
212
|
-
*
|
|
213
|
-
*
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
*
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
'
|
|
239
|
-
'
|
|
240
|
-
'
|
|
241
|
-
'
|
|
242
|
-
'
|
|
243
|
-
'
|
|
244
|
-
'
|
|
245
|
-
'
|
|
246
|
-
'
|
|
247
|
-
'
|
|
248
|
-
'
|
|
249
|
-
'
|
|
250
|
-
'
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
1
|
+
const { unnestEventParam } = require('./params');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* SQL expression that builds a session ID by concatenating `user_pseudo_id` with the `ga_session_id` event parameter.
|
|
5
|
+
*/
|
|
6
|
+
const sessionId = `concat(user_pseudo_id, (select value.int_value from unnest(event_params) where key = 'ga_session_id'))`;
|
|
7
|
+
|
|
8
|
+
/*
|
|
9
|
+
Ecommerce
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Fixes and normalizes the ecommerce struct extracted from GA4 event data.
|
|
14
|
+
*
|
|
15
|
+
* This helper returns a SQL expression that:
|
|
16
|
+
* - Ensures `ecommerce.transaction_id` is set to NULL if it has the placeholder string '(not set)';
|
|
17
|
+
* - For 'purchase' events, normalizes `ecommerce.purchase_revenue` by:
|
|
18
|
+
* * Removing NaN values;
|
|
19
|
+
* * Filling missing purchase revenue (an old GA4 bug) with the event parameter 'value', safely cast as FLOAT64;
|
|
20
|
+
* - Leaves other fields in the ecommerce struct unchanged.
|
|
21
|
+
*
|
|
22
|
+
* The result is a new struct with the same shape as 'ecommerce' but with cleaned transaction_id and purchase_revenue.
|
|
23
|
+
*
|
|
24
|
+
* @returns {string} A SQL snippet for SELECT AS STRUCT ... REPLACE to normalize ecommerce fields.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* fixEcommerceStruct()
|
|
28
|
+
* // => SQL string that can be used in a SELECT list to normalize ecommerce columns
|
|
29
|
+
*/
|
|
30
|
+
const fixEcommerceStruct = () => {
|
|
31
|
+
return `(select as struct ecommerce.* replace(
|
|
32
|
+
if(ecommerce.transaction_id <> '(not set)', ecommerce.transaction_id, null) as transaction_id,
|
|
33
|
+
if(
|
|
34
|
+
event_name = 'purchase',
|
|
35
|
+
coalesce(
|
|
36
|
+
-- fix possible NaN values
|
|
37
|
+
if(is_nan(ecommerce.purchase_revenue), null, ecommerce.purchase_revenue),
|
|
38
|
+
-- fix an old ga4 bug where purchase_revenue was missing
|
|
39
|
+
safe_cast(${unnestEventParam('value')} as float64)
|
|
40
|
+
),
|
|
41
|
+
null
|
|
42
|
+
) as purchase_revenue
|
|
43
|
+
))`;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
/*
|
|
47
|
+
Check if GA4 data is "final" and is not expected to change anymore
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Generates a SQL expression to determine whether GA4 export data can be considered "final" (not subject to further change).
|
|
52
|
+
*
|
|
53
|
+
* Two detection methods are supported:
|
|
54
|
+
* - 'EXPORT_TYPE': Checks the table suffix; returns FALSE for intraday or "fresh" tables, TRUE for finalized data.
|
|
55
|
+
* - 'DAY_THRESHOLD': Considers data final if a configurable number of days has passed since event_date.
|
|
56
|
+
*
|
|
57
|
+
* @param {'EXPORT_TYPE'|'DAY_THRESHOLD'} detectionMethod - The method to use for finality determination.
|
|
58
|
+
* 'EXPORT_TYPE': Uses patterns in _table_suffix (e.g., 'intraday_%', 'fresh_%').
|
|
59
|
+
* 'DAY_THRESHOLD': Uses date difference between the current date and event_date.
|
|
60
|
+
* @param {number} [dayThreshold] - (Only for 'DAY_THRESHOLD') Number of days after which data is considered final. Required when detectionMethod is 'DAY_THRESHOLD'.
|
|
61
|
+
* @returns {string} SQL expression that evaluates to TRUE if the data is final, otherwise FALSE.
|
|
62
|
+
*
|
|
63
|
+
* @throws {Error} If an unsupported detectionMethod is provided.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* // Checks based on export type
|
|
67
|
+
* isFinalData('EXPORT_TYPE')
|
|
68
|
+
* // => "if(_table_suffix like 'intraday_%' or _table_suffix like 'fresh_%', false, true)"
|
|
69
|
+
*
|
|
70
|
+
* // Checks using a custom day threshold
|
|
71
|
+
* isFinalData('DAY_THRESHOLD', 5)
|
|
72
|
+
* // => "if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > 5, true, false)"
|
|
73
|
+
*/
|
|
74
|
+
const isFinalData = (detectionMethod, dayThreshold) => {
|
|
75
|
+
if (detectionMethod !== 'EXPORT_TYPE' && detectionMethod !== 'DAY_THRESHOLD') {
|
|
76
|
+
throw new Error(`isFinalData: Unsupported detectionMethod '${detectionMethod}'. Supported values are 'EXPORT_TYPE' and 'DAY_THRESHOLD'.`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
80
|
+
if (typeof dayThreshold === 'undefined') {
|
|
81
|
+
throw new Error("isFinalData: 'dayThreshold' is required when using 'DAY_THRESHOLD' detectionMethod.");
|
|
82
|
+
}
|
|
83
|
+
if (!Number.isInteger(dayThreshold) || dayThreshold < 0) {
|
|
84
|
+
throw new Error("isFinalData: 'dayThreshold' must be an integer greater than or equal to 0 when using 'DAY_THRESHOLD' detectionMethod.");
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (detectionMethod === 'EXPORT_TYPE') {
|
|
89
|
+
return 'if(_table_suffix like \'intraday_%\' or _table_suffix like \'fresh_%\', false, true)';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (detectionMethod === 'DAY_THRESHOLD') {
|
|
93
|
+
return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${dayThreshold}, true, false)`;
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Checks whether a given column name is part of the standard/expected GA4 BigQuery export columns.
|
|
99
|
+
*
|
|
100
|
+
* The list of recognized GA4 export columns is based on the official schema as of 2026-02-18.
|
|
101
|
+
* This function can be used to filter or validate column names when processing GA4 data exports.
|
|
102
|
+
*
|
|
103
|
+
* @param {string} columnName - The name of the column to check.
|
|
104
|
+
* @returns {boolean} True if the column name is a GA4 export column, otherwise false.
|
|
105
|
+
*/
|
|
106
|
+
const isGa4ExportColumn = (columnName) => {
|
|
107
|
+
// list updated 2026-02-18
|
|
108
|
+
const ga4ExportColumns = [
|
|
109
|
+
"event_date",
|
|
110
|
+
"event_timestamp",
|
|
111
|
+
"event_name",
|
|
112
|
+
"event_params",
|
|
113
|
+
"event_previous_timestamp",
|
|
114
|
+
"event_value_in_usd",
|
|
115
|
+
"event_bundle_sequence_id",
|
|
116
|
+
"event_server_timestamp_offset",
|
|
117
|
+
"user_id",
|
|
118
|
+
"user_pseudo_id",
|
|
119
|
+
"privacy_info",
|
|
120
|
+
"user_properties",
|
|
121
|
+
"user_first_touch_timestamp",
|
|
122
|
+
"user_ltv",
|
|
123
|
+
"device",
|
|
124
|
+
"geo",
|
|
125
|
+
"app_info",
|
|
126
|
+
"traffic_source",
|
|
127
|
+
"stream_id",
|
|
128
|
+
"platform",
|
|
129
|
+
"event_dimensions",
|
|
130
|
+
"ecommerce",
|
|
131
|
+
"items",
|
|
132
|
+
"collected_traffic_source",
|
|
133
|
+
"is_active_user",
|
|
134
|
+
"batch_event_index",
|
|
135
|
+
"batch_page_id",
|
|
136
|
+
"batch_ordering_id",
|
|
137
|
+
"session_traffic_source_last_click",
|
|
138
|
+
"publisher"
|
|
139
|
+
];
|
|
140
|
+
return ga4ExportColumns.includes(columnName);
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Generates a SQL CASE expression that determines the GA4 export type from a table suffix.
|
|
145
|
+
*
|
|
146
|
+
* Returns 'intraday' for suffixes like 'intraday_%', 'fresh' for 'fresh_%',
|
|
147
|
+
* and 'daily' for 8-digit date suffixes (YYYYMMDD).
|
|
148
|
+
*
|
|
149
|
+
* @param {string} tableSuffix - SQL expression or column reference for the table suffix (e.g., '_table_suffix').
|
|
150
|
+
* @returns {string} SQL CASE expression that evaluates to 'intraday', 'fresh', or 'daily'.
|
|
151
|
+
*/
|
|
152
|
+
const getGa4ExportType = (tableSuffix) => {
|
|
153
|
+
return `case
|
|
154
|
+
when ${tableSuffix} like 'intraday_%' then 'intraday'
|
|
155
|
+
when ${tableSuffix} like 'fresh_%' then 'fresh'
|
|
156
|
+
when regexp_contains(${tableSuffix}, r'^\\d{8}$') then 'daily'
|
|
157
|
+
end`;
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Generates a SQL LAST_VALUE window function that attributes item list fields
|
|
162
|
+
* (item_list_name, item_list_id, item_list_index) from select_item/select_promotion
|
|
163
|
+
* events to downstream ecommerce events using a lookback window.
|
|
164
|
+
*
|
|
165
|
+
* Returns a struct containing all three attributed fields via a single window sort.
|
|
166
|
+
*
|
|
167
|
+
* @param {'SESSION'|'TIME'} lookbackType - Window scope: session-based or time-based
|
|
168
|
+
* @param {string} timestampColumn - Column to order by ('event_timestamp' or 'event_custom_timestamp')
|
|
169
|
+
* @param {number} [lookbackTimeMs] - Lookback window in milliseconds (required when lookbackType is 'TIME')
|
|
170
|
+
* @returns {string} SQL expression that evaluates to a struct with item_list_name, item_list_id, item_list_index
|
|
171
|
+
*/
|
|
172
|
+
const itemListAttributionExpr = (lookbackType, timestampColumn, lookbackTimeMs) => {
|
|
173
|
+
const selectEvents = `event_name in ('select_item', 'select_promotion')`;
|
|
174
|
+
const structExpr = `struct(item.item_list_name, item.item_list_id, item.item_list_index)`;
|
|
175
|
+
|
|
176
|
+
let partitionBy;
|
|
177
|
+
let frameBounds;
|
|
178
|
+
|
|
179
|
+
if (lookbackType === 'SESSION') {
|
|
180
|
+
partitionBy = 'session_id, item.item_id';
|
|
181
|
+
frameBounds = 'rows between unbounded preceding and current row';
|
|
182
|
+
} else {
|
|
183
|
+
// TIME-based: range window in microseconds
|
|
184
|
+
const lookbackMicros = lookbackTimeMs * 1000;
|
|
185
|
+
partitionBy = 'user_pseudo_id, item.item_id';
|
|
186
|
+
frameBounds = `range between ${lookbackMicros} preceding and current row`;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return `last_value(
|
|
190
|
+
if(${selectEvents}, ${structExpr}, null) ignore nulls
|
|
191
|
+
) over(
|
|
192
|
+
partition by ${partitionBy}
|
|
193
|
+
order by ${timestampColumn} asc
|
|
194
|
+
${frameBounds}
|
|
195
|
+
)`;
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Generates a SQL expression for a deterministic hash-based row id used by the
|
|
200
|
+
* shared items_unnested / items_rebuilt scaffold (item-list attribution and,
|
|
201
|
+
* eventually, item-level data enrichments). Only computed for events in
|
|
202
|
+
* `ecommerceEventsFilter`; other events get NULL.
|
|
203
|
+
*
|
|
204
|
+
* The row_number() window keeps the id stable across CTE re-evaluations:
|
|
205
|
+
* BigQuery may inline the CTE and re-run the window per reference, so without
|
|
206
|
+
* a stable ordering the two sides of the downstream join could hash differently.
|
|
207
|
+
* partition by event_name avoids a single-partition bottleneck.
|
|
208
|
+
* Residual collisions (identical event_timestamp + identical items) are safe —
|
|
209
|
+
* the rows are interchangeable, so arbitrary row number assignment between them
|
|
210
|
+
* produces the same result.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} ecommerceEventsFilter - Comma-separated, quoted list of event names
|
|
213
|
+
* (e.g., "'purchase', 'add_to_cart'").
|
|
214
|
+
* @returns {string} SQL expression that evaluates to the row id or NULL.
|
|
215
|
+
*/
|
|
216
|
+
const itemRowId = (ecommerceEventsFilter) => {
|
|
217
|
+
return `if(
|
|
218
|
+
event_name in (${ecommerceEventsFilter}),
|
|
219
|
+
farm_fingerprint(concat(
|
|
220
|
+
user_pseudo_id,
|
|
221
|
+
cast(event_timestamp as string),
|
|
222
|
+
event_name,
|
|
223
|
+
to_json_string(items),
|
|
224
|
+
cast(row_number() over(
|
|
225
|
+
partition by event_name, user_pseudo_id
|
|
226
|
+
order by event_timestamp, to_json_string(items)
|
|
227
|
+
) as string)
|
|
228
|
+
)),
|
|
229
|
+
null
|
|
230
|
+
)`;
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Official GA4 ecommerce events that carry item data.
|
|
235
|
+
* Based on: https://developers.google.com/analytics/devguides/collection/ga4/ecommerce
|
|
236
|
+
*/
|
|
237
|
+
const ga4EcommerceEvents = [
|
|
238
|
+
'view_item_list',
|
|
239
|
+
'select_item',
|
|
240
|
+
'view_promotion',
|
|
241
|
+
'select_promotion',
|
|
242
|
+
'view_item',
|
|
243
|
+
'add_to_wishlist',
|
|
244
|
+
'add_to_cart',
|
|
245
|
+
'remove_from_cart',
|
|
246
|
+
'view_cart',
|
|
247
|
+
'begin_checkout',
|
|
248
|
+
'add_shipping_info',
|
|
249
|
+
'add_payment_info',
|
|
250
|
+
'purchase',
|
|
251
|
+
'refund',
|
|
252
|
+
];
|
|
253
|
+
|
|
254
|
+
module.exports = {
|
|
255
|
+
sessionId,
|
|
256
|
+
fixEcommerceStruct,
|
|
257
|
+
isFinalData,
|
|
258
|
+
isGa4ExportColumn,
|
|
259
|
+
getGa4ExportType,
|
|
260
|
+
itemListAttributionExpr,
|
|
261
|
+
itemRowId,
|
|
262
|
+
ga4EcommerceEvents
|
|
263
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ga4-export-fixer",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0-dev.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"createTable.js"
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js",
|
|
20
|
+
"test": "node tests/ga4EventsEnhanced.test.js && node tests/assertions.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js && node tests/inputValidation.test.js && node tests/createTable.test.js && node tests/queryBuilder.test.js && node tests/customSteps.test.js && node tests/enrichments.test.js",
|
|
21
21
|
"test:summary": "node tests/testRunner.js",
|
|
22
22
|
"test:docs": "node tests/documentation.test.js",
|
|
23
23
|
"test:preops": "node tests/preOperations.test.js",
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
"test:createTable": "node tests/createTable.test.js",
|
|
29
29
|
"test:queryBuilder": "node tests/queryBuilder.test.js",
|
|
30
30
|
"test:customSteps": "node tests/customSteps.test.js",
|
|
31
|
+
"test:enrichments": "node tests/enrichments.test.js",
|
|
31
32
|
"test:integration": "node tests/integration/integration.test.js",
|
|
32
33
|
"release:dev": "./scripts/release-dev.sh",
|
|
33
34
|
"readme": "node scripts/updateReadme.js",
|
|
@@ -44,8 +45,8 @@
|
|
|
44
45
|
},
|
|
45
46
|
"homepage": "https://github.com/tanelytics/ga4-export-fixer#readme",
|
|
46
47
|
"devDependencies": {
|
|
47
|
-
"@google-cloud/bigquery": "^8.
|
|
48
|
-
"@google-cloud/dataform": "^2.2.
|
|
49
|
-
"dotenv": "^17.
|
|
48
|
+
"@google-cloud/bigquery": "^8.3.0",
|
|
49
|
+
"@google-cloud/dataform": "^2.2.2",
|
|
50
|
+
"dotenv": "^17.4.2"
|
|
50
51
|
}
|
|
51
52
|
}
|
|
@@ -68,6 +68,10 @@ const ga4EventsEnhancedConfig = {
|
|
|
68
68
|
// user-defined CTEs appended to the pipeline after enhanced_events
|
|
69
69
|
// each entry is a queryBuilder step (raw {name, query} or structured {name, select, from, ...})
|
|
70
70
|
customSteps: [],
|
|
71
|
+
// declarative external-data enrichments joined into the pipeline
|
|
72
|
+
// each entry: { name, level: 'event' | 'item', source, joinKey, columns, dedupe? }
|
|
73
|
+
// 'item' level is accepted at config time but throws at SQL gen — not yet implemented
|
|
74
|
+
enrichments: [],
|
|
71
75
|
};
|
|
72
76
|
|
|
73
77
|
module.exports = { ga4EventsEnhancedConfig };
|