ga4-export-fixer 0.2.5 → 0.2.6-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -5
- package/constants.js +2 -0
- package/defaultConfig.js +1 -1
- package/helpers.js +57 -40
- package/inputValidation.js +7 -6
- package/package.json +1 -1
- package/preOperations.js +98 -6
package/README.md
CHANGED
|
@@ -46,6 +46,7 @@ The **ga4_events_enhanced** table comes with features such as these:
|
|
|
46
46
|
- **Batch processing** – Process large GA4 exports in smaller batches using the `numberOfDaysToProcess` configuration setting
|
|
47
47
|
- **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
|
|
48
48
|
- **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
|
|
49
|
+
- **Zero dependencies** – The package has no external dependencies and will not add any additional packages to your Dataform repository
|
|
49
50
|
|
|
50
51
|
### Planned, Upcoming Features
|
|
51
52
|
|
|
@@ -270,18 +271,28 @@ The `onSchemaChange: "EXTEND"` setting updates the result table schema on increm
|
|
|
270
271
|
| Field | Type | Default | Description |
|
|
271
272
|
| ------------------------------ | ------- | ------- | -------------------------------- |
|
|
272
273
|
| `includedExportTypes.daily` | boolean | `true` | Include daily (processed) export |
|
|
274
|
+
| `includedExportTypes.fresh` | boolean | `false` | Include fresh (hourly-updated) export |
|
|
273
275
|
| `includedExportTypes.intraday` | boolean | `true` | Include intraday export |
|
|
274
276
|
|
|
275
277
|
|
|
276
|
-
|
|
278
|
+
Export priority: **daily > fresh > intraday**. Each lower-priority export only provides data not already covered by a higher-priority one. All seven combinations of the three export types are supported.
|
|
279
|
+
|
|
280
|
+
When all three exports are enabled, the package:
|
|
281
|
+
1. Gets all data from daily export tables
|
|
282
|
+
2. Gets fresh export data for days not yet covered by a daily table
|
|
283
|
+
3. Gets intraday export data for events after the latest fresh event timestamp
|
|
284
|
+
|
|
285
|
+
The boundary between fresh and intraday is timestamp-based because the fresh export is updated hourly, so within the same day some events come from the fresh export and the rest from intraday.
|
|
286
|
+
|
|
287
|
+
> **Without daily export:** When `daily` is `false`, `dataIsFinal.detectionMethod` must be set to `'DAY_THRESHOLD'`, because `EXPORT_TYPE` detection relies on daily tables to mark data as final.
|
|
277
288
|
|
|
278
289
|
**`dataIsFinal`** — how to determine whether data is final (not expected to change):
|
|
279
290
|
|
|
280
291
|
|
|
281
|
-
| Field | Type | Default | Description
|
|
282
|
-
| ----------------------------- | ------- | --------------- |
|
|
283
|
-
| `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix; all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event). Must be `'DAY_THRESHOLD'` when
|
|
284
|
-
| `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'`
|
|
292
|
+
| Field | Type | Default | Description |
|
|
293
|
+
| ----------------------------- | ------- | --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
294
|
+
| `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix; all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event). Must be `'DAY_THRESHOLD'` when daily export is not enabled |
|
|
295
|
+
| `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
|
|
285
296
|
|
|
286
297
|
|
|
287
298
|
**`testConfig`** — date range used when `test` is `true`:
|
package/constants.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
const constants = {
|
|
2
2
|
DATE_RANGE_START_VARIABLE: 'date_range_start',
|
|
3
3
|
INTRADAY_DATE_RANGE_START_VARIABLE: 'intraday_date_range_start',
|
|
4
|
+
FRESH_DATE_RANGE_START_VARIABLE: 'fresh_date_range_start',
|
|
5
|
+
FRESH_MAX_EVENT_TIMESTAMP_VARIABLE: 'fresh_max_event_timestamp',
|
|
4
6
|
DATE_RANGE_END_VARIABLE: 'date_range_end',
|
|
5
7
|
LAST_PARTITION_DATE_VARIABLE: 'last_partition_date',
|
|
6
8
|
DATE_COLUMN: 'event_date',
|
package/defaultConfig.js
CHANGED
package/helpers.js
CHANGED
|
@@ -105,48 +105,37 @@ const getEventDateTime = (config) => {
|
|
|
105
105
|
|
|
106
106
|
// Filter the export tables by date range
|
|
107
107
|
/**
|
|
108
|
-
* Generates a SQL filter condition for selecting GA4 export tables based on the export type
|
|
108
|
+
* Generates a SQL filter condition for selecting GA4 export tables based on the export type and a date range.
|
|
109
109
|
*
|
|
110
110
|
* This helper produces SQL snippets to be used in WHERE clauses, ensuring only tables within the provided date range and export type are included.
|
|
111
111
|
*
|
|
112
112
|
* - For 'daily' exports: Matches table suffixes formatted as YYYYMMDD (e.g., 20240101).
|
|
113
|
+
* - For 'fresh' exports: Matches table suffixes prefixed with 'fresh_' followed by the date (e.g., fresh_20240101).
|
|
113
114
|
* - For 'intraday' exports: Matches table suffixes prefixed with 'intraday_' followed by the date (e.g., intraday_20240101).
|
|
114
|
-
* - Throws an error for unsupported export types or if start/end dates are undefined.
|
|
115
115
|
*
|
|
116
|
-
* @param {'
|
|
116
|
+
* @param {'daily'|'fresh'|'intraday'} exportType - The type of export table.
|
|
117
117
|
* @param {string} start - The start date value as a SQL date expression (e.g. 'current_date()-1').
|
|
118
118
|
* @param {string} end - The end date value as a SQL date expression (e.g. 'current_date()').
|
|
119
119
|
* @returns {string} SQL condition to restrict tables by _table_suffix to the appropriate date range and export type.
|
|
120
120
|
*
|
|
121
|
-
* @throws {Error} If exportType is not
|
|
122
|
-
*
|
|
123
|
-
* @example
|
|
124
|
-
* ga4ExportDateFilter('daily', 'current_date()-1', 'current_date()')
|
|
125
|
-
* // => "(_table_suffix >= cast(current_date()-1 as string format \"YYYYMMDD\") and _table_suffix <= cast(current_date() as string format \"YYYYMMDD\"))"
|
|
126
|
-
*
|
|
127
|
-
* ga4ExportDateFilter('intraday', 'current_date()-1', 'current_date()')
|
|
128
|
-
* // => "(_table_suffix >= 'intraday_' || cast(current_date()-1 as string format \"YYYYMMDD\") and _table_suffix <= 'intraday_' || cast(current_date() as string format \"YYYYMMDD\"))"
|
|
121
|
+
* @throws {Error} If exportType is not supported, or if start/end are not defined.
|
|
129
122
|
*/
|
|
130
123
|
const ga4ExportDateFilter = (exportType, start, end) => {
|
|
131
|
-
if (exportType !== 'intraday' && exportType !== 'daily') {
|
|
124
|
+
if (exportType !== 'intraday' && exportType !== 'daily' && exportType !== 'fresh') {
|
|
132
125
|
throw new Error(
|
|
133
|
-
`ga4ExportDateFilter: Unsupported exportType '${exportType}'. Supported values are '
|
|
126
|
+
`ga4ExportDateFilter: Unsupported exportType '${exportType}'. Supported values are 'daily', 'fresh', and 'intraday'.`
|
|
134
127
|
);
|
|
135
128
|
}
|
|
136
129
|
if (typeof start === 'undefined' || typeof end === 'undefined') {
|
|
137
130
|
throw new Error("ga4ExportDateFilter: 'start' and 'end' parameters must be defined.");
|
|
138
131
|
}
|
|
139
132
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
if (exportType === 'daily') {
|
|
144
|
-
return `(_table_suffix >= cast(${start} as string format "YYYYMMDD") and _table_suffix <= cast(${end} as string format "YYYYMMDD"))`;
|
|
145
|
-
}
|
|
133
|
+
const prefix = exportType === 'daily' ? '' : `'${exportType}_' || `;
|
|
134
|
+
return `(_table_suffix >= ${prefix}cast(${start} as string format "YYYYMMDD") and _table_suffix <= ${prefix}cast(${end} as string format "YYYYMMDD"))`;
|
|
146
135
|
};
|
|
147
136
|
|
|
148
137
|
/**
|
|
149
|
-
* Builds a `_table_suffix` WHERE clause for GA4 BigQuery export tables (daily and/or intraday).
|
|
138
|
+
* Builds a `_table_suffix` WHERE clause for GA4 BigQuery export tables (daily, fresh, and/or intraday).
|
|
150
139
|
*
|
|
151
140
|
* Date boundaries are resolved differently depending on the mode:
|
|
152
141
|
* - **test** -- literal dates from `config.testConfig`
|
|
@@ -156,18 +145,24 @@ const ga4ExportDateFilter = (exportType, start, end) => {
|
|
|
156
145
|
* `bufferDays` is subtracted from the daily start date so sessions that span
|
|
157
146
|
* midnight are not partially excluded.
|
|
158
147
|
*
|
|
159
|
-
*
|
|
160
|
-
*
|
|
161
|
-
*
|
|
162
|
-
* When
|
|
163
|
-
*
|
|
148
|
+
* Export priority: daily > fresh > intraday. Each lower-priority export only
|
|
149
|
+
* provides data not already covered by a higher-priority one.
|
|
150
|
+
*
|
|
151
|
+
* When fresh and daily are both enabled, the fresh start date comes from
|
|
152
|
+
* `FRESH_DATE_RANGE_START_VARIABLE` (first day with fresh but no daily table).
|
|
153
|
+
*
|
|
154
|
+
* When fresh and intraday are both enabled, intraday rows are filtered by
|
|
155
|
+
* `event_timestamp > fresh_max_event_timestamp` to avoid duplicating fresh data.
|
|
156
|
+
*
|
|
157
|
+
* When only daily and intraday are enabled (no fresh), the existing
|
|
158
|
+
* `INTRADAY_DATE_RANGE_START_VARIABLE` checkpoint logic is preserved.
|
|
164
159
|
*
|
|
165
160
|
* @param {Object} config
|
|
166
161
|
* @param {boolean} config.test - Use literal test dates.
|
|
167
162
|
* @param {Object} config.testConfig - `{ dateRangeStart, dateRangeEnd }`.
|
|
168
163
|
* @param {boolean} config.incremental - Use BigQuery variable placeholders.
|
|
169
164
|
* @param {Object} config.preOperations - `{ dateRangeStartFullRefresh, dateRangeEnd }`.
|
|
170
|
-
* @param {Object} config.includedExportTypes - `{ daily: boolean, intraday: boolean }`.
|
|
165
|
+
* @param {Object} config.includedExportTypes - `{ daily: boolean, fresh: boolean, intraday: boolean }`.
|
|
171
166
|
* @param {number} [config.bufferDays=0] - Extra days subtracted from the start date.
|
|
172
167
|
* @returns {string} SQL fragment for a WHERE clause.
|
|
173
168
|
*/
|
|
@@ -175,59 +170,81 @@ const ga4ExportDateFilters = (config) => {
|
|
|
175
170
|
const bufferDays = config.bufferDays || 0;
|
|
176
171
|
|
|
177
172
|
const getStartDate = () => {
|
|
178
|
-
//test mode
|
|
179
173
|
if (config.test) {
|
|
180
174
|
return config.testConfig.dateRangeStart;
|
|
181
175
|
}
|
|
182
176
|
if (config.incremental) {
|
|
183
177
|
return constants.DATE_RANGE_START_VARIABLE;
|
|
184
178
|
}
|
|
185
|
-
// full refresh
|
|
186
179
|
return config.preOperations.dateRangeStartFullRefresh;
|
|
187
180
|
};
|
|
188
181
|
|
|
189
182
|
const getEndDate = () => {
|
|
190
|
-
// test mode, avoid using a BigQuery variable
|
|
191
183
|
if (config.test) {
|
|
192
184
|
return config.testConfig.dateRangeEnd;
|
|
193
185
|
}
|
|
194
|
-
// use checkpoint variable with incremental refresh -> allows pre processing any part of the table without having to do a full refresh
|
|
195
186
|
if (config.incremental) {
|
|
196
187
|
return constants.DATE_RANGE_END_VARIABLE;
|
|
197
188
|
}
|
|
198
|
-
// full refresh
|
|
199
189
|
if (config.preOperations.numberOfDaysToProcess !== undefined) {
|
|
200
190
|
return `least(${config.preOperations.dateRangeStartFullRefresh}+${config.preOperations.numberOfDaysToProcess}-1, current_date())`;
|
|
201
191
|
}
|
|
202
192
|
return config.preOperations.dateRangeEnd;
|
|
203
193
|
};
|
|
204
194
|
|
|
195
|
+
const getFreshStartDate = () => {
|
|
196
|
+
// Fresh tables persist alongside daily tables (unlike intraday which gets deleted),
|
|
197
|
+
// so the checkpoint variable is needed even in test mode to avoid duplicate data.
|
|
198
|
+
if (config.includedExportTypes.fresh && config.includedExportTypes.daily) {
|
|
199
|
+
return constants.FRESH_DATE_RANGE_START_VARIABLE;
|
|
200
|
+
}
|
|
201
|
+
if (config.includedExportTypes.fresh && !config.includedExportTypes.daily) {
|
|
202
|
+
return getStartDate();
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
|
|
205
206
|
const getIntradayStartDate = () => {
|
|
206
|
-
//
|
|
207
|
+
// When fresh is enabled: intraday starts from the same point as fresh.
|
|
208
|
+
// Fresh tables persist alongside intraday tables, so the checkpoint is
|
|
209
|
+
// needed even in test mode to avoid duplicate data.
|
|
210
|
+
if (config.includedExportTypes.fresh) {
|
|
211
|
+
return getFreshStartDate();
|
|
212
|
+
}
|
|
213
|
+
// For non-fresh paths, test mode skips pre-operation variables.
|
|
207
214
|
if (config.test) {
|
|
208
215
|
return config.testConfig.dateRangeStart;
|
|
209
216
|
}
|
|
210
|
-
//
|
|
217
|
+
// When daily+intraday without fresh: use the existing date-based checkpoint
|
|
211
218
|
if (config.includedExportTypes.intraday && config.includedExportTypes.daily) {
|
|
212
219
|
return constants.INTRADAY_DATE_RANGE_START_VARIABLE;
|
|
213
220
|
}
|
|
214
|
-
//
|
|
215
|
-
// (buffer is normally only applied to the daily start date).
|
|
221
|
+
// Intraday-only: reuse the daily start-date logic with bufferDays
|
|
216
222
|
if (config.includedExportTypes.intraday && !config.includedExportTypes.daily) {
|
|
217
|
-
// use the same start date as if daily export was in use
|
|
218
|
-
// include the buffer days as well (not included otherwise for intraday data)
|
|
219
223
|
return `${getStartDate()}-${bufferDays}`;
|
|
220
224
|
}
|
|
221
225
|
};
|
|
222
226
|
|
|
227
|
+
const getIntradayFilter = () => {
|
|
228
|
+
const intradayStart = getIntradayStartDate();
|
|
229
|
+
const suffixFilter = ga4ExportDateFilter('intraday', intradayStart, end);
|
|
230
|
+
|
|
231
|
+
// When fresh is also enabled, add timestamp condition to avoid duplicating fresh data.
|
|
232
|
+
// Applied even in test mode because fresh and intraday tables coexist for the same days.
|
|
233
|
+
if (config.includedExportTypes.fresh) {
|
|
234
|
+
return `(${suffixFilter} and event_timestamp > coalesce(${constants.FRESH_MAX_EVENT_TIMESTAMP_VARIABLE}, 0))`;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return suffixFilter;
|
|
238
|
+
};
|
|
239
|
+
|
|
223
240
|
const dailyStart = `${getStartDate()}-${bufferDays}`;
|
|
224
|
-
const
|
|
241
|
+
const freshStart = getFreshStartDate();
|
|
225
242
|
const end = getEndDate();
|
|
226
|
-
|
|
227
243
|
|
|
228
244
|
const dateFilters = [
|
|
229
245
|
config.includedExportTypes.daily ? ga4ExportDateFilter('daily', dailyStart, end) : null,
|
|
230
|
-
config.includedExportTypes.
|
|
246
|
+
config.includedExportTypes.fresh ? ga4ExportDateFilter('fresh', freshStart, end) : null,
|
|
247
|
+
config.includedExportTypes.intraday ? getIntradayFilter() : null,
|
|
231
248
|
];
|
|
232
249
|
|
|
233
250
|
return `(
|
package/inputValidation.js
CHANGED
|
@@ -152,7 +152,7 @@ const validateEnhancedEventsConfig = (config) => {
|
|
|
152
152
|
if (!config.includedExportTypes || typeof config.includedExportTypes !== 'object' || Array.isArray(config.includedExportTypes)) {
|
|
153
153
|
throw new Error(`config.includedExportTypes must be an object. Received: ${JSON.stringify(config.includedExportTypes)}`);
|
|
154
154
|
}
|
|
155
|
-
for (const key of ['daily', 'intraday']) {
|
|
155
|
+
for (const key of ['daily', 'fresh', 'intraday']) {
|
|
156
156
|
if (!(key in config.includedExportTypes)) {
|
|
157
157
|
throw new Error(`config.includedExportTypes.${key} is required.`);
|
|
158
158
|
}
|
|
@@ -160,8 +160,8 @@ const validateEnhancedEventsConfig = (config) => {
|
|
|
160
160
|
throw new Error(`config.includedExportTypes.${key} must be a boolean. Received: ${JSON.stringify(config.includedExportTypes[key])}`);
|
|
161
161
|
}
|
|
162
162
|
}
|
|
163
|
-
if (!config.includedExportTypes.daily && !config.includedExportTypes.intraday) {
|
|
164
|
-
throw new Error("At least one of config.includedExportTypes.daily or config.includedExportTypes.intraday must be true.");
|
|
163
|
+
if (!config.includedExportTypes.daily && !config.includedExportTypes.fresh && !config.includedExportTypes.intraday) {
|
|
164
|
+
throw new Error("At least one of config.includedExportTypes.daily, config.includedExportTypes.fresh, or config.includedExportTypes.intraday must be true.");
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
// timezone - required
|
|
@@ -204,13 +204,14 @@ const validateEnhancedEventsConfig = (config) => {
|
|
|
204
204
|
) {
|
|
205
205
|
throw new Error(`config.dataIsFinal.dayThreshold must be a non-negative integer. Received: ${JSON.stringify(config.dataIsFinal.dayThreshold)}`);
|
|
206
206
|
}
|
|
207
|
-
// EXPORT_TYPE detection relies on daily export
|
|
207
|
+
// EXPORT_TYPE detection relies on daily export tables to mark data as final.
|
|
208
|
+
// When daily is not enabled, all data would be marked as not final under EXPORT_TYPE,
|
|
209
|
+
// so DAY_THRESHOLD must be used instead.
|
|
208
210
|
if (
|
|
209
|
-
config.includedExportTypes.intraday &&
|
|
210
211
|
!config.includedExportTypes.daily &&
|
|
211
212
|
config.dataIsFinal.detectionMethod !== 'DAY_THRESHOLD'
|
|
212
213
|
) {
|
|
213
|
-
throw new Error(`config.dataIsFinal.detectionMethod must be 'DAY_THRESHOLD' when
|
|
214
|
+
throw new Error(`config.dataIsFinal.detectionMethod must be 'DAY_THRESHOLD' when daily export is not enabled (config.includedExportTypes.daily is false). A dayThreshold of 1 is recommended for intraday only setups. With fresh export, the GA4 data is subject to possible changes for up to 72 hours. Received: ${JSON.stringify(config.dataIsFinal.detectionMethod)}`);
|
|
214
215
|
}
|
|
215
216
|
|
|
216
217
|
// bufferDays - required
|
package/package.json
CHANGED
package/preOperations.js
CHANGED
|
@@ -114,6 +114,79 @@ const getDateRangeStartIntraday = (config) => {
|
|
|
114
114
|
return undefined;
|
|
115
115
|
};
|
|
116
116
|
|
|
117
|
+
// Define the date range start for fresh export tables
|
|
118
|
+
// Uses INFORMATION_SCHEMA.TABLES to find the first day with a fresh table but no daily table
|
|
119
|
+
const getDateRangeStartFresh = (config) => {
|
|
120
|
+
const getStartDate = () => {
|
|
121
|
+
if (config.incremental) {
|
|
122
|
+
return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
|
|
123
|
+
}
|
|
124
|
+
return 'current_date()-5';
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
const startDate = getStartDate();
|
|
128
|
+
|
|
129
|
+
if (config.includedExportTypes.fresh) {
|
|
130
|
+
const informationSchemaPath = config.sourceTable.replace(
|
|
131
|
+
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
132
|
+
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
return `with export_statuses as (
|
|
136
|
+
select
|
|
137
|
+
safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
|
|
138
|
+
case
|
|
139
|
+
when table_name like 'events_fresh_%' then 'fresh'
|
|
140
|
+
else 'daily'
|
|
141
|
+
end as export_type
|
|
142
|
+
from
|
|
143
|
+
${informationSchemaPath}
|
|
144
|
+
where
|
|
145
|
+
regexp_contains(table_name, r'^events_(fresh_)?\\d{8}$')
|
|
146
|
+
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
|
|
147
|
+
between ${startDate} and current_date()
|
|
148
|
+
),
|
|
149
|
+
statuses_by_day as (
|
|
150
|
+
select
|
|
151
|
+
date,
|
|
152
|
+
max(if(export_type = 'daily', true, false)) as daily,
|
|
153
|
+
max(if(export_type = 'fresh', true, false)) as fresh
|
|
154
|
+
from
|
|
155
|
+
export_statuses
|
|
156
|
+
group by
|
|
157
|
+
date
|
|
158
|
+
)
|
|
159
|
+
select
|
|
160
|
+
min(
|
|
161
|
+
if(
|
|
162
|
+
fresh = true and daily = false,
|
|
163
|
+
date,
|
|
164
|
+
null
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
from
|
|
168
|
+
statuses_by_day`;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return undefined;
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
// Get the maximum event_timestamp from fresh export tables
|
|
175
|
+
// Used as the boundary between fresh and intraday data
|
|
176
|
+
const getFreshMaxEventTimestamp = (config) => {
|
|
177
|
+
if (config.includedExportTypes.fresh && config.includedExportTypes.intraday) {
|
|
178
|
+
const freshStartRef = config.includedExportTypes.daily
|
|
179
|
+
? constants.FRESH_DATE_RANGE_START_VARIABLE
|
|
180
|
+
: (config.incremental ? constants.DATE_RANGE_START_VARIABLE : config.preOperations.dateRangeStartFullRefresh);
|
|
181
|
+
|
|
182
|
+
return `select max(event_timestamp) from ${config.sourceTable}
|
|
183
|
+
where _table_suffix >= 'fresh_' || cast(${freshStartRef} as string format 'YYYYMMDD')
|
|
184
|
+
and _table_suffix <= 'fresh_' || cast(current_date() as string format 'YYYYMMDD')`;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return undefined;
|
|
188
|
+
};
|
|
189
|
+
|
|
117
190
|
const getDateRangeEnd = (config) => {
|
|
118
191
|
// if an incremental end override is provided, use it
|
|
119
192
|
if (config.incremental && config.preOperations.incrementalEndOverride) {
|
|
@@ -147,8 +220,12 @@ const createSchemaLockTable = (config) => {
|
|
|
147
220
|
|
|
148
221
|
// Set the pre operations for the query
|
|
149
222
|
const setPreOperations = (config) => {
|
|
150
|
-
//
|
|
151
|
-
|
|
223
|
+
// In test mode, most BigQuery variables are skipped to keep dry-run estimation accurate.
|
|
224
|
+
// Fresh checkpoint variables are the exception: fresh tables persist alongside daily and
|
|
225
|
+
// intraday tables, so the checkpoints are needed even in test mode to avoid duplicate data.
|
|
226
|
+
const freshNeedsCheckpoint = config.includedExportTypes.fresh &&
|
|
227
|
+
(config.includedExportTypes.daily || config.includedExportTypes.intraday);
|
|
228
|
+
if (config.test && !freshNeedsCheckpoint) {
|
|
152
229
|
return '';
|
|
153
230
|
}
|
|
154
231
|
|
|
@@ -170,9 +247,24 @@ const setPreOperations = (config) => {
|
|
|
170
247
|
{
|
|
171
248
|
type: 'variable',
|
|
172
249
|
name: constants.INTRADAY_DATE_RANGE_START_VARIABLE,
|
|
173
|
-
//
|
|
174
|
-
|
|
175
|
-
|
|
250
|
+
// only needed when daily+intraday WITHOUT fresh (the two-way intraday checkpoint)
|
|
251
|
+
// when fresh is also enabled, intraday uses fresh_date_range_start instead
|
|
252
|
+
value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.intraday && config.includedExportTypes.daily && !config.includedExportTypes.fresh ? getDateRangeStartIntraday(config) : undefined,
|
|
253
|
+
comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data. Only needed if intraday and daily export tables are included without fresh.',
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
type: 'variable',
|
|
257
|
+
name: constants.FRESH_DATE_RANGE_START_VARIABLE,
|
|
258
|
+
// needed when fresh and daily are both enabled, to avoid fresh data overlapping with daily
|
|
259
|
+
value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.daily ? getDateRangeStartFresh(config) : undefined,
|
|
260
|
+
comment: 'Define the date range start for fresh export tables. Returns the first day with a fresh table but no daily table.',
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
type: 'variable',
|
|
264
|
+
name: constants.FRESH_MAX_EVENT_TIMESTAMP_VARIABLE,
|
|
265
|
+
// needed when fresh and intraday are both enabled, to set the timestamp boundary
|
|
266
|
+
value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.intraday ? getFreshMaxEventTimestamp(config) : undefined,
|
|
267
|
+
comment: 'Get the latest event timestamp from fresh export tables. Used as the boundary between fresh and intraday data.',
|
|
176
268
|
},
|
|
177
269
|
{
|
|
178
270
|
type: 'variable',
|
|
@@ -190,7 +282,7 @@ const setPreOperations = (config) => {
|
|
|
190
282
|
{
|
|
191
283
|
type: 'create',
|
|
192
284
|
// create table statement only needed with schema lock
|
|
193
|
-
value: config.sourceTableType === 'GA4_EXPORT' && config.schemaLock ? createSchemaLockTable(config) : undefined,
|
|
285
|
+
value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.schemaLock ? createSchemaLockTable(config) : undefined,
|
|
194
286
|
comment: 'Lock the schema to a specific version by creating a table copy from the selected day\'s export.'
|
|
195
287
|
},
|
|
196
288
|
];
|