ga4-export-fixer 0.2.6-dev.1 → 0.2.6-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/preOperations.js +29 -84
package/package.json
CHANGED
package/preOperations.js
CHANGED
|
@@ -57,100 +57,48 @@ where
|
|
|
57
57
|
|
|
58
58
|
};
|
|
59
59
|
|
|
60
|
-
//
|
|
61
|
-
// Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
// Find the first day where a target export type exists but no daily table does.
|
|
61
|
+
// Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data.
|
|
62
|
+
// The export_statuses CTE always classifies all three export types (daily, fresh, intraday).
|
|
63
|
+
// The 5-day lookback limit only applies to intraday rows; daily and fresh have no lower date bound.
|
|
64
|
+
const getExportDateRangeStart = (config, targetExportType) => {
|
|
65
|
+
const intradayStartDate = config.incremental
|
|
66
|
+
? `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`
|
|
67
|
+
: 'current_date()-5';
|
|
68
|
+
|
|
69
|
+
const informationSchemaPath = config.sourceTable.replace(
|
|
70
|
+
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
71
|
+
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
72
|
+
);
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
76
|
-
);
|
|
74
|
+
const finalCondition = targetExportType === 'intraday'
|
|
75
|
+
? 'intraday = true and daily = false'
|
|
76
|
+
: 'fresh = true and daily = false';
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
return `with export_statuses as (
|
|
79
79
|
select
|
|
80
80
|
safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
|
|
81
81
|
case
|
|
82
82
|
when table_name like 'events_intraday_%' then 'intraday'
|
|
83
|
-
else 'daily'
|
|
84
|
-
end as export_type
|
|
85
|
-
from
|
|
86
|
-
${informationSchemaPath}
|
|
87
|
-
where
|
|
88
|
-
regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
|
|
89
|
-
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
|
|
90
|
-
between ${startDate} and current_date()
|
|
91
|
-
),
|
|
92
|
-
statuses_by_day as (
|
|
93
|
-
select
|
|
94
|
-
date,
|
|
95
|
-
max(if(export_type = 'daily', true, false)) as daily,
|
|
96
|
-
max(if(export_type = 'intraday', true, false)) as intraday
|
|
97
|
-
from
|
|
98
|
-
export_statuses
|
|
99
|
-
group by
|
|
100
|
-
date
|
|
101
|
-
)
|
|
102
|
-
select
|
|
103
|
-
min(
|
|
104
|
-
if(
|
|
105
|
-
intraday = true and daily = false,
|
|
106
|
-
date,
|
|
107
|
-
null
|
|
108
|
-
)
|
|
109
|
-
)
|
|
110
|
-
from
|
|
111
|
-
statuses_by_day`;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
return undefined;
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
// Define the date range start for fresh export tables
|
|
118
|
-
// Uses INFORMATION_SCHEMA.TABLES to find the first day with a fresh table but no daily table
|
|
119
|
-
const getDateRangeStartFresh = (config) => {
|
|
120
|
-
const getStartDate = () => {
|
|
121
|
-
if (config.incremental) {
|
|
122
|
-
return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
|
|
123
|
-
}
|
|
124
|
-
return 'current_date()-5';
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
const startDate = getStartDate();
|
|
128
|
-
|
|
129
|
-
if (config.includedExportTypes.fresh) {
|
|
130
|
-
const informationSchemaPath = config.sourceTable.replace(
|
|
131
|
-
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
132
|
-
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
133
|
-
);
|
|
134
|
-
|
|
135
|
-
return `with export_statuses as (
|
|
136
|
-
select
|
|
137
|
-
safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
|
|
138
|
-
case
|
|
139
83
|
when table_name like 'events_fresh_%' then 'fresh'
|
|
140
|
-
|
|
84
|
+
when regexp_contains(table_name, r'^events_\\d{8}$') then 'daily'
|
|
141
85
|
end as export_type
|
|
142
86
|
from
|
|
143
87
|
${informationSchemaPath}
|
|
144
88
|
where
|
|
145
|
-
regexp_contains(table_name, r'^events_(fresh_)?\\d{8}$')
|
|
146
|
-
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
|
|
147
|
-
|
|
89
|
+
regexp_contains(table_name, r'^events_(intraday_|fresh_)?\\d{8}$')
|
|
90
|
+
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') <= current_date()
|
|
91
|
+
and (
|
|
92
|
+
table_name not like 'events_intraday_%'
|
|
93
|
+
or safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') >= ${intradayStartDate}
|
|
94
|
+
)
|
|
148
95
|
),
|
|
149
96
|
statuses_by_day as (
|
|
150
97
|
select
|
|
151
98
|
date,
|
|
152
99
|
max(if(export_type = 'daily', true, false)) as daily,
|
|
153
|
-
max(if(export_type = 'fresh', true, false)) as fresh
|
|
100
|
+
max(if(export_type = 'fresh', true, false)) as fresh,
|
|
101
|
+
max(if(export_type = 'intraday', true, false)) as intraday
|
|
154
102
|
from
|
|
155
103
|
export_statuses
|
|
156
104
|
group by
|
|
@@ -159,16 +107,13 @@ const getDateRangeStartFresh = (config) => {
|
|
|
159
107
|
select
|
|
160
108
|
min(
|
|
161
109
|
if(
|
|
162
|
-
|
|
110
|
+
${finalCondition},
|
|
163
111
|
date,
|
|
164
112
|
null
|
|
165
113
|
)
|
|
166
114
|
)
|
|
167
115
|
from
|
|
168
116
|
statuses_by_day`;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
return undefined;
|
|
172
117
|
};
|
|
173
118
|
|
|
174
119
|
// Get the maximum event_timestamp from fresh export tables
|
|
@@ -250,14 +195,14 @@ const setPreOperations = (config) => {
|
|
|
250
195
|
name: constants.INTRADAY_DATE_RANGE_START_VARIABLE,
|
|
251
196
|
// only needed when daily+intraday WITHOUT fresh (the two-way intraday checkpoint)
|
|
252
197
|
// when fresh is also enabled, intraday uses fresh_date_range_start instead
|
|
253
|
-
value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.intraday && config.includedExportTypes.daily && !config.includedExportTypes.fresh ?
|
|
198
|
+
value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.intraday && config.includedExportTypes.daily && !config.includedExportTypes.fresh ? getExportDateRangeStart(config, 'intraday') : undefined,
|
|
254
199
|
comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data. Only needed if intraday and daily export tables are included without fresh.',
|
|
255
200
|
},
|
|
256
201
|
{
|
|
257
202
|
type: 'variable',
|
|
258
203
|
name: constants.FRESH_DATE_RANGE_START_VARIABLE,
|
|
259
204
|
// needed when fresh and daily are both enabled, to avoid fresh data overlapping with daily
|
|
260
|
-
value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.daily ?
|
|
205
|
+
value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.daily ? getExportDateRangeStart(config, 'fresh') : undefined,
|
|
261
206
|
comment: 'Define the date range start for fresh export tables. Returns the first day with a fresh table but no daily table.',
|
|
262
207
|
},
|
|
263
208
|
{
|