ga4-export-fixer 0.2.4 → 0.2.5-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/constants.js +1 -0
- package/package.json +1 -1
- package/preOperations.js +36 -17
package/README.md
CHANGED
|
@@ -42,6 +42,7 @@ The **ga4_events_enhanced** table comes with features such as these:
|
|
|
42
42
|
- **Custom timestamp support** – Optionally use a custom event parameter as the primary timestamp, with automatic fallback to `event_timestamp`
|
|
43
43
|
- **Schema lock** – Lock the table schema to a specific GA4 export date to prevent schema drift
|
|
44
44
|
- **Data freshness tracking** – `data_is_final` flag and `export_type` label on every row
|
|
45
|
+
- **Selective date range re-processing** – Re-process a subset of data without a full table rebuild, using `incrementalStartOverride` and `incrementalEndOverride`
|
|
45
46
|
- **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
|
|
46
47
|
- **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
|
|
47
48
|
|
package/constants.js
CHANGED
|
@@ -2,6 +2,7 @@ const constants = {
|
|
|
2
2
|
DATE_RANGE_START_VARIABLE: 'date_range_start',
|
|
3
3
|
INTRADAY_DATE_RANGE_START_VARIABLE: 'intraday_date_range_start',
|
|
4
4
|
DATE_RANGE_END_VARIABLE: 'date_range_end',
|
|
5
|
+
LAST_PARTITION_DATE_VARIABLE: 'last_partition_date',
|
|
5
6
|
DATE_COLUMN: 'event_date',
|
|
6
7
|
DEFAULT_EVENTS_TABLE_NAME: 'ga4_events_enhanced',
|
|
7
8
|
TABLE_DESCRIPTION_SUFFIX: 'Created by the ga4-export-fixer package.',
|
package/package.json
CHANGED
package/preOperations.js
CHANGED
|
@@ -6,6 +6,22 @@ const declareVariable = (variable, value) => {
|
|
|
6
6
|
);`;
|
|
7
7
|
};
|
|
8
8
|
|
|
9
|
+
// Get the last partition date from the result table
|
|
10
|
+
const getLastPartitionDate = (config) => {
|
|
11
|
+
const informationSchemaPath = config.self.replace(
|
|
12
|
+
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
13
|
+
'`$1.$2.INFORMATION_SCHEMA.PARTITIONS`'
|
|
14
|
+
);
|
|
15
|
+
const tableName = config.self.replace(/`/g, '').split('.').pop();
|
|
16
|
+
|
|
17
|
+
return `select
|
|
18
|
+
max(parse_date('%Y%m%d', partition_id))
|
|
19
|
+
from
|
|
20
|
+
${informationSchemaPath}
|
|
21
|
+
where
|
|
22
|
+
table_name = '${tableName}' and partition_id != '__NULL__'`;
|
|
23
|
+
};
|
|
24
|
+
|
|
9
25
|
// Define the date range start for incremental and full refresh
|
|
10
26
|
const getDateRangeStart = (config) => {
|
|
11
27
|
if (config.incremental) {
|
|
@@ -22,7 +38,7 @@ const getDateRangeStart = (config) => {
|
|
|
22
38
|
from
|
|
23
39
|
${config.self}
|
|
24
40
|
where
|
|
25
|
-
${constants.DATE_COLUMN} >
|
|
41
|
+
${constants.DATE_COLUMN} > ${constants.LAST_PARTITION_DATE_VARIABLE}-${config.preOperations.numberOfPreviousDaysToScan}
|
|
26
42
|
group by
|
|
27
43
|
${constants.DATE_COLUMN}
|
|
28
44
|
)
|
|
@@ -40,39 +56,36 @@ where
|
|
|
40
56
|
};
|
|
41
57
|
|
|
42
58
|
// Define the date range start for incremental refresh with intraday tables
|
|
59
|
+
// Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
|
|
43
60
|
const getDateRangeStartIntraday = (config) => {
|
|
44
61
|
const getStartDate = () => {
|
|
45
|
-
// with incremental refresh, use the checkpoint variable and check tables created starting from that date
|
|
46
62
|
if (config.incremental) {
|
|
47
63
|
return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
|
|
48
64
|
}
|
|
49
|
-
// otherwise, just scan the last 5 days
|
|
50
65
|
return 'current_date()-5';
|
|
51
66
|
};
|
|
52
67
|
|
|
53
68
|
const startDate = getStartDate();
|
|
54
69
|
|
|
55
70
|
if (config.includedExportTypes.intraday) {
|
|
71
|
+
const informationSchemaPath = config.sourceTable.replace(
|
|
72
|
+
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
73
|
+
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
74
|
+
);
|
|
75
|
+
|
|
56
76
|
return `with export_statuses as (
|
|
57
77
|
select
|
|
58
|
-
safe_cast(regexp_extract(
|
|
78
|
+
safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
|
|
59
79
|
case
|
|
60
|
-
when
|
|
80
|
+
when table_name like 'events_intraday_%' then 'intraday'
|
|
61
81
|
else 'daily'
|
|
62
|
-
end as export_type
|
|
82
|
+
end as export_type
|
|
63
83
|
from
|
|
64
|
-
${
|
|
84
|
+
${informationSchemaPath}
|
|
65
85
|
where
|
|
66
|
-
|
|
67
|
-
(
|
|
68
|
-
|
|
69
|
-
cast(${startDate} as string format 'YYYYMMDD')
|
|
70
|
-
and cast(current_date() as string format 'YYYYMMDD')
|
|
71
|
-
)
|
|
72
|
-
-- only include tables that are from the daily or intraday exports
|
|
73
|
-
and regexp_contains(_table_suffix, r'^(intraday_)?\\d{8}$')
|
|
74
|
-
group by
|
|
75
|
-
date, export_type
|
|
86
|
+
regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
|
|
87
|
+
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
|
|
88
|
+
between ${startDate} and current_date()
|
|
76
89
|
),
|
|
77
90
|
statuses_by_day as (
|
|
78
91
|
select
|
|
@@ -135,6 +148,12 @@ const setPreOperations = (config) => {
|
|
|
135
148
|
|
|
136
149
|
// define the pre operations
|
|
137
150
|
const preOperations = [
|
|
151
|
+
{
|
|
152
|
+
type: 'variable',
|
|
153
|
+
name: constants.LAST_PARTITION_DATE_VARIABLE,
|
|
154
|
+
value: config.incremental ? getLastPartitionDate(config) : undefined,
|
|
155
|
+
comment: 'Get the last partition date from the result table. Used to anchor the incremental date checkpoint scan window to the table\'s actual data.',
|
|
156
|
+
},
|
|
138
157
|
{
|
|
139
158
|
type: 'variable',
|
|
140
159
|
name: constants.DATE_RANGE_START_VARIABLE,
|