ga4-export-fixer 0.2.4-dev.0 → 0.2.5-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/preOperations.js +13 -16
package/README.md
CHANGED
|
@@ -42,6 +42,7 @@ The **ga4_events_enhanced** table comes with features such as these:
|
|
|
42
42
|
- **Custom timestamp support** – Optionally use a custom event parameter as the primary timestamp, with automatic fallback to `event_timestamp`
|
|
43
43
|
- **Schema lock** – Lock the table schema to a specific GA4 export date to prevent schema drift
|
|
44
44
|
- **Data freshness tracking** – `data_is_final` flag and `export_type` label on every row
|
|
45
|
+
- **Selective date range re-processing** – Re-process a subset of data without a full table rebuild, using `incrementalStartOverride` and `incrementalEndOverride`
|
|
45
46
|
- **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
|
|
46
47
|
- **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
|
|
47
48
|
|
|
@@ -76,7 +77,7 @@ Include the package in the package.json file in your Dataform repository.
|
|
|
76
77
|
{
|
|
77
78
|
"dependencies": {
|
|
78
79
|
"@dataform/core": "3.0.42",
|
|
79
|
-
"ga4-export-fixer": "0.2.
|
|
80
|
+
"ga4-export-fixer": "0.2.4"
|
|
80
81
|
}
|
|
81
82
|
}
|
|
82
83
|
```
|
package/package.json
CHANGED
package/preOperations.js
CHANGED
|
@@ -40,39 +40,36 @@ where
|
|
|
40
40
|
};
|
|
41
41
|
|
|
42
42
|
// Define the date range start for incremental refresh with intraday tables
|
|
43
|
+
// Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
|
|
43
44
|
const getDateRangeStartIntraday = (config) => {
|
|
44
45
|
const getStartDate = () => {
|
|
45
|
-
// with incremental refresh, use the checkpoint variable and check tables created starting from that date
|
|
46
46
|
if (config.incremental) {
|
|
47
47
|
return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
|
|
48
48
|
}
|
|
49
|
-
// otherwise, just scan the last 5 days
|
|
50
49
|
return 'current_date()-5';
|
|
51
50
|
};
|
|
52
51
|
|
|
53
52
|
const startDate = getStartDate();
|
|
54
53
|
|
|
55
54
|
if (config.includedExportTypes.intraday) {
|
|
55
|
+
const informationSchemaPath = config.sourceTable.replace(
|
|
56
|
+
/`?([^`]+)\.([^`]+)\.[^`]+`?$/,
|
|
57
|
+
'`$1.$2.INFORMATION_SCHEMA.TABLES`'
|
|
58
|
+
);
|
|
59
|
+
|
|
56
60
|
return `with export_statuses as (
|
|
57
61
|
select
|
|
58
|
-
safe_cast(regexp_extract(
|
|
62
|
+
safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
|
|
59
63
|
case
|
|
60
|
-
when
|
|
64
|
+
when table_name like 'events_intraday_%' then 'intraday'
|
|
61
65
|
else 'daily'
|
|
62
|
-
end as export_type
|
|
66
|
+
end as export_type
|
|
63
67
|
from
|
|
64
|
-
${
|
|
68
|
+
${informationSchemaPath}
|
|
65
69
|
where
|
|
66
|
-
|
|
67
|
-
(
|
|
68
|
-
|
|
69
|
-
cast(${startDate} as string format 'YYYYMMDD')
|
|
70
|
-
and cast(current_date() as string format 'YYYYMMDD')
|
|
71
|
-
)
|
|
72
|
-
-- only include tables that are from the daily or intraday exports
|
|
73
|
-
and regexp_contains(_table_suffix, r'^(intraday_)?\\d{8}$')
|
|
74
|
-
group by
|
|
75
|
-
date, export_type
|
|
70
|
+
regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
|
|
71
|
+
and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
|
|
72
|
+
between ${startDate} and current_date()
|
|
76
73
|
),
|
|
77
74
|
statuses_by_day as (
|
|
78
75
|
select
|