ga4-export-fixer 0.2.4 → 0.2.5-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,6 +42,7 @@ The **ga4_events_enhanced** table comes with features such as these:
42
42
  - **Custom timestamp support** – Optionally use a custom event parameter as the primary timestamp, with automatic fallback to `event_timestamp`
43
43
  - **Schema lock** – Lock the table schema to a specific GA4 export date to prevent schema drift
44
44
  - **Data freshness tracking** – `data_is_final` flag and `export_type` label on every row
45
+ - **Selective date range re-processing** – Re-process a subset of data without a full table rebuild, using `incrementalStartOverride` and `incrementalEndOverride`
45
46
  - **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
46
47
  - **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
47
48
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.2.4",
3
+ "version": "0.2.5-dev.0",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
package/preOperations.js CHANGED
@@ -40,39 +40,36 @@ where
40
40
  };
41
41
 
42
42
  // Define the date range start for incremental refresh with intraday tables
43
+ // Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
43
44
  const getDateRangeStartIntraday = (config) => {
44
45
  const getStartDate = () => {
45
- // with incremental refresh, use the checkpoint variable and check tables created starting from that date
46
46
  if (config.incremental) {
47
47
  return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
48
48
  }
49
- // otherwise, just scan the last 5 days
50
49
  return 'current_date()-5';
51
50
  };
52
51
 
53
52
  const startDate = getStartDate();
54
53
 
55
54
  if (config.includedExportTypes.intraday) {
55
+ const informationSchemaPath = config.sourceTable.replace(
56
+ /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
57
+ '`$1.$2.INFORMATION_SCHEMA.TABLES`'
58
+ );
59
+
56
60
  return `with export_statuses as (
57
61
  select
58
- safe_cast(regexp_extract(_table_suffix, r'\\d+') as date format 'YYYYMMDD') as date,
62
+ safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
59
63
  case
60
- when _table_suffix like 'intraday_%' then 'intraday'
64
+ when table_name like 'events_intraday_%' then 'intraday'
61
65
  else 'daily'
62
- end as export_type,
66
+ end as export_type
63
67
  from
64
- ${config.sourceTable}
68
+ ${informationSchemaPath}
65
69
  where
66
- -- check tables that are newer than the date range start or the last 5 days (if not incremental refresh)
67
- (
68
- regexp_extract(_table_suffix, r'\\d+') between
69
- cast(${startDate} as string format 'YYYYMMDD')
70
- and cast(current_date() as string format 'YYYYMMDD')
71
- )
72
- -- only include tables that are from the daily or intraday exports
73
- and regexp_contains(_table_suffix, r'^(intraday_)?\\d{8}$')
74
- group by
75
- date, export_type
70
+ regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
71
+ and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
72
+ between ${startDate} and current_date()
76
73
  ),
77
74
  statuses_by_day as (
78
75
  select