ga4-export-fixer 0.2.4 → 0.2.5-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,6 +42,7 @@ The **ga4_events_enhanced** table comes with features such as these:
42
42
  - **Custom timestamp support** – Optionally use a custom event parameter as the primary timestamp, with automatic fallback to `event_timestamp`
43
43
  - **Schema lock** – Lock the table schema to a specific GA4 export date to prevent schema drift
44
44
  - **Data freshness tracking** – `data_is_final` flag and `export_type` label on every row
45
+ - **Selective date range re-processing** – Re-process a subset of data without a full table rebuild, using `incrementalStartOverride` and `incrementalEndOverride`
45
46
  - **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
46
47
  - **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
47
48
 
package/constants.js CHANGED
@@ -2,6 +2,7 @@ const constants = {
2
2
  DATE_RANGE_START_VARIABLE: 'date_range_start',
3
3
  INTRADAY_DATE_RANGE_START_VARIABLE: 'intraday_date_range_start',
4
4
  DATE_RANGE_END_VARIABLE: 'date_range_end',
5
+ LAST_PARTITION_DATE_VARIABLE: 'last_partition_date',
5
6
  DATE_COLUMN: 'event_date',
6
7
  DEFAULT_EVENTS_TABLE_NAME: 'ga4_events_enhanced',
7
8
  TABLE_DESCRIPTION_SUFFIX: 'Created by the ga4-export-fixer package.',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.2.4",
3
+ "version": "0.2.5-dev.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
package/preOperations.js CHANGED
@@ -6,6 +6,22 @@ const declareVariable = (variable, value) => {
6
6
  );`;
7
7
  };
8
8
 
9
+ // Get the last partition date from the result table
10
+ const getLastPartitionDate = (config) => {
11
+ const informationSchemaPath = config.self.replace(
12
+ /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
13
+ '`$1.$2.INFORMATION_SCHEMA.PARTITIONS`'
14
+ );
15
+ const tableName = config.self.replace(/`/g, '').split('.').pop();
16
+
17
+ return `select
18
+ max(parse_date('%Y%m%d', partition_id))
19
+ from
20
+ ${informationSchemaPath}
21
+ where
22
+ table_name = '${tableName}' and partition_id != '__NULL__'`;
23
+ };
24
+
9
25
  // Define the date range start for incremental and full refresh
10
26
  const getDateRangeStart = (config) => {
11
27
  if (config.incremental) {
@@ -22,7 +38,7 @@ const getDateRangeStart = (config) => {
22
38
  from
23
39
  ${config.self}
24
40
  where
25
- ${constants.DATE_COLUMN} > current_date()-${config.preOperations.numberOfPreviousDaysToScan}
41
+ ${constants.DATE_COLUMN} > ${constants.LAST_PARTITION_DATE_VARIABLE}-${config.preOperations.numberOfPreviousDaysToScan}
26
42
  group by
27
43
  ${constants.DATE_COLUMN}
28
44
  )
@@ -40,39 +56,36 @@ where
40
56
  };
41
57
 
42
58
  // Define the date range start for incremental refresh with intraday tables
59
+ // Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
43
60
  const getDateRangeStartIntraday = (config) => {
44
61
  const getStartDate = () => {
45
- // with incremental refresh, use the checkpoint variable and check tables created starting from that date
46
62
  if (config.incremental) {
47
63
  return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
48
64
  }
49
- // otherwise, just scan the last 5 days
50
65
  return 'current_date()-5';
51
66
  };
52
67
 
53
68
  const startDate = getStartDate();
54
69
 
55
70
  if (config.includedExportTypes.intraday) {
71
+ const informationSchemaPath = config.sourceTable.replace(
72
+ /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
73
+ '`$1.$2.INFORMATION_SCHEMA.TABLES`'
74
+ );
75
+
56
76
  return `with export_statuses as (
57
77
  select
58
- safe_cast(regexp_extract(_table_suffix, r'\\d+') as date format 'YYYYMMDD') as date,
78
+ safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
59
79
  case
60
- when _table_suffix like 'intraday_%' then 'intraday'
80
+ when table_name like 'events_intraday_%' then 'intraday'
61
81
  else 'daily'
62
- end as export_type,
82
+ end as export_type
63
83
  from
64
- ${config.sourceTable}
84
+ ${informationSchemaPath}
65
85
  where
66
- -- check tables that are newer than the date range start or the last 5 days (if not incremental refresh)
67
- (
68
- regexp_extract(_table_suffix, r'\\d+') between
69
- cast(${startDate} as string format 'YYYYMMDD')
70
- and cast(current_date() as string format 'YYYYMMDD')
71
- )
72
- -- only include tables that are from the daily or intraday exports
73
- and regexp_contains(_table_suffix, r'^(intraday_)?\\d{8}$')
74
- group by
75
- date, export_type
86
+ regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
87
+ and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
88
+ between ${startDate} and current_date()
76
89
  ),
77
90
  statuses_by_day as (
78
91
  select
@@ -135,6 +148,12 @@ const setPreOperations = (config) => {
135
148
 
136
149
  // define the pre operations
137
150
  const preOperations = [
151
+ {
152
+ type: 'variable',
153
+ name: constants.LAST_PARTITION_DATE_VARIABLE,
154
+ value: config.incremental ? getLastPartitionDate(config) : undefined,
155
+ comment: 'Get the last partition date from the result table. Used to anchor the incremental date checkpoint scan window to the table\'s actual data.',
156
+ },
138
157
  {
139
158
  type: 'variable',
140
159
  name: constants.DATE_RANGE_START_VARIABLE,