ga4-export-fixer 0.2.5-dev.1 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,6 +43,7 @@ The **ga4_events_enhanced** table comes with features such as these:
43
43
  - **Schema lock** – Lock the table schema to a specific GA4 export date to prevent schema drift
44
44
  - **Data freshness tracking** – `data_is_final` flag and `export_type` label on every row
45
45
  - **Selective date range re-processing** – Re-process a subset of data without a full table rebuild, using `incrementalStartOverride` and `incrementalEndOverride`
46
+ - **Batch processing** – Process large GA4 exports in smaller batches using the `numberOfDaysToProcess` configuration setting
46
47
  - **Timezone-aware datetime** – `event_datetime` converted to a configurable IANA timezone
47
48
  - **Column descriptions** – Full column-level documentation included in the Dataform table configuration, reflecting the specific configuration used to build the table
48
49
 
@@ -77,7 +78,7 @@ Include the package in the package.json file in your Dataform repository.
77
78
  {
78
79
  "dependencies": {
79
80
  "@dataform/core": "3.0.42",
80
- "ga4-export-fixer": "0.2.4"
81
+ "ga4-export-fixer": "0.2.5"
81
82
  }
82
83
  }
83
84
  ```
@@ -299,9 +300,10 @@ The `onSchemaChange: "EXTEND"` setting updates the result table schema on increm
299
300
  | ------------------------------------------ | ----------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
300
301
  | `preOperations.dateRangeStartFullRefresh` | string (SQL date) | `'date(2000, 1, 1)'` | Start date for full refresh |
301
302
  | `preOperations.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for queries |
302
- | `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of previous days to scan from the result table when determining the incremental refresh start checkpoint. A higher value is required if the table updates have fallen behind for some reason |
303
+ | `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of days to scan backwards from the result table's last partition when determining the incremental refresh start checkpoint. Needs to cover the number of days that can still contain not final `(data_is_final = false)` data |
303
304
  | `preOperations.incrementalStartOverride` | string (SQL date) | `undefined` | Override the incremental start date to re-process a specific range |
304
305
  | `preOperations.incrementalEndOverride` | string (SQL date) | `undefined` | Override the incremental end date to re-process a specific range |
306
+ | `preOperations.numberOfDaysToProcess` | integer | `undefined` | Limit each run to N days of data. When set, the end date becomes `start + N - 1` (capped at `current_date()`). When `undefined`, `dateRangeEnd` is used as-is. `incrementalEndOverride` takes priority |
305
307
 
306
308
 
307
309
  **`eventParamsToColumns`** — each item in the array is an object:
package/defaultConfig.js CHANGED
@@ -24,6 +24,7 @@ const baseConfig = {
24
24
  incrementalStartOverride: undefined,
25
25
  incrementalEndOverride: undefined,
26
26
  numberOfPreviousDaysToScan: 10,
27
+ numberOfDaysToProcess: undefined,
27
28
  },
28
29
  };
29
30
 
package/helpers.js CHANGED
@@ -196,6 +196,9 @@ const ga4ExportDateFilters = (config) => {
196
196
  return constants.DATE_RANGE_END_VARIABLE;
197
197
  }
198
198
  // full refresh
199
+ if (config.preOperations.numberOfDaysToProcess !== undefined) {
200
+ return `least(${config.preOperations.dateRangeStartFullRefresh}+${config.preOperations.numberOfDaysToProcess}-1, current_date())`;
201
+ }
199
202
  return config.preOperations.dateRangeEnd;
200
203
  };
201
204
 
@@ -274,7 +277,9 @@ const incrementalDateFilter = (config) => {
274
277
 
275
278
  // full refresh mode
276
279
  const fullRefreshStart = config?.preOperations?.dateRangeStartFullRefresh || baseConfig.preOperations.dateRangeStartFullRefresh;
277
- const fullRefreshEnd = config?.preOperations?.dateRangeEnd || baseConfig.preOperations.dateRangeEnd;
280
+ const fullRefreshEnd = config?.preOperations?.numberOfDaysToProcess !== undefined
281
+ ? `least(${fullRefreshStart}+${config.preOperations.numberOfDaysToProcess}-1, current_date())`
282
+ : (config?.preOperations?.dateRangeEnd || baseConfig.preOperations.dateRangeEnd);
278
283
 
279
284
  return setDateRange(fullRefreshStart, fullRefreshEnd);
280
285
  };
@@ -69,6 +69,12 @@ const validateBaseConfig = (config) => {
69
69
  if (typeof config.preOperations.dateRangeEnd !== 'string' || !config.preOperations.dateRangeEnd.trim()) {
70
70
  throw new Error(`config.preOperations.dateRangeEnd must be a non-empty string (SQL date expression). Received: ${JSON.stringify(config.preOperations.dateRangeEnd)}`);
71
71
  }
72
+ if (config.preOperations.numberOfDaysToProcess !== undefined) {
73
+ const nd = config.preOperations.numberOfDaysToProcess;
74
+ if (typeof nd !== 'number' || isNaN(nd) || !Number.isInteger(nd) || nd < 1) {
75
+ throw new Error(`config.preOperations.numberOfDaysToProcess must be a positive integer when defined. Received: ${JSON.stringify(nd)}`);
76
+ }
77
+ }
72
78
  if (config.preOperations.incrementalStartOverride !== undefined && config.preOperations.incrementalStartOverride !== null && config.preOperations.incrementalStartOverride !== '') {
73
79
  if (typeof config.preOperations.incrementalStartOverride !== 'string' || !config.preOperations.incrementalStartOverride.trim()) {
74
80
  throw new Error(`config.preOperations.incrementalStartOverride must be a non-empty string when provided. Received: ${JSON.stringify(config.preOperations.incrementalStartOverride)}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.2.5-dev.1",
3
+ "version": "0.2.5",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
package/preOperations.js CHANGED
@@ -15,11 +15,11 @@ const getLastPartitionDate = (config) => {
15
15
  const tableName = config.self.replace(/`/g, '').split('.').pop();
16
16
 
17
17
  return `select
18
- max(parse_date('%Y%m%d', partition_id))
19
- from
20
- ${informationSchemaPath}
21
- where
22
- table_name = '${tableName}' and partition_id != '__NULL__'`;
18
+ max(parse_date('%Y%m%d', partition_id))
19
+ from
20
+ ${informationSchemaPath}
21
+ where
22
+ table_name = '${tableName}' and partition_id != '__NULL__'`;
23
23
  };
24
24
 
25
25
  // Define the date range start for incremental and full refresh
@@ -38,6 +38,8 @@ const getDateRangeStart = (config) => {
38
38
  from
39
39
  ${config.self}
40
40
  where
41
+ -- the scan is relative to the last partition date in the table
42
+ -- takes into account cases where table updates have fallen behind
41
43
  ${constants.DATE_COLUMN} > ${constants.LAST_PARTITION_DATE_VARIABLE}-${config.preOperations.numberOfPreviousDaysToScan}
42
44
  group by
43
45
  ${constants.DATE_COLUMN}
@@ -118,7 +120,11 @@ const getDateRangeEnd = (config) => {
118
120
  return `select ${config.preOperations.incrementalEndOverride}`;
119
121
  }
120
122
 
121
- // otherwise, use the default logic
123
+ // if a number of days to process is capped, adjust the end date accordingly
124
+ if (config.preOperations.numberOfDaysToProcess !== undefined) {
125
+ return `select least(${constants.DATE_RANGE_START_VARIABLE}+${config.preOperations.numberOfDaysToProcess}-1, current_date())`;
126
+ }
127
+
122
128
  return `select ${config.preOperations.dateRangeEnd}`;
123
129
  };
124
130
 
@@ -152,7 +158,7 @@ const setPreOperations = (config) => {
152
158
  type: 'variable',
153
159
  name: constants.LAST_PARTITION_DATE_VARIABLE,
154
160
  value: config.incremental ? getLastPartitionDate(config) : undefined,
155
- comment: 'Get the last partition date from the result table. Used to anchor the incremental date checkpoint scan window to the table\'s actual data.',
161
+ comment: `Get the last partition date from the result table. Reduces the number of rows scanned when defining the ${constants.DATE_RANGE_START_VARIABLE} variable.`,
156
162
  },
157
163
  {
158
164
  type: 'variable',