ga4-export-fixer 0.1.4 → 0.1.5-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  **ga4-export-fixer** is a **Dataform NPM package** that transforms raw GA4 BigQuery export data into a cleaner, more queryable incremental table. It combines daily and intraday exports so the best available version of each event is always in use, adds session-level fields like `session_id` and `landing_page`, promotes key event parameters to columns, and fixes known GA4 export issues — handling the boilerplate transformations that are otherwise tedious to include in every GA4 query.
4
4
 
5
+ The goal of the package is to **speed up development** when building data models and pipelines on top of GA4 export data, allowing you to focus on your use case instead of wrestling with the raw export format.
6
+
7
+ ### Planned Features
8
+
9
+ - Support for using only intraday export data
10
+ - Tools for building on top of the enhanced events table
11
+ - Column descriptions
12
+ - Default configurations for app and web properties
13
+ - Item list attribution
14
+ - Data enrichment (item-level, session-level, event-level)
15
+ - Support for fresh export (GA4 360)
16
+ - Custom processing steps (additional CTEs)
17
+ - Custom traffic source attribution
18
+
5
19
  ## Installation
6
20
 
7
21
  ### Bash
@@ -19,7 +33,7 @@ Include the package in the package.json file in your Dataform repository.
19
33
  {
20
34
  "dependencies": {
21
35
  "@dataform/core": "3.0.42",
22
- "ga4-export-fixer": "0.1.2"
36
+ "ga4-export-fixer": "0.1.4"
23
37
  }
24
38
  }
25
39
  ```
package/helpers.js CHANGED
@@ -139,66 +139,90 @@ const ga4ExportDateFilter = (exportType, start, end) => {
139
139
  }
140
140
  };
141
141
 
142
- // Filter the export tables by date range for both intraday and daily exports
143
142
  /**
144
- * Generates a SQL filter condition for GA4 export tables based on the provided configuration.
145
- *
146
- * This function produces a composite filter condition that constrains which BigQuery GA4 export tables
147
- * (daily and, optionally, intraday) are included based on start and end dates, the operational mode (test, incremental, or full refresh),
148
- * and any configured buffer days for session overlap.
149
- *
150
- * Logic:
151
- * - In test mode: Uses `testConfig.dateRangeStart` and `testConfig.dateRangeEnd` for the table date filters.
152
- * - In incremental refresh mode: Uses dynamic variable placeholders for efficient incremental logic (`constants.DATE_RANGE_START_VARIABLE`, etc).
153
- * - Otherwise (full refresh): Uses static date values from the configuration's preOperations.
154
- * - If `includedExportTypes.intraday` is true, includes a filter for intraday tables with their own start date variable.
155
- * - Applies `bufferDays` to the daily export's start boundary to ensure session completeness across date boundaries.
156
- *
157
- * Example output (SQL snippet):
158
- * (
159
- * (_table_suffix >= '20240101-1' and _table_suffix <= '20240105')
160
- * or (_table_suffix >= 'intraday_20240101' and _table_suffix <= 'intraday_20240105')
161
- * )
162
- *
163
- * @param {Object} config Configuration object governing the date filtering logic. Expected properties:
164
- * @param {boolean} [config.test] Whether to use test configuration dates.
165
- * @param {Object} [config.testConfig] Contains `dateRangeStart` and `dateRangeEnd` for tests.
166
- * @param {boolean} [config.incremental] Whether to use incremental variable placeholders.
167
- * @param {Object} [config.preOperations] Contains `dateRangeStartFullRefresh` and `dateRangeEnd` for full refresh.
168
- * @param {Object} [config.includedExportTypes] Should contain `intraday` (boolean).
169
- * @param {number} [config.bufferDays] Number of buffer days to extend date range for daily exports.
170
- * @returns {string} SQL condition as a string that can be injected into a WHERE clause.
143
+ * Builds a `_table_suffix` WHERE clause for GA4 BigQuery export tables (daily and/or intraday).
144
+ *
145
+ * Date boundaries are resolved differently depending on the mode:
146
+ * - **test** -- literal dates from `config.testConfig`
147
+ * - **incremental** -- BigQuery variable placeholders set by pre-operations
148
+ * - **full refresh** -- static dates from `config.preOperations`
149
+ *
150
+ * `bufferDays` is subtracted from the daily start date so sessions that span
151
+ * midnight are not partially excluded.
152
+ *
153
+ * When both daily and intraday exports are enabled, the intraday start date
154
+ * comes from a dedicated variable (`INTRADAY_DATE_RANGE_START_VARIABLE`) so
155
+ * intraday tables that already have a corresponding daily table are excluded.
156
+ * When only intraday is enabled, the daily start-date logic (including buffer
157
+ * days) is reused instead.
158
+ *
159
+ * @param {Object} config
160
+ * @param {boolean} config.test - Use literal test dates.
161
+ * @param {Object} config.testConfig - `{ dateRangeStart, dateRangeEnd }`.
162
+ * @param {boolean} config.incremental - Use BigQuery variable placeholders.
163
+ * @param {Object} config.preOperations - `{ dateRangeStartFullRefresh, dateRangeEnd }`.
164
+ * @param {Object} config.includedExportTypes - `{ daily: boolean, intraday: boolean }`.
165
+ * @param {number} [config.bufferDays=0] - Extra days subtracted from the start date.
166
+ * @returns {string} SQL fragment for a WHERE clause.
171
167
  */
172
168
  const ga4ExportDateFilters = (config) => {
173
169
  const bufferDays = config.bufferDays || 0;
174
170
 
175
171
  const getStartDate = () => {
172
+ //test mode
176
173
  if (config.test) {
177
174
  return config.testConfig.dateRangeStart;
178
175
  }
179
176
  if (config.incremental) {
180
177
  return constants.DATE_RANGE_START_VARIABLE;
181
178
  }
179
+ // full refresh
182
180
  return config.preOperations.dateRangeStartFullRefresh;
183
181
  };
184
182
 
185
183
  const getEndDate = () => {
184
+ // test mode, avoid using a BigQuery variable
186
185
  if (config.test) {
187
186
  return config.testConfig.dateRangeEnd;
188
187
  }
188
+ // use checkpoint variable with incremental refresh -> allows pre processing any part of the table without having to do a full refresh
189
189
  if (config.incremental) {
190
190
  return constants.DATE_RANGE_END_VARIABLE;
191
191
  }
192
+ // full refresh
192
193
  return config.preOperations.dateRangeEnd;
193
194
  };
194
195
 
195
- const start = getStartDate();
196
+ const getIntradayStartDate = () => {
197
+ // In test mode, skip pre-operations even though intraday and daily tables may temporarily overlap.
198
+ if (config.test) {
199
+ return config.testConfig.dateRangeStart;
200
+ }
201
+ // Dedicated variable excludes intraday tables that overlap with already-processed daily tables.
202
+ if (config.includedExportTypes.intraday && config.includedExportTypes.daily) {
203
+ return constants.INTRADAY_DATE_RANGE_START_VARIABLE;
204
+ }
205
+ // Without daily export, reuse the daily start-date logic and apply bufferDays
206
+ // (buffer is normally only applied to the daily start date).
207
+ if (config.includedExportTypes.intraday && !config.includedExportTypes.daily) {
208
+ // use the same start date as if daily export was in use
209
+ // include the buffer days as well (not included otherwise for intraday data)
210
+ return `${getStartDate()}-${bufferDays}`;
211
+ }
212
+ };
213
+
214
+ const dailyStart = `${getStartDate()}-${bufferDays}`;
215
+ const intradayStart = getIntradayStartDate();
196
216
  const end = getEndDate();
197
- const intradayStart = config.test ? config.testConfig.dateRangeStart : constants.INTRADAY_DATE_RANGE_START_VARIABLE;
217
+
218
+
219
+ const dateFilters = [
220
+ config.includedExportTypes.daily ? ga4ExportDateFilter('daily', dailyStart, end) : null,
221
+ config.includedExportTypes.intraday ? ga4ExportDateFilter('intraday', intradayStart, end) : null,
222
+ ];
198
223
 
199
224
  return `(
200
- ${ga4ExportDateFilter('daily', `${start}-${bufferDays}`, end)}
201
- ${config.includedExportTypes.intraday ? `or ${ga4ExportDateFilter('intraday', intradayStart, end)}` : ''}
225
+ ${dateFilters.filter(filter => !!filter).join(' or ')}
202
226
  )`;
203
227
  };
204
228
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.1.4",
3
+ "version": "0.1.5-dev.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -14,11 +14,12 @@
14
14
  "scripts": {
15
15
  "test": "node tests/ga4EventsEnhanced.test.js",
16
16
  "test:events": "node tests/ga4EventsEnhanced.test.js",
17
+ "test:merge": "node tests/mergeSQLConfigurations.test.js",
17
18
  "prepublishOnly": "node scripts/updateReadme.js"
18
19
  },
19
20
  "repository": {
20
21
  "type": "git",
21
- "url": "git+https://github.com/tanelytics/ga4-export-fixer"
22
+ "url": "git+https://github.com/tanelytics/ga4-export-fixer.git"
22
23
  },
23
24
  "author": "Taneli Salonen",
24
25
  "license": "MIT",
package/preOperations.js CHANGED
@@ -140,9 +140,9 @@ const setPreOperations = (config) => {
140
140
  {
141
141
  type: 'variable',
142
142
  name: constants.INTRADAY_DATE_RANGE_START_VARIABLE,
143
- // variable only needed with incremental refresh and intraday export tables
144
- value: config.includedExportTypes.intraday ? getDateRangeStartIntraday(config) : undefined,
145
- comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data.',
143
+ // variable only needed if intraday export tables are included together with daily export tables
144
+ value: config.includedExportTypes.intraday && config.includedExportTypes.daily ? getDateRangeStartIntraday(config) : undefined,
145
+ comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data. Only needed if intraday export tables are included together with daily export tables.',
146
146
  },
147
147
  {
148
148
  type: 'variable',
@@ -52,11 +52,13 @@ const defaultConfig = {
52
52
  //{name: 'page_location', type: 'string', columnName: 'page_location2'},
53
53
  ],
54
54
  sessionParams: [],
55
- defaultExcludedEvents: [
55
+ defaultExcludedEvents: [],
56
+ // session_start and first_visit are excluded via the excludedEvents array
57
+ // this allows the user to include them if needed
58
+ excludedEvents: [
56
59
  'session_start',
57
60
  'first_visit'
58
61
  ],
59
- excludedEvents: [],
60
62
  defaultExcludedColumns: [
61
63
  'event_dimensions', // legacy column, not needed
62
64
  'traffic_source', // renamed to user_traffic_source
@@ -214,7 +216,7 @@ const generateEnhancedEventsSQL = (config) => {
214
216
  const mainTimestampColumn = mergedConfig.customTimestampParam ? 'event_custom_timestamp' : 'event_timestamp';
215
217
 
216
218
  // exlude these events from the table
217
- const excludedEvents = utils.mergeUniqueArrays(mergedConfig.defaultExcludedEvents, mergedConfig.excludedEvents);
219
+ const excludedEvents = mergedConfig.excludedEvents;
218
220
  const excludedEventsSQL = excludedEvents.length > 0 ? `and event_name not in (${excludedEvents.map(event => `'${event}'`).join(',')})` : '';
219
221
 
220
222
  // promote these event parameters to columns
@@ -228,7 +230,7 @@ const generateEnhancedEventsSQL = (config) => {
228
230
  };
229
231
 
230
232
  const getExcludedColumns = () => {
231
- const allExcludedColumns = utils.mergeUniqueArrays(mergedConfig.defaultExcludedColumns, mergedConfig.excludedColumns);
233
+ const allExcludedColumns = mergedConfig.excludedColumns;
232
234
  const excludedColumns = {};
233
235
  allExcludedColumns.forEach(c => {
234
236
  excludedColumns[c] = undefined;
@@ -259,10 +261,7 @@ const generateEnhancedEventsSQL = (config) => {
259
261
  page: helpers.extractPageDetails(),
260
262
  // event parameters and user properties
261
263
  ...promotedEventParameters(),
262
- event_params: helpers.filterEventParams(utils.mergeUniqueArrays(
263
- mergedConfig.defaultExcludedEventParams, mergedConfig.excludedEventParams),
264
- 'exclude'
265
- ),
264
+ event_params: helpers.filterEventParams(mergedConfig.excludedEventParams, 'exclude'),
266
265
  user_properties: 'user_properties',
267
266
  // traffic source
268
267
  collected_traffic_source: 'collected_traffic_source',
package/utils.js CHANGED
@@ -87,7 +87,10 @@ ${groupByClause}`;
87
87
  *
88
88
  * Rules:
89
89
  * - Nested objects are merged recursively key by key
90
- * - Arrays are concatenated and deduplicated
90
+ * - Arrays with a "default" counterpart (e.g. excludedEvents + defaultExcludedEvents)
91
+ * are merged with mergeUniqueArrays, with user values taking precedence
92
+ * - Arrays that are themselves a "default" version, or have no default counterpart,
93
+ * are overwritten by user input
91
94
  * - Default values are preserved unless explicitly overridden (including with undefined)
92
95
  * - Explicitly setting a value to undefined in inputConfig will override the default
93
96
  * - Date fields: after merging, specific date fields (listed in dateFields) are processed via processDate().
@@ -120,9 +123,16 @@ const mergeSQLConfigurations = (defaultConfig, inputConfig = {}) => {
120
123
  continue;
121
124
  }
122
125
 
123
- // Handle arrays: concatenate and remove duplicates
126
+ // Handle arrays: merge with default counterpart if one exists, otherwise overwrite
124
127
  if (Array.isArray(defaultValue) && Array.isArray(inputValue)) {
125
- result[key] = mergeUniqueArrays(defaultValue, inputValue);
128
+ // check if the array has a "default" counterpart
129
+ // for example, excludedEvents and defaultExcludedEvents
130
+ const defaultKey = 'default' + key.charAt(0).toUpperCase() + key.slice(1);
131
+ if (!key.startsWith('default') && defaultKey in result) {
132
+ result[key] = mergeUniqueArrays(inputValue, result[defaultKey]);
133
+ } else {
134
+ result[key] = inputValue;
135
+ }
126
136
  continue;
127
137
  }
128
138