ga4-export-fixer 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,7 +20,7 @@ Include the package in the package.json file in your Dataform repository.
20
20
  "name": "my_dataform_repo",
21
21
  "dependencies": {
22
22
  "@dataform/core": "3.0.39",
23
- "ga4-export-fixer": "0.1.0"
23
+ "ga4-export-fixer": "0.1.1"
24
24
  }
25
25
  }
26
26
  ```
@@ -46,6 +46,8 @@ The main features include:
46
46
 
47
47
  Create a new **ga4_events_enhanced** table using a **.js** file in your repository's **definitions** folder.
48
48
 
49
+ ##### Using Defaults
50
+
49
51
  **`definitions/ga4/ga4_events_enhanced.js`**
50
52
  ```javascript
51
53
  const { ga4EventsEnhanced } = require('ga4-export-fixer');
@@ -57,6 +59,48 @@ const config = {
57
59
  ga4EventsEnhanced.createTable(publish, config);
58
60
  ```
59
61
 
62
+ ##### With Custom Configuration
63
+
64
+ **`definitions/ga4/ga4_events_enhanced.js`**
65
+ ```javascript
66
+ const { ga4EventsEnhanced } = require('ga4-export-fixer');
67
+
68
+ const config = {
69
+ sourceTable: constants.GA4_TABLES.MY_GA4_EXPORT,
70
+ schemaLock: '20260101', // prevent possible issues from updates to the export schema
71
+ customTimestampParam: 'custom_event_timestamp', // custom timestamp collected as an event param
72
+ timezone: 'Europe/Helsinki',
73
+ // not needed data
74
+ excludedColumns: [
75
+ 'app_info',
76
+ 'publisher'
77
+ ],
78
+ // not needed events
79
+ excludedEvents: [
80
+ 'user_engagement'
81
+ ],
82
+ // transform to session-level
83
+ sessionParams: [
84
+ 'user_agent'
85
+ ],
86
+ // promote as columns
87
+ eventParamsToColumns: [
88
+ {name: 'session_engaged'},
89
+ {name: 'ga_session_number', type: 'int'},
90
+ {name: 'page_type', type: 'string'},
91
+ ],
92
+ // not needed in the event_params array
93
+ excludedEventParams: [
94
+ 'session_engaged',
95
+ 'ga_session_number',
96
+ 'page_type',
97
+ 'user_agent'
98
+ ]
99
+ };
100
+
101
+ ga4EventsEnhanced.createTable(publish, config);
102
+ ```
103
+
60
104
  #### SQLX Deployment
61
105
 
62
106
  Alternatively, you can create the **ga4_events_enhanced** table using a .SQLX file.
@@ -91,19 +135,132 @@ pre_operations {
91
135
  }
92
136
  ```
93
137
 
138
+
139
+
140
+ #### Configuration Object
141
+
142
+ All fields are optional except `sourceTable`. Default values are applied automatically, so you only need to specify the fields you want to override.
143
+
144
+ | Field | Type | Default | Description |
145
+ |-------|------|---------|-------------|
146
+ | `sourceTable` | Dataform ref() / string | **required** | Source GA4 export table. Use `ref()` in Dataform or a string in format `` `project.dataset.table` `` |
147
+ | `self` | Dataform self() | **required for .SQLX deployment** | Reference to the table itself. Use `self()` in Dataform |
148
+ | `incremental` | Dataform incremental() | **required for .SQLX deployment** | Switch between incremental and full refresh logic. Use `incremental()` in Dataform |
149
+ | `schemaLock` | string (YYYYMMDD) | `undefined` | Lock the table schema to a specific date. Must be a valid date >= `"20241009"` |
150
+ | `timezone` | string | `'Etc/UTC'` | IANA timezone for event datetime (e.g. `'Europe/Helsinki'`) |
151
+ | `customTimestampParam` | string | `undefined` | Name of a custom event parameter containing a JS timestamp in milliseconds (e.g. collected via `Date.now()`) |
152
+ | `bufferDays` | integer | `1` | Extra days to include for sessions that span midnight |
153
+ | `test` | boolean | `false` | Enable test mode (uses `testConfig` date range instead of pre-operations) |
154
+ | `excludedEventParams` | string[] | `[]` | Event parameter names to exclude from the `event_params` array |
155
+ | `excludedEvents` | string[] | `[]` | Event names to exclude from the table |
156
+ | `excludedColumns` | string[] | `[]` | Default GA4 export columns to exclude from the final table, for example `'app_info'` or `'publisher'` |
157
+ | `sessionParams` | string[] | `[]` | Event parameter names to aggregate as session-level parameters |
158
+
159
+ **`includedExportTypes`** — which GA4 export types to include:
160
+
161
+ | Field | Type | Default | Description |
162
+ |-------|------|---------|-------------|
163
+ | `includedExportTypes.daily` | boolean | `true` | Include daily (processed) export |
164
+ | `includedExportTypes.intraday` | boolean | `true` | Include intraday export |
165
+
166
+ **`dataIsFinal`** — how to determine whether data is final (not expected to change):
167
+
168
+ | Field | Type | Default | Description |
169
+ |-------|------|---------|-------------|
170
+ | `dataIsFinal.detectionMethod` | string | `'EXPORT_TYPE'` | `'EXPORT_TYPE'` (uses table suffix, all data from the daily export is considered final) or `'DAY_THRESHOLD'` (uses days since event) |
171
+ | `dataIsFinal.dayThreshold` | integer | `4` | Days after which data is considered final. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
172
+
173
+ **`testConfig`** — date range used when `test` is `true`:
174
+
175
+ | Field | Type | Default | Description |
176
+ |-------|------|---------|-------------|
177
+ | `testConfig.dateRangeStart` | string (SQL date) | `'current_date()-1'` | Start date for test queries |
178
+ | `testConfig.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for test queries |
179
+
180
+ **`preOperations`** — date range and incremental refresh configuration:
181
+
182
+ | Field | Type | Default | Description |
183
+ |-------|------|---------|-------------|
184
+ | `preOperations.dateRangeStartFullRefresh` | string (SQL date) | `'date(2000, 1, 1)'` | Start date for full refresh |
185
+ | `preOperations.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for queries |
186
+ | `preOperations.numberOfPreviousDaysToScan` | integer | `10` | Number of previous days to scan from the result table when determining the incremental refresh start checkpoint. A higher value is required if the table updates have fallen behind for some reason |
187
+ | `preOperations.incrementalStartOverride` | string (SQL date) | `undefined` | Override the incremental start date to re-process a specific range |
188
+ | `preOperations.incrementalEndOverride` | string (SQL date) | `undefined` | Override the incremental end date to re-process a specific range |
189
+
190
+ **`eventParamsToColumns`** — each item in the array is an object:
191
+
192
+ | Field | Type | Required | Description |
193
+ |-------|------|----------|-------------|
194
+ | `name` | string | Yes | Event parameter name |
195
+ | `type` | string | No | Data type: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, or `'float64'`. If omitted, returns the value converted to a string |
196
+ | `columnName` | string | No | Column name in the output. Defaults to the parameter `name` |
197
+
198
+ Date fields (`dateRangeStart`, `dateRangeEnd`, etc.) accept string dates in `YYYYMMDD` or `YYYY-MM-DD` format, or BigQuery SQL expressions (e.g. `'current_date()'`, `'date(2026, 1, 1)'`).
199
+
94
200
  ### Helpers
95
201
 
96
- The helpers contain templates for common SQL expression needed when working with GA4 data.
202
+ The helpers contain templates for common SQL expressions. The functions are referenced by **ga4EventsEnhanced** but can also be imported as utility functions for working with GA4 data.
97
203
 
98
204
  ```javascript
99
205
  const { helpers } = require('ga4-export-fixer');
100
-
101
- // Unnest event parameters, date filters, URL extraction, session aggregation, etc.
102
- helpers.unnestEventParam('page_location', 'string');
103
- helpers.ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()');
104
- helpers.extractPageDetails();
105
206
  ```
106
207
 
208
+ #### SQL Templates
209
+
210
+ | Name | Example | Description |
211
+ |------|---------|-------------|
212
+ | `eventDate` | `helpers.eventDate` | Casts `event_date` string to a DATE using YYYYMMDD format |
213
+ | `sessionId` | `helpers.sessionId` | Builds a session ID by concatenating `user_pseudo_id` and `ga_session_id` |
214
+
215
+ #### Functions
216
+
217
+ **Unnesting parameters**
218
+
219
+ | Function | Example | Description |
220
+ |----------|---------|-------------|
221
+ | `unnestEventParam` | `unnestEventParam('page_location', 'string')` | Extracts a value from the `event_params` array by key. Supported types: `'string'`, `'int'`, `'int64'`, `'double'`, `'float'`, `'float64'`. Omit type to get the value converted as a string |
222
+
223
+ **Date and time**
224
+
225
+ | Function | Example | Description |
226
+ |----------|---------|-------------|
227
+ | `getEventTimestampMicros` | `getEventTimestampMicros('custom_ts')` | Returns SQL for event timestamp in microseconds. With a custom parameter, uses it (converted from ms) with fallback to `event_timestamp` |
228
+ | `getEventDateTime` | `getEventDateTime({ timezone: 'Europe/Helsinki' })` | Returns SQL for event datetime in the given timezone. Defaults to `'Etc/UTC'` |
229
+
230
+ **Date filters**
231
+
232
+ | Function | Example | Description |
233
+ |----------|---------|-------------|
234
+ | `ga4ExportDateFilter` | `ga4ExportDateFilter('daily', 'current_date()-7', 'current_date()')` | Generates a `_table_suffix` filter for a single export type (`'daily'` or `'intraday'`) and date range |
235
+
236
+ **Page details**
237
+
238
+ | Function | Example | Description |
239
+ |----------|---------|-------------|
240
+ | `extractUrlHostname` | `extractUrlHostname('page_location')` | Extracts hostname from a URL column |
241
+ | `extractUrlPath` | `extractUrlPath('page_location')` | Extracts the path component from a URL column |
242
+ | `extractUrlQuery` | `extractUrlQuery('page_location')` | Extracts the query string (including `?`) from a URL column |
243
+ | `extractUrlQueryParams` | `extractUrlQueryParams('page_location')` | Parses URL query parameters into `ARRAY<STRUCT<key STRING, value STRING>>` |
244
+ | `extractPageDetails` | `extractPageDetails()` | Returns a struct with `hostname`, `path`, `query`, and `query_params`. Defaults to `page_location` event parameter |
245
+
246
+ **Aggregation**
247
+
248
+ | Function | Example | Description |
249
+ |----------|---------|-------------|
250
+ | `aggregateValue` | `aggregateValue('user_id', 'last', 'event_timestamp')` | Aggregates a column using `'max'`, `'min'`, `'first'`, `'last'`, or `'any'`. `'first'` and `'last'` use the timestamp column for ordering |
251
+
252
+ **Ecommerce**
253
+
254
+ | Function | Example | Description |
255
+ |----------|---------|-------------|
256
+ | `fixEcommerceStruct` | `fixEcommerceStruct()` | Cleans the ecommerce struct: sets `transaction_id` to null when `'(not set)'`, and fixes missing/NaN `purchase_revenue` for purchase events |
257
+
258
+ **Data freshness**
259
+
260
+ | Function | Example | Description |
261
+ |----------|---------|-------------|
262
+ | `isFinalData` | `isFinalData('DAY_THRESHOLD', 4)` | Returns SQL that evaluates to `true` when data is final. `'EXPORT_TYPE'` checks table suffix; `'DAY_THRESHOLD'` uses days since event (`dayThreshold` is required and must be a non-negative integer) |
263
+
107
264
  ## License
108
265
 
109
266
  MIT
package/helpers.js CHANGED
@@ -634,19 +634,21 @@ const isFinalData = (detectionMethod, dayThreshold) => {
634
634
  throw new Error(`isFinalData: Unsupported detectionMethod '${detectionMethod}'. Supported values are 'EXPORT_TYPE' and 'DAY_THRESHOLD'.`);
635
635
  }
636
636
 
637
- if (typeof dayThreshold !== 'undefined' && (typeof dayThreshold !== 'number' || isNaN(dayThreshold))) {
638
- throw new Error("isFinalData: 'dayThreshold' must be a number if provided.");
637
+ if (detectionMethod === 'DAY_THRESHOLD') {
638
+ if (typeof dayThreshold === 'undefined') {
639
+ throw new Error("isFinalData: 'dayThreshold' is required when using 'DAY_THRESHOLD' detectionMethod.");
640
+ }
641
+ if (!Number.isInteger(dayThreshold) || dayThreshold < 0) {
642
+ throw new Error("isFinalData: 'dayThreshold' must be an integer greater than or equal to 0 when using 'DAY_THRESHOLD' detectionMethod.");
643
+ }
639
644
  }
640
645
 
641
- const defaultDayThreshold = 3;
642
- const threshold = typeof dayThreshold !== 'undefined' ? dayThreshold : defaultDayThreshold;
643
-
644
646
  if (detectionMethod === 'EXPORT_TYPE') {
645
647
  return 'if(_table_suffix like \'intraday_%\' or _table_suffix like \'fresh_%\', false, true)';
646
648
  }
647
649
 
648
650
  if (detectionMethod === 'DAY_THRESHOLD') {
649
- return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${threshold}, true, false)`;
651
+ return `if(date_diff(current_date(), cast(event_date as date format 'YYYYMMDD'), day) > ${dayThreshold}, true, false)`;
650
652
  }
651
653
  };
652
654
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -13,7 +13,8 @@
13
13
  ],
14
14
  "scripts": {
15
15
  "test": "node tests/ga4EventsEnhanced.test.js",
16
- "test:events": "node tests/ga4EventsEnhanced.test.js"
16
+ "test:events": "node tests/ga4EventsEnhanced.test.js",
17
+ "prepublishOnly": "node scripts/updateReadme.js"
17
18
  },
18
19
  "repository": {
19
20
  "type": "git",
@@ -39,13 +39,13 @@ const defaultConfig = {
39
39
  // this is useful if you want to re-process only a specific date range
40
40
  incrementalStartOverride: undefined,
41
41
  incrementalEndOverride: undefined,
42
- numberOfPreviousDaysToScan: 5,
42
+ numberOfPreviousDaysToScan: 10,
43
43
  },
44
44
  // these parameters are excluded by default because they've been made available in other columns
45
45
  defaultExcludedEventParams: [
46
46
  'page_location',
47
47
  'ga_session_id',
48
- //'custom_event_timestamp', // poistetaan, jos käytössä
48
+ //'custom_event_timestamp', // removed if customTimestampParam is used
49
49
  ],
50
50
  excludedEventParams: [],
51
51
  eventParamsToColumns: [
@@ -57,12 +57,13 @@ const defaultConfig = {
57
57
  'first_visit'
58
58
  ],
59
59
  excludedEvents: [],
60
- // exclude these columns when extracting raw data from the export tables
61
- excludedColumns: [
60
+ defaultExcludedColumns: [
62
61
  'event_dimensions', // legacy column, not needed
63
62
  'traffic_source', // renamed to user_traffic_source
64
63
  'session_id'
65
64
  ],
65
+ // exclude these columns when extracting raw data from the export tables
66
+ excludedColumns: [],
66
67
  };
67
68
 
68
69
  // List the columns in the order they should be in the final table
@@ -227,8 +228,9 @@ const generateEnhancedEventsSQL = (config) => {
227
228
  };
228
229
 
229
230
  const getExcludedColumns = () => {
231
+ const allExcludedColumns = utils.mergeUniqueArrays(mergedConfig.defaultExcludedColumns, mergedConfig.excludedColumns);
230
232
  const excludedColumns = {};
231
- mergedConfig.excludedColumns.forEach(c => {
233
+ allExcludedColumns.forEach(c => {
232
234
  excludedColumns[c] = undefined;
233
235
  });
234
236
  return excludedColumns;