ga4-export-fixer 0.3.1 → 0.3.2-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -228,6 +228,11 @@ All fields are optional except `sourceTable`. Default values are applied automat
228
228
  | `excludedEvents` | string[] | `['session_start', 'first_visit']` | Event names to exclude from the table. These events are excluded by default because they have no use for analysis purposes. Override this to include them if needed |
229
229
  | `excludedColumns` | string[] | `[]` | Default GA4 export columns to exclude from the final table, for example `'app_info'` or `'publisher'` |
230
230
  | `sessionParams` | string[] | `[]` | Event parameter names to aggregate as session-level parameters |
231
+ | `includedExportTypes` | object | [See details](#includedExportTypes) | Which GA4 export types to include (daily, fresh, intraday) |
232
+ | `dataIsFinal` | object | [See details](#dataIsFinal) | How to determine whether data is final (not expected to change) |
233
+ | `testConfig` | object | [See details](#testConfig) | Date range used when `test` is `true` |
234
+ | `preOperations` | object | [See details](#preOperations) | Date range and incremental refresh configuration |
235
+ | `eventParamsToColumns` | object[] | `[]` | Event parameters to promote to columns. [See item schema](#eventParamsToColumns) |
231
236
 
232
237
  <a id="default-dataformtableconfig"></a>
233
238
  <details>
@@ -263,7 +268,9 @@ The `onSchemaChange: "EXTEND"` setting updates the result table schema on increm
263
268
  </details>
264
269
  <br>
265
270
 
266
- `**includedExportTypes**` — which GA4 export types to include:
271
+ <a id="includedExportTypes"></a>
272
+
273
+ **`includedExportTypes`** — which GA4 export types to include:
267
274
 
268
275
 
269
276
  | Field | Type | Default | Description |
@@ -284,6 +291,8 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
284
291
 
285
292
  > **Without daily export:** When `daily` is `false`, `dataIsFinal.detectionMethod` must be set to `'DAY_THRESHOLD'`, because `EXPORT_TYPE` detection relies on daily tables to mark data as final.
286
293
 
294
+ <a id="dataIsFinal"></a>
295
+
287
296
  **`dataIsFinal`** — how to determine whether data is final (not expected to change):
288
297
 
289
298
 
@@ -293,6 +302,8 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
293
302
  | `dataIsFinal.dayThreshold` | integer | `3` | Days after which data is considered final. According to GA4 documentation, data up to 72 hours old is subject to possible changes. Required when `detectionMethod` is `'DAY_THRESHOLD'` |
294
303
 
295
304
 
305
+ <a id="testConfig"></a>
306
+
296
307
  **`testConfig`** — date range used when `test` is `true`:
297
308
 
298
309
 
@@ -302,6 +313,8 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
302
313
  | `testConfig.dateRangeEnd` | string (SQL date) | `'current_date()'` | End date for test queries |
303
314
 
304
315
 
316
+ <a id="preOperations"></a>
317
+
305
318
  **`preOperations`** — date range and incremental refresh configuration:
306
319
 
307
320
 
@@ -315,6 +328,8 @@ The boundary between fresh and intraday is timestamp-based because the fresh exp
315
328
  | `preOperations.numberOfDaysToProcess` | integer | `undefined` | Limit each run to N days of data. When set, the end date becomes `start + N - 1` (capped at `current_date()`). When `undefined`, `dateRangeEnd` is used as-is. `incrementalEndOverride` takes priority |
316
329
 
317
330
 
331
+ <a id="eventParamsToColumns"></a>
332
+
318
333
  **`eventParamsToColumns`** — each item in the array is an object:
319
334
 
320
335
 
@@ -0,0 +1,146 @@
1
+ {
2
+ "event_date": {
3
+ "source": "ga4_export_modified",
4
+ "note": "Cast to DATE from the original YYYYMMDD string format"
5
+ },
6
+ "event_datetime": {
7
+ "source": "derived",
8
+ "note": "Computed from event_timestamp (or custom timestamp parameter) with timezone conversion"
9
+ },
10
+ "event_timestamp": {
11
+ "source": "ga4_export"
12
+ },
13
+ "event_custom_timestamp": {
14
+ "source": "derived",
15
+ "note": "Derived from a custom event parameter (milliseconds converted to microseconds), falling back to event_timestamp"
16
+ },
17
+ "event_name": {
18
+ "source": "ga4_export"
19
+ },
20
+ "session_id": {
21
+ "source": "derived",
22
+ "note": "Concatenation of user_pseudo_id and the ga_session_id event parameter"
23
+ },
24
+ "user_pseudo_id": {
25
+ "source": "ga4_export"
26
+ },
27
+ "user_id": {
28
+ "source": "ga4_export_modified",
29
+ "note": "Session-level aggregation: last non-null user_id within the session, ordered by timestamp"
30
+ },
31
+ "merged_user_id": {
32
+ "source": "derived",
33
+ "note": "Coalesces session-level user_id with user_pseudo_id"
34
+ },
35
+ "page_location": {
36
+ "source": "derived",
37
+ "note": "Unnested from the page_location event parameter"
38
+ },
39
+ "page": {
40
+ "source": "derived",
41
+ "note": "Parsed from the page_location event parameter into hostname, path, query, and query_params"
42
+ },
43
+ "landing_page": {
44
+ "source": "derived",
45
+ "note": "Session-level aggregation: first page struct where entrances > 0, ordered by timestamp"
46
+ },
47
+ "event_params": {
48
+ "source": "ga4_export_modified",
49
+ "note": "Excluded and promoted parameters are filtered out from the original array"
50
+ },
51
+ "session_params": {
52
+ "source": "derived",
53
+ "note": "Aggregated from event_params at session level: last non-null value per configured parameter"
54
+ },
55
+ "user_properties": {
56
+ "source": "ga4_export"
57
+ },
58
+ "ecommerce": {
59
+ "source": "ga4_export_modified",
60
+ "note": "Fixes applied: transaction_id '(not set)' nullified, purchase_revenue NaN and missing-value corrections"
61
+ },
62
+ "items": {
63
+ "source": "ga4_export"
64
+ },
65
+ "user_ltv": {
66
+ "source": "ga4_export"
67
+ },
68
+ "collected_traffic_source": {
69
+ "source": "ga4_export"
70
+ },
71
+ "session_first_traffic_source": {
72
+ "source": "derived",
73
+ "note": "Session-level aggregation: collected_traffic_source from the first event in the session"
74
+ },
75
+ "session_traffic_source_last_click": {
76
+ "source": "ga4_export_modified",
77
+ "note": "Session-level aggregation: first non-null value in the session (the field is session-scoped in the GA4 export)"
78
+ },
79
+ "user_traffic_source": {
80
+ "source": "ga4_export_modified",
81
+ "note": "Renamed from the GA4 export traffic_source field"
82
+ },
83
+ "event_previous_timestamp": {
84
+ "source": "ga4_export"
85
+ },
86
+ "event_value_in_usd": {
87
+ "source": "ga4_export"
88
+ },
89
+ "event_bundle_sequence_id": {
90
+ "source": "ga4_export"
91
+ },
92
+ "event_server_timestamp_offset": {
93
+ "source": "ga4_export"
94
+ },
95
+ "privacy_info": {
96
+ "source": "ga4_export"
97
+ },
98
+ "user_first_touch_timestamp": {
99
+ "source": "ga4_export"
100
+ },
101
+ "device": {
102
+ "source": "ga4_export"
103
+ },
104
+ "geo": {
105
+ "source": "ga4_export"
106
+ },
107
+ "app_info": {
108
+ "source": "ga4_export"
109
+ },
110
+ "stream_id": {
111
+ "source": "ga4_export"
112
+ },
113
+ "platform": {
114
+ "source": "ga4_export"
115
+ },
116
+ "event_dimensions": {
117
+ "source": "ga4_export"
118
+ },
119
+ "is_active_user": {
120
+ "source": "ga4_export"
121
+ },
122
+ "batch_event_index": {
123
+ "source": "ga4_export"
124
+ },
125
+ "batch_page_id": {
126
+ "source": "ga4_export"
127
+ },
128
+ "batch_ordering_id": {
129
+ "source": "ga4_export"
130
+ },
131
+ "publisher": {
132
+ "source": "ga4_export"
133
+ },
134
+ "row_inserted_timestamp": {
135
+ "source": "derived",
136
+ "note": "Set to current_timestamp() when the row is inserted or refreshed by the incremental pipeline"
137
+ },
138
+ "data_is_final": {
139
+ "source": "derived",
140
+ "note": "Computed from export type or day threshold since event_date"
141
+ },
142
+ "export_type": {
143
+ "source": "derived",
144
+ "note": "Determined from the GA4 export table suffix pattern"
145
+ }
146
+ }
@@ -0,0 +1,23 @@
1
+ {
2
+ "event_date": "Partition column. Always include in WHERE clauses to limit scanned data and reduce query cost",
3
+ "event_datetime": "Human-readable timestamp in the configured timezone. Use for time-of-day analysis and reporting",
4
+ "event_timestamp": "Microsecond-precision UTC timestamp. Use for precise event ordering and time difference calculations",
5
+ "event_name": "Primary event filter. Use in WHERE clauses to select specific event types (e.g. WHERE event_name = 'purchase')",
6
+ "session_id": "Use for counting unique sessions (COUNT(DISTINCT session_id)) and as a GROUP BY key for session-level aggregations",
7
+ "user_pseudo_id": "Device-level user identifier. Use for counting unique devices or as a fallback when merged_user_id is not needed",
8
+ "user_id": "Authenticated user identifier. NULL when the user is not logged in. Prefer merged_user_id for user-level analysis",
9
+ "merged_user_id": "Preferred user identifier for user-level aggregations. Resolves authenticated and anonymous users into a single ID per session",
10
+ "page_location": "Full URL string. Use for exact-match filtering. For path-level or hostname-level analysis, use the page struct instead",
11
+ "page": "Use page.hostname, page.path, and page.query_params for structured URL analysis without manual string parsing",
12
+ "landing_page": "Session entry page. Use for landing page performance reports and campaign attribution analysis",
13
+ "event_params": "Nested array of event parameters. Unnest with CROSS JOIN UNNEST(event_params) to access individual parameter values",
14
+ "session_params": "Session-scoped parameters. Unnest with CROSS JOIN UNNEST(session_params) to access session-level parameter values",
15
+ "ecommerce": "Transaction and revenue data. Filter to purchase or refund events (WHERE event_name = 'purchase') for ecommerce reporting",
16
+ "items": "Product-level data array. Unnest with CROSS JOIN UNNEST(items) for item-level analysis in ecommerce reports",
17
+ "collected_traffic_source": "Event-level UTM parameters and click identifiers. For session-level attribution, prefer session_first_traffic_source instead",
18
+ "session_first_traffic_source": "First-touch traffic source for the session. Use for session-level acquisition and campaign reporting",
19
+ "session_traffic_source_last_click": "Google-attributed session traffic source. Use for last-click attribution analysis across Google Ads and manual channels",
20
+ "data_is_final": "Data stability flag. Use WHERE data_is_final = true when you need only stable data that will not change in future refreshes",
21
+ "export_type": "Source export type (daily, intraday, fresh). Use for debugging data freshness or filtering by export source",
22
+ "row_inserted_timestamp": "Pipeline metadata. Use to identify when data was last refreshed or to debug incremental update issues"
23
+ }
package/documentation.js CHANGED
@@ -1,85 +1,177 @@
1
- const columnDescriptions = require('./columns/columnDescriptions.json');
2
-
3
- /**
4
- * Returns a deep copy of the default column descriptions, enriched with
5
- * configuration-specific context appended to the relevant descriptions.
6
- *
7
- * @param {Object} config - The merged configuration object from ga4EventsEnhanced.
8
- * @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
9
- */
10
- const getColumnDescriptions = (config) => {
11
- const descriptions = JSON.parse(JSON.stringify(columnDescriptions));
12
-
13
- if (!config) return descriptions;
14
-
15
- const appendToDescription = (key, suffix) => {
16
- if (!descriptions[key]) return;
17
- if (typeof descriptions[key] === 'string') {
18
- descriptions[key] = `${descriptions[key]}. ${suffix}`;
19
- } else if (typeof descriptions[key] === 'object' && descriptions[key].description) {
20
- descriptions[key].description = `${descriptions[key].description}. ${suffix}`;
21
- }
22
- };
23
-
24
- // timezone
25
- if (config.timezone) {
26
- appendToDescription('event_datetime', `Timezone: ${config.timezone}`);
27
- }
28
-
29
- // customTimestampParam
30
- if (config.customTimestampParam) {
31
- appendToDescription('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
32
- appendToDescription('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
33
- } else {
34
- delete descriptions.event_custom_timestamp;
35
- }
36
-
37
- // data_is_final
38
- if (config.dataIsFinal) {
39
- const method = config.dataIsFinal.detectionMethod;
40
- if (method === 'DAY_THRESHOLD') {
41
- appendToDescription('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
42
- } else {
43
- appendToDescription('data_is_final', `Detection method: EXPORT_TYPE`);
44
- }
45
- }
46
-
47
- // excludedEvents
48
- if (config.excludedEvents && config.excludedEvents.length > 0) {
49
- appendToDescription('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
50
- }
51
-
52
- // excludedEventParams
53
- if (config.excludedEventParams && config.excludedEventParams.length > 0) {
54
- appendToDescription('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
55
- }
56
-
57
- // sessionParams
58
- if (config.sessionParams && config.sessionParams.length > 0) {
59
- appendToDescription('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
60
- }
61
-
62
- // eventParamsToColumns add descriptions for dynamically promoted columns
63
- if (config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
64
- config.eventParamsToColumns.forEach(p => {
65
- const columnName = p.columnName || p.name;
66
- const type = p.type ? ` (${p.type})` : ' (any data type)';
67
- descriptions[columnName] = `Promoted from event parameter '${p.name}'${type}`;
68
- });
69
- }
70
-
71
- // includedExportTypes
72
- if (config.includedExportTypes) {
73
- const types = Object.entries(config.includedExportTypes)
74
- .filter(([, enabled]) => enabled)
75
- .map(([type]) => type);
76
- appendToDescription('export_type', `Included export types: ${types.join(', ')}`);
77
- }
78
-
79
- return descriptions;
80
- };
81
-
82
- module.exports = {
83
- columnDescriptions,
84
- getColumnDescriptions
85
- };
1
+ const columnDescriptions = require('./columns/columnDescriptions.json');
2
+ const columnLineage = require('./columns/columnLineage.json');
3
+ const columnTypicalUse = require('./columns/columnTypicalUse.json');
4
+
5
+ /**
6
+ * Composes a multi-section column description string from individual sections.
7
+ * Sections with null/undefined/empty values are omitted.
8
+ * Sections are separated by line breaks for readability.
9
+ *
10
+ * @param {Object} sections - { base, lineage, typicalUse, config }
11
+ * @returns {string} Composed description
12
+ */
13
+ const composeDescription = (sections) => {
14
+ const parts = [];
15
+
16
+ if (sections.base) {
17
+ parts.push(sections.base);
18
+ }
19
+
20
+ if (sections.lineage) {
21
+ parts.push(`Lineage: ${sections.lineage}`);
22
+ }
23
+
24
+ if (sections.typicalUse) {
25
+ parts.push(`Typical use: ${sections.typicalUse}`);
26
+ }
27
+
28
+ if (sections.config) {
29
+ parts.push(`Config: ${sections.config}`);
30
+ }
31
+
32
+ return parts.join('\n\n');
33
+ };
34
+
35
+ /**
36
+ * Returns a formatted lineage text string for a column, or null if no lineage data exists.
37
+ *
38
+ * @param {string} columnName - The column name to look up.
39
+ * @returns {string|null} Formatted lineage string, e.g. "Derived -- Concatenation of ..."
40
+ */
41
+ const getLineageText = (columnName) => {
42
+ const entry = columnLineage[columnName];
43
+ if (!entry) return null;
44
+
45
+ const sourceLabels = {
46
+ 'ga4_export': 'Standard GA4 export field',
47
+ 'ga4_export_modified': 'GA4 export field (modified)',
48
+ 'derived': 'Derived',
49
+ };
50
+
51
+ const label = sourceLabels[entry.source] || entry.source;
52
+ return entry.note ? `${label} -- ${entry.note}` : label;
53
+ };
54
+
55
+ /**
56
+ * Builds a map of config-specific notes for columns based on the provided configuration.
57
+ * Extracts the configuration-dependent description suffixes into a { columnName: "note" } map.
58
+ *
59
+ * @param {Object} config - The merged configuration object.
60
+ * @returns {Object} Map of column names to config note strings.
61
+ */
62
+ const buildConfigNotes = (config) => {
63
+ const notes = {};
64
+
65
+ if (!config) return notes;
66
+
67
+ const append = (key, text) => {
68
+ notes[key] = notes[key] ? `${notes[key]}. ${text}` : text;
69
+ };
70
+
71
+ // timezone
72
+ if (config.timezone) {
73
+ append('event_datetime', `Timezone: ${config.timezone}`);
74
+ }
75
+
76
+ // customTimestampParam
77
+ if (config.customTimestampParam) {
78
+ append('event_datetime', `Custom timestamp parameter: '${config.customTimestampParam}'`);
79
+ append('event_custom_timestamp', `Source parameter: '${config.customTimestampParam}'`);
80
+ }
81
+
82
+ // data_is_final
83
+ if (config.dataIsFinal) {
84
+ const method = config.dataIsFinal.detectionMethod;
85
+ if (method === 'DAY_THRESHOLD') {
86
+ append('data_is_final', `Detection method: DAY_THRESHOLD (${config.dataIsFinal.dayThreshold} days)`);
87
+ } else {
88
+ append('data_is_final', `Detection method: EXPORT_TYPE`);
89
+ }
90
+ }
91
+
92
+ // excludedEvents
93
+ if (config.excludedEvents && config.excludedEvents.length > 0) {
94
+ append('event_name', `Excluded events: ${config.excludedEvents.join(', ')}`);
95
+ }
96
+
97
+ // excludedEventParams
98
+ if (config.excludedEventParams && config.excludedEventParams.length > 0) {
99
+ append('event_params', `Excluded parameters: ${config.excludedEventParams.join(', ')}`);
100
+ }
101
+
102
+ // sessionParams
103
+ if (config.sessionParams && config.sessionParams.length > 0) {
104
+ append('session_params', `Configured parameters: ${config.sessionParams.join(', ')}`);
105
+ }
106
+
107
+ // includedExportTypes
108
+ if (config.includedExportTypes) {
109
+ const types = Object.entries(config.includedExportTypes)
110
+ .filter(([, enabled]) => enabled)
111
+ .map(([type]) => type);
112
+ if (types.length > 0) {
113
+ append('export_type', `Included export types: ${types.join(', ')}`);
114
+ }
115
+ }
116
+
117
+ return notes;
118
+ };
119
+
120
+ /**
121
+ * Returns a deep copy of the default column descriptions, enriched with
122
+ * lineage, typical use, and configuration-specific sections composed into
123
+ * multi-section descriptions.
124
+ *
125
+ * @param {Object} config - The merged configuration object from ga4EventsEnhanced.
126
+ * @returns {Object} Column descriptions object in Dataform ITableConfig columns format.
127
+ */
128
+ const getColumnDescriptions = (config) => {
129
+ const descriptions = JSON.parse(JSON.stringify(columnDescriptions));
130
+
131
+ const configNotes = buildConfigNotes(config);
132
+
133
+ // Compose multi-section descriptions for each top-level column
134
+ for (const key of Object.keys(descriptions)) {
135
+ const isStruct = typeof descriptions[key] === 'object' && descriptions[key].description;
136
+ const baseDesc = isStruct ? descriptions[key].description : (typeof descriptions[key] === 'string' ? descriptions[key] : null);
137
+
138
+ if (!baseDesc) continue;
139
+
140
+ const composed = composeDescription({
141
+ base: baseDesc,
142
+ lineage: getLineageText(key),
143
+ typicalUse: columnTypicalUse[key] || null,
144
+ config: configNotes[key] || null,
145
+ });
146
+
147
+ if (isStruct) {
148
+ descriptions[key].description = composed;
149
+ } else {
150
+ descriptions[key] = composed;
151
+ }
152
+ }
153
+
154
+ // Add descriptions for dynamically promoted event parameter columns
155
+ if (config && config.eventParamsToColumns && config.eventParamsToColumns.length > 0) {
156
+ config.eventParamsToColumns.forEach(p => {
157
+ const columnName = p.columnName || p.name;
158
+ const type = p.type ? ` (${p.type})` : ' (any data type)';
159
+ descriptions[columnName] = composeDescription({
160
+ base: `Promoted from event parameter '${p.name}'${type}`,
161
+ lineage: `Derived -- Promoted from the event_params array`,
162
+ typicalUse: 'Promoted event parameter available as a top-level column for direct filtering and aggregation',
163
+ config: null,
164
+ });
165
+ });
166
+ }
167
+
168
+ return descriptions;
169
+ };
170
+
171
+ module.exports = {
172
+ columnDescriptions,
173
+ getColumnDescriptions,
174
+ composeDescription,
175
+ getLineageText,
176
+ buildConfigNotes,
177
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.3.1",
3
+ "version": "0.3.2-dev.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,8 @@
17
17
  "documentation.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js && node tests/documentation.test.js",
21
+ "test:docs": "node tests/documentation.test.js",
21
22
  "test:preops": "node tests/preOperations.test.js",
22
23
  "test:events": "node tests/ga4EventsEnhanced.test.js",
23
24
  "test:merge": "node tests/mergeSQLConfigurations.test.js",
@@ -338,7 +338,8 @@ The last full table refresh was done using this configuration:
338
338
  ${JSON.stringify(
339
339
  Object.fromEntries(
340
340
  // don't display the default arrays here, their contents are included in the main arrays via the mergeSQLConfigurations function
341
- Object.entries(mergedConfig).filter(([key]) => !key.startsWith('default'))
341
+ // dataformTAbleConfig is also excluded since it's not relevant for the SQL generation and is more of a deployment detail
342
+ Object.entries(mergedConfig).filter(([key]) => !key.startsWith('default') && key !== 'dataformTableConfig')
342
343
  ),
343
344
  null,
344
345
  2