ga4-export-fixer 0.2.6-dev.1 → 0.2.6-dev.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -2
  2. package/preOperations.js +31 -85
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ga4-export-fixer",
3
- "version": "0.2.6-dev.1",
3
+ "version": "0.2.6-dev.3",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -17,7 +17,8 @@
17
17
  "documentation.js"
18
18
  ],
19
19
  "scripts": {
20
- "test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js",
20
+ "test": "node tests/ga4EventsEnhanced.test.js && node tests/mergeSQLConfigurations.test.js && node tests/preOperations.test.js",
21
+ "test:preops": "node tests/preOperations.test.js",
21
22
  "test:events": "node tests/ga4EventsEnhanced.test.js",
22
23
  "test:merge": "node tests/mergeSQLConfigurations.test.js",
23
24
  "readme": "node scripts/updateReadme.js",
package/preOperations.js CHANGED
@@ -57,100 +57,48 @@ where
57
57
 
58
58
  };
59
59
 
60
- // Define the date range start for incremental refresh with intraday tables
61
- // Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data
62
- const getDateRangeStartIntraday = (config) => {
63
- const getStartDate = () => {
64
- if (config.incremental) {
65
- return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
66
- }
67
- return 'current_date()-5';
68
- };
69
-
70
- const startDate = getStartDate();
60
+ // Find the first day where a target export type exists but no daily table does.
61
+ // Uses INFORMATION_SCHEMA.TABLES to avoid scanning actual table data.
62
+ // The export_statuses CTE always classifies all three export types (daily, fresh, intraday).
63
+ // The 5-day lookback limit only applies to intraday rows; daily and fresh have no lower date bound.
64
+ const getExportDateRangeStart = (config, targetExportType) => {
65
+ const intradayStartDate = config.incremental
66
+ ? `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`
67
+ : 'current_date()-5';
68
+
69
+ const informationSchemaPath = config.sourceTable.replace(
70
+ /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
71
+ '`$1.$2.INFORMATION_SCHEMA.TABLES`'
72
+ );
71
73
 
72
- if (config.includedExportTypes.intraday) {
73
- const informationSchemaPath = config.sourceTable.replace(
74
- /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
75
- '`$1.$2.INFORMATION_SCHEMA.TABLES`'
76
- );
74
+ const finalCondition = targetExportType === 'intraday'
75
+ ? 'intraday = true and daily = false'
76
+ : 'fresh = true and daily = false';
77
77
 
78
- return `with export_statuses as (
78
+ return `with export_statuses as (
79
79
  select
80
80
  safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
81
81
  case
82
82
  when table_name like 'events_intraday_%' then 'intraday'
83
- else 'daily'
84
- end as export_type
85
- from
86
- ${informationSchemaPath}
87
- where
88
- regexp_contains(table_name, r'^events_(intraday_)?\\d{8}$')
89
- and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
90
- between ${startDate} and current_date()
91
- ),
92
- statuses_by_day as (
93
- select
94
- date,
95
- max(if(export_type = 'daily', true, false)) as daily,
96
- max(if(export_type = 'intraday', true, false)) as intraday
97
- from
98
- export_statuses
99
- group by
100
- date
101
- )
102
- select
103
- min(
104
- if(
105
- intraday = true and daily = false,
106
- date,
107
- null
108
- )
109
- )
110
- from
111
- statuses_by_day`;
112
- }
113
-
114
- return undefined;
115
- };
116
-
117
- // Define the date range start for fresh export tables
118
- // Uses INFORMATION_SCHEMA.TABLES to find the first day with a fresh table but no daily table
119
- const getDateRangeStartFresh = (config) => {
120
- const getStartDate = () => {
121
- if (config.incremental) {
122
- return `greatest(${constants.DATE_RANGE_START_VARIABLE}, current_date()-5)`;
123
- }
124
- return 'current_date()-5';
125
- };
126
-
127
- const startDate = getStartDate();
128
-
129
- if (config.includedExportTypes.fresh) {
130
- const informationSchemaPath = config.sourceTable.replace(
131
- /`?([^`]+)\.([^`]+)\.[^`]+`?$/,
132
- '`$1.$2.INFORMATION_SCHEMA.TABLES`'
133
- );
134
-
135
- return `with export_statuses as (
136
- select
137
- safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') as date,
138
- case
139
83
  when table_name like 'events_fresh_%' then 'fresh'
140
- else 'daily'
84
+ when regexp_contains(table_name, r'^events_\\d{8}$') then 'daily'
141
85
  end as export_type
142
86
  from
143
87
  ${informationSchemaPath}
144
88
  where
145
- regexp_contains(table_name, r'^events_(fresh_)?\\d{8}$')
146
- and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD')
147
- between ${startDate} and current_date()
89
+ regexp_contains(table_name, r'^events_(intraday_|fresh_)?\\d{8}$')
90
+ and safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') <= current_date()
91
+ and (
92
+ table_name not like 'events_intraday_%'
93
+ or safe_cast(regexp_extract(table_name, r'\\d+') as date format 'YYYYMMDD') >= ${intradayStartDate}
94
+ )
148
95
  ),
149
96
  statuses_by_day as (
150
97
  select
151
98
  date,
152
99
  max(if(export_type = 'daily', true, false)) as daily,
153
- max(if(export_type = 'fresh', true, false)) as fresh
100
+ max(if(export_type = 'fresh', true, false)) as fresh,
101
+ max(if(export_type = 'intraday', true, false)) as intraday
154
102
  from
155
103
  export_statuses
156
104
  group by
@@ -159,16 +107,13 @@ const getDateRangeStartFresh = (config) => {
159
107
  select
160
108
  min(
161
109
  if(
162
- fresh = true and daily = false,
110
+ ${finalCondition},
163
111
  date,
164
112
  null
165
113
  )
166
114
  )
167
115
  from
168
116
  statuses_by_day`;
169
- }
170
-
171
- return undefined;
172
117
  };
173
118
 
174
119
  // Get the maximum event_timestamp from fresh export tables
@@ -250,14 +195,14 @@ const setPreOperations = (config) => {
250
195
  name: constants.INTRADAY_DATE_RANGE_START_VARIABLE,
251
196
  // only needed when daily+intraday WITHOUT fresh (the two-way intraday checkpoint)
252
197
  // when fresh is also enabled, intraday uses fresh_date_range_start instead
253
- value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.intraday && config.includedExportTypes.daily && !config.includedExportTypes.fresh ? getDateRangeStartIntraday(config) : undefined,
198
+ value: !config.test && config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.intraday && config.includedExportTypes.daily && !config.includedExportTypes.fresh ? getExportDateRangeStart(config, 'intraday') : undefined,
254
199
  comment: 'Define the date range start for intraday export tables. Avoid returning intraday data if it overlaps with daily export data. Only needed if intraday and daily export tables are included without fresh.',
255
200
  },
256
201
  {
257
202
  type: 'variable',
258
203
  name: constants.FRESH_DATE_RANGE_START_VARIABLE,
259
204
  // needed when fresh and daily are both enabled, to avoid fresh data overlapping with daily
260
- value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.daily ? getDateRangeStartFresh(config) : undefined,
205
+ value: config.sourceTableType === 'GA4_EXPORT' && config.includedExportTypes.fresh && config.includedExportTypes.daily ? getExportDateRangeStart(config, 'fresh') : undefined,
261
206
  comment: 'Define the date range start for fresh export tables. Returns the first day with a fresh table but no daily table.',
262
207
  },
263
208
  {
@@ -313,5 +258,6 @@ ${preOperationsSQL}
313
258
  };
314
259
 
315
260
  module.exports = {
316
- setPreOperations
261
+ setPreOperations,
262
+ _internal: { getExportDateRangeStart },
317
263
  };