ipulse-shared-core-ftredge 2.51__py3-none-any.whl → 2.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge/__init__.py +4 -4
- ipulse_shared_core_ftredge/enums/__init__.py +2 -2
- ipulse_shared_core_ftredge/enums/enums_common_utils.py +19 -9
- ipulse_shared_core_ftredge/utils_common.py +419 -131
- ipulse_shared_core_ftredge/utils_gcp.py +76 -36
- ipulse_shared_core_ftredge/utils_templates_and_schemas.py +60 -57
- {ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/METADATA +1 -1
- {ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/RECORD +11 -11
- {ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/WHEEL +1 -1
- {ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/top_level.txt +0 -0
|
@@ -2,12 +2,12 @@ from .models import (Organisation, UserAuth, UserProfile,
|
|
|
2
2
|
UserStatus, UserProfileUpdate, pulse_enums)
|
|
3
3
|
from .utils_gcp import (setup_gcp_logger_and_error_report,
|
|
4
4
|
read_csv_from_gcs, read_json_from_gcs,
|
|
5
|
-
write_csv_to_gcs,
|
|
5
|
+
write_csv_to_gcs, write_json_to_gcs)
|
|
6
6
|
from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
|
|
7
|
-
|
|
8
|
-
from .utils_common import (
|
|
7
|
+
check_format_against_schema_template)
|
|
8
|
+
from .utils_common import (SmartLog, Watcher)
|
|
9
9
|
|
|
10
|
-
from .enums import (
|
|
10
|
+
from .enums import (WatcherCategory, LogLevel, Unit, Frequency,
|
|
11
11
|
Module, SubModule, BaseDataCategory,
|
|
12
12
|
FinCoreCategory, FincCoreSubCategory,
|
|
13
13
|
FinCoreRecordsCategory, ExchangeOrPublisher,
|
|
@@ -6,11 +6,17 @@
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
class WatcherCategory(Enum):
|
|
10
|
+
MIXED="watcher_mixed"
|
|
11
|
+
SUCCESSES = "watcher_successes"
|
|
12
|
+
NOTICES = "watcher_notices"
|
|
13
|
+
SUCCESSES_AND_NOTICES = "watcher_succs_n_notcs"
|
|
14
|
+
WARNINGS = "watcher_warnings"
|
|
15
|
+
WARNINGS_AND_ERRORS = "watcher_warns_n_errs"
|
|
16
|
+
ERRORS = "watcher_errors"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LogLevel(Enum):
|
|
14
20
|
"""
|
|
15
21
|
Standardized notice levels for data engineering pipelines,
|
|
16
22
|
designed for easy analysis and identification of manual
|
|
@@ -19,12 +25,16 @@ class NoticeLevel(Enum):
|
|
|
19
25
|
DEBUG = 100 # Detailed debug information (for development/troubleshooting)
|
|
20
26
|
|
|
21
27
|
INFO = 200
|
|
28
|
+
SUCCESS = 201
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
NOTICE = 300 # Maybe same file or data already fully or partially exists
|
|
31
|
+
NOTICE_ALREADY_EXISTS = 301 # Data already exists, no action required
|
|
32
|
+
NOTICE_PARTIAL_EXISTS = 302 # Partial data exists, no action required
|
|
33
|
+
NOTICE_CANCELLED = 303 # Data processing cancelled, no action required
|
|
24
34
|
|
|
25
35
|
# Warnings indicate potential issues that might require attention:
|
|
26
36
|
WARNING = 400 # General warning, no immediate action required
|
|
27
|
-
WARNING_NO_ACTION = 401 # Minor issue or Unexpected Behavior, no immediate action required (can be logged frequently)
|
|
37
|
+
# WARNING_NO_ACTION = 401 # Minor issue or Unexpected Behavior, no immediate action required (can be logged frequently)
|
|
28
38
|
WARNING_REVIEW_RECOMMENDED = 402 # Action recommended to prevent potential future issues
|
|
29
39
|
WARNING_FIX_RECOMMENDED = 403 # Action recommended to prevent potential future issues
|
|
30
40
|
WARNING_FIX_REQUIRED = 404 # Action required, pipeline can likely continue
|
|
@@ -46,16 +56,16 @@ class NoticeLevel(Enum):
|
|
|
46
56
|
UNKNOWN=1001 # Unknown error, should not be used in normal operation
|
|
47
57
|
|
|
48
58
|
|
|
49
|
-
class
|
|
59
|
+
class LogStatus(Enum):
|
|
50
60
|
OPEN = "open"
|
|
51
61
|
ACKNOWLEDGED = "acknowledged"
|
|
52
62
|
IN_PROGRESS = "in_progress"
|
|
53
63
|
RESOLVED = "resolved"
|
|
54
64
|
IGNORED = "ignored"
|
|
65
|
+
CANCELLED = "cancelled"
|
|
55
66
|
|
|
56
67
|
|
|
57
68
|
|
|
58
|
-
|
|
59
69
|
### Exception during full exection, partially saved
|
|
60
70
|
# Exception during ensemble pipeline; modifications collected in local object , nothing persisted
|
|
61
71
|
# Exception during ensemble pipeline; modifications persisted , metadata failed
|
|
@@ -10,18 +10,18 @@ from datetime import datetime, timezone
|
|
|
10
10
|
from contextlib import contextmanager
|
|
11
11
|
from typing import List
|
|
12
12
|
from google.cloud import logging as cloudlogging
|
|
13
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import
|
|
14
|
-
from ipulse_shared_core_ftredge.utils_gcp import
|
|
13
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import WatcherCategory, LogLevel, LogStatus
|
|
14
|
+
from ipulse_shared_core_ftredge.utils_gcp import write_json_to_gcs
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
# ["data_import","data_quality", "data_processing","data_general","data_persistance","metadata_quality", "metadata_processing", "metadata_persistance","metadata_general"]
|
|
18
18
|
|
|
19
|
-
class
|
|
19
|
+
class SmartLog:
|
|
20
20
|
MAX_TRACEBACK_LINES = 14 # Define the maximum number of traceback lines to include
|
|
21
|
-
def __init__(self, level:
|
|
21
|
+
def __init__(self, level: LogLevel, start_context: str = None, collector_id: str = None,
|
|
22
22
|
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
23
23
|
subject: str = None, description: str = None, context: str = None,
|
|
24
|
-
|
|
24
|
+
log_status: LogStatus = LogStatus.OPEN):
|
|
25
25
|
if e is not None:
|
|
26
26
|
e_type = type(e).__name__ if e_type is None else e_type
|
|
27
27
|
e_message = str(e) if e_message is None else e_message
|
|
@@ -34,11 +34,11 @@ class Notice:
|
|
|
34
34
|
self.description = description
|
|
35
35
|
self._start_context = start_context
|
|
36
36
|
self._context = context
|
|
37
|
-
self.
|
|
37
|
+
self.collector_id = collector_id
|
|
38
38
|
self.exception_type = e_type
|
|
39
39
|
self.exception_message = e_message
|
|
40
40
|
self.exception_traceback = self._format_traceback(e_traceback,e_message)
|
|
41
|
-
self.
|
|
41
|
+
self.log_status = log_status
|
|
42
42
|
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
43
43
|
|
|
44
44
|
def _format_traceback(self, e_traceback, e_message):
|
|
@@ -72,7 +72,7 @@ class Notice:
|
|
|
72
72
|
# Determine the number of lines to keep from the start and end
|
|
73
73
|
keep_lines_start = min(self.MAX_TRACEBACK_LINES // 2, len(combined_lines))
|
|
74
74
|
keep_lines_end = min(self.MAX_TRACEBACK_LINES // 2, len(combined_lines) - keep_lines_start)
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
if len(combined_lines) > self.MAX_TRACEBACK_LINES:
|
|
77
77
|
# Include the first few and last few lines, and an indicator of truncation
|
|
78
78
|
formatted_traceback = '\n'.join(
|
|
@@ -82,8 +82,8 @@ class Notice:
|
|
|
82
82
|
)
|
|
83
83
|
else:
|
|
84
84
|
formatted_traceback = '\n'.join(combined_lines)
|
|
85
|
-
|
|
86
|
-
return formatted_traceback
|
|
85
|
+
|
|
86
|
+
return formatted_traceback
|
|
87
87
|
|
|
88
88
|
@property
|
|
89
89
|
def start_context(self):
|
|
@@ -112,24 +112,26 @@ class Notice:
|
|
|
112
112
|
"exception_type": self.exception_type,
|
|
113
113
|
"exception_message": self.exception_message,
|
|
114
114
|
"exception_traceback": self.exception_traceback,
|
|
115
|
-
"
|
|
116
|
-
"
|
|
115
|
+
"log_status": self.log_status.value,
|
|
116
|
+
"collector_id": self.collector_id,
|
|
117
117
|
"timestamp": self.timestamp
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
class
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
class Watcher:
|
|
121
|
+
ERROR_START_CODE = LogLevel.ERROR.value
|
|
122
|
+
WARNING_START_CODE = LogLevel.WARNING.value
|
|
123
|
+
NOTICE_START_CODE = LogLevel.NOTICE.value
|
|
124
|
+
SUCCESS_START_CODE = LogLevel.SUCCESS.value
|
|
124
125
|
|
|
125
|
-
def __init__(self, start_context: str, category:
|
|
126
|
-
self.
|
|
127
|
-
self.
|
|
126
|
+
def __init__(self, start_context: str, category: WatcherCategory = WatcherCategory.MIXED, logger_name=None):
|
|
127
|
+
self._id = str(uuid.uuid4())
|
|
128
|
+
self._logs = []
|
|
128
129
|
self._early_stop = False
|
|
129
|
-
self.
|
|
130
|
-
self.
|
|
131
|
-
self.
|
|
132
|
-
self.
|
|
130
|
+
self._errors_count = 0
|
|
131
|
+
self._warnings_count = 0
|
|
132
|
+
self._notices_count = 0
|
|
133
|
+
self._successes_count = 0
|
|
134
|
+
self._level_counts = {level.name: 0 for level in LogLevel}
|
|
133
135
|
self._start_context = start_context
|
|
134
136
|
self._context_stack = []
|
|
135
137
|
self._category = category.value
|
|
@@ -141,7 +143,6 @@ class NoticesManager:
|
|
|
141
143
|
return logging_client.logger(logger_name)
|
|
142
144
|
return None
|
|
143
145
|
|
|
144
|
-
|
|
145
146
|
@contextmanager
|
|
146
147
|
def context(self, context):
|
|
147
148
|
self.push_context(context)
|
|
@@ -157,141 +158,163 @@ class NoticesManager:
|
|
|
157
158
|
if self._context_stack:
|
|
158
159
|
self._context_stack.pop()
|
|
159
160
|
|
|
160
|
-
|
|
161
|
+
@property
|
|
162
|
+
def current_context(self):
|
|
161
163
|
return " >> ".join(self._context_stack)
|
|
162
164
|
|
|
163
|
-
|
|
165
|
+
@property
|
|
166
|
+
def start_context(self):
|
|
164
167
|
return self._start_context
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def id(self):
|
|
171
|
+
return self._id
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def early_stop(self):
|
|
175
|
+
return self._early_stop
|
|
176
|
+
|
|
177
|
+
def set_early_stop(self, max_errors_tolerance: int, create_error_log=True, pop_context=False):
|
|
170
178
|
self._early_stop = True
|
|
171
|
-
if
|
|
179
|
+
if create_error_log:
|
|
172
180
|
if pop_context:
|
|
173
181
|
self.pop_context()
|
|
174
|
-
self.
|
|
175
|
-
|
|
176
|
-
|
|
182
|
+
self.add_log(SmartLog(level=LogLevel.ERROR,
|
|
183
|
+
subject="EARLY_STOP",
|
|
184
|
+
description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
185
|
+
|
|
177
186
|
def reset_early_stop(self):
|
|
178
187
|
self._early_stop = False
|
|
179
188
|
|
|
180
189
|
def get_early_stop(self):
|
|
181
190
|
return self._early_stop
|
|
182
191
|
|
|
183
|
-
def
|
|
184
|
-
if (self._category ==
|
|
185
|
-
(self._category ==
|
|
186
|
-
raise ValueError(f"Invalid
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
self.
|
|
192
|
-
self._update_counts(
|
|
192
|
+
def add_log(self, log: SmartLog):
|
|
193
|
+
if (self._category == WatcherCategory.SUCCESSES and log.level >=self.NOTICE_START_CODE) or \
|
|
194
|
+
(self._category == WatcherCategory.WARNINGS_AND_ERRORS and log.level.value < self.WARNING_START_CODE):
|
|
195
|
+
raise ValueError(f"Invalid log level {log.level.name} for category {self._category}")
|
|
196
|
+
log.start_context = self.start_context
|
|
197
|
+
log.context = self.current_context
|
|
198
|
+
log.collector_id = self.id
|
|
199
|
+
log_dict = log.to_dict()
|
|
200
|
+
self._logs.append(log_dict)
|
|
201
|
+
self._update_counts(log_dict)
|
|
193
202
|
|
|
194
203
|
if self._logger:
|
|
195
|
-
|
|
196
|
-
|
|
204
|
+
# We specifically want to avoid having an ERROR log level for this structured Watcher reporting, to ensure Errors are alerting on Critical Application Services.
|
|
205
|
+
# A single ERROR log level can be used for the entire pipeline, which shall be used at the end of the pipeline
|
|
206
|
+
if log.level.value >= self.WARNING_START_CODE:
|
|
207
|
+
self._logger.log_struct(log_dict, severity="WARNING")
|
|
208
|
+
elif log.level.value >= self.NOTICE_START_CODE:
|
|
209
|
+
self._logger.log_struct(log_dict, severity="NOTICE")
|
|
197
210
|
else:
|
|
198
|
-
self._logger.log_struct(
|
|
211
|
+
self._logger.log_struct(log_dict, severity="INFO")
|
|
199
212
|
|
|
200
|
-
def
|
|
201
|
-
for
|
|
202
|
-
self.
|
|
213
|
+
def add_logs(self, logs: List[SmartLog]):
|
|
214
|
+
for log in logs:
|
|
215
|
+
self.add_log(log)
|
|
203
216
|
|
|
204
|
-
def
|
|
205
|
-
self.
|
|
206
|
-
self.
|
|
207
|
-
self.
|
|
208
|
-
self.
|
|
209
|
-
self.
|
|
217
|
+
def clear_logs_and_counts(self):
|
|
218
|
+
self._logs = []
|
|
219
|
+
self._errors_count = 0
|
|
220
|
+
self._warnings_count = 0
|
|
221
|
+
self._notices_count = 0
|
|
222
|
+
self._successes_count = 0
|
|
223
|
+
self._level_counts = {level.name: 0 for level in LogLevel}
|
|
210
224
|
|
|
211
|
-
def
|
|
212
|
-
self.
|
|
225
|
+
def clear_logs(self):
|
|
226
|
+
self._logs = []
|
|
213
227
|
|
|
214
|
-
def
|
|
215
|
-
return self.
|
|
228
|
+
def get_all_logs(self):
|
|
229
|
+
return self._logs
|
|
216
230
|
|
|
217
|
-
def
|
|
218
|
-
return [
|
|
231
|
+
def get_logs_for_level(self, level: LogLevel):
|
|
232
|
+
return [log for log in self._logs if log["level_code"] == level.value]
|
|
219
233
|
|
|
220
|
-
def
|
|
234
|
+
def get_logs_by_str_in_context(self, context_substring: str):
|
|
221
235
|
return [
|
|
222
|
-
|
|
223
|
-
if context_substring in
|
|
236
|
+
log for log in self._logs
|
|
237
|
+
if context_substring in log["context"]
|
|
224
238
|
]
|
|
225
|
-
|
|
239
|
+
|
|
226
240
|
def contains_errors(self):
|
|
227
|
-
return self.
|
|
241
|
+
return self._errors_count > 0
|
|
228
242
|
|
|
229
243
|
def count_errors(self):
|
|
230
|
-
return self.
|
|
244
|
+
return self._errors_count
|
|
231
245
|
|
|
232
246
|
def contains_warnings_or_errors(self):
|
|
233
|
-
return self.
|
|
247
|
+
return self._warnings_count > 0 or self._errors_count > 0
|
|
234
248
|
|
|
235
249
|
def count_warnings_and_errors(self):
|
|
236
|
-
return self.
|
|
237
|
-
|
|
250
|
+
return self._warnings_count + self._errors_count
|
|
251
|
+
|
|
238
252
|
def count_warnings(self):
|
|
239
|
-
return self.
|
|
253
|
+
return self._warnings_count
|
|
254
|
+
|
|
255
|
+
def count_notices(self):
|
|
256
|
+
return self._notices_count
|
|
240
257
|
|
|
241
258
|
def count_successes(self):
|
|
242
|
-
return self.
|
|
259
|
+
return self._successes_count
|
|
243
260
|
|
|
244
|
-
def
|
|
245
|
-
return len(self.
|
|
261
|
+
def count_all_logs(self):
|
|
262
|
+
return len(self._logs)
|
|
246
263
|
|
|
247
|
-
def
|
|
264
|
+
def count_logs_by_level(self, level: LogLevel):
|
|
248
265
|
return self._level_counts.get(level.name, 0)
|
|
249
266
|
|
|
250
|
-
def
|
|
267
|
+
def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
251
268
|
return sum(
|
|
252
|
-
1 for
|
|
253
|
-
if (
|
|
254
|
-
(level_code_min is None or
|
|
255
|
-
(level_code_max is None or
|
|
269
|
+
1 for log in self._logs
|
|
270
|
+
if (log["context"] == context_substring if exact_match else context_substring in log["context"]) and
|
|
271
|
+
(level_code_min is None or log["level_code"] >= level_code_min) and
|
|
272
|
+
(level_code_max is None or log["level_code"] <= level_code_max)
|
|
256
273
|
)
|
|
257
274
|
|
|
258
|
-
def
|
|
259
|
-
return self.
|
|
275
|
+
def count_logs_for_current_context(self):
|
|
276
|
+
return self._count_logs(self.current_context, exact_match=True)
|
|
260
277
|
|
|
261
|
-
def
|
|
262
|
-
return self.
|
|
278
|
+
def count_logs_for_current_and_nested_contexts(self):
|
|
279
|
+
return self._count_logs(self.current_context)
|
|
263
280
|
|
|
264
|
-
def
|
|
265
|
-
return self.
|
|
281
|
+
def count_logs_by_level_for_current_context(self, level: LogLevel):
|
|
282
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
|
|
266
283
|
|
|
267
|
-
def
|
|
268
|
-
return self.
|
|
284
|
+
def count_logs_by_level_for_current_and_nested_contexts(self, level: LogLevel):
|
|
285
|
+
return self._count_logs(self.current_context, level_code_min=level.value, level_code_max=level.value)
|
|
269
286
|
|
|
270
287
|
def count_errors_for_current_context(self):
|
|
271
|
-
return self.
|
|
288
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
|
|
272
289
|
|
|
273
290
|
def count_errors_for_current_and_nested_contexts(self):
|
|
274
|
-
return self.
|
|
291
|
+
return self._count_logs(self.current_context, level_code_min=self.ERROR_START_CODE)
|
|
275
292
|
|
|
276
293
|
def count_warnings_and_errors_for_current_context(self):
|
|
277
|
-
return self.
|
|
294
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
|
|
278
295
|
|
|
279
296
|
def count_warnings_and_errors_for_current_and_nested_contexts(self):
|
|
280
|
-
return self.
|
|
297
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE)
|
|
281
298
|
|
|
282
299
|
def count_warnings_for_current_context(self):
|
|
283
|
-
return self.
|
|
300
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
284
301
|
|
|
285
302
|
def count_warnings_for_current_and_nested_contexts(self):
|
|
286
|
-
return self.
|
|
303
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
304
|
+
|
|
305
|
+
def count_notices_for_current_context(self):
|
|
306
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
307
|
+
|
|
308
|
+
def count_notices_for_current_and_nested_contexts(self):
|
|
309
|
+
return self._count_logs(self.current_context, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
287
310
|
|
|
288
311
|
def count_successes_for_current_context(self):
|
|
289
|
-
return self.
|
|
312
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
290
313
|
|
|
291
314
|
def count_successes_for_current_and_nested_contexts(self):
|
|
292
|
-
return self.
|
|
315
|
+
return self._count_logs(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
293
316
|
|
|
294
|
-
def
|
|
317
|
+
def export_logs_to_gcs_file(self, bucket_name, storage_client, file_prefix=None, file_name=None, top_level_context=None, save_locally=False, local_path=None, logger=None, max_retries=2):
|
|
295
318
|
def log_message(message):
|
|
296
319
|
if logger:
|
|
297
320
|
logger.info(message)
|
|
@@ -305,30 +328,30 @@ class NoticesManager:
|
|
|
305
328
|
if not file_name:
|
|
306
329
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
307
330
|
if top_level_context:
|
|
308
|
-
file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self.
|
|
331
|
+
file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self._logs)}.json"
|
|
309
332
|
else:
|
|
310
|
-
file_name = f"{file_prefix}_{timestamp}_len{len(self.
|
|
333
|
+
file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
|
|
311
334
|
|
|
312
|
-
|
|
313
|
-
local_path = None
|
|
335
|
+
result = None
|
|
314
336
|
try:
|
|
315
|
-
|
|
337
|
+
result = write_json_to_gcs(
|
|
316
338
|
bucket_name=bucket_name,
|
|
317
339
|
storage_client=storage_client,
|
|
318
|
-
data=self.
|
|
340
|
+
data=self._logs,
|
|
319
341
|
file_name=file_name,
|
|
320
342
|
save_locally=save_locally,
|
|
321
343
|
local_path=local_path,
|
|
322
344
|
logger=logger,
|
|
323
|
-
max_retries=max_retries
|
|
345
|
+
max_retries=max_retries,
|
|
346
|
+
overwrite_if_exists=False
|
|
324
347
|
)
|
|
325
|
-
log_message(f"{file_prefix} successfully saved to GCS at {
|
|
348
|
+
log_message(f"{file_prefix} successfully saved (overwritten={result.get('gcs_file_overwritten')}) to GCS at {result.get('gcs_path')} and locally at {result.get('local_path')}.")
|
|
326
349
|
except Exception as e:
|
|
327
|
-
log_error(f"Failed at
|
|
350
|
+
log_error(f"Failed at export_logs_to_gcs_file for {file_prefix} for file {file_name} to bucket {bucket_name}: {type(e).__name__} - {str(e)}")
|
|
328
351
|
|
|
329
|
-
return
|
|
352
|
+
return result
|
|
330
353
|
|
|
331
|
-
def
|
|
354
|
+
def import_logs_from_json(self, json_or_file, logger=None):
|
|
332
355
|
def log_message(message):
|
|
333
356
|
if logger:
|
|
334
357
|
logger.info(message)
|
|
@@ -339,31 +362,296 @@ class NoticesManager:
|
|
|
339
362
|
|
|
340
363
|
try:
|
|
341
364
|
if isinstance(json_or_file, str): # Load from string
|
|
342
|
-
|
|
365
|
+
imported_logs = json.loads(json_or_file)
|
|
343
366
|
elif hasattr(json_or_file, 'read'): # Load from file-like object
|
|
344
|
-
|
|
345
|
-
self.
|
|
346
|
-
log_message("Successfully imported
|
|
367
|
+
imported_logs = json.load(json_or_file)
|
|
368
|
+
self.add_logs(imported_logs)
|
|
369
|
+
log_message("Successfully imported logs from json.")
|
|
347
370
|
except Exception as e:
|
|
348
|
-
log_warning(f"Failed to import
|
|
371
|
+
log_warning(f"Failed to import logs from json: {type(e).__name__} - {str(e)}", exc_info=True)
|
|
349
372
|
|
|
350
|
-
def _update_counts(self,
|
|
351
|
-
level_code =
|
|
352
|
-
level_name =
|
|
373
|
+
def _update_counts(self, log, remove=False):
|
|
374
|
+
level_code = log["level_code"]
|
|
375
|
+
level_name = log["level_name"]
|
|
353
376
|
|
|
354
377
|
if remove:
|
|
355
|
-
if level_code >= self.
|
|
356
|
-
self.
|
|
357
|
-
elif level_code
|
|
358
|
-
self.
|
|
359
|
-
elif level_code
|
|
360
|
-
self.
|
|
378
|
+
if level_code >= self.ERROR_START_CODE:
|
|
379
|
+
self._errors_count -= 1
|
|
380
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
381
|
+
self._warnings_count -= 1
|
|
382
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
383
|
+
self._notices_count -= 1
|
|
384
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
385
|
+
self._successes_count -= 1
|
|
361
386
|
self._level_counts[level_name] -= 1
|
|
362
387
|
else:
|
|
363
|
-
if level_code >= self.
|
|
364
|
-
self.
|
|
365
|
-
elif level_code
|
|
366
|
-
self.
|
|
367
|
-
elif level_code
|
|
368
|
-
self.
|
|
369
|
-
self.
|
|
388
|
+
if level_code >= self.ERROR_START_CODE:
|
|
389
|
+
self._errors_count += 1
|
|
390
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
391
|
+
self._warnings_count += 1
|
|
392
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
393
|
+
self._notices_count += 1
|
|
394
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
395
|
+
self._successes_count += 1
|
|
396
|
+
self._level_counts[level_name] += 1
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# class Watcher:
|
|
400
|
+
# ERROR_START_CODE = WatcherLogLevel.ERROR.value
|
|
401
|
+
# WARNING_START_CODE = WatcherLogLevel.WARNING.value
|
|
402
|
+
# NOTICE_START_CODE = WatcherLogLevel.NOTICE.value
|
|
403
|
+
# SUCCESS_START_CODE = WatcherLogLevel.SUCCESS.value
|
|
404
|
+
|
|
405
|
+
# def __init__(self, start_context: str, category: WatcherCategory = WatcherCategory.MIXED, logger_name=None):
|
|
406
|
+
# self._id = str(uuid.uuid4())
|
|
407
|
+
# self._logs = []
|
|
408
|
+
# self._early_stop = False
|
|
409
|
+
# self._errors_count = 0
|
|
410
|
+
# self._warnings_count = 0
|
|
411
|
+
# self._successes_count = 0
|
|
412
|
+
# self._level_counts = {level.name: 0 for level in WatcherLogLevel}
|
|
413
|
+
# self._start_context = start_context
|
|
414
|
+
# self._context_stack = []
|
|
415
|
+
# self._category = category.value
|
|
416
|
+
# self._logger = self._initialize_logger(logger_name)
|
|
417
|
+
|
|
418
|
+
# def _initialize_logger(self, logger_name):
|
|
419
|
+
# if logger_name:
|
|
420
|
+
# logging_client = cloudlogging.Client()
|
|
421
|
+
# return logging_client.logger(logger_name)
|
|
422
|
+
# return None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# @contextmanager
|
|
426
|
+
# def context(self, context):
|
|
427
|
+
# self.push_context(context)
|
|
428
|
+
# try:
|
|
429
|
+
# yield
|
|
430
|
+
# finally:
|
|
431
|
+
# self.pop_context()
|
|
432
|
+
|
|
433
|
+
# def push_context(self, context):
|
|
434
|
+
# self._context_stack.append(context)
|
|
435
|
+
|
|
436
|
+
# def pop_context(self):
|
|
437
|
+
# if self._context_stack:
|
|
438
|
+
# self._context_stack.pop()
|
|
439
|
+
|
|
440
|
+
# @property
|
|
441
|
+
# def current_context(self):
|
|
442
|
+
# return " >> ".join(self._context_stack)
|
|
443
|
+
|
|
444
|
+
# @property
|
|
445
|
+
# def start_context(self):
|
|
446
|
+
# return self._start_context
|
|
447
|
+
|
|
448
|
+
# @property
|
|
449
|
+
# def id(self):
|
|
450
|
+
# return self._id
|
|
451
|
+
|
|
452
|
+
# @property
|
|
453
|
+
# def early_stop(self):
|
|
454
|
+
# return self._early_stop
|
|
455
|
+
|
|
456
|
+
# def set_early_stop(self, max_errors_tolerance:int, create_error_notice=True,pop_context=False):
|
|
457
|
+
# self.early_stop = True
|
|
458
|
+
# if create_error_notice:
|
|
459
|
+
# if pop_context:
|
|
460
|
+
# self.pop_context()
|
|
461
|
+
# self.add_notice(WatcherLog(level=WatcherLogLevel.ERROR,
|
|
462
|
+
# subject="EARLY_STOP",
|
|
463
|
+
# description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
464
|
+
|
|
465
|
+
# def reset_early_stop(self):
|
|
466
|
+
# self._early_stop = False
|
|
467
|
+
|
|
468
|
+
# def get_early_stop(self):
|
|
469
|
+
# return self._early_stop
|
|
470
|
+
|
|
471
|
+
# def add_notice(self, notice: WatcherLog):
|
|
472
|
+
# if (self._category == WatcherCategory.SUCCESSES.value and notice.level != WatcherLogLevel.SUCCESS) or \
|
|
473
|
+
# (self._category == WatcherCategory.WARNINGS_AND_ERRORS.value and notice.level.value < self.WARNING_START_CODE):
|
|
474
|
+
# raise ValueError(f"Invalid notice level {notice.level.name} for category {self._category}")
|
|
475
|
+
# notice.start_context = self.start_context
|
|
476
|
+
# notice.context = self.current_context
|
|
477
|
+
# notice.watcher_id = self.id
|
|
478
|
+
# notice_dict = notice.to_dict()
|
|
479
|
+
# self._logs.append(notice_dict)
|
|
480
|
+
# self._update_counts(notice_dict)
|
|
481
|
+
|
|
482
|
+
# if self._logger:
|
|
483
|
+
# if notice.level.value >= self.WARNING_START_CODE:
|
|
484
|
+
# self._logger.log_struct(notice_dict, severity="WARNING")
|
|
485
|
+
# else:
|
|
486
|
+
# self._logger.log_struct(notice_dict, severity="INFO")
|
|
487
|
+
|
|
488
|
+
# def add_notices(self, notices: List[WatcherLog]):
|
|
489
|
+
# for notice in notices:
|
|
490
|
+
# self.add_notice(notice)
|
|
491
|
+
|
|
492
|
+
# def clear_notices_and_counts(self):
|
|
493
|
+
# self._logs = []
|
|
494
|
+
# self._errors_count = 0
|
|
495
|
+
# self._warnings_count = 0
|
|
496
|
+
# self._successes_count = 0
|
|
497
|
+
# self._level_counts = {level.name: 0 for level in WatcherLogLevel}
|
|
498
|
+
|
|
499
|
+
# def clear_notices(self):
|
|
500
|
+
# self._logs = []
|
|
501
|
+
|
|
502
|
+
# def get_all_notices(self):
|
|
503
|
+
# return self._logs
|
|
504
|
+
|
|
505
|
+
# def get_notices_for_level(self, level: WatcherLogLevel):
|
|
506
|
+
# return [notice for notice in self._logs if notice["level_code"] == level.value]
|
|
507
|
+
|
|
508
|
+
# def get_notices_by_str_in_context(self, context_substring: str):
|
|
509
|
+
# return [
|
|
510
|
+
# notice for notice in self._logs
|
|
511
|
+
# if context_substring in notice["context"]
|
|
512
|
+
# ]
|
|
513
|
+
|
|
514
|
+
# def contains_errors(self):
|
|
515
|
+
# return self._errors_count > 0
|
|
516
|
+
|
|
517
|
+
# def count_errors(self):
|
|
518
|
+
# return self._errors_count
|
|
519
|
+
|
|
520
|
+
# def contains_warnings_or_errors(self):
|
|
521
|
+
# return self._warnings_count > 0 or self._errors_count > 0
|
|
522
|
+
|
|
523
|
+
# def count_warnings_and_errors(self):
|
|
524
|
+
# return self._warnings_count + self._errors_count
|
|
525
|
+
|
|
526
|
+
# def count_warnings(self):
|
|
527
|
+
# return self._warnings_count
|
|
528
|
+
|
|
529
|
+
# def count_successes(self):
|
|
530
|
+
# return self._successes_count
|
|
531
|
+
|
|
532
|
+
# def count_all_notices(self):
|
|
533
|
+
# return len(self._logs)
|
|
534
|
+
|
|
535
|
+
# def count_notices_by_level(self, level: WatcherLogLevel):
|
|
536
|
+
# return self._level_counts.get(level.name, 0)
|
|
537
|
+
|
|
538
|
+
# def _count_notices(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
539
|
+
# return sum(
|
|
540
|
+
# 1 for notice in self._logs
|
|
541
|
+
# if (notice["context"] == context_substring if exact_match else context_substring in notice["context"]) and
|
|
542
|
+
# (level_code_min is None or notice["level_code"] >= level_code_min) and
|
|
543
|
+
# (level_code_max is None or notice["level_code"] <= level_code_max)
|
|
544
|
+
# )
|
|
545
|
+
|
|
546
|
+
# def count_notices_for_current_context(self):
|
|
547
|
+
# return self._count_notices(self.current_context, exact_match=True)
|
|
548
|
+
|
|
549
|
+
# def count_notices_for_current_and_nested_contexts(self):
|
|
550
|
+
# return self._count_notices(self.current_context)
|
|
551
|
+
|
|
552
|
+
# def count_notices_by_level_for_current_context(self, level: WatcherLogLevel):
|
|
553
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
|
|
554
|
+
|
|
555
|
+
# def count_notices_by_level_for_current_and_nested_contexts(self, level: WatcherLogLevel):
|
|
556
|
+
# return self._count_notices(self.current_context, level_code_min=level.value, level_code_max=level.value)
|
|
557
|
+
|
|
558
|
+
# def count_errors_for_current_context(self):
|
|
559
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
|
|
560
|
+
|
|
561
|
+
# def count_errors_for_current_and_nested_contexts(self):
|
|
562
|
+
# return self._count_notices(self.current_context, level_code_min=self.ERROR_START_CODE)
|
|
563
|
+
|
|
564
|
+
# def count_warnings_and_errors_for_current_context(self):
|
|
565
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
|
|
566
|
+
|
|
567
|
+
# def count_warnings_and_errors_for_current_and_nested_contexts(self):
|
|
568
|
+
# return self._count_notices(self.current_context, level_code_min=self.WARNING_START_CODE)
|
|
569
|
+
|
|
570
|
+
# def count_warnings_for_current_context(self):
|
|
571
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
572
|
+
|
|
573
|
+
# def count_warnings_for_current_and_nested_contexts(self):
|
|
574
|
+
# return self._count_notices(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
575
|
+
|
|
576
|
+
# def count_successes_for_current_context(self):
|
|
577
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.SUCCESS_START_CODE)
|
|
578
|
+
|
|
579
|
+
# def count_successes_for_current_and_nested_contexts(self):
|
|
580
|
+
# return self._count_notices(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.SUCCESS_START_CODE)
|
|
581
|
+
|
|
582
|
+
# def export_notices_to_gcs_file(self, bucket_name, storage_client, file_prefix=None, file_name=None, top_level_context=None, save_locally=False, local_path=None, logger=None, max_retries=2):
|
|
583
|
+
# def log_message(message):
|
|
584
|
+
# if logger:
|
|
585
|
+
# logger.info(message)
|
|
586
|
+
|
|
587
|
+
# def log_error(message, exc_info=False):
|
|
588
|
+
# if logger:
|
|
589
|
+
# logger.error(message, exc_info=exc_info)
|
|
590
|
+
|
|
591
|
+
# if not file_prefix:
|
|
592
|
+
# file_prefix = self._category
|
|
593
|
+
# if not file_name:
|
|
594
|
+
# timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
595
|
+
# if top_level_context:
|
|
596
|
+
# file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self._logs)}.json"
|
|
597
|
+
# else:
|
|
598
|
+
# file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
|
|
599
|
+
|
|
600
|
+
# result=None
|
|
601
|
+
# try:
|
|
602
|
+
# result= write_json_to_gcs(
|
|
603
|
+
# bucket_name=bucket_name,
|
|
604
|
+
# storage_client=storage_client,
|
|
605
|
+
# data=self._logs,
|
|
606
|
+
# file_name=file_name,
|
|
607
|
+
# save_locally=save_locally,
|
|
608
|
+
# local_path=local_path,
|
|
609
|
+
# logger=logger,
|
|
610
|
+
# max_retries=max_retries,
|
|
611
|
+
# overwrite_gcs=False
|
|
612
|
+
# )
|
|
613
|
+
# log_message(f"{file_prefix} successfully saved (ovewritten={result.get("gcs_file_overwritten")}) to GCS at {result.get("gcs_path")} and locally at {result.get("local_path")}.")
|
|
614
|
+
# except Exception as e:
|
|
615
|
+
# log_error(f"Failed at export_notices_to_gcs_file for {file_prefix} for file {file_name} to bucket {bucket_name}: {type(e).__name__} - {str(e)}")
|
|
616
|
+
|
|
617
|
+
# return result
|
|
618
|
+
|
|
619
|
+
# def import_notices_from_json(self, json_or_file, logger=None):
|
|
620
|
+
# def log_message(message):
|
|
621
|
+
# if logger:
|
|
622
|
+
# logger.info(message)
|
|
623
|
+
|
|
624
|
+
# def log_warning(message, exc_info=False):
|
|
625
|
+
# if logger:
|
|
626
|
+
# logger.warning(message, exc_info=exc_info)
|
|
627
|
+
|
|
628
|
+
# try:
|
|
629
|
+
# if isinstance(json_or_file, str): # Load from string
|
|
630
|
+
# imported_notices = json.loads(json_or_file)
|
|
631
|
+
# elif hasattr(json_or_file, 'read'): # Load from file-like object
|
|
632
|
+
# imported_notices = json.load(json_or_file)
|
|
633
|
+
# self.add_notices(imported_notices)
|
|
634
|
+
# log_message("Successfully imported notices from json.")
|
|
635
|
+
# except Exception as e:
|
|
636
|
+
# log_warning(f"Failed to import notices from json: {type(e).__name__} - {str(e)}", exc_info=True)
|
|
637
|
+
|
|
638
|
+
# def _update_counts(self, notice, remove=False):
|
|
639
|
+
# level_code = notice["level_code"]
|
|
640
|
+
# level_name = notice["level_name"]
|
|
641
|
+
|
|
642
|
+
# if remove:
|
|
643
|
+
# if level_code >= self.ERROR_START_CODE:
|
|
644
|
+
# self._errors_count -= 1
|
|
645
|
+
# elif level_code >= self.WARNING_START_CODE:
|
|
646
|
+
# self._warnings_count -= 1
|
|
647
|
+
# elif level_code >= self.SUCCESS_START_CODE:
|
|
648
|
+
# self._successes_count -= 1
|
|
649
|
+
# self._level_counts[level_name] -= 1
|
|
650
|
+
# else:
|
|
651
|
+
# if level_code >= self.ERROR_START_CODE:
|
|
652
|
+
# self._errors_count += 1
|
|
653
|
+
# elif level_code >= self.WARNING_START_CODE:
|
|
654
|
+
# self._warnings_count += 1
|
|
655
|
+
# elif level_code == self.SUCCESS_START_CODE:
|
|
656
|
+
# self._successes_count += 1
|
|
657
|
+
# self._level_counts[level_name] += 1
|
|
@@ -122,33 +122,19 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
def
|
|
126
|
-
save_locally=False, local_path=None, logger=None, max_retries=
|
|
125
|
+
def write_json_to_gcs(bucket_name, storage_client, data, file_name,
|
|
126
|
+
save_locally=False, local_path=None, logger=None, max_retries=2,
|
|
127
|
+
overwrite_if_exists=False, increment_if_exists=False):
|
|
127
128
|
"""Saves data to Google Cloud Storage and optionally locally.
|
|
128
129
|
|
|
129
130
|
This function attempts to upload data to GCS. If the upload fails after
|
|
130
131
|
retries and `save_locally` is True or `local_path` is provided, it attempts
|
|
131
132
|
to save the data locally.
|
|
132
133
|
|
|
133
|
-
Args:
|
|
134
|
-
bucket_name (str): Name of the GCS bucket.
|
|
135
|
-
storage_client (google.cloud.storage.Client): GCS client object.
|
|
136
|
-
data (list, dict, or str): Data to be saved.
|
|
137
|
-
file_name (str, optional): File name for GCS and local. Defaults to None.
|
|
138
|
-
save_locally (bool, optional): Save locally if GCS fails. Defaults to False.
|
|
139
|
-
local_path (str, optional): Local directory to save. Defaults to None.
|
|
140
|
-
logger (logging.Logger, optional): Logger for messages. Defaults to None.
|
|
141
|
-
max_retries (int, optional): Number of GCS upload retries. Defaults to 3.
|
|
142
|
-
|
|
143
134
|
Returns:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
Raises:
|
|
148
|
-
ValueError: If data is not a list, dict, or str.
|
|
149
|
-
Exception: If GCS upload fails after retries and local saving fails or
|
|
150
|
-
is not requested. If GCS upload fails after retries and
|
|
151
|
-
local saving is requested but unsuccessful.
|
|
135
|
+
dict: A dictionary containing the GCS path (or None if upload failed),
|
|
136
|
+
the local path (or None if not saved locally), a boolean indicating if the file was overwritten,
|
|
137
|
+
a boolean indicating if the file already existed, and a boolean indicating if the file was saved with an incremented name.
|
|
152
138
|
"""
|
|
153
139
|
|
|
154
140
|
def log_message(message):
|
|
@@ -159,12 +145,23 @@ def write_data_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
159
145
|
if logger:
|
|
160
146
|
logger.error(message, exc_info=exc_info)
|
|
161
147
|
|
|
148
|
+
def log_warning(message):
|
|
149
|
+
if logger:
|
|
150
|
+
logger.warning(message)
|
|
151
|
+
|
|
162
152
|
attempts = 0
|
|
163
153
|
success = False
|
|
164
|
-
|
|
154
|
+
gcs_path = None
|
|
165
155
|
local_path_final = None
|
|
156
|
+
gcs_file_overwritten = False
|
|
157
|
+
gcs_file_already_exists = False
|
|
158
|
+
gcs_file_saved_with_increment = False
|
|
166
159
|
gcs_upload_exception = None # Store potential GCS exception
|
|
167
160
|
|
|
161
|
+
# Check for conflicting options
|
|
162
|
+
if overwrite_if_exists and increment_if_exists:
|
|
163
|
+
raise ValueError("When writing JSON to GCS, both overwrite and increment_if_exists cannot be True at the same time.")
|
|
164
|
+
|
|
168
165
|
if isinstance(data, (list, dict)):
|
|
169
166
|
data_str = json.dumps(data, indent=2)
|
|
170
167
|
elif isinstance(data, str):
|
|
@@ -172,13 +169,36 @@ def write_data_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
172
169
|
else:
|
|
173
170
|
raise ValueError("Unsupported data type. It should be a list, dict, or str.")
|
|
174
171
|
|
|
172
|
+
bucket = storage_client.bucket(bucket_name)
|
|
173
|
+
base_file_name, ext = os.path.splitext(file_name)
|
|
174
|
+
increment = 0
|
|
175
|
+
|
|
175
176
|
while attempts < max_retries and not success:
|
|
176
177
|
try:
|
|
177
|
-
|
|
178
|
-
|
|
178
|
+
if increment_if_exists:
|
|
179
|
+
while bucket.blob(file_name).exists():
|
|
180
|
+
gcs_file_already_exists = True
|
|
181
|
+
increment += 1
|
|
182
|
+
file_name = f"{base_file_name}_{increment}{ext}"
|
|
183
|
+
gcs_file_saved_with_increment = True
|
|
184
|
+
log_warning(f"File {file_name} already exists in bucket {bucket_name}. Writing with increment: {increment_if_exists}")
|
|
185
|
+
else:
|
|
186
|
+
blob = bucket.blob(file_name)
|
|
187
|
+
|
|
188
|
+
# Check if the file exists
|
|
189
|
+
if blob.exists():
|
|
190
|
+
gcs_file_already_exists = True
|
|
191
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
192
|
+
log_message(f"File {file_name} already exists in bucket {bucket_name}. Overwriting: {overwrite_if_exists}")
|
|
193
|
+
if not overwrite_if_exists:
|
|
194
|
+
log_warning(f"File {file_name} already exists and overwrite is set to False. Skipping save to GCS.")
|
|
195
|
+
break
|
|
196
|
+
else:
|
|
197
|
+
gcs_file_overwritten = True
|
|
198
|
+
|
|
179
199
|
blob.upload_from_string(data_str, content_type='application/json')
|
|
180
|
-
|
|
181
|
-
log_message(f"Successfully saved file to GCS {
|
|
200
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
201
|
+
log_message(f"Successfully saved file to GCS {gcs_path}.")
|
|
182
202
|
success = True
|
|
183
203
|
except Exception as e:
|
|
184
204
|
gcs_upload_exception = e
|
|
@@ -186,26 +206,46 @@ def write_data_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
186
206
|
if attempts < max_retries:
|
|
187
207
|
time.sleep(2 ** attempts)
|
|
188
208
|
else:
|
|
189
|
-
log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts
|
|
209
|
+
log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
|
|
190
210
|
|
|
191
|
-
if not success
|
|
211
|
+
if not success or save_locally or local_path:
|
|
192
212
|
try:
|
|
193
213
|
if not local_path:
|
|
194
214
|
local_path_final = os.path.join("/tmp", file_name)
|
|
195
215
|
else:
|
|
196
216
|
local_path_final = os.path.join(local_path, file_name)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
217
|
+
|
|
218
|
+
if os.path.exists(local_path_final):
|
|
219
|
+
if increment_if_exists:
|
|
220
|
+
increment = 0
|
|
221
|
+
while os.path.exists(local_path_final):
|
|
222
|
+
increment += 1
|
|
223
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_{increment}{ext}")
|
|
224
|
+
gcs_file_saved_with_increment = True
|
|
225
|
+
elif not overwrite_if_exists:
|
|
226
|
+
log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
|
|
227
|
+
success = True
|
|
228
|
+
else:
|
|
229
|
+
log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
|
|
230
|
+
|
|
231
|
+
if not success:
|
|
232
|
+
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
233
|
+
f.write(data_str)
|
|
234
|
+
log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
|
|
235
|
+
success = True
|
|
200
236
|
except Exception as local_e:
|
|
201
|
-
log_error(f"Failed to write {file_name} locally: {local_e}",exc_info=True)
|
|
202
|
-
|
|
203
|
-
# If GCS upload failed, raise a single exception here
|
|
237
|
+
log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
|
|
204
238
|
|
|
205
239
|
if gcs_upload_exception is not None:
|
|
206
|
-
raise gcs_upload_exception
|
|
207
|
-
|
|
208
|
-
return
|
|
240
|
+
raise gcs_upload_exception # Propagate without nesting
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
"gcs_path": gcs_path,
|
|
244
|
+
"local_path": local_path_final,
|
|
245
|
+
"gcs_file_already_exists": gcs_file_already_exists,
|
|
246
|
+
"gcs_file_overwritten": gcs_file_overwritten,
|
|
247
|
+
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
248
|
+
}
|
|
209
249
|
|
|
210
250
|
|
|
211
251
|
def write_csv_to_gcs(bucket_name, file_name, data, storage_client, logger,log_info_verbose=True):
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
import datetime
|
|
7
7
|
from google.cloud import bigquery
|
|
8
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import
|
|
9
|
-
from ipulse_shared_core_ftredge.utils_common import
|
|
8
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
9
|
+
from ipulse_shared_core_ftredge.utils_common import SmartLog
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def create_bigquery_schema_from_json(json_schema):
|
|
@@ -19,55 +19,58 @@ def create_bigquery_schema_from_json(json_schema):
|
|
|
19
19
|
return schema
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def
|
|
23
|
-
|
|
22
|
+
def check_format_against_schema_template(data_to_check, schema, dt_ts_to_str=True, check_max_length=True):
|
|
24
23
|
"""Ensure Update dict corresponds to the config schema, ensuring proper formats and lengths."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
24
|
+
checked_data = {}
|
|
25
|
+
warnings_or_error = [] # Group warnings and errors for a given run
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# Process updates to conform to the schema
|
|
29
|
+
for field in schema:
|
|
30
|
+
field_name = field["name"]
|
|
31
|
+
field_type = field["type"]
|
|
32
|
+
mode = field["mode"]
|
|
33
|
+
|
|
34
|
+
# Initialize notice to None at the start of each field processing
|
|
35
|
+
warning = None
|
|
36
|
+
|
|
37
|
+
if field_name in data_to_check:
|
|
38
|
+
value = data_to_check[field_name]
|
|
39
|
+
|
|
40
|
+
# Handle date and timestamp formatting
|
|
41
|
+
if field_type == "DATE":
|
|
42
|
+
value, warning = handle_date_fields(field_name, value, dt_ts_to_str)
|
|
43
|
+
elif field_type == "TIMESTAMP":
|
|
44
|
+
value, warning = handle_timestamp_fields(field_name, value, dt_ts_to_str)
|
|
45
|
+
elif field_type in ["STRING", "INT64", "FLOAT64", "BOOL"]:
|
|
46
|
+
value, warning = handle_type_conversion(field_type, field_name, value)
|
|
47
|
+
|
|
48
|
+
if warning:
|
|
49
|
+
warnings_or_error.append(warning)
|
|
50
|
+
|
|
51
|
+
# Check and handle max length restriction
|
|
52
|
+
if check_max_length and "max_length" in field:
|
|
53
|
+
value, warning = check_and_truncate_length(field_name, value, field["max_length"])
|
|
54
|
+
if warning:
|
|
55
|
+
warnings_or_error.append(warning)
|
|
56
|
+
|
|
57
|
+
# Only add to the dictionary if value is not None or the field is required
|
|
58
|
+
if value is not None or mode == "REQUIRED":
|
|
59
|
+
checked_data[field_name] = value
|
|
60
|
+
|
|
61
|
+
elif mode == "REQUIRED":
|
|
62
|
+
warning = SmartLog(level=LogLevel.WARNING,
|
|
63
|
+
subject=field_name,
|
|
64
|
+
description=f"Required field '{field_name}' is missing in the updates.")
|
|
65
|
+
warnings_or_error.append(warning)
|
|
67
66
|
|
|
68
|
-
|
|
67
|
+
except Exception as e:
|
|
68
|
+
error_log = SmartLog(level=LogLevel.ERROR_EXCEPTION_REDO,
|
|
69
|
+
subject=data_to_check,
|
|
70
|
+
description=f"An error occurred during update check: {str(e)}")
|
|
71
|
+
warnings_or_error.append(error_log)
|
|
69
72
|
|
|
70
|
-
return
|
|
73
|
+
return checked_data, warnings_or_error
|
|
71
74
|
|
|
72
75
|
def handle_date_fields(field_name, value, dt_ts_to_str):
|
|
73
76
|
"""Handles date fields, ensuring they are in the correct format and optionally converts them to string."""
|
|
@@ -82,11 +85,11 @@ def handle_date_fields(field_name, value, dt_ts_to_str):
|
|
|
82
85
|
return value, None
|
|
83
86
|
return parsed_date, None
|
|
84
87
|
except ValueError:
|
|
85
|
-
return None,
|
|
88
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
86
89
|
subject=field_name,
|
|
87
90
|
description=f"Expected a DATE in YYYY-MM-DD format but got {value}.")
|
|
88
91
|
else:
|
|
89
|
-
return None,
|
|
92
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
90
93
|
subject=field_name,
|
|
91
94
|
description= f"Expected a DATE or YYYY-MM-DD str format but got {value} of type {type(value).__name__}.")
|
|
92
95
|
|
|
@@ -104,11 +107,11 @@ def handle_timestamp_fields(field_name, value, dt_ts_to_str):
|
|
|
104
107
|
return value, None
|
|
105
108
|
return parsed_datetime, None
|
|
106
109
|
except ValueError:
|
|
107
|
-
return None,
|
|
110
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
108
111
|
subject=field_name,
|
|
109
112
|
description= f"Expected ISO format TIMESTAMP but got {value}.")
|
|
110
113
|
else:
|
|
111
|
-
return None,
|
|
114
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
112
115
|
subject=field_name,
|
|
113
116
|
description= f"Expected ISO format TIMESTAMP but got {value} of type {type(value).__name__}.")
|
|
114
117
|
|
|
@@ -116,7 +119,7 @@ def handle_timestamp_fields(field_name, value, dt_ts_to_str):
|
|
|
116
119
|
def check_and_truncate_length(field_name, value, max_length):
|
|
117
120
|
"""Checks and truncates the length of string fields if they exceed the max length."""
|
|
118
121
|
if isinstance(value, str) and len(value) > max_length:
|
|
119
|
-
return value[:max_length],
|
|
122
|
+
return value[:max_length], SmartLog(level=LogLevel.WARNING_FIX_RECOMMENDED,
|
|
120
123
|
subject= field_name,
|
|
121
124
|
description= f"Field exceeds max length: {len(value)}/{max_length}. Truncating.")
|
|
122
125
|
|
|
@@ -126,7 +129,7 @@ def check_and_truncate_length(field_name, value, max_length):
|
|
|
126
129
|
|
|
127
130
|
def handle_type_conversion(field_type, field_name, value):
|
|
128
131
|
if field_type == "STRING" and not isinstance(value, str):
|
|
129
|
-
return str(value),
|
|
132
|
+
return str(value), SmartLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
130
133
|
subject=field_name,
|
|
131
134
|
description= f"Expected STRING but got {value} of type {type(value).__name__}.")
|
|
132
135
|
|
|
@@ -134,18 +137,18 @@ def handle_type_conversion(field_type, field_name, value):
|
|
|
134
137
|
try:
|
|
135
138
|
return int(value), None
|
|
136
139
|
except ValueError:
|
|
137
|
-
return None,
|
|
140
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
138
141
|
subject= field_name,
|
|
139
142
|
description=f"Expected INTEGER, but got {value} of type {type(value).__name__}.")
|
|
140
143
|
if field_type == "FLOAT64" and not isinstance(value, float):
|
|
141
144
|
try:
|
|
142
145
|
return float(value), None
|
|
143
146
|
except ValueError:
|
|
144
|
-
return None,
|
|
147
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
145
148
|
subject=field_name,
|
|
146
149
|
description=f"Expected FLOAT, but got {value} of type {type(value).__name__}.")
|
|
147
150
|
if field_type == "BOOL" and not isinstance(value, bool):
|
|
148
|
-
return bool(value),
|
|
151
|
+
return bool(value), SmartLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
149
152
|
subject=field_name,
|
|
150
153
|
description=f"Expected BOOL, but got {value}. Converting as {bool(value)}.")
|
|
151
154
|
|
{ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ipulse_shared_core_ftredge
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.53
|
|
4
4
|
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
5
|
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
6
|
Author: Russlan Ramdowar
|
{ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/RECORD
RENAMED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ipulse_shared_core_ftredge/__init__.py,sha256=
|
|
2
|
-
ipulse_shared_core_ftredge/utils_common.py,sha256=
|
|
3
|
-
ipulse_shared_core_ftredge/utils_gcp.py,sha256=
|
|
4
|
-
ipulse_shared_core_ftredge/utils_templates_and_schemas.py,sha256=
|
|
5
|
-
ipulse_shared_core_ftredge/enums/__init__.py,sha256=
|
|
6
|
-
ipulse_shared_core_ftredge/enums/enums_common_utils.py,sha256=
|
|
1
|
+
ipulse_shared_core_ftredge/__init__.py,sha256=CcHx8XkC7YJ5pOxsOpZJrTuxweN1ya1WlQJZjOTwrBY,868
|
|
2
|
+
ipulse_shared_core_ftredge/utils_common.py,sha256=GEo4Xilh9quDdUh_ppOVO6G7ustHWkSaxuILKC_FLNo,27406
|
|
3
|
+
ipulse_shared_core_ftredge/utils_gcp.py,sha256=8KgsOPkLe1-1i3M_UX5niKg_CjjiNoUhZXiWFIHJdmY,11286
|
|
4
|
+
ipulse_shared_core_ftredge/utils_templates_and_schemas.py,sha256=CHrFbhRVrXlqDzGdPe9nujn5uFQtIN2xW7RBTiHYFBc,7475
|
|
5
|
+
ipulse_shared_core_ftredge/enums/__init__.py,sha256=PT8Ig7hcx_hhVlsfun24H0pFjbdfQb201ZtJplQ9uAE,844
|
|
6
|
+
ipulse_shared_core_ftredge/enums/enums_common_utils.py,sha256=CB0IMW5aer-n50G3AM6Fz-NrN85mJkvZhSrnuUb7EMs,5702
|
|
7
7
|
ipulse_shared_core_ftredge/enums/enums_data_eng.py,sha256=2i6Qo6Yi_j_O9xxnOD6QA-r0Cv7mWAUaKUx907XMRio,1825
|
|
8
8
|
ipulse_shared_core_ftredge/enums/enums_module_fincore.py,sha256=MuqQg249clrWUOBb1S-iPsoOldN2_F3ohRQizbjhwG0,1374
|
|
9
9
|
ipulse_shared_core_ftredge/enums/enums_modules.py,sha256=AyXUoNmR75DZLaEHi3snV6LngR25LeZRqzrLDaAupbY,1244
|
|
@@ -18,8 +18,8 @@ ipulse_shared_core_ftredge/models/user_profile_update.py,sha256=oKK0XsQDKkgDvjFP
|
|
|
18
18
|
ipulse_shared_core_ftredge/models/user_status.py,sha256=8TyRd8tBK9_xb0MPKbI5pn9-lX7ovKbeiuWYYPtIOiw,3202
|
|
19
19
|
ipulse_shared_core_ftredge/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
ipulse_shared_core_ftredge/tests/test.py,sha256=0lS8HP5Quo_BqNoscU40qOH9aJRaa1Pfam5VUBmdld8,682
|
|
21
|
-
ipulse_shared_core_ftredge-2.
|
|
22
|
-
ipulse_shared_core_ftredge-2.
|
|
23
|
-
ipulse_shared_core_ftredge-2.
|
|
24
|
-
ipulse_shared_core_ftredge-2.
|
|
25
|
-
ipulse_shared_core_ftredge-2.
|
|
21
|
+
ipulse_shared_core_ftredge-2.53.dist-info/LICENCE,sha256=YBtYAXNqCCOo9Mr2hfkbSPAM9CeAr2j1VZBSwQTrNwE,1060
|
|
22
|
+
ipulse_shared_core_ftredge-2.53.dist-info/METADATA,sha256=9JixJKcqPsiCzRQR6ZpOiKwDIOcjEHFY6OG-VILJ_zg,561
|
|
23
|
+
ipulse_shared_core_ftredge-2.53.dist-info/WHEEL,sha256=rWxmBtp7hEUqVLOnTaDOPpR-cZpCDkzhhcBce-Zyd5k,91
|
|
24
|
+
ipulse_shared_core_ftredge-2.53.dist-info/top_level.txt,sha256=8sgYrptpexkA_6_HyGvho26cVFH9kmtGvaK8tHbsGHk,27
|
|
25
|
+
ipulse_shared_core_ftredge-2.53.dist-info/RECORD,,
|
{ipulse_shared_core_ftredge-2.51.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/LICENCE
RENAMED
|
File without changes
|
|
File without changes
|