ipulse-shared-core-ftredge 2.52__py3-none-any.whl → 2.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge/__init__.py +3 -3
- ipulse_shared_core_ftredge/enums/__init__.py +2 -2
- ipulse_shared_core_ftredge/enums/enums_common_utils.py +19 -9
- ipulse_shared_core_ftredge/utils_common.py +381 -101
- ipulse_shared_core_ftredge/utils_gcp.py +64 -19
- ipulse_shared_core_ftredge/utils_templates_and_schemas.py +60 -57
- {ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/METADATA +1 -1
- {ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/RECORD +11 -11
- {ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/WHEEL +0 -0
- {ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/top_level.txt +0 -0
|
@@ -4,10 +4,10 @@ from .utils_gcp import (setup_gcp_logger_and_error_report,
|
|
|
4
4
|
read_csv_from_gcs, read_json_from_gcs,
|
|
5
5
|
write_csv_to_gcs, write_json_to_gcs)
|
|
6
6
|
from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
|
|
7
|
-
|
|
8
|
-
from .utils_common import (
|
|
7
|
+
check_format_against_schema_template)
|
|
8
|
+
from .utils_common import (SmartLog, Watcher)
|
|
9
9
|
|
|
10
|
-
from .enums import (
|
|
10
|
+
from .enums import (WatcherCategory, LogLevel, Unit, Frequency,
|
|
11
11
|
Module, SubModule, BaseDataCategory,
|
|
12
12
|
FinCoreCategory, FincCoreSubCategory,
|
|
13
13
|
FinCoreRecordsCategory, ExchangeOrPublisher,
|
|
@@ -6,11 +6,17 @@
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
class WatcherCategory(Enum):
|
|
10
|
+
MIXED="watcher_mixed"
|
|
11
|
+
SUCCESSES = "watcher_successes"
|
|
12
|
+
NOTICES = "watcher_notices"
|
|
13
|
+
SUCCESSES_AND_NOTICES = "watcher_succs_n_notcs"
|
|
14
|
+
WARNINGS = "watcher_warnings"
|
|
15
|
+
WARNINGS_AND_ERRORS = "watcher_warns_n_errs"
|
|
16
|
+
ERRORS = "watcher_errors"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LogLevel(Enum):
|
|
14
20
|
"""
|
|
15
21
|
Standardized notice levels for data engineering pipelines,
|
|
16
22
|
designed for easy analysis and identification of manual
|
|
@@ -19,12 +25,16 @@ class NoticeLevel(Enum):
|
|
|
19
25
|
DEBUG = 100 # Detailed debug information (for development/troubleshooting)
|
|
20
26
|
|
|
21
27
|
INFO = 200
|
|
28
|
+
SUCCESS = 201
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
NOTICE = 300 # Maybe same file or data already fully or partially exists
|
|
31
|
+
NOTICE_ALREADY_EXISTS = 301 # Data already exists, no action required
|
|
32
|
+
NOTICE_PARTIAL_EXISTS = 302 # Partial data exists, no action required
|
|
33
|
+
NOTICE_CANCELLED = 303 # Data processing cancelled, no action required
|
|
24
34
|
|
|
25
35
|
# Warnings indicate potential issues that might require attention:
|
|
26
36
|
WARNING = 400 # General warning, no immediate action required
|
|
27
|
-
WARNING_NO_ACTION = 401 # Minor issue or Unexpected Behavior, no immediate action required (can be logged frequently)
|
|
37
|
+
# WARNING_NO_ACTION = 401 # Minor issue or Unexpected Behavior, no immediate action required (can be logged frequently)
|
|
28
38
|
WARNING_REVIEW_RECOMMENDED = 402 # Action recommended to prevent potential future issues
|
|
29
39
|
WARNING_FIX_RECOMMENDED = 403 # Action recommended to prevent potential future issues
|
|
30
40
|
WARNING_FIX_REQUIRED = 404 # Action required, pipeline can likely continue
|
|
@@ -46,16 +56,16 @@ class NoticeLevel(Enum):
|
|
|
46
56
|
UNKNOWN=1001 # Unknown error, should not be used in normal operation
|
|
47
57
|
|
|
48
58
|
|
|
49
|
-
class
|
|
59
|
+
class LogStatus(Enum):
|
|
50
60
|
OPEN = "open"
|
|
51
61
|
ACKNOWLEDGED = "acknowledged"
|
|
52
62
|
IN_PROGRESS = "in_progress"
|
|
53
63
|
RESOLVED = "resolved"
|
|
54
64
|
IGNORED = "ignored"
|
|
65
|
+
CANCELLED = "cancelled"
|
|
55
66
|
|
|
56
67
|
|
|
57
68
|
|
|
58
|
-
|
|
59
69
|
### Exception during full exection, partially saved
|
|
60
70
|
# Exception during ensemble pipeline; modifications collected in local object , nothing persisted
|
|
61
71
|
# Exception during ensemble pipeline; modifications persisted , metadata failed
|
|
@@ -10,18 +10,18 @@ from datetime import datetime, timezone
|
|
|
10
10
|
from contextlib import contextmanager
|
|
11
11
|
from typing import List
|
|
12
12
|
from google.cloud import logging as cloudlogging
|
|
13
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import
|
|
13
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import WatcherCategory, LogLevel, LogStatus
|
|
14
14
|
from ipulse_shared_core_ftredge.utils_gcp import write_json_to_gcs
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
# ["data_import","data_quality", "data_processing","data_general","data_persistance","metadata_quality", "metadata_processing", "metadata_persistance","metadata_general"]
|
|
18
18
|
|
|
19
|
-
class
|
|
19
|
+
class SmartLog:
|
|
20
20
|
MAX_TRACEBACK_LINES = 14 # Define the maximum number of traceback lines to include
|
|
21
|
-
def __init__(self, level:
|
|
21
|
+
def __init__(self, level: LogLevel, start_context: str = None, collector_id: str = None,
|
|
22
22
|
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
23
23
|
subject: str = None, description: str = None, context: str = None,
|
|
24
|
-
|
|
24
|
+
log_status: LogStatus = LogStatus.OPEN):
|
|
25
25
|
if e is not None:
|
|
26
26
|
e_type = type(e).__name__ if e_type is None else e_type
|
|
27
27
|
e_message = str(e) if e_message is None else e_message
|
|
@@ -34,11 +34,11 @@ class Notice:
|
|
|
34
34
|
self.description = description
|
|
35
35
|
self._start_context = start_context
|
|
36
36
|
self._context = context
|
|
37
|
-
self.
|
|
37
|
+
self.collector_id = collector_id
|
|
38
38
|
self.exception_type = e_type
|
|
39
39
|
self.exception_message = e_message
|
|
40
40
|
self.exception_traceback = self._format_traceback(e_traceback,e_message)
|
|
41
|
-
self.
|
|
41
|
+
self.log_status = log_status
|
|
42
42
|
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
43
43
|
|
|
44
44
|
def _format_traceback(self, e_traceback, e_message):
|
|
@@ -83,7 +83,7 @@ class Notice:
|
|
|
83
83
|
else:
|
|
84
84
|
formatted_traceback = '\n'.join(combined_lines)
|
|
85
85
|
|
|
86
|
-
return formatted_traceback
|
|
86
|
+
return formatted_traceback
|
|
87
87
|
|
|
88
88
|
@property
|
|
89
89
|
def start_context(self):
|
|
@@ -112,24 +112,26 @@ class Notice:
|
|
|
112
112
|
"exception_type": self.exception_type,
|
|
113
113
|
"exception_message": self.exception_message,
|
|
114
114
|
"exception_traceback": self.exception_traceback,
|
|
115
|
-
"
|
|
116
|
-
"
|
|
115
|
+
"log_status": self.log_status.value,
|
|
116
|
+
"collector_id": self.collector_id,
|
|
117
117
|
"timestamp": self.timestamp
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
class
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
class Watcher:
|
|
121
|
+
ERROR_START_CODE = LogLevel.ERROR.value
|
|
122
|
+
WARNING_START_CODE = LogLevel.WARNING.value
|
|
123
|
+
NOTICE_START_CODE = LogLevel.NOTICE.value
|
|
124
|
+
SUCCESS_START_CODE = LogLevel.SUCCESS.value
|
|
124
125
|
|
|
125
|
-
def __init__(self, start_context: str, category:
|
|
126
|
+
def __init__(self, start_context: str, category: WatcherCategory = WatcherCategory.MIXED, logger_name=None):
|
|
126
127
|
self._id = str(uuid.uuid4())
|
|
127
|
-
self.
|
|
128
|
+
self._logs = []
|
|
128
129
|
self._early_stop = False
|
|
129
130
|
self._errors_count = 0
|
|
130
131
|
self._warnings_count = 0
|
|
132
|
+
self._notices_count = 0
|
|
131
133
|
self._successes_count = 0
|
|
132
|
-
self._level_counts = {level.name: 0 for level in
|
|
134
|
+
self._level_counts = {level.name: 0 for level in LogLevel}
|
|
133
135
|
self._start_context = start_context
|
|
134
136
|
self._context_stack = []
|
|
135
137
|
self._category = category.value
|
|
@@ -141,7 +143,6 @@ class NoticesManager:
|
|
|
141
143
|
return logging_client.logger(logger_name)
|
|
142
144
|
return None
|
|
143
145
|
|
|
144
|
-
|
|
145
146
|
@contextmanager
|
|
146
147
|
def context(self, context):
|
|
147
148
|
self.push_context(context)
|
|
@@ -173,14 +174,14 @@ class NoticesManager:
|
|
|
173
174
|
def early_stop(self):
|
|
174
175
|
return self._early_stop
|
|
175
176
|
|
|
176
|
-
def set_early_stop(self, max_errors_tolerance:int,
|
|
177
|
-
self.
|
|
178
|
-
if
|
|
177
|
+
def set_early_stop(self, max_errors_tolerance: int, create_error_log=True, pop_context=False):
|
|
178
|
+
self._early_stop = True
|
|
179
|
+
if create_error_log:
|
|
179
180
|
if pop_context:
|
|
180
181
|
self.pop_context()
|
|
181
|
-
self.
|
|
182
|
-
|
|
183
|
-
|
|
182
|
+
self.add_log(SmartLog(level=LogLevel.ERROR,
|
|
183
|
+
subject="EARLY_STOP",
|
|
184
|
+
description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
184
185
|
|
|
185
186
|
def reset_early_stop(self):
|
|
186
187
|
self._early_stop = False
|
|
@@ -188,47 +189,52 @@ class NoticesManager:
|
|
|
188
189
|
def get_early_stop(self):
|
|
189
190
|
return self._early_stop
|
|
190
191
|
|
|
191
|
-
def
|
|
192
|
-
if (self._category ==
|
|
193
|
-
(self._category ==
|
|
194
|
-
raise ValueError(f"Invalid
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
self.
|
|
200
|
-
self._update_counts(
|
|
192
|
+
def add_log(self, log: SmartLog):
|
|
193
|
+
if (self._category == WatcherCategory.SUCCESSES and log.level >=self.NOTICE_START_CODE) or \
|
|
194
|
+
(self._category == WatcherCategory.WARNINGS_AND_ERRORS and log.level.value < self.WARNING_START_CODE):
|
|
195
|
+
raise ValueError(f"Invalid log level {log.level.name} for category {self._category}")
|
|
196
|
+
log.start_context = self.start_context
|
|
197
|
+
log.context = self.current_context
|
|
198
|
+
log.collector_id = self.id
|
|
199
|
+
log_dict = log.to_dict()
|
|
200
|
+
self._logs.append(log_dict)
|
|
201
|
+
self._update_counts(log_dict)
|
|
201
202
|
|
|
202
203
|
if self._logger:
|
|
203
|
-
|
|
204
|
-
|
|
204
|
+
# We specifically want to avoid having an ERROR log level for this structured Watcher reporting, to ensure Errors are alerting on Critical Application Services.
|
|
205
|
+
# A single ERROR log level can be used for the entire pipeline, which shall be used at the end of the pipeline
|
|
206
|
+
if log.level.value >= self.WARNING_START_CODE:
|
|
207
|
+
self._logger.log_struct(log_dict, severity="WARNING")
|
|
208
|
+
elif log.level.value >= self.NOTICE_START_CODE:
|
|
209
|
+
self._logger.log_struct(log_dict, severity="NOTICE")
|
|
205
210
|
else:
|
|
206
|
-
self._logger.log_struct(
|
|
211
|
+
self._logger.log_struct(log_dict, severity="INFO")
|
|
207
212
|
|
|
208
|
-
def
|
|
209
|
-
for
|
|
210
|
-
self.
|
|
213
|
+
def add_logs(self, logs: List[SmartLog]):
|
|
214
|
+
for log in logs:
|
|
215
|
+
self.add_log(log)
|
|
211
216
|
|
|
212
|
-
def
|
|
213
|
-
self.
|
|
217
|
+
def clear_logs_and_counts(self):
|
|
218
|
+
self._logs = []
|
|
214
219
|
self._errors_count = 0
|
|
215
220
|
self._warnings_count = 0
|
|
221
|
+
self._notices_count = 0
|
|
216
222
|
self._successes_count = 0
|
|
217
|
-
self._level_counts = {level.name: 0 for level in
|
|
223
|
+
self._level_counts = {level.name: 0 for level in LogLevel}
|
|
218
224
|
|
|
219
|
-
def
|
|
220
|
-
self.
|
|
225
|
+
def clear_logs(self):
|
|
226
|
+
self._logs = []
|
|
221
227
|
|
|
222
|
-
def
|
|
223
|
-
return self.
|
|
228
|
+
def get_all_logs(self):
|
|
229
|
+
return self._logs
|
|
224
230
|
|
|
225
|
-
def
|
|
226
|
-
return [
|
|
231
|
+
def get_logs_for_level(self, level: LogLevel):
|
|
232
|
+
return [log for log in self._logs if log["level_code"] == level.value]
|
|
227
233
|
|
|
228
|
-
def
|
|
234
|
+
def get_logs_by_str_in_context(self, context_substring: str):
|
|
229
235
|
return [
|
|
230
|
-
|
|
231
|
-
if context_substring in
|
|
236
|
+
log for log in self._logs
|
|
237
|
+
if context_substring in log["context"]
|
|
232
238
|
]
|
|
233
239
|
|
|
234
240
|
def contains_errors(self):
|
|
@@ -242,64 +248,73 @@ class NoticesManager:
|
|
|
242
248
|
|
|
243
249
|
def count_warnings_and_errors(self):
|
|
244
250
|
return self._warnings_count + self._errors_count
|
|
245
|
-
|
|
251
|
+
|
|
246
252
|
def count_warnings(self):
|
|
247
253
|
return self._warnings_count
|
|
248
254
|
|
|
255
|
+
def count_notices(self):
|
|
256
|
+
return self._notices_count
|
|
257
|
+
|
|
249
258
|
def count_successes(self):
|
|
250
259
|
return self._successes_count
|
|
251
260
|
|
|
252
|
-
def
|
|
253
|
-
return len(self.
|
|
261
|
+
def count_all_logs(self):
|
|
262
|
+
return len(self._logs)
|
|
254
263
|
|
|
255
|
-
def
|
|
264
|
+
def count_logs_by_level(self, level: LogLevel):
|
|
256
265
|
return self._level_counts.get(level.name, 0)
|
|
257
266
|
|
|
258
|
-
def
|
|
267
|
+
def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
259
268
|
return sum(
|
|
260
|
-
1 for
|
|
261
|
-
if (
|
|
262
|
-
(level_code_min is None or
|
|
263
|
-
(level_code_max is None or
|
|
269
|
+
1 for log in self._logs
|
|
270
|
+
if (log["context"] == context_substring if exact_match else context_substring in log["context"]) and
|
|
271
|
+
(level_code_min is None or log["level_code"] >= level_code_min) and
|
|
272
|
+
(level_code_max is None or log["level_code"] <= level_code_max)
|
|
264
273
|
)
|
|
265
274
|
|
|
266
|
-
def
|
|
267
|
-
return self.
|
|
275
|
+
def count_logs_for_current_context(self):
|
|
276
|
+
return self._count_logs(self.current_context, exact_match=True)
|
|
268
277
|
|
|
269
|
-
def
|
|
270
|
-
return self.
|
|
278
|
+
def count_logs_for_current_and_nested_contexts(self):
|
|
279
|
+
return self._count_logs(self.current_context)
|
|
271
280
|
|
|
272
|
-
def
|
|
273
|
-
return self.
|
|
281
|
+
def count_logs_by_level_for_current_context(self, level: LogLevel):
|
|
282
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
|
|
274
283
|
|
|
275
|
-
def
|
|
276
|
-
return self.
|
|
284
|
+
def count_logs_by_level_for_current_and_nested_contexts(self, level: LogLevel):
|
|
285
|
+
return self._count_logs(self.current_context, level_code_min=level.value, level_code_max=level.value)
|
|
277
286
|
|
|
278
287
|
def count_errors_for_current_context(self):
|
|
279
|
-
return self.
|
|
288
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
|
|
280
289
|
|
|
281
290
|
def count_errors_for_current_and_nested_contexts(self):
|
|
282
|
-
return self.
|
|
291
|
+
return self._count_logs(self.current_context, level_code_min=self.ERROR_START_CODE)
|
|
283
292
|
|
|
284
293
|
def count_warnings_and_errors_for_current_context(self):
|
|
285
|
-
return self.
|
|
294
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
|
|
286
295
|
|
|
287
296
|
def count_warnings_and_errors_for_current_and_nested_contexts(self):
|
|
288
|
-
return self.
|
|
297
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE)
|
|
289
298
|
|
|
290
299
|
def count_warnings_for_current_context(self):
|
|
291
|
-
return self.
|
|
300
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
292
301
|
|
|
293
302
|
def count_warnings_for_current_and_nested_contexts(self):
|
|
294
|
-
return self.
|
|
303
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
304
|
+
|
|
305
|
+
def count_notices_for_current_context(self):
|
|
306
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
307
|
+
|
|
308
|
+
def count_notices_for_current_and_nested_contexts(self):
|
|
309
|
+
return self._count_logs(self.current_context, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
295
310
|
|
|
296
311
|
def count_successes_for_current_context(self):
|
|
297
|
-
return self.
|
|
312
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
298
313
|
|
|
299
314
|
def count_successes_for_current_and_nested_contexts(self):
|
|
300
|
-
return self.
|
|
315
|
+
return self._count_logs(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
301
316
|
|
|
302
|
-
def
|
|
317
|
+
def export_logs_to_gcs_file(self, bucket_name, storage_client, file_prefix=None, file_name=None, top_level_context=None, save_locally=False, local_path=None, logger=None, max_retries=2):
|
|
303
318
|
def log_message(message):
|
|
304
319
|
if logger:
|
|
305
320
|
logger.info(message)
|
|
@@ -313,30 +328,30 @@ class NoticesManager:
|
|
|
313
328
|
if not file_name:
|
|
314
329
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
315
330
|
if top_level_context:
|
|
316
|
-
file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self.
|
|
331
|
+
file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self._logs)}.json"
|
|
317
332
|
else:
|
|
318
|
-
file_name = f"{file_prefix}_{timestamp}_len{len(self.
|
|
333
|
+
file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
|
|
319
334
|
|
|
320
|
-
result=None
|
|
335
|
+
result = None
|
|
321
336
|
try:
|
|
322
|
-
result= write_json_to_gcs(
|
|
337
|
+
result = write_json_to_gcs(
|
|
323
338
|
bucket_name=bucket_name,
|
|
324
339
|
storage_client=storage_client,
|
|
325
|
-
data=self.
|
|
340
|
+
data=self._logs,
|
|
326
341
|
file_name=file_name,
|
|
327
342
|
save_locally=save_locally,
|
|
328
343
|
local_path=local_path,
|
|
329
344
|
logger=logger,
|
|
330
345
|
max_retries=max_retries,
|
|
331
|
-
|
|
346
|
+
overwrite_if_exists=False
|
|
332
347
|
)
|
|
333
|
-
log_message(f"{file_prefix} successfully saved (
|
|
348
|
+
log_message(f"{file_prefix} successfully saved (overwritten={result.get('gcs_file_overwritten')}) to GCS at {result.get('gcs_path')} and locally at {result.get('local_path')}.")
|
|
334
349
|
except Exception as e:
|
|
335
|
-
log_error(f"Failed at
|
|
350
|
+
log_error(f"Failed at export_logs_to_gcs_file for {file_prefix} for file {file_name} to bucket {bucket_name}: {type(e).__name__} - {str(e)}")
|
|
336
351
|
|
|
337
352
|
return result
|
|
338
353
|
|
|
339
|
-
def
|
|
354
|
+
def import_logs_from_json(self, json_or_file, logger=None):
|
|
340
355
|
def log_message(message):
|
|
341
356
|
if logger:
|
|
342
357
|
logger.info(message)
|
|
@@ -347,31 +362,296 @@ class NoticesManager:
|
|
|
347
362
|
|
|
348
363
|
try:
|
|
349
364
|
if isinstance(json_or_file, str): # Load from string
|
|
350
|
-
|
|
365
|
+
imported_logs = json.loads(json_or_file)
|
|
351
366
|
elif hasattr(json_or_file, 'read'): # Load from file-like object
|
|
352
|
-
|
|
353
|
-
self.
|
|
354
|
-
log_message("Successfully imported
|
|
367
|
+
imported_logs = json.load(json_or_file)
|
|
368
|
+
self.add_logs(imported_logs)
|
|
369
|
+
log_message("Successfully imported logs from json.")
|
|
355
370
|
except Exception as e:
|
|
356
|
-
log_warning(f"Failed to import
|
|
371
|
+
log_warning(f"Failed to import logs from json: {type(e).__name__} - {str(e)}", exc_info=True)
|
|
357
372
|
|
|
358
|
-
def _update_counts(self,
|
|
359
|
-
level_code =
|
|
360
|
-
level_name =
|
|
373
|
+
def _update_counts(self, log, remove=False):
|
|
374
|
+
level_code = log["level_code"]
|
|
375
|
+
level_name = log["level_name"]
|
|
361
376
|
|
|
362
377
|
if remove:
|
|
363
|
-
if level_code >= self.
|
|
378
|
+
if level_code >= self.ERROR_START_CODE:
|
|
364
379
|
self._errors_count -= 1
|
|
365
|
-
elif level_code
|
|
380
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
366
381
|
self._warnings_count -= 1
|
|
367
|
-
elif level_code
|
|
382
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
383
|
+
self._notices_count -= 1
|
|
384
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
368
385
|
self._successes_count -= 1
|
|
369
386
|
self._level_counts[level_name] -= 1
|
|
370
387
|
else:
|
|
371
|
-
if level_code >= self.
|
|
388
|
+
if level_code >= self.ERROR_START_CODE:
|
|
372
389
|
self._errors_count += 1
|
|
373
|
-
elif level_code
|
|
390
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
374
391
|
self._warnings_count += 1
|
|
375
|
-
elif level_code
|
|
392
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
393
|
+
self._notices_count += 1
|
|
394
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
376
395
|
self._successes_count += 1
|
|
377
|
-
self._level_counts[level_name] += 1
|
|
396
|
+
self._level_counts[level_name] += 1
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# class Watcher:
|
|
400
|
+
# ERROR_START_CODE = WatcherLogLevel.ERROR.value
|
|
401
|
+
# WARNING_START_CODE = WatcherLogLevel.WARNING.value
|
|
402
|
+
# NOTICE_START_CODE = WatcherLogLevel.NOTICE.value
|
|
403
|
+
# SUCCESS_START_CODE = WatcherLogLevel.SUCCESS.value
|
|
404
|
+
|
|
405
|
+
# def __init__(self, start_context: str, category: WatcherCategory = WatcherCategory.MIXED, logger_name=None):
|
|
406
|
+
# self._id = str(uuid.uuid4())
|
|
407
|
+
# self._logs = []
|
|
408
|
+
# self._early_stop = False
|
|
409
|
+
# self._errors_count = 0
|
|
410
|
+
# self._warnings_count = 0
|
|
411
|
+
# self._successes_count = 0
|
|
412
|
+
# self._level_counts = {level.name: 0 for level in WatcherLogLevel}
|
|
413
|
+
# self._start_context = start_context
|
|
414
|
+
# self._context_stack = []
|
|
415
|
+
# self._category = category.value
|
|
416
|
+
# self._logger = self._initialize_logger(logger_name)
|
|
417
|
+
|
|
418
|
+
# def _initialize_logger(self, logger_name):
|
|
419
|
+
# if logger_name:
|
|
420
|
+
# logging_client = cloudlogging.Client()
|
|
421
|
+
# return logging_client.logger(logger_name)
|
|
422
|
+
# return None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# @contextmanager
|
|
426
|
+
# def context(self, context):
|
|
427
|
+
# self.push_context(context)
|
|
428
|
+
# try:
|
|
429
|
+
# yield
|
|
430
|
+
# finally:
|
|
431
|
+
# self.pop_context()
|
|
432
|
+
|
|
433
|
+
# def push_context(self, context):
|
|
434
|
+
# self._context_stack.append(context)
|
|
435
|
+
|
|
436
|
+
# def pop_context(self):
|
|
437
|
+
# if self._context_stack:
|
|
438
|
+
# self._context_stack.pop()
|
|
439
|
+
|
|
440
|
+
# @property
|
|
441
|
+
# def current_context(self):
|
|
442
|
+
# return " >> ".join(self._context_stack)
|
|
443
|
+
|
|
444
|
+
# @property
|
|
445
|
+
# def start_context(self):
|
|
446
|
+
# return self._start_context
|
|
447
|
+
|
|
448
|
+
# @property
|
|
449
|
+
# def id(self):
|
|
450
|
+
# return self._id
|
|
451
|
+
|
|
452
|
+
# @property
|
|
453
|
+
# def early_stop(self):
|
|
454
|
+
# return self._early_stop
|
|
455
|
+
|
|
456
|
+
# def set_early_stop(self, max_errors_tolerance:int, create_error_notice=True,pop_context=False):
|
|
457
|
+
# self.early_stop = True
|
|
458
|
+
# if create_error_notice:
|
|
459
|
+
# if pop_context:
|
|
460
|
+
# self.pop_context()
|
|
461
|
+
# self.add_notice(WatcherLog(level=WatcherLogLevel.ERROR,
|
|
462
|
+
# subject="EARLY_STOP",
|
|
463
|
+
# description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
464
|
+
|
|
465
|
+
# def reset_early_stop(self):
|
|
466
|
+
# self._early_stop = False
|
|
467
|
+
|
|
468
|
+
# def get_early_stop(self):
|
|
469
|
+
# return self._early_stop
|
|
470
|
+
|
|
471
|
+
# def add_notice(self, notice: WatcherLog):
|
|
472
|
+
# if (self._category == WatcherCategory.SUCCESSES.value and notice.level != WatcherLogLevel.SUCCESS) or \
|
|
473
|
+
# (self._category == WatcherCategory.WARNINGS_AND_ERRORS.value and notice.level.value < self.WARNING_START_CODE):
|
|
474
|
+
# raise ValueError(f"Invalid notice level {notice.level.name} for category {self._category}")
|
|
475
|
+
# notice.start_context = self.start_context
|
|
476
|
+
# notice.context = self.current_context
|
|
477
|
+
# notice.watcher_id = self.id
|
|
478
|
+
# notice_dict = notice.to_dict()
|
|
479
|
+
# self._logs.append(notice_dict)
|
|
480
|
+
# self._update_counts(notice_dict)
|
|
481
|
+
|
|
482
|
+
# if self._logger:
|
|
483
|
+
# if notice.level.value >= self.WARNING_START_CODE:
|
|
484
|
+
# self._logger.log_struct(notice_dict, severity="WARNING")
|
|
485
|
+
# else:
|
|
486
|
+
# self._logger.log_struct(notice_dict, severity="INFO")
|
|
487
|
+
|
|
488
|
+
# def add_notices(self, notices: List[WatcherLog]):
|
|
489
|
+
# for notice in notices:
|
|
490
|
+
# self.add_notice(notice)
|
|
491
|
+
|
|
492
|
+
# def clear_notices_and_counts(self):
|
|
493
|
+
# self._logs = []
|
|
494
|
+
# self._errors_count = 0
|
|
495
|
+
# self._warnings_count = 0
|
|
496
|
+
# self._successes_count = 0
|
|
497
|
+
# self._level_counts = {level.name: 0 for level in WatcherLogLevel}
|
|
498
|
+
|
|
499
|
+
# def clear_notices(self):
|
|
500
|
+
# self._logs = []
|
|
501
|
+
|
|
502
|
+
# def get_all_notices(self):
|
|
503
|
+
# return self._logs
|
|
504
|
+
|
|
505
|
+
# def get_notices_for_level(self, level: WatcherLogLevel):
|
|
506
|
+
# return [notice for notice in self._logs if notice["level_code"] == level.value]
|
|
507
|
+
|
|
508
|
+
# def get_notices_by_str_in_context(self, context_substring: str):
|
|
509
|
+
# return [
|
|
510
|
+
# notice for notice in self._logs
|
|
511
|
+
# if context_substring in notice["context"]
|
|
512
|
+
# ]
|
|
513
|
+
|
|
514
|
+
# def contains_errors(self):
|
|
515
|
+
# return self._errors_count > 0
|
|
516
|
+
|
|
517
|
+
# def count_errors(self):
|
|
518
|
+
# return self._errors_count
|
|
519
|
+
|
|
520
|
+
# def contains_warnings_or_errors(self):
|
|
521
|
+
# return self._warnings_count > 0 or self._errors_count > 0
|
|
522
|
+
|
|
523
|
+
# def count_warnings_and_errors(self):
|
|
524
|
+
# return self._warnings_count + self._errors_count
|
|
525
|
+
|
|
526
|
+
# def count_warnings(self):
|
|
527
|
+
# return self._warnings_count
|
|
528
|
+
|
|
529
|
+
# def count_successes(self):
|
|
530
|
+
# return self._successes_count
|
|
531
|
+
|
|
532
|
+
# def count_all_notices(self):
|
|
533
|
+
# return len(self._logs)
|
|
534
|
+
|
|
535
|
+
# def count_notices_by_level(self, level: WatcherLogLevel):
|
|
536
|
+
# return self._level_counts.get(level.name, 0)
|
|
537
|
+
|
|
538
|
+
# def _count_notices(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
539
|
+
# return sum(
|
|
540
|
+
# 1 for notice in self._logs
|
|
541
|
+
# if (notice["context"] == context_substring if exact_match else context_substring in notice["context"]) and
|
|
542
|
+
# (level_code_min is None or notice["level_code"] >= level_code_min) and
|
|
543
|
+
# (level_code_max is None or notice["level_code"] <= level_code_max)
|
|
544
|
+
# )
|
|
545
|
+
|
|
546
|
+
# def count_notices_for_current_context(self):
|
|
547
|
+
# return self._count_notices(self.current_context, exact_match=True)
|
|
548
|
+
|
|
549
|
+
# def count_notices_for_current_and_nested_contexts(self):
|
|
550
|
+
# return self._count_notices(self.current_context)
|
|
551
|
+
|
|
552
|
+
# def count_notices_by_level_for_current_context(self, level: WatcherLogLevel):
|
|
553
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
|
|
554
|
+
|
|
555
|
+
# def count_notices_by_level_for_current_and_nested_contexts(self, level: WatcherLogLevel):
|
|
556
|
+
# return self._count_notices(self.current_context, level_code_min=level.value, level_code_max=level.value)
|
|
557
|
+
|
|
558
|
+
# def count_errors_for_current_context(self):
|
|
559
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
|
|
560
|
+
|
|
561
|
+
# def count_errors_for_current_and_nested_contexts(self):
|
|
562
|
+
# return self._count_notices(self.current_context, level_code_min=self.ERROR_START_CODE)
|
|
563
|
+
|
|
564
|
+
# def count_warnings_and_errors_for_current_context(self):
|
|
565
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
|
|
566
|
+
|
|
567
|
+
# def count_warnings_and_errors_for_current_and_nested_contexts(self):
|
|
568
|
+
# return self._count_notices(self.current_context, level_code_min=self.WARNING_START_CODE)
|
|
569
|
+
|
|
570
|
+
# def count_warnings_for_current_context(self):
|
|
571
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
572
|
+
|
|
573
|
+
# def count_warnings_for_current_and_nested_contexts(self):
|
|
574
|
+
# return self._count_notices(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
575
|
+
|
|
576
|
+
# def count_successes_for_current_context(self):
|
|
577
|
+
# return self._count_notices(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.SUCCESS_START_CODE)
|
|
578
|
+
|
|
579
|
+
# def count_successes_for_current_and_nested_contexts(self):
|
|
580
|
+
# return self._count_notices(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.SUCCESS_START_CODE)
|
|
581
|
+
|
|
582
|
+
# def export_notices_to_gcs_file(self, bucket_name, storage_client, file_prefix=None, file_name=None, top_level_context=None, save_locally=False, local_path=None, logger=None, max_retries=2):
|
|
583
|
+
# def log_message(message):
|
|
584
|
+
# if logger:
|
|
585
|
+
# logger.info(message)
|
|
586
|
+
|
|
587
|
+
# def log_error(message, exc_info=False):
|
|
588
|
+
# if logger:
|
|
589
|
+
# logger.error(message, exc_info=exc_info)
|
|
590
|
+
|
|
591
|
+
# if not file_prefix:
|
|
592
|
+
# file_prefix = self._category
|
|
593
|
+
# if not file_name:
|
|
594
|
+
# timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
595
|
+
# if top_level_context:
|
|
596
|
+
# file_name = f"{file_prefix}_{timestamp}_{top_level_context}_len{len(self._logs)}.json"
|
|
597
|
+
# else:
|
|
598
|
+
# file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
|
|
599
|
+
|
|
600
|
+
# result=None
|
|
601
|
+
# try:
|
|
602
|
+
# result= write_json_to_gcs(
|
|
603
|
+
# bucket_name=bucket_name,
|
|
604
|
+
# storage_client=storage_client,
|
|
605
|
+
# data=self._logs,
|
|
606
|
+
# file_name=file_name,
|
|
607
|
+
# save_locally=save_locally,
|
|
608
|
+
# local_path=local_path,
|
|
609
|
+
# logger=logger,
|
|
610
|
+
# max_retries=max_retries,
|
|
611
|
+
# overwrite_gcs=False
|
|
612
|
+
# )
|
|
613
|
+
# log_message(f"{file_prefix} successfully saved (ovewritten={result.get("gcs_file_overwritten")}) to GCS at {result.get("gcs_path")} and locally at {result.get("local_path")}.")
|
|
614
|
+
# except Exception as e:
|
|
615
|
+
# log_error(f"Failed at export_notices_to_gcs_file for {file_prefix} for file {file_name} to bucket {bucket_name}: {type(e).__name__} - {str(e)}")
|
|
616
|
+
|
|
617
|
+
# return result
|
|
618
|
+
|
|
619
|
+
# def import_notices_from_json(self, json_or_file, logger=None):
|
|
620
|
+
# def log_message(message):
|
|
621
|
+
# if logger:
|
|
622
|
+
# logger.info(message)
|
|
623
|
+
|
|
624
|
+
# def log_warning(message, exc_info=False):
|
|
625
|
+
# if logger:
|
|
626
|
+
# logger.warning(message, exc_info=exc_info)
|
|
627
|
+
|
|
628
|
+
# try:
|
|
629
|
+
# if isinstance(json_or_file, str): # Load from string
|
|
630
|
+
# imported_notices = json.loads(json_or_file)
|
|
631
|
+
# elif hasattr(json_or_file, 'read'): # Load from file-like object
|
|
632
|
+
# imported_notices = json.load(json_or_file)
|
|
633
|
+
# self.add_notices(imported_notices)
|
|
634
|
+
# log_message("Successfully imported notices from json.")
|
|
635
|
+
# except Exception as e:
|
|
636
|
+
# log_warning(f"Failed to import notices from json: {type(e).__name__} - {str(e)}", exc_info=True)
|
|
637
|
+
|
|
638
|
+
# def _update_counts(self, notice, remove=False):
|
|
639
|
+
# level_code = notice["level_code"]
|
|
640
|
+
# level_name = notice["level_name"]
|
|
641
|
+
|
|
642
|
+
# if remove:
|
|
643
|
+
# if level_code >= self.ERROR_START_CODE:
|
|
644
|
+
# self._errors_count -= 1
|
|
645
|
+
# elif level_code >= self.WARNING_START_CODE:
|
|
646
|
+
# self._warnings_count -= 1
|
|
647
|
+
# elif level_code >= self.SUCCESS_START_CODE:
|
|
648
|
+
# self._successes_count -= 1
|
|
649
|
+
# self._level_counts[level_name] -= 1
|
|
650
|
+
# else:
|
|
651
|
+
# if level_code >= self.ERROR_START_CODE:
|
|
652
|
+
# self._errors_count += 1
|
|
653
|
+
# elif level_code >= self.WARNING_START_CODE:
|
|
654
|
+
# self._warnings_count += 1
|
|
655
|
+
# elif level_code == self.SUCCESS_START_CODE:
|
|
656
|
+
# self._successes_count += 1
|
|
657
|
+
# self._level_counts[level_name] += 1
|
|
@@ -122,8 +122,9 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
def write_json_to_gcs(bucket_name, storage_client, data, file_name
|
|
126
|
-
save_locally=False, local_path=None, logger=None, max_retries=
|
|
125
|
+
def write_json_to_gcs(bucket_name, storage_client, data, file_name,
|
|
126
|
+
save_locally=False, local_path=None, logger=None, max_retries=2,
|
|
127
|
+
overwrite_if_exists=False, increment_if_exists=False):
|
|
127
128
|
"""Saves data to Google Cloud Storage and optionally locally.
|
|
128
129
|
|
|
129
130
|
This function attempts to upload data to GCS. If the upload fails after
|
|
@@ -132,24 +133,35 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
132
133
|
|
|
133
134
|
Returns:
|
|
134
135
|
dict: A dictionary containing the GCS path (or None if upload failed),
|
|
135
|
-
the local path (or None if not saved locally),
|
|
136
|
+
the local path (or None if not saved locally), a boolean indicating if the file was overwritten,
|
|
137
|
+
a boolean indicating if the file already existed, and a boolean indicating if the file was saved with an incremented name.
|
|
136
138
|
"""
|
|
137
139
|
|
|
138
140
|
def log_message(message):
|
|
139
141
|
if logger:
|
|
140
142
|
logger.info(message)
|
|
141
143
|
|
|
142
|
-
def log_error(message,exc_info=False):
|
|
144
|
+
def log_error(message, exc_info=False):
|
|
143
145
|
if logger:
|
|
144
146
|
logger.error(message, exc_info=exc_info)
|
|
145
147
|
|
|
148
|
+
def log_warning(message):
|
|
149
|
+
if logger:
|
|
150
|
+
logger.warning(message)
|
|
151
|
+
|
|
146
152
|
attempts = 0
|
|
147
153
|
success = False
|
|
148
154
|
gcs_path = None
|
|
149
155
|
local_path_final = None
|
|
150
|
-
|
|
156
|
+
gcs_file_overwritten = False
|
|
157
|
+
gcs_file_already_exists = False
|
|
158
|
+
gcs_file_saved_with_increment = False
|
|
151
159
|
gcs_upload_exception = None # Store potential GCS exception
|
|
152
160
|
|
|
161
|
+
# Check for conflicting options
|
|
162
|
+
if overwrite_if_exists and increment_if_exists:
|
|
163
|
+
raise ValueError("When writing JSON to GCS, both overwrite and increment_if_exists cannot be True at the same time.")
|
|
164
|
+
|
|
153
165
|
if isinstance(data, (list, dict)):
|
|
154
166
|
data_str = json.dumps(data, indent=2)
|
|
155
167
|
elif isinstance(data, str):
|
|
@@ -157,17 +169,32 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
157
169
|
else:
|
|
158
170
|
raise ValueError("Unsupported data type. It should be a list, dict, or str.")
|
|
159
171
|
|
|
172
|
+
bucket = storage_client.bucket(bucket_name)
|
|
173
|
+
base_file_name, ext = os.path.splitext(file_name)
|
|
174
|
+
increment = 0
|
|
175
|
+
|
|
160
176
|
while attempts < max_retries and not success:
|
|
161
177
|
try:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
178
|
+
if increment_if_exists:
|
|
179
|
+
while bucket.blob(file_name).exists():
|
|
180
|
+
gcs_file_already_exists = True
|
|
181
|
+
increment += 1
|
|
182
|
+
file_name = f"{base_file_name}_{increment}{ext}"
|
|
183
|
+
gcs_file_saved_with_increment = True
|
|
184
|
+
log_warning(f"File {file_name} already exists in bucket {bucket_name}. Writing with increment: {increment_if_exists}")
|
|
185
|
+
else:
|
|
186
|
+
blob = bucket.blob(file_name)
|
|
187
|
+
|
|
188
|
+
# Check if the file exists
|
|
189
|
+
if blob.exists():
|
|
190
|
+
gcs_file_already_exists = True
|
|
191
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
192
|
+
log_message(f"File {file_name} already exists in bucket {bucket_name}. Overwriting: {overwrite_if_exists}")
|
|
193
|
+
if not overwrite_if_exists:
|
|
194
|
+
log_warning(f"File {file_name} already exists and overwrite is set to False. Skipping save to GCS.")
|
|
195
|
+
break
|
|
196
|
+
else:
|
|
197
|
+
gcs_file_overwritten = True
|
|
171
198
|
|
|
172
199
|
blob.upload_from_string(data_str, content_type='application/json')
|
|
173
200
|
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
@@ -181,15 +208,31 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
181
208
|
else:
|
|
182
209
|
log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
|
|
183
210
|
|
|
184
|
-
if not success
|
|
211
|
+
if not success or save_locally or local_path:
|
|
185
212
|
try:
|
|
186
213
|
if not local_path:
|
|
187
214
|
local_path_final = os.path.join("/tmp", file_name)
|
|
188
215
|
else:
|
|
189
216
|
local_path_final = os.path.join(local_path, file_name)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
217
|
+
|
|
218
|
+
if os.path.exists(local_path_final):
|
|
219
|
+
if increment_if_exists:
|
|
220
|
+
increment = 0
|
|
221
|
+
while os.path.exists(local_path_final):
|
|
222
|
+
increment += 1
|
|
223
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_{increment}{ext}")
|
|
224
|
+
gcs_file_saved_with_increment = True
|
|
225
|
+
elif not overwrite_if_exists:
|
|
226
|
+
log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
|
|
227
|
+
success = True
|
|
228
|
+
else:
|
|
229
|
+
log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
|
|
230
|
+
|
|
231
|
+
if not success:
|
|
232
|
+
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
233
|
+
f.write(data_str)
|
|
234
|
+
log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
|
|
235
|
+
success = True
|
|
193
236
|
except Exception as local_e:
|
|
194
237
|
log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
|
|
195
238
|
|
|
@@ -199,7 +242,9 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name=None,
|
|
|
199
242
|
return {
|
|
200
243
|
"gcs_path": gcs_path,
|
|
201
244
|
"local_path": local_path_final,
|
|
202
|
-
"
|
|
245
|
+
"gcs_file_already_exists": gcs_file_already_exists,
|
|
246
|
+
"gcs_file_overwritten": gcs_file_overwritten,
|
|
247
|
+
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
203
248
|
}
|
|
204
249
|
|
|
205
250
|
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
import datetime
|
|
7
7
|
from google.cloud import bigquery
|
|
8
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import
|
|
9
|
-
from ipulse_shared_core_ftredge.utils_common import
|
|
8
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
9
|
+
from ipulse_shared_core_ftredge.utils_common import SmartLog
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def create_bigquery_schema_from_json(json_schema):
|
|
@@ -19,55 +19,58 @@ def create_bigquery_schema_from_json(json_schema):
|
|
|
19
19
|
return schema
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def
|
|
23
|
-
|
|
22
|
+
def check_format_against_schema_template(data_to_check, schema, dt_ts_to_str=True, check_max_length=True):
|
|
24
23
|
"""Ensure Update dict corresponds to the config schema, ensuring proper formats and lengths."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
24
|
+
checked_data = {}
|
|
25
|
+
warnings_or_error = [] # Group warnings and errors for a given run
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# Process updates to conform to the schema
|
|
29
|
+
for field in schema:
|
|
30
|
+
field_name = field["name"]
|
|
31
|
+
field_type = field["type"]
|
|
32
|
+
mode = field["mode"]
|
|
33
|
+
|
|
34
|
+
# Initialize notice to None at the start of each field processing
|
|
35
|
+
warning = None
|
|
36
|
+
|
|
37
|
+
if field_name in data_to_check:
|
|
38
|
+
value = data_to_check[field_name]
|
|
39
|
+
|
|
40
|
+
# Handle date and timestamp formatting
|
|
41
|
+
if field_type == "DATE":
|
|
42
|
+
value, warning = handle_date_fields(field_name, value, dt_ts_to_str)
|
|
43
|
+
elif field_type == "TIMESTAMP":
|
|
44
|
+
value, warning = handle_timestamp_fields(field_name, value, dt_ts_to_str)
|
|
45
|
+
elif field_type in ["STRING", "INT64", "FLOAT64", "BOOL"]:
|
|
46
|
+
value, warning = handle_type_conversion(field_type, field_name, value)
|
|
47
|
+
|
|
48
|
+
if warning:
|
|
49
|
+
warnings_or_error.append(warning)
|
|
50
|
+
|
|
51
|
+
# Check and handle max length restriction
|
|
52
|
+
if check_max_length and "max_length" in field:
|
|
53
|
+
value, warning = check_and_truncate_length(field_name, value, field["max_length"])
|
|
54
|
+
if warning:
|
|
55
|
+
warnings_or_error.append(warning)
|
|
56
|
+
|
|
57
|
+
# Only add to the dictionary if value is not None or the field is required
|
|
58
|
+
if value is not None or mode == "REQUIRED":
|
|
59
|
+
checked_data[field_name] = value
|
|
60
|
+
|
|
61
|
+
elif mode == "REQUIRED":
|
|
62
|
+
warning = SmartLog(level=LogLevel.WARNING,
|
|
63
|
+
subject=field_name,
|
|
64
|
+
description=f"Required field '{field_name}' is missing in the updates.")
|
|
65
|
+
warnings_or_error.append(warning)
|
|
67
66
|
|
|
68
|
-
|
|
67
|
+
except Exception as e:
|
|
68
|
+
error_log = SmartLog(level=LogLevel.ERROR_EXCEPTION_REDO,
|
|
69
|
+
subject=data_to_check,
|
|
70
|
+
description=f"An error occurred during update check: {str(e)}")
|
|
71
|
+
warnings_or_error.append(error_log)
|
|
69
72
|
|
|
70
|
-
return
|
|
73
|
+
return checked_data, warnings_or_error
|
|
71
74
|
|
|
72
75
|
def handle_date_fields(field_name, value, dt_ts_to_str):
|
|
73
76
|
"""Handles date fields, ensuring they are in the correct format and optionally converts them to string."""
|
|
@@ -82,11 +85,11 @@ def handle_date_fields(field_name, value, dt_ts_to_str):
|
|
|
82
85
|
return value, None
|
|
83
86
|
return parsed_date, None
|
|
84
87
|
except ValueError:
|
|
85
|
-
return None,
|
|
88
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
86
89
|
subject=field_name,
|
|
87
90
|
description=f"Expected a DATE in YYYY-MM-DD format but got {value}.")
|
|
88
91
|
else:
|
|
89
|
-
return None,
|
|
92
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
90
93
|
subject=field_name,
|
|
91
94
|
description= f"Expected a DATE or YYYY-MM-DD str format but got {value} of type {type(value).__name__}.")
|
|
92
95
|
|
|
@@ -104,11 +107,11 @@ def handle_timestamp_fields(field_name, value, dt_ts_to_str):
|
|
|
104
107
|
return value, None
|
|
105
108
|
return parsed_datetime, None
|
|
106
109
|
except ValueError:
|
|
107
|
-
return None,
|
|
110
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
108
111
|
subject=field_name,
|
|
109
112
|
description= f"Expected ISO format TIMESTAMP but got {value}.")
|
|
110
113
|
else:
|
|
111
|
-
return None,
|
|
114
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
112
115
|
subject=field_name,
|
|
113
116
|
description= f"Expected ISO format TIMESTAMP but got {value} of type {type(value).__name__}.")
|
|
114
117
|
|
|
@@ -116,7 +119,7 @@ def handle_timestamp_fields(field_name, value, dt_ts_to_str):
|
|
|
116
119
|
def check_and_truncate_length(field_name, value, max_length):
|
|
117
120
|
"""Checks and truncates the length of string fields if they exceed the max length."""
|
|
118
121
|
if isinstance(value, str) and len(value) > max_length:
|
|
119
|
-
return value[:max_length],
|
|
122
|
+
return value[:max_length], SmartLog(level=LogLevel.WARNING_FIX_RECOMMENDED,
|
|
120
123
|
subject= field_name,
|
|
121
124
|
description= f"Field exceeds max length: {len(value)}/{max_length}. Truncating.")
|
|
122
125
|
|
|
@@ -126,7 +129,7 @@ def check_and_truncate_length(field_name, value, max_length):
|
|
|
126
129
|
|
|
127
130
|
def handle_type_conversion(field_type, field_name, value):
|
|
128
131
|
if field_type == "STRING" and not isinstance(value, str):
|
|
129
|
-
return str(value),
|
|
132
|
+
return str(value), SmartLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
130
133
|
subject=field_name,
|
|
131
134
|
description= f"Expected STRING but got {value} of type {type(value).__name__}.")
|
|
132
135
|
|
|
@@ -134,18 +137,18 @@ def handle_type_conversion(field_type, field_name, value):
|
|
|
134
137
|
try:
|
|
135
138
|
return int(value), None
|
|
136
139
|
except ValueError:
|
|
137
|
-
return None,
|
|
140
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
138
141
|
subject= field_name,
|
|
139
142
|
description=f"Expected INTEGER, but got {value} of type {type(value).__name__}.")
|
|
140
143
|
if field_type == "FLOAT64" and not isinstance(value, float):
|
|
141
144
|
try:
|
|
142
145
|
return float(value), None
|
|
143
146
|
except ValueError:
|
|
144
|
-
return None,
|
|
147
|
+
return None, SmartLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
145
148
|
subject=field_name,
|
|
146
149
|
description=f"Expected FLOAT, but got {value} of type {type(value).__name__}.")
|
|
147
150
|
if field_type == "BOOL" and not isinstance(value, bool):
|
|
148
|
-
return bool(value),
|
|
151
|
+
return bool(value), SmartLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
149
152
|
subject=field_name,
|
|
150
153
|
description=f"Expected BOOL, but got {value}. Converting as {bool(value)}.")
|
|
151
154
|
|
{ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ipulse_shared_core_ftredge
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.53
|
|
4
4
|
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
5
|
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
6
|
Author: Russlan Ramdowar
|
{ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/RECORD
RENAMED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ipulse_shared_core_ftredge/__init__.py,sha256=
|
|
2
|
-
ipulse_shared_core_ftredge/utils_common.py,sha256=
|
|
3
|
-
ipulse_shared_core_ftredge/utils_gcp.py,sha256=
|
|
4
|
-
ipulse_shared_core_ftredge/utils_templates_and_schemas.py,sha256=
|
|
5
|
-
ipulse_shared_core_ftredge/enums/__init__.py,sha256=
|
|
6
|
-
ipulse_shared_core_ftredge/enums/enums_common_utils.py,sha256=
|
|
1
|
+
ipulse_shared_core_ftredge/__init__.py,sha256=CcHx8XkC7YJ5pOxsOpZJrTuxweN1ya1WlQJZjOTwrBY,868
|
|
2
|
+
ipulse_shared_core_ftredge/utils_common.py,sha256=GEo4Xilh9quDdUh_ppOVO6G7ustHWkSaxuILKC_FLNo,27406
|
|
3
|
+
ipulse_shared_core_ftredge/utils_gcp.py,sha256=8KgsOPkLe1-1i3M_UX5niKg_CjjiNoUhZXiWFIHJdmY,11286
|
|
4
|
+
ipulse_shared_core_ftredge/utils_templates_and_schemas.py,sha256=CHrFbhRVrXlqDzGdPe9nujn5uFQtIN2xW7RBTiHYFBc,7475
|
|
5
|
+
ipulse_shared_core_ftredge/enums/__init__.py,sha256=PT8Ig7hcx_hhVlsfun24H0pFjbdfQb201ZtJplQ9uAE,844
|
|
6
|
+
ipulse_shared_core_ftredge/enums/enums_common_utils.py,sha256=CB0IMW5aer-n50G3AM6Fz-NrN85mJkvZhSrnuUb7EMs,5702
|
|
7
7
|
ipulse_shared_core_ftredge/enums/enums_data_eng.py,sha256=2i6Qo6Yi_j_O9xxnOD6QA-r0Cv7mWAUaKUx907XMRio,1825
|
|
8
8
|
ipulse_shared_core_ftredge/enums/enums_module_fincore.py,sha256=MuqQg249clrWUOBb1S-iPsoOldN2_F3ohRQizbjhwG0,1374
|
|
9
9
|
ipulse_shared_core_ftredge/enums/enums_modules.py,sha256=AyXUoNmR75DZLaEHi3snV6LngR25LeZRqzrLDaAupbY,1244
|
|
@@ -18,8 +18,8 @@ ipulse_shared_core_ftredge/models/user_profile_update.py,sha256=oKK0XsQDKkgDvjFP
|
|
|
18
18
|
ipulse_shared_core_ftredge/models/user_status.py,sha256=8TyRd8tBK9_xb0MPKbI5pn9-lX7ovKbeiuWYYPtIOiw,3202
|
|
19
19
|
ipulse_shared_core_ftredge/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
ipulse_shared_core_ftredge/tests/test.py,sha256=0lS8HP5Quo_BqNoscU40qOH9aJRaa1Pfam5VUBmdld8,682
|
|
21
|
-
ipulse_shared_core_ftredge-2.
|
|
22
|
-
ipulse_shared_core_ftredge-2.
|
|
23
|
-
ipulse_shared_core_ftredge-2.
|
|
24
|
-
ipulse_shared_core_ftredge-2.
|
|
25
|
-
ipulse_shared_core_ftredge-2.
|
|
21
|
+
ipulse_shared_core_ftredge-2.53.dist-info/LICENCE,sha256=YBtYAXNqCCOo9Mr2hfkbSPAM9CeAr2j1VZBSwQTrNwE,1060
|
|
22
|
+
ipulse_shared_core_ftredge-2.53.dist-info/METADATA,sha256=9JixJKcqPsiCzRQR6ZpOiKwDIOcjEHFY6OG-VILJ_zg,561
|
|
23
|
+
ipulse_shared_core_ftredge-2.53.dist-info/WHEEL,sha256=rWxmBtp7hEUqVLOnTaDOPpR-cZpCDkzhhcBce-Zyd5k,91
|
|
24
|
+
ipulse_shared_core_ftredge-2.53.dist-info/top_level.txt,sha256=8sgYrptpexkA_6_HyGvho26cVFH9kmtGvaK8tHbsGHk,27
|
|
25
|
+
ipulse_shared_core_ftredge-2.53.dist-info/RECORD,,
|
{ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/LICENCE
RENAMED
|
File without changes
|
{ipulse_shared_core_ftredge-2.52.dist-info → ipulse_shared_core_ftredge-2.53.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|