ipulse-shared-core-ftredge 2.6__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge/__init__.py +21 -4
- ipulse_shared_core_ftredge/enums/__init__.py +32 -0
- ipulse_shared_core_ftredge/enums/enums_cloud.py +17 -0
- ipulse_shared_core_ftredge/enums/enums_common_utils.py +98 -0
- ipulse_shared_core_ftredge/enums/enums_data_eng.py +109 -0
- ipulse_shared_core_ftredge/enums/enums_logs.py +79 -0
- ipulse_shared_core_ftredge/enums/enums_module_fincore.py +58 -0
- ipulse_shared_core_ftredge/enums/enums_modules.py +25 -0
- ipulse_shared_core_ftredge/{models → enums}/pulse_enums.py +10 -46
- ipulse_shared_core_ftredge/models/__init__.py +0 -1
- ipulse_shared_core_ftredge/models/organisation.py +61 -55
- ipulse_shared_core_ftredge/models/resource_catalog_item.py +97 -171
- ipulse_shared_core_ftredge/models/user_profile.py +10 -9
- ipulse_shared_core_ftredge/models/user_profile_update.py +32 -14
- ipulse_shared_core_ftredge/models/user_status.py +21 -11
- ipulse_shared_core_ftredge/utils/__init__.py +19 -0
- ipulse_shared_core_ftredge/utils/logs/__init__.py +2 -0
- ipulse_shared_core_ftredge/{models → utils/logs}/audit_log_firestore.py +1 -1
- ipulse_shared_core_ftredge/utils/logs/context_log.py +211 -0
- ipulse_shared_core_ftredge/utils/logs/get_logger.py +76 -0
- ipulse_shared_core_ftredge/utils/utils_cloud.py +44 -0
- ipulse_shared_core_ftredge/utils/utils_cloud_gcp.py +311 -0
- ipulse_shared_core_ftredge/utils/utils_cloud_gcp_with_collectors.py +169 -0
- ipulse_shared_core_ftredge/utils/utils_cloud_with_collectors.py +26 -0
- ipulse_shared_core_ftredge/utils/utils_collector_pipelinemon.py +356 -0
- ipulse_shared_core_ftredge/utils/utils_common.py +145 -0
- ipulse_shared_core_ftredge/utils/utils_templates_and_schemas.py +151 -0
- ipulse_shared_core_ftredge-2.6.1.dist-info/METADATA +14 -0
- ipulse_shared_core_ftredge-2.6.1.dist-info/RECORD +33 -0
- {ipulse_shared_core_ftredge-2.6.dist-info → ipulse_shared_core_ftredge-2.6.1.dist-info}/WHEEL +1 -1
- ipulse_shared_core_ftredge/tests/__init__.py +0 -0
- ipulse_shared_core_ftredge/tests/test.py +0 -17
- ipulse_shared_core_ftredge-2.6.dist-info/METADATA +0 -11
- ipulse_shared_core_ftredge-2.6.dist-info/RECORD +0 -17
- {ipulse_shared_core_ftredge-2.6.dist-info → ipulse_shared_core_ftredge-2.6.1.dist-info}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.6.dist-info → ipulse_shared_core_ftredge-2.6.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=logging-fstring-interpolation
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=missing-class-docstring
|
|
7
|
+
# pylint: disable=broad-exception-caught
|
|
8
|
+
# pylint: disable=unused-variable
|
|
9
|
+
import traceback
|
|
10
|
+
import json
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import List
|
|
13
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import Status
|
|
14
|
+
from ipulse_shared_core_ftredge.enums.enums_logs import LogLevel
|
|
15
|
+
|
|
16
|
+
############################################################################
|
|
17
|
+
##################### SETTING UP custom LOGGING format= DICT ##########################
|
|
18
|
+
### Cloud Agnostic, can be used with any cloud provider , jsut use to_dict() method to get the log in dict format
|
|
19
|
+
class ContextLog:
|
|
20
|
+
|
|
21
|
+
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
22
|
+
context: str = None, description: str = None,
|
|
23
|
+
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
24
|
+
log_status: Status = Status.OPEN, subject: str = None, systems_impacted: List[str] = None,
|
|
25
|
+
):
|
|
26
|
+
|
|
27
|
+
if e is not None:
|
|
28
|
+
e_type = type(e).__name__ if e_type is None else e_type
|
|
29
|
+
e_message = str(e) if e_message is None else e_message
|
|
30
|
+
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
31
|
+
elif e_traceback is None and (e_type or e_message):
|
|
32
|
+
e_traceback = traceback.format_exc()
|
|
33
|
+
|
|
34
|
+
self.level = level
|
|
35
|
+
self.subject = subject
|
|
36
|
+
self.description = description
|
|
37
|
+
self._base_context = base_context
|
|
38
|
+
self._context = context
|
|
39
|
+
self._systems_impacted = systems_impacted if systems_impacted else []
|
|
40
|
+
self.collector_id = collector_id
|
|
41
|
+
self.exception_type = e_type
|
|
42
|
+
self.exception_message = e_message
|
|
43
|
+
self.exception_traceback = e_traceback
|
|
44
|
+
self.log_status = log_status
|
|
45
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def base_context(self):
|
|
49
|
+
return self._base_context
|
|
50
|
+
|
|
51
|
+
@base_context.setter
|
|
52
|
+
def base_context(self, value):
|
|
53
|
+
self._base_context = value
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def context(self):
|
|
57
|
+
return self._context
|
|
58
|
+
|
|
59
|
+
@context.setter
|
|
60
|
+
def context(self, value):
|
|
61
|
+
self._context = value
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def systems_impacted(self):
|
|
65
|
+
return self._systems_impacted
|
|
66
|
+
|
|
67
|
+
@systems_impacted.setter
|
|
68
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
69
|
+
self._systems_impacted = list_of_si
|
|
70
|
+
|
|
71
|
+
def add_system_impacted(self, system_impacted: str):
|
|
72
|
+
if self._systems_impacted is None:
|
|
73
|
+
self._systems_impacted = []
|
|
74
|
+
self._systems_impacted.append(system_impacted)
|
|
75
|
+
|
|
76
|
+
def remove_system_impacted(self, system_impacted: str):
|
|
77
|
+
if self._systems_impacted is not None:
|
|
78
|
+
self._systems_impacted.remove(system_impacted)
|
|
79
|
+
|
|
80
|
+
def clear_systems_impacted(self):
|
|
81
|
+
self._systems_impacted = []
|
|
82
|
+
|
|
83
|
+
def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
|
|
84
|
+
if not e_traceback or e_traceback == 'None\n':
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
traceback_lines = e_traceback.splitlines()
|
|
88
|
+
|
|
89
|
+
# Check if the traceback is within the limits
|
|
90
|
+
if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
|
|
91
|
+
return e_traceback
|
|
92
|
+
|
|
93
|
+
# Remove lines that are part of the exception message if they are present in traceback
|
|
94
|
+
message_lines = e_message.splitlines() if e_message else []
|
|
95
|
+
if message_lines:
|
|
96
|
+
for message_line in message_lines:
|
|
97
|
+
if message_line in traceback_lines:
|
|
98
|
+
traceback_lines.remove(message_line)
|
|
99
|
+
|
|
100
|
+
# Filter out lines from third-party libraries (like site-packages)
|
|
101
|
+
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
102
|
+
|
|
103
|
+
# If filtering results in too few lines, revert to original traceback
|
|
104
|
+
if len(filtered_lines) < 2:
|
|
105
|
+
filtered_lines = traceback_lines
|
|
106
|
+
|
|
107
|
+
# Combine standalone bracket lines with previous or next lines
|
|
108
|
+
combined_lines = []
|
|
109
|
+
for line in filtered_lines:
|
|
110
|
+
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
111
|
+
combined_lines[-1] += " " + line.strip()
|
|
112
|
+
else:
|
|
113
|
+
combined_lines.append(line)
|
|
114
|
+
|
|
115
|
+
# Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
|
|
116
|
+
if len(combined_lines) > max_traceback_lines:
|
|
117
|
+
keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
|
|
118
|
+
keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
|
|
119
|
+
combined_lines = (
|
|
120
|
+
combined_lines[:keep_lines_start] +
|
|
121
|
+
['... (truncated) ...'] +
|
|
122
|
+
combined_lines[-keep_lines_end:]
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
formatted_traceback = '\n'.join(combined_lines)
|
|
126
|
+
|
|
127
|
+
# Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
|
|
128
|
+
if len(formatted_traceback) > max_field_len:
|
|
129
|
+
truncated_length = max_field_len - len('... (truncated) ...')
|
|
130
|
+
half_truncated_length = truncated_length // 2
|
|
131
|
+
formatted_traceback = (
|
|
132
|
+
formatted_traceback[:half_truncated_length] +
|
|
133
|
+
'\n... (truncated) ...\n' +
|
|
134
|
+
formatted_traceback[-half_truncated_length:]
|
|
135
|
+
)
|
|
136
|
+
return formatted_traceback
|
|
137
|
+
|
|
138
|
+
def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
|
|
139
|
+
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
140
|
+
|
|
141
|
+
# Unified list of all fields
|
|
142
|
+
systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
|
|
143
|
+
fields = [
|
|
144
|
+
("log_status", str(self.log_status.name)),
|
|
145
|
+
("level_code", self.level.value),
|
|
146
|
+
("level_name", str(self.level.name)),
|
|
147
|
+
("base_context", str(self.base_context)),
|
|
148
|
+
("timestamp", str(self.timestamp)),
|
|
149
|
+
("collector_id", str(self.collector_id)),
|
|
150
|
+
("systems_impacted", systems_impacted_str),
|
|
151
|
+
("context", str(self.context)), # special sizing rules apply to it
|
|
152
|
+
("subject", str(self.subject)),
|
|
153
|
+
("description", str(self.description)),
|
|
154
|
+
("exception_type", str(self.exception_type)),
|
|
155
|
+
("exception_message", str(self.exception_message)),
|
|
156
|
+
("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
# Function to calculate the byte size of a JSON-encoded field
|
|
160
|
+
def field_size(key, value):
|
|
161
|
+
return len(json.dumps({key: value}).encode('utf-8'))
|
|
162
|
+
|
|
163
|
+
# Function to truncate a value based on its type
|
|
164
|
+
# Function to truncate a value based on its type
|
|
165
|
+
def truncate_value(value, max_size):
|
|
166
|
+
if isinstance(value, str):
|
|
167
|
+
half_size = max_size // 2
|
|
168
|
+
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
169
|
+
return value
|
|
170
|
+
|
|
171
|
+
# Ensure no field exceeds max_field_len
|
|
172
|
+
for i, (key, value) in enumerate(fields):
|
|
173
|
+
if isinstance(value, str) and len(value) > max_field_len:
|
|
174
|
+
fields[i] = (key, truncate_value(value, max_field_len))
|
|
175
|
+
|
|
176
|
+
# Ensure total size of the dict doesn't exceed size_limit
|
|
177
|
+
total_size = sum(field_size(key, value) for key, value in fields)
|
|
178
|
+
log_dict = {}
|
|
179
|
+
truncated = False
|
|
180
|
+
|
|
181
|
+
if total_size > size_limit:
|
|
182
|
+
truncated = True
|
|
183
|
+
remaining_size = size_limit
|
|
184
|
+
remaining_fields = len(fields)
|
|
185
|
+
|
|
186
|
+
for key, value in fields:
|
|
187
|
+
if remaining_fields > 0:
|
|
188
|
+
max_size_per_field = remaining_size // remaining_fields
|
|
189
|
+
else:
|
|
190
|
+
max_size_per_field = 0
|
|
191
|
+
|
|
192
|
+
field_sz = field_size(key, value)
|
|
193
|
+
if field_sz > max_size_per_field:
|
|
194
|
+
value = truncate_value(value, max_size_per_field)
|
|
195
|
+
field_sz = field_size(key, value)
|
|
196
|
+
|
|
197
|
+
log_dict[key] = value
|
|
198
|
+
remaining_size -= field_sz
|
|
199
|
+
remaining_fields -= 1
|
|
200
|
+
else:
|
|
201
|
+
log_dict = dict(fields)
|
|
202
|
+
|
|
203
|
+
log_dict['trunc'] = truncated
|
|
204
|
+
|
|
205
|
+
return log_dict
|
|
206
|
+
|
|
207
|
+
def __str__(self):
|
|
208
|
+
return json.dumps(self.to_dict(), indent=4)
|
|
209
|
+
|
|
210
|
+
def __repr__(self):
|
|
211
|
+
return self.__str__()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
# pylint: disable=broad-exception-caught
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=unused-variable
|
|
7
|
+
# pylint: disable=broad-exception-raised
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import json
|
|
11
|
+
import traceback
|
|
12
|
+
from ipulse_shared_core_ftredge.enums.enums_cloud import CloudProvider
|
|
13
|
+
from ipulse_shared_core_ftredge.utils.utils_cloud_gcp import setup_gcp_logging
|
|
14
|
+
|
|
15
|
+
###################################################################################################
|
|
16
|
+
##################################################################################################
|
|
17
|
+
##################################### SETTING UP LOGGER ##########################################
|
|
18
|
+
|
|
19
|
+
class CloudLogFormatter(logging.Formatter):
|
|
20
|
+
"""Formats log records as structured JSON."""
|
|
21
|
+
|
|
22
|
+
def format(self, record):
|
|
23
|
+
log_entry = {
|
|
24
|
+
'timestamp': self.formatTime(record, self.datefmt),
|
|
25
|
+
'name': record.name,
|
|
26
|
+
'severity': record.levelname,
|
|
27
|
+
'message': record.msg,
|
|
28
|
+
'pathname': record.pathname,
|
|
29
|
+
'lineno': record.lineno,
|
|
30
|
+
}
|
|
31
|
+
if record.exc_info:
|
|
32
|
+
log_entry['exception_traceback'] = ''.join(traceback.format_exception(*record.exc_info))
|
|
33
|
+
if isinstance(record.msg, dict):
|
|
34
|
+
log_entry.update(record.msg)
|
|
35
|
+
return json.dumps(log_entry)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LocalLogFormatter(logging.Formatter):
|
|
39
|
+
"""Formats log records for local output to the console."""
|
|
40
|
+
|
|
41
|
+
def format(self, record): # Make sure you have the 'record' argument here!
|
|
42
|
+
path_parts = record.pathname.split(os.sep)
|
|
43
|
+
|
|
44
|
+
# Get the last two parts of the path if they exist
|
|
45
|
+
if len(path_parts) >= 2:
|
|
46
|
+
short_path = os.path.join(path_parts[-2], path_parts[-1])
|
|
47
|
+
else:
|
|
48
|
+
short_path = record.pathname
|
|
49
|
+
|
|
50
|
+
log_message = f"{record.levelname} ::: {record.name} ::: {short_path} ::: lineno: {record.lineno} ::: {self.formatTime(record, self.datefmt)} ::: message: {record.msg}"
|
|
51
|
+
if record.exc_info:
|
|
52
|
+
log_message += "\n" + ''.join(
|
|
53
|
+
traceback.format_exception(*record.exc_info)
|
|
54
|
+
)
|
|
55
|
+
return log_message
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_logger( logger_name:str ,level=logging.INFO, enable_local_streamer=False, cloud_provider:CloudProvider=CloudProvider.NO_CLOUD, enable_error_reporting=True ):
|
|
59
|
+
|
|
60
|
+
logger = logging.getLogger(logger_name)
|
|
61
|
+
logger.setLevel(level)
|
|
62
|
+
cloud_formatter = CloudLogFormatter()
|
|
63
|
+
|
|
64
|
+
without_cloud_logging_handler = [CloudProvider.NO_CLOUD, CloudProvider.CLOUD_AGNOSTIC, CloudProvider.UNKNWON, CloudProvider.OTHER]
|
|
65
|
+
|
|
66
|
+
if cloud_provider in without_cloud_logging_handler or enable_local_streamer:
|
|
67
|
+
handler = logging.StreamHandler()
|
|
68
|
+
handler.setFormatter(LocalLogFormatter())
|
|
69
|
+
logger.addHandler(handler)
|
|
70
|
+
|
|
71
|
+
if cloud_provider == CloudProvider.GCP:
|
|
72
|
+
setup_gcp_logging(logger=logger, formatter=cloud_formatter, enable_error_reporting=enable_error_reporting)
|
|
73
|
+
elif cloud_provider not in without_cloud_logging_handler:
|
|
74
|
+
raise ValueError(f"Unsupported cloud provider: {cloud_provider}. Supported cloud providers: {CloudProvider.GCP.value}")
|
|
75
|
+
|
|
76
|
+
return logger
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
# pylint: disable=broad-exception-caught
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=unused-variable
|
|
7
|
+
# pylint: disable=broad-exception-caught
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from ipulse_shared_core_ftredge.enums.enums_cloud import CloudProvider
|
|
10
|
+
from .utils_cloud_gcp import (write_json_to_gcs_extended, read_json_from_gcs)
|
|
11
|
+
|
|
12
|
+
#######################################################################################################################
|
|
13
|
+
#######################################################################################################################
|
|
14
|
+
################################################# cloud IO functions ########################################
|
|
15
|
+
|
|
16
|
+
# Define the central function that routes to the relevant cloud-specific function
|
|
17
|
+
def write_json_to_cloud_storage_extended(cloud_provider:CloudProvider, storage_client, data:dict | list | str, bucket_name: str, file_name: str,
|
|
18
|
+
file_exists_if_starts_with_prefix:Optional[str] =None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
|
|
19
|
+
max_retries:int=2, max_deletable_files:int=1, logger=None, print_out=False):
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if cloud_provider == CloudProvider.GCP:
|
|
23
|
+
return write_json_to_gcs_extended(
|
|
24
|
+
storage_client=storage_client,
|
|
25
|
+
data=data,
|
|
26
|
+
bucket_name=bucket_name,
|
|
27
|
+
file_name=file_name,
|
|
28
|
+
file_exists_if_starts_with_prefix=file_exists_if_starts_with_prefix,
|
|
29
|
+
overwrite_if_exists=overwrite_if_exists,
|
|
30
|
+
increment_if_exists=increment_if_exists,
|
|
31
|
+
max_retries=max_retries,
|
|
32
|
+
max_deletable_files=max_deletable_files,
|
|
33
|
+
logger=logger,
|
|
34
|
+
print_out=print_out
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
raise ValueError(f"Unsupported cloud provider: {cloud_provider}. Supported cloud providers: gcp")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def read_json_from_cloud_storage(cloud_provider:CloudProvider, storage_client, bucket_name, file_name, logger=None, print_out=False):
|
|
41
|
+
if cloud_provider == CloudProvider.GCP:
|
|
42
|
+
return read_json_from_gcs(storage_client=storage_client, bucket_name=bucket_name, file_name=file_name, logger=logger, print_out=print_out)
|
|
43
|
+
|
|
44
|
+
raise ValueError(f"Unsupported cloud provider: {cloud_provider}. Supported cloud providers: gcp")
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
# pylint: disable=broad-exception-caught
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=unused-variable
|
|
7
|
+
# pylint: disable=broad-exception-raised
|
|
8
|
+
import json
|
|
9
|
+
import csv
|
|
10
|
+
from io import StringIO
|
|
11
|
+
import os
|
|
12
|
+
import time
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Optional
|
|
15
|
+
import traceback
|
|
16
|
+
from google.api_core.exceptions import NotFound
|
|
17
|
+
from google.cloud import error_reporting
|
|
18
|
+
from google.cloud import logging as cloud_logging
|
|
19
|
+
from google.cloud.storage import Client as GCSClient
|
|
20
|
+
from google.cloud import bigquery
|
|
21
|
+
|
|
22
|
+
############################################################################
|
|
23
|
+
##################### GOOGLE CLOUD UTILS ##################################
|
|
24
|
+
############################################################################
|
|
25
|
+
|
|
26
|
+
def log_error(message, logger=None , print_out=False, exc_info=False):
|
|
27
|
+
if logger:
|
|
28
|
+
logger.error(message, exc_info=exc_info)
|
|
29
|
+
elif print_out:
|
|
30
|
+
print(message)
|
|
31
|
+
|
|
32
|
+
def log_warning(message, logger=None, print_out=False):
|
|
33
|
+
if logger:
|
|
34
|
+
logger.warning(message)
|
|
35
|
+
elif print_out:
|
|
36
|
+
print(message)
|
|
37
|
+
|
|
38
|
+
def log_info(message, logger=None, print_out=False):
|
|
39
|
+
if logger:
|
|
40
|
+
logger.info(message)
|
|
41
|
+
elif print_out:
|
|
42
|
+
print(message)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
############################################################################
|
|
47
|
+
##################### LOGGING and ERROR reporting ##########################
|
|
48
|
+
####DEPCREACATED: THIS APPROACH WAS GOOD, BUT ERRORS WERE NOT REPORTED TO ERROR REPORTING
|
|
49
|
+
# logging.basicConfig(level=logging.INFO)
|
|
50
|
+
# logging_client = google.cloud.logging.Client()
|
|
51
|
+
# logging_client.setup_logging()
|
|
52
|
+
###################################
|
|
53
|
+
def setup_gcp_logging(logger, formatter, enable_error_reporting=True):
|
|
54
|
+
|
|
55
|
+
class CustomGCPErrorReportingHandler(logging.Handler):
|
|
56
|
+
def __init__(self, level=logging.ERROR):
|
|
57
|
+
super().__init__(level)
|
|
58
|
+
self.error_client = error_reporting.Client()
|
|
59
|
+
self.propagate = True
|
|
60
|
+
|
|
61
|
+
def emit(self, record):
|
|
62
|
+
try:
|
|
63
|
+
if record.levelno >= logging.ERROR:
|
|
64
|
+
log_struct = {
|
|
65
|
+
'message': self.format(record),
|
|
66
|
+
'severity': record.levelname,
|
|
67
|
+
'pathname': getattr(record, 'pathname', None),
|
|
68
|
+
'lineno': getattr(record, 'lineno', None)
|
|
69
|
+
}
|
|
70
|
+
if record.exc_info:
|
|
71
|
+
log_struct['exception'] = ''.join(
|
|
72
|
+
traceback.format_exception(*record.exc_info)
|
|
73
|
+
)
|
|
74
|
+
self.error_client.report(str(log_struct))
|
|
75
|
+
except Exception as e:
|
|
76
|
+
self.handleError(record)
|
|
77
|
+
|
|
78
|
+
class CustomGCPLoggingHandler(cloud_logging.handlers.CloudLoggingHandler):
|
|
79
|
+
"""Custom handler for Google Cloud Logging with a dynamic logName."""
|
|
80
|
+
def __init__(self, client, name, resource=None, labels=None):
|
|
81
|
+
super().__init__(client=client, name=name, resource=resource, labels=labels)
|
|
82
|
+
|
|
83
|
+
def emit(self, record):
|
|
84
|
+
# 1. Create the basic log entry dictionary
|
|
85
|
+
log_entry = {
|
|
86
|
+
'message': record.msg,
|
|
87
|
+
'severity': record.levelname,
|
|
88
|
+
'name': record.name,
|
|
89
|
+
'pathname': record.filename,
|
|
90
|
+
'lineno': record.lineno,
|
|
91
|
+
}
|
|
92
|
+
if record.exc_info:
|
|
93
|
+
log_entry['exception_traceback'] = ''.join(
|
|
94
|
+
traceback.format_exception(*record.exc_info)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# 2. Apply the formatter to the 'message' field if it's a dictionary
|
|
98
|
+
if isinstance(record.msg, dict):
|
|
99
|
+
formatted_message = self.formatter.format(record)
|
|
100
|
+
try:
|
|
101
|
+
log_entry['message'] = json.loads(formatted_message)
|
|
102
|
+
except json.JSONDecodeError:
|
|
103
|
+
log_entry['message'] = formatted_message
|
|
104
|
+
else:
|
|
105
|
+
log_entry['message'] = record.msg
|
|
106
|
+
|
|
107
|
+
# 3. Set the custom logName
|
|
108
|
+
log_entry['logName'] = f"projects/{self.client.project}/logs/{record.name}"
|
|
109
|
+
|
|
110
|
+
# 4. Send to Google Cloud Logging
|
|
111
|
+
super().emit(record)
|
|
112
|
+
|
|
113
|
+
# Create Google Cloud Logging handler
|
|
114
|
+
cloud_logging_client = cloud_logging.Client()
|
|
115
|
+
cloud_logging_handler = CustomGCPLoggingHandler(cloud_logging_client, logger.name) # No prefix needed
|
|
116
|
+
cloud_logging_handler.setFormatter(formatter)
|
|
117
|
+
logger.addHandler(cloud_logging_handler)
|
|
118
|
+
|
|
119
|
+
if enable_error_reporting:
|
|
120
|
+
# Create and add Error Reporting handler
|
|
121
|
+
error_reporting_handler = CustomGCPErrorReportingHandler()
|
|
122
|
+
logger.addHandler(error_reporting_handler)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def create_bigquery_schema_from_json(json_schema: list) -> list:
|
|
127
|
+
schema = []
|
|
128
|
+
for field in json_schema:
|
|
129
|
+
if "max_length" in field:
|
|
130
|
+
schema.append(bigquery.SchemaField(field["name"], field["type"], mode=field["mode"], max_length=field["max_length"]))
|
|
131
|
+
else:
|
|
132
|
+
schema.append(bigquery.SchemaField(field["name"], field["type"], mode=field["mode"]))
|
|
133
|
+
return schema
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def read_json_from_gcs(storage_client:GCSClient, bucket_name:str, file_name:str, logger=None,print_out=False):
|
|
137
|
+
""" Helper function to read a JSON file from Google Cloud Storage """
|
|
138
|
+
try:
|
|
139
|
+
bucket = storage_client.bucket(bucket_name)
|
|
140
|
+
blob = bucket.blob(file_name)
|
|
141
|
+
data_string = blob.download_as_text()
|
|
142
|
+
data = json.loads(data_string)
|
|
143
|
+
return data
|
|
144
|
+
except NotFound:
|
|
145
|
+
log_error(message=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
|
|
146
|
+
return None
|
|
147
|
+
except json.JSONDecodeError:
|
|
148
|
+
log_error(message=f"Error: The file {file_name} could not be decoded as JSON.", logger=logger, print_out=print_out)
|
|
149
|
+
return None
|
|
150
|
+
except Exception as e:
|
|
151
|
+
log_error(message=f"An unexpected error occurred: {e}", exc_info=True, logger=logger, print_out=print_out)
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
def read_csv_from_gcs(bucket_name:str, file_name:str, storage_client:GCSClient, logger=None, print_out=False):
|
|
155
|
+
""" Helper function to read a CSV file from Google Cloud Storage """
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
bucket = storage_client.bucket(bucket_name)
|
|
159
|
+
blob = bucket.blob(file_name)
|
|
160
|
+
data_string = blob.download_as_text()
|
|
161
|
+
data_file = StringIO(data_string)
|
|
162
|
+
reader = csv.DictReader(data_file)
|
|
163
|
+
return list(reader)
|
|
164
|
+
except NotFound:
|
|
165
|
+
log_error(message=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
|
|
166
|
+
return None
|
|
167
|
+
except csv.Error:
|
|
168
|
+
log_error(message=f"Error: The file {file_name} could not be read as CSV.", logger=logger, print_out=print_out)
|
|
169
|
+
return None
|
|
170
|
+
except Exception as e:
|
|
171
|
+
log_error(message=f"An unexpected error occurred: {e}", logger=logger, print_out=print_out, exc_info=True)
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def write_json_to_gcs_extended( storage_client:GCSClient, data:dict | list | str, bucket_name: str, file_name: str,
|
|
177
|
+
file_exists_if_starts_with_prefix:Optional[str] =None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
|
|
178
|
+
max_retries:int=2, max_deletable_files:int=1, logger=None, print_out=False):
|
|
179
|
+
|
|
180
|
+
"""Saves data to Google Cloud Storage and optionally locally.
|
|
181
|
+
|
|
182
|
+
This function attempts to upload data to GCS.
|
|
183
|
+
- If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
|
|
184
|
+
- It handles file name conflicts based on these rules:
|
|
185
|
+
- If `overwrite_if_exists` is True:
|
|
186
|
+
- If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
|
|
187
|
+
- If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
|
|
188
|
+
- If `increment_if_exists` is True:
|
|
189
|
+
- If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
|
|
190
|
+
- If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
|
|
191
|
+
|
|
192
|
+
-If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
|
|
193
|
+
"""
|
|
194
|
+
# GCS upload exception
|
|
195
|
+
# Input validation
|
|
196
|
+
if overwrite_if_exists and increment_if_exists:
|
|
197
|
+
raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
|
|
198
|
+
if not isinstance(data, (list, dict, str)):
|
|
199
|
+
raise ValueError("Data should be a list, dict, or string.")
|
|
200
|
+
if max_deletable_files > 10:
|
|
201
|
+
raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
|
|
202
|
+
|
|
203
|
+
# Prepare data
|
|
204
|
+
if isinstance(data, (list, dict)):
|
|
205
|
+
data_str = json.dumps(data, indent=2)
|
|
206
|
+
else:
|
|
207
|
+
data_str = data
|
|
208
|
+
|
|
209
|
+
bucket = storage_client.bucket(bucket_name)
|
|
210
|
+
base_file_name, ext = os.path.splitext(file_name)
|
|
211
|
+
increment = 0
|
|
212
|
+
attempts = 0
|
|
213
|
+
success = False
|
|
214
|
+
|
|
215
|
+
# GCS-related metadata
|
|
216
|
+
cloud_storage_path = None
|
|
217
|
+
cloud_storage_file_overwritten = False
|
|
218
|
+
cloud_storage_file_already_exists = False
|
|
219
|
+
cloud_storage_file_saved_with_increment = False
|
|
220
|
+
cloud_storage_file_exists_checked_on_name = file_name
|
|
221
|
+
cloud_storage_deleted_files=[]
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
upload_allowed = True
|
|
225
|
+
# --- Overwrite Logic ---
|
|
226
|
+
if overwrite_if_exists:
|
|
227
|
+
if file_exists_if_starts_with_prefix:
|
|
228
|
+
cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
229
|
+
blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
230
|
+
if len(blobs_to_delete) > max_deletable_files:
|
|
231
|
+
raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
|
|
232
|
+
if blobs_to_delete:
|
|
233
|
+
for blob in blobs_to_delete:
|
|
234
|
+
cloud_storage_path_del = f"gs://{bucket_name}/{blob.name}"
|
|
235
|
+
blob.delete()
|
|
236
|
+
cloud_storage_deleted_files.append(cloud_storage_path_del)
|
|
237
|
+
cloud_storage_file_overwritten = True
|
|
238
|
+
elif bucket.blob(file_name).exists():
|
|
239
|
+
cloud_storage_file_already_exists = True
|
|
240
|
+
cloud_storage_path_del = f"gs://{bucket_name}/{file_name}"
|
|
241
|
+
blob.delete() # Delete the existing blob
|
|
242
|
+
cloud_storage_deleted_files.append(cloud_storage_path_del)
|
|
243
|
+
cloud_storage_file_overwritten = True
|
|
244
|
+
# --- Increment Logic ---
|
|
245
|
+
elif increment_if_exists:
|
|
246
|
+
cloud_storage_file_exists_checked_on_name = file_name # We only increment if the exact name exists
|
|
247
|
+
while bucket.blob(file_name).exists():
|
|
248
|
+
cloud_storage_file_already_exists = True
|
|
249
|
+
increment += 1
|
|
250
|
+
file_name = f"{base_file_name}_v{increment}{ext}"
|
|
251
|
+
cloud_storage_file_saved_with_increment = True
|
|
252
|
+
if increment>0:
|
|
253
|
+
cloud_storage_path = f"gs://{bucket_name}/{file_name}"
|
|
254
|
+
# --- Check for Conflicts (Including Prefix) ---
|
|
255
|
+
else:
|
|
256
|
+
if file_exists_if_starts_with_prefix:
|
|
257
|
+
blobs_matched = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
258
|
+
cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
259
|
+
if blobs_matched:
|
|
260
|
+
upload_allowed = False
|
|
261
|
+
cloud_storage_file_already_exists = True
|
|
262
|
+
elif bucket.blob(file_name).exists():
|
|
263
|
+
upload_allowed = False
|
|
264
|
+
cloud_storage_file_already_exists = True
|
|
265
|
+
|
|
266
|
+
# --- GCS Upload ---
|
|
267
|
+
cloud_storage_path = f"gs://{bucket_name}/{file_name}"
|
|
268
|
+
if overwrite_if_exists or increment_if_exists or upload_allowed:
|
|
269
|
+
while attempts < max_retries and not success:
|
|
270
|
+
try:
|
|
271
|
+
blob = bucket.blob(file_name) # Use the potentially updated file_name
|
|
272
|
+
blob.upload_from_string(data_str, content_type='application/json')
|
|
273
|
+
success = True
|
|
274
|
+
except Exception as e:
|
|
275
|
+
attempts += 1
|
|
276
|
+
if attempts < max_retries:
|
|
277
|
+
time.sleep(2 ** attempts)
|
|
278
|
+
else:
|
|
279
|
+
raise e
|
|
280
|
+
|
|
281
|
+
# --- Return Metadata ---
|
|
282
|
+
return {
|
|
283
|
+
"cloud_storage_path": cloud_storage_path if (success or not upload_allowed ) else None,
|
|
284
|
+
"cloud_storage_file_already_exists": cloud_storage_file_already_exists,
|
|
285
|
+
"cloud_storage_file_exists_checked_on_name":cloud_storage_file_exists_checked_on_name ,
|
|
286
|
+
"cloud_storage_file_overwritten": cloud_storage_file_overwritten,
|
|
287
|
+
"cloud_storage_deleted_file_names": ",,,".join(cloud_storage_deleted_files) if cloud_storage_deleted_files else None,
|
|
288
|
+
"cloud_storage_file_saved_with_increment": cloud_storage_file_saved_with_increment
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def write_csv_to_gcs(bucket_name:str, file_name:str, data:dict | list | str, storage_client:GCSClient, logger,log_info_verbose=True):
|
|
293
|
+
""" Helper function to write a CSV file to Google Cloud Storage """
|
|
294
|
+
try:
|
|
295
|
+
bucket = storage_client.bucket(bucket_name)
|
|
296
|
+
blob = bucket.blob(file_name)
|
|
297
|
+
data_file = StringIO()
|
|
298
|
+
if data and isinstance(data, list) and isinstance(data[0], dict):
|
|
299
|
+
fieldnames = data[0].keys()
|
|
300
|
+
writer = csv.DictWriter(data_file, fieldnames=fieldnames)
|
|
301
|
+
writer.writeheader()
|
|
302
|
+
writer.writerows(data)
|
|
303
|
+
else:
|
|
304
|
+
raise ValueError("Data should be a list of dictionaries")
|
|
305
|
+
blob.upload_from_string(data_file.getvalue(), content_type='text/csv')
|
|
306
|
+
if log_info_verbose:
|
|
307
|
+
logger.info(f"Successfully wrote CSV to {file_name} in bucket {bucket_name}.")
|
|
308
|
+
except ValueError as e:
|
|
309
|
+
logger.error(f"ValueError: {e}")
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.error(f"An unexpected error occurred while writing CSV to GCS: {e}", exc_info=True)
|