ipulse-shared-core-ftredge 2.56__py3-none-any.whl → 2.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge/__init__.py +10 -5
- ipulse_shared_core_ftredge/enums/__init__.py +1 -0
- ipulse_shared_core_ftredge/enums/enums_common_utils.py +9 -3
- ipulse_shared_core_ftredge/models/user_profile.py +3 -3
- ipulse_shared_core_ftredge/utils_custom_logs.py +201 -0
- ipulse_shared_core_ftredge/utils_gcp.py +112 -65
- ipulse_shared_core_ftredge/utils_gcp_for_pipelines.py +201 -0
- ipulse_shared_core_ftredge/{utils_common.py → utils_pipelinemon.py} +25 -206
- ipulse_shared_core_ftredge/utils_templates_and_schemas.py +7 -9
- {ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/METADATA +1 -1
- {ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/RECORD +14 -14
- {ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/WHEEL +1 -1
- ipulse_shared_core_ftredge/tests/__init__.py +0 -0
- ipulse_shared_core_ftredge/tests/test.py +0 -17
- {ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
from .models import (Organisation, UserAuth, UserProfile,
|
|
2
2
|
UserStatus, UserProfileUpdate, pulse_enums)
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from .utils_gcp import (setup_gcp_logger_and_error_report,
|
|
6
|
+
read_csv_from_gcs, read_json_from_gcs,
|
|
7
|
+
write_csv_to_gcs,write_json_to_gcs)
|
|
8
|
+
from .utils_custom_logs import (ContextLog)
|
|
9
|
+
from .utils_pipelinemon import ( Pipelinemon)
|
|
10
|
+
from .utils_gcp_for_pipelines import (write_json_to_gcs_in_pipeline )
|
|
11
|
+
|
|
6
12
|
from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
|
|
7
13
|
check_format_against_schema_template)
|
|
8
|
-
from .utils_common import (ContextLog, Pipelinemon)
|
|
9
14
|
|
|
10
|
-
from .enums import (TargetLogs, LogLevel, Unit, Frequency,
|
|
15
|
+
from .enums import (TargetLogs, LogStatus, LogLevel, Unit, Frequency,
|
|
11
16
|
Module, SubModule, BaseDataCategory,
|
|
12
17
|
FinCoreCategory, FincCoreSubCategory,
|
|
13
18
|
FinCoreRecordsCategory, ExchangeOrPublisher,
|
|
@@ -42,9 +42,15 @@ class LogLevel(Enum):
|
|
|
42
42
|
DEBUG = 10 # Detailed debug information (for development/troubleshooting)
|
|
43
43
|
|
|
44
44
|
INFO = 100
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
INFO_REMOTE_PERSISTNACE_COMPLETE= 101
|
|
46
|
+
INFO_REMOTE_UPDATE_COMPLETE = 102
|
|
47
|
+
INFO_REMOTE_DELETE_COMPLETE = 103
|
|
48
|
+
|
|
49
|
+
INFO_REMOTE_BULK_PERSISTNACE_COMPLETE= 111
|
|
50
|
+
INFO_REMOTE_BULK_UPDATE_COMPLETE = 112
|
|
51
|
+
INFO_REMOTE_BULK_DELETE_COMPLETE = 113
|
|
52
|
+
|
|
53
|
+
INFO_LOCAL_PERSISTNACE_COMPLETE = 121
|
|
48
54
|
|
|
49
55
|
SUCCESS = 201
|
|
50
56
|
SUCCESS_WITH_NOTICES = 211
|
|
@@ -33,9 +33,9 @@ class UserProfile(BaseModel):
|
|
|
33
33
|
provider_id: str #User can Read only
|
|
34
34
|
|
|
35
35
|
username: Optional[str] = None #User can Read and Edit
|
|
36
|
-
dob: Optional[date] = None #User can Read and Edit
|
|
37
|
-
first_name: Optional[str] = None #User can Read and Edit
|
|
38
|
-
last_name: Optional[str] = None #User can Read and Edit
|
|
36
|
+
dob: Optional[date] = None #User can Read and Edit
|
|
37
|
+
first_name: Optional[str] = None #User can Read and Edit
|
|
38
|
+
last_name: Optional[str] = None #User can Read and Edit
|
|
39
39
|
mobile: Optional[str] = None #User can Read and Edit
|
|
40
40
|
class Config:
|
|
41
41
|
extra = "forbid"
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=logging-fstring-interpolation
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=missing-class-docstring
|
|
7
|
+
# pylint: disable=broad-exception-caught
|
|
8
|
+
import traceback
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import List
|
|
12
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel, LogStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContextLog:
|
|
16
|
+
|
|
17
|
+
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
18
|
+
context: str = None, description: str = None,
|
|
19
|
+
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
20
|
+
log_status: LogStatus = LogStatus.OPEN, subject: str = None, systems_impacted: List[str] = None,
|
|
21
|
+
):
|
|
22
|
+
|
|
23
|
+
if e is not None:
|
|
24
|
+
e_type = type(e).__name__ if e_type is None else e_type
|
|
25
|
+
e_message = str(e) if e_message is None else e_message
|
|
26
|
+
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
27
|
+
elif e_traceback is None and (e_type or e_message):
|
|
28
|
+
e_traceback = traceback.format_exc()
|
|
29
|
+
|
|
30
|
+
self.level = level
|
|
31
|
+
self.subject = subject
|
|
32
|
+
self.description = description
|
|
33
|
+
self._base_context = base_context
|
|
34
|
+
self._context = context
|
|
35
|
+
self._systems_impacted = systems_impacted if systems_impacted else []
|
|
36
|
+
self.collector_id = collector_id
|
|
37
|
+
self.exception_type = e_type
|
|
38
|
+
self.exception_message = e_message
|
|
39
|
+
self.exception_traceback = e_traceback
|
|
40
|
+
self.log_status = log_status
|
|
41
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def base_context(self):
|
|
45
|
+
return self._base_context
|
|
46
|
+
|
|
47
|
+
@base_context.setter
|
|
48
|
+
def base_context(self, value):
|
|
49
|
+
self._base_context = value
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def context(self):
|
|
53
|
+
return self._context
|
|
54
|
+
|
|
55
|
+
@context.setter
|
|
56
|
+
def context(self, value):
|
|
57
|
+
self._context = value
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def systems_impacted(self):
|
|
61
|
+
return self._systems_impacted
|
|
62
|
+
|
|
63
|
+
@systems_impacted.setter
|
|
64
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
65
|
+
self._systems_impacted = list_of_si
|
|
66
|
+
|
|
67
|
+
def add_system_impacted(self, system_impacted: str):
|
|
68
|
+
if self._systems_impacted is None:
|
|
69
|
+
self._systems_impacted = []
|
|
70
|
+
self._systems_impacted.append(system_impacted)
|
|
71
|
+
|
|
72
|
+
def remove_system_impacted(self, system_impacted: str):
|
|
73
|
+
if self._systems_impacted is not None:
|
|
74
|
+
self._systems_impacted.remove(system_impacted)
|
|
75
|
+
|
|
76
|
+
def clear_systems_impacted(self):
|
|
77
|
+
self._systems_impacted = []
|
|
78
|
+
|
|
79
|
+
def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
|
|
80
|
+
if not e_traceback or e_traceback == 'None\n':
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
traceback_lines = e_traceback.splitlines()
|
|
84
|
+
|
|
85
|
+
# Check if the traceback is within the limits
|
|
86
|
+
if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
|
|
87
|
+
return e_traceback
|
|
88
|
+
|
|
89
|
+
# Remove lines that are part of the exception message if they are present in traceback
|
|
90
|
+
message_lines = e_message.splitlines() if e_message else []
|
|
91
|
+
if message_lines:
|
|
92
|
+
for message_line in message_lines:
|
|
93
|
+
if message_line in traceback_lines:
|
|
94
|
+
traceback_lines.remove(message_line)
|
|
95
|
+
|
|
96
|
+
# Filter out lines from third-party libraries (like site-packages)
|
|
97
|
+
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
98
|
+
|
|
99
|
+
# If filtering results in too few lines, revert to original traceback
|
|
100
|
+
if len(filtered_lines) < 2:
|
|
101
|
+
filtered_lines = traceback_lines
|
|
102
|
+
|
|
103
|
+
# Combine standalone bracket lines with previous or next lines
|
|
104
|
+
combined_lines = []
|
|
105
|
+
for line in filtered_lines:
|
|
106
|
+
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
107
|
+
combined_lines[-1] += " " + line.strip()
|
|
108
|
+
else:
|
|
109
|
+
combined_lines.append(line)
|
|
110
|
+
|
|
111
|
+
# Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
|
|
112
|
+
if len(combined_lines) > max_traceback_lines:
|
|
113
|
+
keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
|
|
114
|
+
keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
|
|
115
|
+
combined_lines = (
|
|
116
|
+
combined_lines[:keep_lines_start] +
|
|
117
|
+
['... (truncated) ...'] +
|
|
118
|
+
combined_lines[-keep_lines_end:]
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
formatted_traceback = '\n'.join(combined_lines)
|
|
122
|
+
|
|
123
|
+
# Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
|
|
124
|
+
if len(formatted_traceback) > max_field_len:
|
|
125
|
+
truncated_length = max_field_len - len('... (truncated) ...')
|
|
126
|
+
half_truncated_length = truncated_length // 2
|
|
127
|
+
formatted_traceback = (
|
|
128
|
+
formatted_traceback[:half_truncated_length] +
|
|
129
|
+
'\n... (truncated) ...\n' +
|
|
130
|
+
formatted_traceback[-half_truncated_length:]
|
|
131
|
+
)
|
|
132
|
+
return formatted_traceback
|
|
133
|
+
|
|
134
|
+
def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
|
|
135
|
+
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
136
|
+
|
|
137
|
+
# Unified list of all fields
|
|
138
|
+
systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
|
|
139
|
+
fields = [
|
|
140
|
+
("log_status", str(self.log_status.name)),
|
|
141
|
+
("level_code", self.level.value),
|
|
142
|
+
("level_name", str(self.level.name)),
|
|
143
|
+
("base_context", str(self.base_context)),
|
|
144
|
+
("timestamp", str(self.timestamp)),
|
|
145
|
+
("collector_id", str(self.collector_id)),
|
|
146
|
+
("systems_impacted", systems_impacted_str),
|
|
147
|
+
("context", str(self.context)), # special sizing rules apply to it
|
|
148
|
+
("subject", str(self.subject)),
|
|
149
|
+
("description", str(self.description)),
|
|
150
|
+
("exception_type", str(self.exception_type)),
|
|
151
|
+
("exception_message", str(self.exception_message)),
|
|
152
|
+
("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
# Function to calculate the byte size of a JSON-encoded field
|
|
156
|
+
def field_size(key, value):
|
|
157
|
+
return len(json.dumps({key: value}).encode('utf-8'))
|
|
158
|
+
|
|
159
|
+
# Function to truncate a value based on its type
|
|
160
|
+
# Function to truncate a value based on its type
|
|
161
|
+
def truncate_value(value, max_size):
|
|
162
|
+
if isinstance(value, str):
|
|
163
|
+
half_size = max_size // 2
|
|
164
|
+
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
165
|
+
return value
|
|
166
|
+
|
|
167
|
+
# Ensure no field exceeds max_field_len
|
|
168
|
+
for i, (key, value) in enumerate(fields):
|
|
169
|
+
if isinstance(value, str) and len(value) > max_field_len:
|
|
170
|
+
fields[i] = (key, truncate_value(value, max_field_len))
|
|
171
|
+
|
|
172
|
+
# Ensure total size of the dict doesn't exceed size_limit
|
|
173
|
+
total_size = sum(field_size(key, value) for key, value in fields)
|
|
174
|
+
log_dict = {}
|
|
175
|
+
truncated = False
|
|
176
|
+
|
|
177
|
+
if total_size > size_limit:
|
|
178
|
+
truncated = True
|
|
179
|
+
remaining_size = size_limit
|
|
180
|
+
remaining_fields = len(fields)
|
|
181
|
+
|
|
182
|
+
for key, value in fields:
|
|
183
|
+
if remaining_fields > 0:
|
|
184
|
+
max_size_per_field = remaining_size // remaining_fields
|
|
185
|
+
else:
|
|
186
|
+
max_size_per_field = 0
|
|
187
|
+
|
|
188
|
+
field_sz = field_size(key, value)
|
|
189
|
+
if field_sz > max_size_per_field:
|
|
190
|
+
value = truncate_value(value, max_size_per_field)
|
|
191
|
+
field_sz = field_size(key, value)
|
|
192
|
+
|
|
193
|
+
log_dict[key] = value
|
|
194
|
+
remaining_size -= field_sz
|
|
195
|
+
remaining_fields -= 1
|
|
196
|
+
else:
|
|
197
|
+
log_dict = dict(fields)
|
|
198
|
+
|
|
199
|
+
log_dict['trunc'] = truncated
|
|
200
|
+
|
|
201
|
+
return log_dict
|
|
@@ -14,7 +14,6 @@ import traceback
|
|
|
14
14
|
from google.cloud import error_reporting, logging as cloud_logging
|
|
15
15
|
from google.api_core.exceptions import NotFound
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
############################################################################
|
|
19
18
|
##################### SETTING UP LOGGER ##########################
|
|
20
19
|
|
|
@@ -124,15 +123,22 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
|
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
|
|
127
|
-
def write_json_to_gcs(
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
def write_json_to_gcs( storage_client, data, bucket_name, file_name,
|
|
127
|
+
file_exists_if_starts_with_prefix=None, overwrite_if_exists=False, increment_if_exists=False,
|
|
128
|
+
save_locally=False, local_path=None, max_retries=2, max_deletable_files=1, logger=None):
|
|
130
129
|
"""Saves data to Google Cloud Storage and optionally locally.
|
|
131
|
-
|
|
132
|
-
This function attempts to upload data to GCS.
|
|
133
|
-
retries and `save_locally` is True or `local_path` is provided, it attempts
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
|
|
131
|
+
This function attempts to upload data to GCS.
|
|
132
|
+
- If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
|
|
133
|
+
- It handles file name conflicts based on these rules:
|
|
134
|
+
- If `overwrite_if_exists` is True:
|
|
135
|
+
- If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
|
|
136
|
+
- If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
|
|
137
|
+
- If `increment_if_exists` is True:
|
|
138
|
+
- If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
|
|
139
|
+
- If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
|
|
140
|
+
|
|
141
|
+
-If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
|
|
136
142
|
"""
|
|
137
143
|
|
|
138
144
|
def log_message(message):
|
|
@@ -147,100 +153,141 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name,
|
|
|
147
153
|
if logger:
|
|
148
154
|
logger.warning(message)
|
|
149
155
|
|
|
150
|
-
|
|
151
|
-
success = False
|
|
152
|
-
gcs_path = None
|
|
153
|
-
local_path_final = None
|
|
154
|
-
gcs_file_overwritten = False
|
|
155
|
-
gcs_file_already_exists = False
|
|
156
|
-
gcs_file_saved_with_increment = False
|
|
157
|
-
gcs_upload_exception = None # Store potential GCS exception
|
|
158
|
-
|
|
159
|
-
# Check for conflicting options
|
|
156
|
+
# Input validation
|
|
160
157
|
if overwrite_if_exists and increment_if_exists:
|
|
161
|
-
raise ValueError("
|
|
158
|
+
raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
|
|
159
|
+
if not isinstance(data, (list, dict, str)):
|
|
160
|
+
raise ValueError("Unsupported data type. Data must be a list, dict, or str.")
|
|
161
|
+
if max_deletable_files > 10:
|
|
162
|
+
raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
|
|
162
163
|
|
|
164
|
+
# Prepare data
|
|
163
165
|
if isinstance(data, (list, dict)):
|
|
164
166
|
data_str = json.dumps(data, indent=2)
|
|
165
|
-
elif isinstance(data, str):
|
|
166
|
-
data_str = data
|
|
167
167
|
else:
|
|
168
|
-
|
|
168
|
+
data_str = data
|
|
169
169
|
|
|
170
170
|
bucket = storage_client.bucket(bucket_name)
|
|
171
171
|
base_file_name, ext = os.path.splitext(file_name)
|
|
172
172
|
increment = 0
|
|
173
|
+
attempts = 0
|
|
174
|
+
success = False
|
|
173
175
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
176
|
+
# GCS-related metadata
|
|
177
|
+
gcs_path = None
|
|
178
|
+
gcs_file_overwritten = False
|
|
179
|
+
gcs_file_already_exists = False
|
|
180
|
+
gcs_file_saved_with_increment = False
|
|
181
|
+
gcs_file_exists_checked_on_name = file_name
|
|
182
|
+
gcs_deleted_files=[]
|
|
183
|
+
|
|
184
|
+
# GCS upload exception
|
|
185
|
+
gcs_upload_exception = None
|
|
186
|
+
|
|
187
|
+
# Local file path
|
|
188
|
+
local_path_final = None
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
# --- Overwrite Logic ---
|
|
192
|
+
if overwrite_if_exists:
|
|
193
|
+
if file_exists_if_starts_with_prefix:
|
|
194
|
+
gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
195
|
+
blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
196
|
+
if len(blobs_to_delete) > max_deletable_files:
|
|
197
|
+
raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
|
|
198
|
+
if blobs_to_delete:
|
|
199
|
+
log_message(f"Deleting files containing '{file_exists_if_starts_with_prefix}' for overwrite.")
|
|
200
|
+
for blob in blobs_to_delete:
|
|
201
|
+
blob.delete()
|
|
202
|
+
gcs_deleted_files.append(blob.name)
|
|
203
|
+
log_message(f"Deleted: gs://{bucket_name}/{blob.name}")
|
|
204
|
+
gcs_file_overwritten = True
|
|
183
205
|
else:
|
|
184
206
|
blob = bucket.blob(file_name)
|
|
185
|
-
|
|
186
|
-
# Check if the file exists
|
|
187
207
|
if blob.exists():
|
|
188
208
|
gcs_file_already_exists = True
|
|
189
209
|
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
190
|
-
log_message(f"File {file_name} already exists
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
210
|
+
log_message(f"File '{file_name}' already exists. Overwriting.")
|
|
211
|
+
blob.delete() # Delete the existing blob
|
|
212
|
+
gcs_deleted_files.append(blob.name)
|
|
213
|
+
gcs_file_overwritten = True
|
|
214
|
+
|
|
215
|
+
# --- Increment Logic ---
|
|
216
|
+
elif increment_if_exists:
|
|
217
|
+
gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
|
|
218
|
+
while bucket.blob(file_name).exists():
|
|
219
|
+
gcs_file_already_exists = True
|
|
220
|
+
increment += 1
|
|
221
|
+
file_name = f"{base_file_name}_v{increment}{ext}"
|
|
222
|
+
gcs_file_saved_with_increment = True
|
|
223
|
+
log_warning(f"File already exists. Using incremented name: {file_name}")
|
|
224
|
+
|
|
225
|
+
# --- GCS Upload ---
|
|
226
|
+
if overwrite_if_exists or increment_if_exists: # Only upload if either overwrite or increment is True
|
|
227
|
+
while attempts < max_retries and not success:
|
|
228
|
+
try:
|
|
229
|
+
blob = bucket.blob(file_name) # Use the potentially updated file_name
|
|
230
|
+
blob.upload_from_string(data_str, content_type='application/json')
|
|
231
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
232
|
+
log_message(f"Successfully saved file to GCS: {gcs_path}")
|
|
233
|
+
success = True
|
|
234
|
+
except Exception as e:
|
|
235
|
+
gcs_upload_exception=e
|
|
236
|
+
attempts += 1
|
|
237
|
+
if attempts < max_retries:
|
|
238
|
+
log_warning(f"Attempt {attempts} to upload to GCS failed. Retrying...")
|
|
239
|
+
time.sleep(2 ** attempts)
|
|
194
240
|
else:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
except Exception as e:
|
|
202
|
-
gcs_upload_exception = e
|
|
203
|
-
attempts += 1
|
|
204
|
-
if attempts < max_retries:
|
|
205
|
-
time.sleep(2 ** attempts)
|
|
206
|
-
else:
|
|
207
|
-
log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
|
|
241
|
+
log_error(f"Failed to write '{file_name}' to GCS bucket '{bucket_name}' after {max_retries} attempts: {e}", exc_info=True)
|
|
242
|
+
if save_locally or local_path:
|
|
243
|
+
log_message(f"Attempting to save '{file_name}' locally due to GCS upload failure.")
|
|
244
|
+
except Exception as e:
|
|
245
|
+
log_error(f"Error during GCS operations: {e}", exc_info=True)
|
|
246
|
+
gcs_upload_exception = e
|
|
208
247
|
|
|
248
|
+
# --- Save Locally ---
|
|
249
|
+
write_out=False
|
|
209
250
|
if not success or save_locally or local_path:
|
|
210
251
|
try:
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
else:
|
|
214
|
-
local_path_final = os.path.join(local_path, file_name)
|
|
252
|
+
local_path=local_path if local_path else "/tmp"
|
|
253
|
+
local_path_final = os.path.join(local_path, file_name)
|
|
215
254
|
|
|
216
255
|
if os.path.exists(local_path_final):
|
|
217
256
|
if increment_if_exists:
|
|
218
257
|
increment = 0
|
|
219
258
|
while os.path.exists(local_path_final):
|
|
220
259
|
increment += 1
|
|
221
|
-
local_path_final = os.path.join(local_path, f"{base_file_name}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
260
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
|
|
261
|
+
log_warning(f"Local file already exists. Using incremented name: {local_path_final}")
|
|
262
|
+
write_out=True
|
|
263
|
+
elif overwrite_if_exists:
|
|
264
|
+
write_out=True
|
|
226
265
|
log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
|
|
266
|
+
else:
|
|
267
|
+
log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
|
|
268
|
+
write_out=False
|
|
269
|
+
else:
|
|
270
|
+
write_out=True
|
|
227
271
|
|
|
228
|
-
if
|
|
272
|
+
if write_out:
|
|
229
273
|
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
230
274
|
f.write(data_str)
|
|
231
|
-
|
|
232
|
-
|
|
275
|
+
log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
|
|
276
|
+
|
|
233
277
|
except Exception as local_e:
|
|
234
278
|
log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
|
|
235
279
|
|
|
236
280
|
if gcs_upload_exception is not None:
|
|
237
281
|
raise gcs_upload_exception # Propagate without nesting
|
|
238
282
|
|
|
283
|
+
# --- Return Metadata ---
|
|
239
284
|
return {
|
|
240
|
-
"gcs_path": gcs_path,
|
|
241
|
-
"local_path": local_path_final,
|
|
285
|
+
"gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
|
|
286
|
+
"local_path": local_path_final if write_out else None, # Only set local_path if saved locally
|
|
242
287
|
"gcs_file_already_exists": gcs_file_already_exists,
|
|
288
|
+
"gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
|
|
243
289
|
"gcs_file_overwritten": gcs_file_overwritten,
|
|
290
|
+
"gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
|
|
244
291
|
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
245
292
|
}
|
|
246
293
|
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
# pylint: disable=broad-exception-caught
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=unused-variable
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
11
|
+
from .utils_custom_logs import ContextLog
|
|
12
|
+
from ipulse_shared_core_ftredge.utils_pipelinemon import Pipelinemon
|
|
13
|
+
|
|
14
|
+
############################################################################
|
|
15
|
+
##################### SETTING UP LOGGER ##########################
|
|
16
|
+
|
|
17
|
+
####DEPCREACATED: THIS APPROACH WAS GOOD, BUT ERRORS WERE NOT REPORTED TO ERROR REPORTING
|
|
18
|
+
# logging.basicConfig(level=logging.INFO)
|
|
19
|
+
# logging_client = google.cloud.logging.Client()
|
|
20
|
+
# logging_client.setup_logging()
|
|
21
|
+
###################################
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
##### THIS APPROACH IS USED NOW ########
|
|
25
|
+
ENV = os.getenv('ENV', 'LOCAL').strip("'")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def write_json_to_gcs_in_pipeline( pipelinemon:Pipelinemon, storage_client, data, bucket_name, file_name,
|
|
29
|
+
file_exists_if_starts_with_prefix:str=None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
|
|
30
|
+
save_locally:bool=False, local_path=None, max_retries:int=2, max_deletable_files:int=1):
|
|
31
|
+
"""Saves data to Google Cloud Storage and optionally locally.
|
|
32
|
+
|
|
33
|
+
This function attempts to upload data to GCS.
|
|
34
|
+
- If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
|
|
35
|
+
- It handles file name conflicts based on these rules:
|
|
36
|
+
- If `overwrite_if_exists` is True:
|
|
37
|
+
- If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
|
|
38
|
+
- If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
|
|
39
|
+
- If `increment_if_exists` is True:
|
|
40
|
+
- If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
|
|
41
|
+
- If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
|
|
42
|
+
|
|
43
|
+
-If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
with pipelinemon.context("write_json_to_gcs_in_pipeline"):
|
|
47
|
+
# GCS upload exception
|
|
48
|
+
gcs_upload_error = False
|
|
49
|
+
# Input validation
|
|
50
|
+
if overwrite_if_exists and increment_if_exists:
|
|
51
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM, subject="Param validation", description="Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously."))
|
|
52
|
+
gcs_upload_error=True
|
|
53
|
+
if not isinstance(data, (list, dict, str)):
|
|
54
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM,subject="Data validation", description="Unsupported data type. Data must be a list, dict, or str."))
|
|
55
|
+
gcs_upload_error=True
|
|
56
|
+
if max_deletable_files > 10:
|
|
57
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM,subject="max_deletable_files", description="max_deletable_files should be less than 10 for safety. For more use another method."))
|
|
58
|
+
gcs_upload_error=True
|
|
59
|
+
|
|
60
|
+
# Prepare data
|
|
61
|
+
if isinstance(data, (list, dict)):
|
|
62
|
+
data_str = json.dumps(data, indent=2)
|
|
63
|
+
else:
|
|
64
|
+
data_str = data
|
|
65
|
+
|
|
66
|
+
bucket = storage_client.bucket(bucket_name)
|
|
67
|
+
base_file_name, ext = os.path.splitext(file_name)
|
|
68
|
+
increment = 0
|
|
69
|
+
attempts = 0
|
|
70
|
+
success = False
|
|
71
|
+
|
|
72
|
+
# GCS-related metadata
|
|
73
|
+
gcs_path = None
|
|
74
|
+
gcs_file_overwritten = False
|
|
75
|
+
gcs_file_already_exists = False
|
|
76
|
+
gcs_file_saved_with_increment = False
|
|
77
|
+
gcs_file_exists_checked_on_name = file_name
|
|
78
|
+
gcs_deleted_files=[]
|
|
79
|
+
|
|
80
|
+
# Local file path
|
|
81
|
+
local_path_final = None
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
upload_allowed = True
|
|
85
|
+
# --- Overwrite Logic ---
|
|
86
|
+
if overwrite_if_exists:
|
|
87
|
+
with pipelinemon.context("overwriting"):
|
|
88
|
+
if file_exists_if_starts_with_prefix:
|
|
89
|
+
gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
90
|
+
blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
91
|
+
if len(blobs_to_delete) > max_deletable_files:
|
|
92
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
|
|
93
|
+
#### Ensuring to quit the operation if too many files are found
|
|
94
|
+
raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
|
|
95
|
+
if blobs_to_delete:
|
|
96
|
+
gcs_file_already_exists = True
|
|
97
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
|
|
98
|
+
for blob in blobs_to_delete:
|
|
99
|
+
gcs_path_del = f"gs://{bucket_name}/{blob.name}"
|
|
100
|
+
pipelinemon.add_system_impacted(f"delete: gcs_bucket_file: {gcs_path_del}")
|
|
101
|
+
blob.delete()
|
|
102
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= gcs_path_del, description="file deleted from GCS as part of overwrite, matched with prefix"))
|
|
103
|
+
gcs_deleted_files.append(blob.name)
|
|
104
|
+
gcs_file_overwritten = True
|
|
105
|
+
else:
|
|
106
|
+
blob = bucket.blob(file_name)
|
|
107
|
+
if blob.exists():
|
|
108
|
+
gcs_file_already_exists = True
|
|
109
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}"))
|
|
110
|
+
gcs_path_del = f"gs://{bucket_name}/{file_name}"
|
|
111
|
+
pipelinemon.add_system_impacted(f"delete: gcs_bucket_file: {gcs_path_del}")
|
|
112
|
+
blob.delete() # Delete the existing blob
|
|
113
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= gcs_path_del, description="file deleted from GCS as part of overwrite, matched with exact name"))
|
|
114
|
+
gcs_deleted_files.append(blob.name)
|
|
115
|
+
gcs_file_overwritten = True
|
|
116
|
+
|
|
117
|
+
# --- Increment Logic ---
|
|
118
|
+
elif increment_if_exists:
|
|
119
|
+
with pipelinemon.context("incrementing"):
|
|
120
|
+
gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
|
|
121
|
+
while bucket.blob(file_name).exists():
|
|
122
|
+
gcs_file_already_exists = True
|
|
123
|
+
increment += 1
|
|
124
|
+
file_name = f"{base_file_name}_v{increment}{ext}"
|
|
125
|
+
gcs_file_saved_with_increment = True
|
|
126
|
+
if increment>0:
|
|
127
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"File saved with incremented version in {bucket_name}"))
|
|
128
|
+
|
|
129
|
+
# --- Check for Conflicts (Including Prefix) ---
|
|
130
|
+
else:
|
|
131
|
+
if file_exists_if_starts_with_prefix:
|
|
132
|
+
blobs_matched = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
133
|
+
if blobs_matched:
|
|
134
|
+
upload_allowed = False
|
|
135
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_matched)} existing files in bucket {bucket_name}."))
|
|
136
|
+
elif bucket.blob(file_name).exists():
|
|
137
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}."))
|
|
138
|
+
upload_allowed = False
|
|
139
|
+
|
|
140
|
+
# --- GCS Upload ---
|
|
141
|
+
if overwrite_if_exists or increment_if_exists or upload_allowed:
|
|
142
|
+
with pipelinemon.context("uploading"):
|
|
143
|
+
while attempts < max_retries and not success:
|
|
144
|
+
try:
|
|
145
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
146
|
+
blob = bucket.blob(file_name) # Use the potentially updated file_name
|
|
147
|
+
pipelinemon.add_system_impacted(f"upload: gcs_bucket_file: {gcs_path}")
|
|
148
|
+
blob.upload_from_string(data_str, content_type='application/json')
|
|
149
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_PERSISTNACE_COMPLETE, subject= gcs_path, description="file uploaded to GCS"))
|
|
150
|
+
success = True
|
|
151
|
+
except Exception as e:
|
|
152
|
+
attempts += 1
|
|
153
|
+
if attempts < max_retries:
|
|
154
|
+
time.sleep(2 ** attempts)
|
|
155
|
+
else:
|
|
156
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e))
|
|
157
|
+
gcs_upload_error = True
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e))
|
|
161
|
+
gcs_upload_error = True
|
|
162
|
+
|
|
163
|
+
# --- Save Locally ---
|
|
164
|
+
###### Not logging the local save operation in pipeline, as it is not a critical operation
|
|
165
|
+
write_out=False
|
|
166
|
+
if not success or gcs_upload_error or save_locally or local_path:
|
|
167
|
+
try:
|
|
168
|
+
local_path=local_path if local_path else "/tmp"
|
|
169
|
+
local_path_final = os.path.join(local_path, file_name)
|
|
170
|
+
|
|
171
|
+
if os.path.exists(local_path_final):
|
|
172
|
+
if increment_if_exists:
|
|
173
|
+
increment = 0
|
|
174
|
+
while os.path.exists(local_path_final):
|
|
175
|
+
increment += 1
|
|
176
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
|
|
177
|
+
write_out=True
|
|
178
|
+
elif overwrite_if_exists:
|
|
179
|
+
write_out=True
|
|
180
|
+
else:
|
|
181
|
+
write_out=False
|
|
182
|
+
else:
|
|
183
|
+
write_out=True
|
|
184
|
+
|
|
185
|
+
if write_out:
|
|
186
|
+
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
187
|
+
f.write(data_str)
|
|
188
|
+
|
|
189
|
+
except Exception as local_e:
|
|
190
|
+
pipelinemon.add_log(ContextLog(LogLevel.WARNING_FIX_RECOMMENDED, e=local_e, description="Failed to write to local file"))
|
|
191
|
+
|
|
192
|
+
# --- Return Metadata ---
|
|
193
|
+
return {
|
|
194
|
+
"gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
|
|
195
|
+
"local_path": local_path_final if write_out else None, # Only set local_path if saved locally
|
|
196
|
+
"gcs_file_already_exists": gcs_file_already_exists,
|
|
197
|
+
"gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
|
|
198
|
+
"gcs_file_overwritten": gcs_file_overwritten,
|
|
199
|
+
"gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
|
|
200
|
+
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
201
|
+
}
|
|
@@ -4,208 +4,17 @@
|
|
|
4
4
|
# pylint: disable=line-too-long
|
|
5
5
|
# pylint: disable=missing-class-docstring
|
|
6
6
|
# pylint: disable=broad-exception-caught
|
|
7
|
-
import traceback
|
|
8
7
|
import json
|
|
9
8
|
import uuid
|
|
10
9
|
from datetime import datetime, timezone
|
|
11
10
|
from contextlib import contextmanager
|
|
12
11
|
from typing import List
|
|
13
12
|
from google.cloud import logging as cloudlogging
|
|
14
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import TargetLogs, LogLevel
|
|
13
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import TargetLogs, LogLevel
|
|
15
14
|
from ipulse_shared_core_ftredge.utils_gcp import write_json_to_gcs
|
|
15
|
+
from ipulse_shared_core_ftredge.utils_custom_logs import ContextLog
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
# ["data_import","data_quality", "data_processing","data_general","data_persistance","metadata_quality", "metadata_processing", "metadata_persistance","metadata_general"]
|
|
19
|
-
|
|
20
|
-
class ContextLog:
|
|
21
|
-
MAX_FIELD_LINES = 26 # Define the maximum number of traceback lines to include
|
|
22
|
-
MAX_FIELD_LENGTH=10000
|
|
23
|
-
|
|
24
|
-
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
25
|
-
context: str = None, description: str = None,
|
|
26
|
-
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
27
|
-
log_status: LogStatus = LogStatus.OPEN, subject: str = None, systems_impacted: List[str] = None
|
|
28
|
-
):
|
|
29
|
-
if e is not None:
|
|
30
|
-
e_type = type(e).__name__ if e_type is None else e_type
|
|
31
|
-
e_message = str(e) if e_message is None else e_message
|
|
32
|
-
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
33
|
-
elif e_traceback is None and (e_type or e_message):
|
|
34
|
-
e_traceback = traceback.format_exc()
|
|
35
|
-
|
|
36
|
-
self.level = level
|
|
37
|
-
self.subject = subject
|
|
38
|
-
self.description = description
|
|
39
|
-
self._base_context = base_context
|
|
40
|
-
self._context = context
|
|
41
|
-
self._systems_impacted = systems_impacted if systems_impacted else []
|
|
42
|
-
self.collector_id = collector_id
|
|
43
|
-
self.exception_type = e_type
|
|
44
|
-
self.exception_message = e_message
|
|
45
|
-
self.exception_traceback = e_traceback
|
|
46
|
-
self.log_status = log_status
|
|
47
|
-
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
48
|
-
|
|
49
|
-
@property
|
|
50
|
-
def base_context(self):
|
|
51
|
-
return self._base_context
|
|
52
|
-
|
|
53
|
-
@base_context.setter
|
|
54
|
-
def base_context(self, value):
|
|
55
|
-
self._base_context = value
|
|
56
|
-
|
|
57
|
-
@property
|
|
58
|
-
def context(self):
|
|
59
|
-
return self._context
|
|
60
|
-
|
|
61
|
-
@context.setter
|
|
62
|
-
def context(self, value):
|
|
63
|
-
self._context = value
|
|
64
|
-
|
|
65
|
-
@property
|
|
66
|
-
def systems_impacted(self):
|
|
67
|
-
return self._systems_impacted
|
|
68
|
-
|
|
69
|
-
@systems_impacted.setter
|
|
70
|
-
def systems_impacted(self, list_of_si: List[str]):
|
|
71
|
-
self._systems_impacted = list_of_si
|
|
72
|
-
|
|
73
|
-
def add_system_impacted(self, system_impacted: str):
|
|
74
|
-
if self._systems_impacted is None:
|
|
75
|
-
self._systems_impacted = []
|
|
76
|
-
self._systems_impacted.append(system_impacted)
|
|
77
|
-
|
|
78
|
-
def remove_system_impacted(self, system_impacted: str):
|
|
79
|
-
if self._systems_impacted is not None:
|
|
80
|
-
self._systems_impacted.remove(system_impacted)
|
|
81
|
-
|
|
82
|
-
def clear_systems_impacted(self):
|
|
83
|
-
self._systems_impacted = []
|
|
84
|
-
|
|
85
|
-
def _format_traceback(self, e_traceback, e_message):
|
|
86
|
-
if not e_traceback or e_traceback == 'None\n':
|
|
87
|
-
return None
|
|
88
|
-
|
|
89
|
-
traceback_lines = e_traceback.splitlines()
|
|
90
|
-
|
|
91
|
-
# Check if the traceback is within the limits
|
|
92
|
-
if len(traceback_lines) <= self.MAX_FIELD_LINES and len(e_traceback) <= self.MAX_FIELD_LENGTH:
|
|
93
|
-
return e_traceback
|
|
94
|
-
|
|
95
|
-
# Remove lines that are part of the exception message if they are present in traceback
|
|
96
|
-
message_lines = e_message.splitlines() if e_message else []
|
|
97
|
-
if message_lines:
|
|
98
|
-
for message_line in message_lines:
|
|
99
|
-
if message_line in traceback_lines:
|
|
100
|
-
traceback_lines.remove(message_line)
|
|
101
|
-
|
|
102
|
-
# Filter out lines from third-party libraries (like site-packages)
|
|
103
|
-
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
104
|
-
|
|
105
|
-
# If filtering results in too few lines, revert to original traceback
|
|
106
|
-
if len(filtered_lines) < 2:
|
|
107
|
-
filtered_lines = traceback_lines
|
|
108
|
-
|
|
109
|
-
# Combine standalone bracket lines with previous or next lines
|
|
110
|
-
combined_lines = []
|
|
111
|
-
for line in filtered_lines:
|
|
112
|
-
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
113
|
-
combined_lines[-1] += " " + line.strip()
|
|
114
|
-
else:
|
|
115
|
-
combined_lines.append(line)
|
|
116
|
-
|
|
117
|
-
# Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
|
|
118
|
-
if len(combined_lines) > self.MAX_FIELD_LINES:
|
|
119
|
-
keep_lines_start = min(self.MAX_FIELD_LINES // 2, len(combined_lines))
|
|
120
|
-
keep_lines_end = min(self.MAX_FIELD_LINES // 2, len(combined_lines) - keep_lines_start)
|
|
121
|
-
combined_lines = (
|
|
122
|
-
combined_lines[:keep_lines_start] +
|
|
123
|
-
['... (truncated) ...'] +
|
|
124
|
-
combined_lines[-keep_lines_end:]
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
formatted_traceback = '\n'.join(combined_lines)
|
|
128
|
-
|
|
129
|
-
# Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
|
|
130
|
-
if len(formatted_traceback) > self.MAX_FIELD_LENGTH:
|
|
131
|
-
truncated_length = self.MAX_FIELD_LENGTH - len('... (truncated) ...')
|
|
132
|
-
half_truncated_length = truncated_length // 2
|
|
133
|
-
formatted_traceback = (
|
|
134
|
-
formatted_traceback[:half_truncated_length] +
|
|
135
|
-
'\n... (truncated) ...\n' +
|
|
136
|
-
formatted_traceback[-half_truncated_length:]
|
|
137
|
-
)
|
|
138
|
-
return formatted_traceback
|
|
139
|
-
|
|
140
|
-
def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80):
|
|
141
|
-
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
142
|
-
|
|
143
|
-
# Unified list of all fields
|
|
144
|
-
systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
|
|
145
|
-
fields = [
|
|
146
|
-
("log_status", str(self.log_status.name)),
|
|
147
|
-
("level_code", self.level.value),
|
|
148
|
-
("level_name", str(self.level.name)),
|
|
149
|
-
("base_context", str(self.base_context)),
|
|
150
|
-
("timestamp", str(self.timestamp)),
|
|
151
|
-
("collector_id", str(self.collector_id)),
|
|
152
|
-
("systems_impacted", systems_impacted_str),
|
|
153
|
-
("context", str(self.context)), # special sizing rules apply to it
|
|
154
|
-
("subject", str(self.subject)),
|
|
155
|
-
("description", str(self.description)),
|
|
156
|
-
("exception_type", str(self.exception_type)),
|
|
157
|
-
("exception_message", str(self.exception_message)),
|
|
158
|
-
("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message)))
|
|
159
|
-
]
|
|
160
|
-
|
|
161
|
-
# Function to calculate the byte size of a JSON-encoded field
|
|
162
|
-
def field_size(key, value):
|
|
163
|
-
return len(json.dumps({key: value}).encode('utf-8'))
|
|
164
|
-
|
|
165
|
-
# Function to truncate a value based on its type
|
|
166
|
-
# Function to truncate a value based on its type
|
|
167
|
-
def truncate_value(value, max_size):
|
|
168
|
-
if isinstance(value, str):
|
|
169
|
-
half_size = max_size // 2
|
|
170
|
-
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
171
|
-
return value
|
|
172
|
-
|
|
173
|
-
# Ensure no field exceeds max_field_len
|
|
174
|
-
for i, (key, value) in enumerate(fields):
|
|
175
|
-
if isinstance(value, str) and len(value) > max_field_len:
|
|
176
|
-
fields[i] = (key, truncate_value(value, max_field_len))
|
|
177
|
-
|
|
178
|
-
# Ensure total size of the dict doesn't exceed size_limit
|
|
179
|
-
total_size = sum(field_size(key, value) for key, value in fields)
|
|
180
|
-
log_dict = {}
|
|
181
|
-
truncated = False
|
|
182
|
-
|
|
183
|
-
if total_size > size_limit:
|
|
184
|
-
truncated = True
|
|
185
|
-
remaining_size = size_limit
|
|
186
|
-
remaining_fields = len(fields)
|
|
187
|
-
|
|
188
|
-
for key, value in fields:
|
|
189
|
-
if remaining_fields > 0:
|
|
190
|
-
max_size_per_field = remaining_size // remaining_fields
|
|
191
|
-
else:
|
|
192
|
-
max_size_per_field = 0
|
|
193
|
-
|
|
194
|
-
field_sz = field_size(key, value)
|
|
195
|
-
if field_sz > max_size_per_field:
|
|
196
|
-
value = truncate_value(value, max_size_per_field)
|
|
197
|
-
field_sz = field_size(key, value)
|
|
198
|
-
|
|
199
|
-
log_dict[key] = value
|
|
200
|
-
remaining_size -= field_sz
|
|
201
|
-
remaining_fields -= 1
|
|
202
|
-
else:
|
|
203
|
-
log_dict = dict(fields)
|
|
204
|
-
|
|
205
|
-
log_dict['trunc'] = truncated
|
|
206
|
-
|
|
207
|
-
return log_dict
|
|
208
|
-
|
|
209
18
|
class Pipelinemon:
|
|
210
19
|
ERROR_START_CODE = LogLevel.ERROR.value
|
|
211
20
|
WARNING_START_CODE = LogLevel.WARNING.value
|
|
@@ -213,7 +22,7 @@ class Pipelinemon:
|
|
|
213
22
|
SUCCESS_START_CODE = LogLevel.SUCCESS.value
|
|
214
23
|
INFO_START_CODE = LogLevel.INFO.value
|
|
215
24
|
|
|
216
|
-
def __init__(self, base_context: str, target_logs: TargetLogs = TargetLogs.MIXED, logger_name=None, max_log_field_size:int =10000, max_log_dict_size:float=256 * 1024 * 0.80):
|
|
25
|
+
def __init__(self, base_context: str, target_logs: TargetLogs = TargetLogs.MIXED, logger_name=None, max_log_field_size:int =10000, max_log_dict_size:float=256 * 1024 * 0.80, max_log_traceback_lines:int = 30):
|
|
217
26
|
self._id = str(uuid.uuid4())
|
|
218
27
|
self._logs = []
|
|
219
28
|
self._early_stop = False
|
|
@@ -223,13 +32,14 @@ class Pipelinemon:
|
|
|
223
32
|
self._successes_count = 0
|
|
224
33
|
self._infos_count = 0
|
|
225
34
|
self._systems_impacted = []
|
|
226
|
-
self.
|
|
35
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
227
36
|
self._base_context = base_context
|
|
228
37
|
self._context_stack = []
|
|
229
38
|
self._target_logs = target_logs.value
|
|
230
39
|
self._logger = self._initialize_logger(logger_name)
|
|
231
40
|
self._max_log_field_size = max_log_field_size
|
|
232
41
|
self._max_log_dict_size = max_log_dict_size
|
|
42
|
+
self._max_log_traceback_lines = max_log_traceback_lines
|
|
233
43
|
|
|
234
44
|
def _initialize_logger(self, logger_name):
|
|
235
45
|
if logger_name:
|
|
@@ -280,6 +90,14 @@ class Pipelinemon:
|
|
|
280
90
|
def clear_systems_impacted(self):
|
|
281
91
|
self._systems_impacted = []
|
|
282
92
|
|
|
93
|
+
@property
|
|
94
|
+
def max_log_dict_size(self):
|
|
95
|
+
return self._max_log_dict_size
|
|
96
|
+
|
|
97
|
+
@max_log_dict_size.setter
|
|
98
|
+
def max_log_dict_size(self, value):
|
|
99
|
+
self._max_log_dict_size = value
|
|
100
|
+
|
|
283
101
|
@property
|
|
284
102
|
def max_log_field_size(self):
|
|
285
103
|
return self._max_log_field_size
|
|
@@ -289,12 +107,12 @@ class Pipelinemon:
|
|
|
289
107
|
self._max_log_field_size = value
|
|
290
108
|
|
|
291
109
|
@property
|
|
292
|
-
def
|
|
293
|
-
return self.
|
|
110
|
+
def max_log_traceback_lines(self):
|
|
111
|
+
return self._max_log_traceback_lines
|
|
294
112
|
|
|
295
|
-
@
|
|
296
|
-
def
|
|
297
|
-
self.
|
|
113
|
+
@max_log_traceback_lines.setter
|
|
114
|
+
def max_log_traceback_lines(self, value):
|
|
115
|
+
self._max_log_traceback_lines = value
|
|
298
116
|
|
|
299
117
|
@property
|
|
300
118
|
def early_stop(self):
|
|
@@ -313,7 +131,7 @@ class Pipelinemon:
|
|
|
313
131
|
self._early_stop = False
|
|
314
132
|
|
|
315
133
|
|
|
316
|
-
def add_log(self, log: ContextLog
|
|
134
|
+
def add_log(self, log: ContextLog ):
|
|
317
135
|
if (self._target_logs == TargetLogs.SUCCESSES and log.level >=self.NOTICE_START_CODE) or \
|
|
318
136
|
(self._target_logs == TargetLogs.WARNINGS_AND_ERRORS and log.level.value < self.WARNING_START_CODE):
|
|
319
137
|
raise ValueError(f"Invalid log level {log.level.name} for Pipelinemon target logs setup: {self._target_logs}")
|
|
@@ -321,7 +139,7 @@ class Pipelinemon:
|
|
|
321
139
|
log.context = self.current_context
|
|
322
140
|
log.collector_id = self.id
|
|
323
141
|
log.systems_impacted = self.systems_impacted
|
|
324
|
-
log_dict = log.to_dict(max_field_len=self.max_log_field_size, size_limit=self.max_log_dict_size)
|
|
142
|
+
log_dict = log.to_dict(max_field_len=self.max_log_field_size, size_limit=self.max_log_dict_size, max_traceback_lines=self.max_log_traceback_lines)
|
|
325
143
|
self._logs.append(log_dict)
|
|
326
144
|
self._update_counts(log_dict)
|
|
327
145
|
|
|
@@ -346,7 +164,7 @@ class Pipelinemon:
|
|
|
346
164
|
self._notices_count = 0
|
|
347
165
|
self._successes_count = 0
|
|
348
166
|
self._infos_count = 0
|
|
349
|
-
self.
|
|
167
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
350
168
|
|
|
351
169
|
def clear_logs(self):
|
|
352
170
|
self._logs = []
|
|
@@ -397,7 +215,7 @@ class Pipelinemon:
|
|
|
397
215
|
return len(self._logs)
|
|
398
216
|
|
|
399
217
|
def count_logs_by_level(self, level: LogLevel):
|
|
400
|
-
return self.
|
|
218
|
+
return self._by_level_counts.get(level.name, 0)
|
|
401
219
|
|
|
402
220
|
def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
403
221
|
return sum(
|
|
@@ -528,7 +346,7 @@ class Pipelinemon:
|
|
|
528
346
|
self._successes_count -= 1
|
|
529
347
|
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
530
348
|
self._infos_count -= 1
|
|
531
|
-
self.
|
|
349
|
+
self._by_level_counts[level_name] -= 1
|
|
532
350
|
else:
|
|
533
351
|
if level_code >= self.ERROR_START_CODE:
|
|
534
352
|
self._errors_count += 1
|
|
@@ -540,4 +358,5 @@ class Pipelinemon:
|
|
|
540
358
|
self._successes_count += 1
|
|
541
359
|
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
542
360
|
self._infos_count += 1
|
|
543
|
-
self.
|
|
361
|
+
self._by_level_counts[level_name] += 1
|
|
362
|
+
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import datetime
|
|
7
7
|
from google.cloud import bigquery
|
|
8
8
|
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
9
|
-
from ipulse_shared_core_ftredge.
|
|
9
|
+
from ipulse_shared_core_ftredge.utils_custom_logs import ContextLog
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def create_bigquery_schema_from_json(json_schema):
|
|
@@ -59,16 +59,14 @@ def check_format_against_schema_template(data_to_check, schema, dt_ts_to_str=Tru
|
|
|
59
59
|
checked_data[field_name] = value
|
|
60
60
|
|
|
61
61
|
elif mode == "REQUIRED":
|
|
62
|
-
|
|
62
|
+
warnings_or_error.append(ContextLog(level=LogLevel.WARNING,
|
|
63
63
|
subject=field_name,
|
|
64
|
-
description=f"Required field '{field_name}' is missing in the updates.")
|
|
65
|
-
warnings_or_error.append(warning)
|
|
64
|
+
description=f"Required field '{field_name}' is missing in the updates."))
|
|
66
65
|
|
|
67
66
|
except Exception as e:
|
|
68
|
-
|
|
67
|
+
warnings_or_error.append(ContextLog(level=LogLevel.ERROR_EXCEPTION,
|
|
69
68
|
subject=data_to_check,
|
|
70
|
-
description=f"An error occurred during update check: {str(e)}")
|
|
71
|
-
warnings_or_error.append(error_log)
|
|
69
|
+
description=f"An error occurred during update check: {str(e)}"))
|
|
72
70
|
|
|
73
71
|
return checked_data, warnings_or_error
|
|
74
72
|
|
|
@@ -144,11 +142,11 @@ def handle_type_conversion(field_type, field_name, value):
|
|
|
144
142
|
try:
|
|
145
143
|
return float(value), None
|
|
146
144
|
except ValueError:
|
|
147
|
-
return None, ContextLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
145
|
+
return None, ContextLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
148
146
|
subject=field_name,
|
|
149
147
|
description=f"Expected FLOAT, but got {value} of type {type(value).__name__}.")
|
|
150
148
|
if field_type == "BOOL" and not isinstance(value, bool):
|
|
151
|
-
return bool(value), ContextLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
149
|
+
return bool(value), ContextLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
152
150
|
subject=field_name,
|
|
153
151
|
description=f"Expected BOOL, but got {value}. Converting as {bool(value)}.")
|
|
154
152
|
|
{ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ipulse_shared_core_ftredge
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.57
|
|
4
4
|
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
5
|
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
6
|
Author: Russlan Ramdowar
|
{ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/RECORD
RENAMED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
ipulse_shared_core_ftredge/__init__.py,sha256=
|
|
2
|
-
ipulse_shared_core_ftredge/
|
|
3
|
-
ipulse_shared_core_ftredge/utils_gcp.py,sha256=
|
|
4
|
-
ipulse_shared_core_ftredge/
|
|
5
|
-
ipulse_shared_core_ftredge/
|
|
6
|
-
ipulse_shared_core_ftredge/
|
|
1
|
+
ipulse_shared_core_ftredge/__init__.py,sha256=QHWbeWYQgs314BKxQDzG7_P2mdnryC1Zqc3xvw8OfnM,987
|
|
2
|
+
ipulse_shared_core_ftredge/utils_custom_logs.py,sha256=OJ6STEL7GER-s3x03cKcsYtZ4jKhndPOAJXT1oiAY2M,8183
|
|
3
|
+
ipulse_shared_core_ftredge/utils_gcp.py,sha256=xI0iOxSjGQ75ECFlmPirOB8Cz8qjqYNabfRLEg0cE8o,14142
|
|
4
|
+
ipulse_shared_core_ftredge/utils_gcp_for_pipelines.py,sha256=rO7KD8JUmit0NwKoepKmBX8MlcQ8GzDhUfAyp6OWAw0,11816
|
|
5
|
+
ipulse_shared_core_ftredge/utils_pipelinemon.py,sha256=KRDJW0fqF6sfqsxL8YKH378qpXnOzpZ3C53VAkMoT10,15011
|
|
6
|
+
ipulse_shared_core_ftredge/utils_templates_and_schemas.py,sha256=vi8hBU95_N-Znfs-FClBNCFMKvJYBOdBwZm2pgBZ7BQ,7433
|
|
7
|
+
ipulse_shared_core_ftredge/enums/__init__.py,sha256=MRmvcrFhbGZbww759KCledFe6BpOArmnCQ18mb_F8Fg,932
|
|
8
|
+
ipulse_shared_core_ftredge/enums/enums_common_utils.py,sha256=VdOipu5YNVO4TcQMXy7UkCjSnBfUzutEkgfFFsnsp7k,6922
|
|
7
9
|
ipulse_shared_core_ftredge/enums/enums_data_eng.py,sha256=7w3Jjmw84Wq22Bb5Qs09Z82Bdf-j8nhRiQJfw60_g80,1903
|
|
8
10
|
ipulse_shared_core_ftredge/enums/enums_module_fincore.py,sha256=W1TkSLu3ryLf_aif2VcKsFznWz0igeMUR_buoGEG6w8,1406
|
|
9
11
|
ipulse_shared_core_ftredge/enums/enums_modules.py,sha256=AyXUoNmR75DZLaEHi3snV6LngR25LeZRqzrLDaAupbY,1244
|
|
@@ -13,13 +15,11 @@ ipulse_shared_core_ftredge/models/organisation.py,sha256=4f1ATEWh5WT-CDJBLEZUhUw
|
|
|
13
15
|
ipulse_shared_core_ftredge/models/pulse_enums.py,sha256=YJhtvoX6Dk3_SyJUD8vVDSRIzWy5n0I0AOwe19fmDT8,4851
|
|
14
16
|
ipulse_shared_core_ftredge/models/resource_catalog_item.py,sha256=PxeRvI8fe8KOiHr6NW2Jz_yocyLId09PW8QyTZxjHAA,9809
|
|
15
17
|
ipulse_shared_core_ftredge/models/user_auth.py,sha256=35HNN7ZW4ZELCqaJrAtoSsVLFAZ1KL2S_VmuzbcEMm4,119
|
|
16
|
-
ipulse_shared_core_ftredge/models/user_profile.py,sha256=
|
|
18
|
+
ipulse_shared_core_ftredge/models/user_profile.py,sha256=D3BB9D6XEv7IVZgsURgf0hWmUZW5rms3uiBXS0ZGLeE,1927
|
|
17
19
|
ipulse_shared_core_ftredge/models/user_profile_update.py,sha256=oKK0XsQDKkgDvjFPhX2XlqEqlKLBQ4AkvPHXEuZbFMY,1712
|
|
18
20
|
ipulse_shared_core_ftredge/models/user_status.py,sha256=8TyRd8tBK9_xb0MPKbI5pn9-lX7ovKbeiuWYYPtIOiw,3202
|
|
19
|
-
ipulse_shared_core_ftredge/
|
|
20
|
-
ipulse_shared_core_ftredge/
|
|
21
|
-
ipulse_shared_core_ftredge-2.
|
|
22
|
-
ipulse_shared_core_ftredge-2.
|
|
23
|
-
ipulse_shared_core_ftredge-2.
|
|
24
|
-
ipulse_shared_core_ftredge-2.56.dist-info/top_level.txt,sha256=8sgYrptpexkA_6_HyGvho26cVFH9kmtGvaK8tHbsGHk,27
|
|
25
|
-
ipulse_shared_core_ftredge-2.56.dist-info/RECORD,,
|
|
21
|
+
ipulse_shared_core_ftredge-2.57.dist-info/LICENCE,sha256=YBtYAXNqCCOo9Mr2hfkbSPAM9CeAr2j1VZBSwQTrNwE,1060
|
|
22
|
+
ipulse_shared_core_ftredge-2.57.dist-info/METADATA,sha256=OFCR8U5m0OwnbR9hyIqSwOWW40QmwDfz1WqnWSVnBxg,561
|
|
23
|
+
ipulse_shared_core_ftredge-2.57.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
24
|
+
ipulse_shared_core_ftredge-2.57.dist-info/top_level.txt,sha256=8sgYrptpexkA_6_HyGvho26cVFH9kmtGvaK8tHbsGHk,27
|
|
25
|
+
ipulse_shared_core_ftredge-2.57.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from ipulse_shared_core_ftredge import UserProfile, UserAuth
|
|
2
|
-
import datetime
|
|
3
|
-
import logging
|
|
4
|
-
logging.basicConfig(level=logging.INFO)
|
|
5
|
-
ex=UserProfile(uid="uid",
|
|
6
|
-
organizations_uids={"20231220retailcustomer_coreorgn"},
|
|
7
|
-
email="email@gmail.com",
|
|
8
|
-
creat_date= datetime.datetime.now(datetime.UTC),
|
|
9
|
-
creat_by_user='creat_by_user',
|
|
10
|
-
updt_date=datetime.datetime.now(datetime.UTC),
|
|
11
|
-
updt_by_user="subscriber_cf_persistUserAuthToUserProfile",
|
|
12
|
-
approved=True,
|
|
13
|
-
provider_id='provider_id',
|
|
14
|
-
username='username')
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
logging.info(ex.model_dump(exclude_unset=True))
|
{ipulse_shared_core_ftredge-2.56.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/LICENCE
RENAMED
|
File without changes
|
|
File without changes
|