ipulse-shared-core-ftredge 2.55__py3-none-any.whl → 2.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge/__init__.py +10 -5
- ipulse_shared_core_ftredge/enums/__init__.py +2 -0
- ipulse_shared_core_ftredge/enums/enums_common_utils.py +48 -17
- ipulse_shared_core_ftredge/models/user_profile.py +3 -3
- ipulse_shared_core_ftredge/utils_custom_logs.py +201 -0
- ipulse_shared_core_ftredge/utils_gcp.py +117 -73
- ipulse_shared_core_ftredge/utils_gcp_for_pipelines.py +201 -0
- ipulse_shared_core_ftredge/{utils_common.py → utils_pipelinemon.py} +85 -205
- ipulse_shared_core_ftredge/utils_templates_and_schemas.py +7 -9
- {ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/METADATA +1 -1
- {ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/RECORD +14 -14
- {ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/WHEEL +1 -1
- ipulse_shared_core_ftredge/tests/__init__.py +0 -0
- ipulse_shared_core_ftredge/tests/test.py +0 -17
- {ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
# pylint: disable=broad-exception-caught
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=unused-variable
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
11
|
+
from .utils_custom_logs import ContextLog
|
|
12
|
+
from ipulse_shared_core_ftredge.utils_pipelinemon import Pipelinemon
|
|
13
|
+
|
|
14
|
+
############################################################################
|
|
15
|
+
##################### SETTING UP LOGGER ##########################
|
|
16
|
+
|
|
17
|
+
####DEPCREACATED: THIS APPROACH WAS GOOD, BUT ERRORS WERE NOT REPORTED TO ERROR REPORTING
|
|
18
|
+
# logging.basicConfig(level=logging.INFO)
|
|
19
|
+
# logging_client = google.cloud.logging.Client()
|
|
20
|
+
# logging_client.setup_logging()
|
|
21
|
+
###################################
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
##### THIS APPROACH IS USED NOW ########
|
|
25
|
+
ENV = os.getenv('ENV', 'LOCAL').strip("'")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def write_json_to_gcs_in_pipeline( pipelinemon:Pipelinemon, storage_client, data, bucket_name, file_name,
|
|
29
|
+
file_exists_if_starts_with_prefix:str=None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
|
|
30
|
+
save_locally:bool=False, local_path=None, max_retries:int=2, max_deletable_files:int=1):
|
|
31
|
+
"""Saves data to Google Cloud Storage and optionally locally.
|
|
32
|
+
|
|
33
|
+
This function attempts to upload data to GCS.
|
|
34
|
+
- If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
|
|
35
|
+
- It handles file name conflicts based on these rules:
|
|
36
|
+
- If `overwrite_if_exists` is True:
|
|
37
|
+
- If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
|
|
38
|
+
- If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
|
|
39
|
+
- If `increment_if_exists` is True:
|
|
40
|
+
- If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
|
|
41
|
+
- If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
|
|
42
|
+
|
|
43
|
+
-If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
with pipelinemon.context("write_json_to_gcs_in_pipeline"):
|
|
47
|
+
# GCS upload exception
|
|
48
|
+
gcs_upload_error = False
|
|
49
|
+
# Input validation
|
|
50
|
+
if overwrite_if_exists and increment_if_exists:
|
|
51
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM, subject="Param validation", description="Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously."))
|
|
52
|
+
gcs_upload_error=True
|
|
53
|
+
if not isinstance(data, (list, dict, str)):
|
|
54
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM,subject="Data validation", description="Unsupported data type. Data must be a list, dict, or str."))
|
|
55
|
+
gcs_upload_error=True
|
|
56
|
+
if max_deletable_files > 10:
|
|
57
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM,subject="max_deletable_files", description="max_deletable_files should be less than 10 for safety. For more use another method."))
|
|
58
|
+
gcs_upload_error=True
|
|
59
|
+
|
|
60
|
+
# Prepare data
|
|
61
|
+
if isinstance(data, (list, dict)):
|
|
62
|
+
data_str = json.dumps(data, indent=2)
|
|
63
|
+
else:
|
|
64
|
+
data_str = data
|
|
65
|
+
|
|
66
|
+
bucket = storage_client.bucket(bucket_name)
|
|
67
|
+
base_file_name, ext = os.path.splitext(file_name)
|
|
68
|
+
increment = 0
|
|
69
|
+
attempts = 0
|
|
70
|
+
success = False
|
|
71
|
+
|
|
72
|
+
# GCS-related metadata
|
|
73
|
+
gcs_path = None
|
|
74
|
+
gcs_file_overwritten = False
|
|
75
|
+
gcs_file_already_exists = False
|
|
76
|
+
gcs_file_saved_with_increment = False
|
|
77
|
+
gcs_file_exists_checked_on_name = file_name
|
|
78
|
+
gcs_deleted_files=[]
|
|
79
|
+
|
|
80
|
+
# Local file path
|
|
81
|
+
local_path_final = None
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
upload_allowed = True
|
|
85
|
+
# --- Overwrite Logic ---
|
|
86
|
+
if overwrite_if_exists:
|
|
87
|
+
with pipelinemon.context("overwriting"):
|
|
88
|
+
if file_exists_if_starts_with_prefix:
|
|
89
|
+
gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
90
|
+
blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
91
|
+
if len(blobs_to_delete) > max_deletable_files:
|
|
92
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
|
|
93
|
+
#### Ensuring to quit the operation if too many files are found
|
|
94
|
+
raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
|
|
95
|
+
if blobs_to_delete:
|
|
96
|
+
gcs_file_already_exists = True
|
|
97
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
|
|
98
|
+
for blob in blobs_to_delete:
|
|
99
|
+
gcs_path_del = f"gs://{bucket_name}/{blob.name}"
|
|
100
|
+
pipelinemon.add_system_impacted(f"delete: gcs_bucket_file: {gcs_path_del}")
|
|
101
|
+
blob.delete()
|
|
102
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= gcs_path_del, description="file deleted from GCS as part of overwrite, matched with prefix"))
|
|
103
|
+
gcs_deleted_files.append(blob.name)
|
|
104
|
+
gcs_file_overwritten = True
|
|
105
|
+
else:
|
|
106
|
+
blob = bucket.blob(file_name)
|
|
107
|
+
if blob.exists():
|
|
108
|
+
gcs_file_already_exists = True
|
|
109
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}"))
|
|
110
|
+
gcs_path_del = f"gs://{bucket_name}/{file_name}"
|
|
111
|
+
pipelinemon.add_system_impacted(f"delete: gcs_bucket_file: {gcs_path_del}")
|
|
112
|
+
blob.delete() # Delete the existing blob
|
|
113
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= gcs_path_del, description="file deleted from GCS as part of overwrite, matched with exact name"))
|
|
114
|
+
gcs_deleted_files.append(blob.name)
|
|
115
|
+
gcs_file_overwritten = True
|
|
116
|
+
|
|
117
|
+
# --- Increment Logic ---
|
|
118
|
+
elif increment_if_exists:
|
|
119
|
+
with pipelinemon.context("incrementing"):
|
|
120
|
+
gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
|
|
121
|
+
while bucket.blob(file_name).exists():
|
|
122
|
+
gcs_file_already_exists = True
|
|
123
|
+
increment += 1
|
|
124
|
+
file_name = f"{base_file_name}_v{increment}{ext}"
|
|
125
|
+
gcs_file_saved_with_increment = True
|
|
126
|
+
if increment>0:
|
|
127
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"File saved with incremented version in {bucket_name}"))
|
|
128
|
+
|
|
129
|
+
# --- Check for Conflicts (Including Prefix) ---
|
|
130
|
+
else:
|
|
131
|
+
if file_exists_if_starts_with_prefix:
|
|
132
|
+
blobs_matched = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
133
|
+
if blobs_matched:
|
|
134
|
+
upload_allowed = False
|
|
135
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_matched)} existing files in bucket {bucket_name}."))
|
|
136
|
+
elif bucket.blob(file_name).exists():
|
|
137
|
+
pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}."))
|
|
138
|
+
upload_allowed = False
|
|
139
|
+
|
|
140
|
+
# --- GCS Upload ---
|
|
141
|
+
if overwrite_if_exists or increment_if_exists or upload_allowed:
|
|
142
|
+
with pipelinemon.context("uploading"):
|
|
143
|
+
while attempts < max_retries and not success:
|
|
144
|
+
try:
|
|
145
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
146
|
+
blob = bucket.blob(file_name) # Use the potentially updated file_name
|
|
147
|
+
pipelinemon.add_system_impacted(f"upload: gcs_bucket_file: {gcs_path}")
|
|
148
|
+
blob.upload_from_string(data_str, content_type='application/json')
|
|
149
|
+
pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_PERSISTNACE_COMPLETE, subject= gcs_path, description="file uploaded to GCS"))
|
|
150
|
+
success = True
|
|
151
|
+
except Exception as e:
|
|
152
|
+
attempts += 1
|
|
153
|
+
if attempts < max_retries:
|
|
154
|
+
time.sleep(2 ** attempts)
|
|
155
|
+
else:
|
|
156
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e))
|
|
157
|
+
gcs_upload_error = True
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e))
|
|
161
|
+
gcs_upload_error = True
|
|
162
|
+
|
|
163
|
+
# --- Save Locally ---
|
|
164
|
+
###### Not logging the local save operation in pipeline, as it is not a critical operation
|
|
165
|
+
write_out=False
|
|
166
|
+
if not success or gcs_upload_error or save_locally or local_path:
|
|
167
|
+
try:
|
|
168
|
+
local_path=local_path if local_path else "/tmp"
|
|
169
|
+
local_path_final = os.path.join(local_path, file_name)
|
|
170
|
+
|
|
171
|
+
if os.path.exists(local_path_final):
|
|
172
|
+
if increment_if_exists:
|
|
173
|
+
increment = 0
|
|
174
|
+
while os.path.exists(local_path_final):
|
|
175
|
+
increment += 1
|
|
176
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
|
|
177
|
+
write_out=True
|
|
178
|
+
elif overwrite_if_exists:
|
|
179
|
+
write_out=True
|
|
180
|
+
else:
|
|
181
|
+
write_out=False
|
|
182
|
+
else:
|
|
183
|
+
write_out=True
|
|
184
|
+
|
|
185
|
+
if write_out:
|
|
186
|
+
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
187
|
+
f.write(data_str)
|
|
188
|
+
|
|
189
|
+
except Exception as local_e:
|
|
190
|
+
pipelinemon.add_log(ContextLog(LogLevel.WARNING_FIX_RECOMMENDED, e=local_e, description="Failed to write to local file"))
|
|
191
|
+
|
|
192
|
+
# --- Return Metadata ---
|
|
193
|
+
return {
|
|
194
|
+
"gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
|
|
195
|
+
"local_path": local_path_final if write_out else None, # Only set local_path if saved locally
|
|
196
|
+
"gcs_file_already_exists": gcs_file_already_exists,
|
|
197
|
+
"gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
|
|
198
|
+
"gcs_file_overwritten": gcs_file_overwritten,
|
|
199
|
+
"gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
|
|
200
|
+
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
201
|
+
}
|
|
@@ -3,213 +3,26 @@
|
|
|
3
3
|
# pylint: disable=logging-fstring-interpolation
|
|
4
4
|
# pylint: disable=line-too-long
|
|
5
5
|
# pylint: disable=missing-class-docstring
|
|
6
|
-
|
|
6
|
+
# pylint: disable=broad-exception-caught
|
|
7
7
|
import json
|
|
8
8
|
import uuid
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
10
|
from contextlib import contextmanager
|
|
11
11
|
from typing import List
|
|
12
12
|
from google.cloud import logging as cloudlogging
|
|
13
|
-
from ipulse_shared_core_ftredge.enums.enums_common_utils import TargetLogs, LogLevel
|
|
13
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import TargetLogs, LogLevel
|
|
14
14
|
from ipulse_shared_core_ftredge.utils_gcp import write_json_to_gcs
|
|
15
|
+
from ipulse_shared_core_ftredge.utils_custom_logs import ContextLog
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
# ["data_import","data_quality", "data_processing","data_general","data_persistance","metadata_quality", "metadata_processing", "metadata_persistance","metadata_general"]
|
|
18
|
-
|
|
19
|
-
class ContextLog:
|
|
20
|
-
MAX_TRACEBACK_LINES = 24 # Define the maximum number of traceback lines to include
|
|
21
|
-
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
22
|
-
context: str = None, description: str = None,
|
|
23
|
-
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
24
|
-
log_status: LogStatus = LogStatus.OPEN, subject: str = None
|
|
25
|
-
):
|
|
26
|
-
if e is not None:
|
|
27
|
-
e_type = type(e).__name__ if e_type is None else e_type
|
|
28
|
-
e_message = str(e) if e_message is None else e_message
|
|
29
|
-
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
30
|
-
elif e_traceback is None and (e_type or e_message):
|
|
31
|
-
e_traceback = traceback.format_exc()
|
|
32
|
-
|
|
33
|
-
self.level = level
|
|
34
|
-
self.subject = subject
|
|
35
|
-
self.description = description
|
|
36
|
-
self._base_context = base_context
|
|
37
|
-
self._context = context
|
|
38
|
-
self.collector_id = collector_id
|
|
39
|
-
self.exception_type = e_type
|
|
40
|
-
self.exception_message = e_message
|
|
41
|
-
self.exception_traceback = self._format_traceback(e_traceback,e_message)
|
|
42
|
-
self.log_status = log_status
|
|
43
|
-
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
44
|
-
|
|
45
|
-
def _format_traceback(self, e_traceback, e_message):
|
|
46
|
-
if not e_traceback or e_traceback == 'None\n':
|
|
47
|
-
return None
|
|
48
|
-
|
|
49
|
-
traceback_lines = e_traceback.splitlines()
|
|
50
|
-
|
|
51
|
-
# Remove lines that are part of the exception message if they are present in traceback
|
|
52
|
-
message_lines = e_message.splitlines() if e_message else []
|
|
53
|
-
if message_lines:
|
|
54
|
-
for message_line in message_lines:
|
|
55
|
-
if message_line in traceback_lines:
|
|
56
|
-
traceback_lines.remove(message_line)
|
|
57
|
-
|
|
58
|
-
# Filter out lines from third-party libraries (like site-packages)
|
|
59
|
-
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
60
|
-
|
|
61
|
-
# If filtering results in too few lines, revert to original traceback
|
|
62
|
-
if len(filtered_lines) < 2:
|
|
63
|
-
filtered_lines = traceback_lines
|
|
64
|
-
|
|
65
|
-
# Combine standalone bracket lines with previous or next lines
|
|
66
|
-
combined_lines = []
|
|
67
|
-
for line in filtered_lines:
|
|
68
|
-
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
69
|
-
combined_lines[-1] += " " + line.strip()
|
|
70
|
-
else:
|
|
71
|
-
combined_lines.append(line)
|
|
72
|
-
|
|
73
|
-
# Determine the number of lines to keep from the start and end
|
|
74
|
-
keep_lines_start = min(self.MAX_TRACEBACK_LINES // 2, len(combined_lines))
|
|
75
|
-
keep_lines_end = min(self.MAX_TRACEBACK_LINES // 2, len(combined_lines) - keep_lines_start)
|
|
76
|
-
|
|
77
|
-
if len(combined_lines) > self.MAX_TRACEBACK_LINES:
|
|
78
|
-
# Include the first few and last few lines, and an indicator of truncation
|
|
79
|
-
formatted_traceback = '\n'.join(
|
|
80
|
-
combined_lines[:keep_lines_start] +
|
|
81
|
-
['... (truncated) ...'] +
|
|
82
|
-
combined_lines[-keep_lines_end:]
|
|
83
|
-
)
|
|
84
|
-
else:
|
|
85
|
-
formatted_traceback = '\n'.join(combined_lines)
|
|
86
|
-
|
|
87
|
-
return formatted_traceback
|
|
88
|
-
|
|
89
|
-
@property
|
|
90
|
-
def base_context(self):
|
|
91
|
-
return self._base_context
|
|
92
|
-
|
|
93
|
-
@base_context.setter
|
|
94
|
-
def base_context(self, value):
|
|
95
|
-
self._base_context = value
|
|
96
|
-
|
|
97
|
-
@property
|
|
98
|
-
def context(self):
|
|
99
|
-
return self._context
|
|
100
|
-
|
|
101
|
-
@context.setter
|
|
102
|
-
def context(self, value):
|
|
103
|
-
self._context = value
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def to_dict(self, size_limit=256 * 1024 * 0.80):
|
|
107
|
-
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
108
|
-
|
|
109
|
-
# Define the priority order of the fields
|
|
110
|
-
priority_fields = [
|
|
111
|
-
("base_context", self.base_context),
|
|
112
|
-
("level_code", self.level.value),
|
|
113
|
-
("level_name", self.level.name),
|
|
114
|
-
("log_status", self.log_status.value),
|
|
115
|
-
("collector_id", self.collector_id),
|
|
116
|
-
("timestamp", self.timestamp),
|
|
117
|
-
]
|
|
118
|
-
|
|
119
|
-
# Additional fields to be truncated if necessary. Shorter fields are truncated first so that remaining size can increase for longer fields.
|
|
120
|
-
additional_fields = [
|
|
121
|
-
("subject", self.subject),
|
|
122
|
-
("description", self.description),
|
|
123
|
-
("exception_type", self.exception_type),
|
|
124
|
-
("exception_message", self.exception_message),
|
|
125
|
-
("context", self.context), # special sizing rules apply to it
|
|
126
|
-
("exception_traceback", self.exception_traceback)
|
|
127
|
-
]
|
|
128
|
-
|
|
129
|
-
all_fields = priority_fields + additional_fields
|
|
130
|
-
non_zero_fields = [(key, value) for key, value in all_fields if value is not None]
|
|
131
|
-
|
|
132
|
-
total_size = 0
|
|
133
|
-
truncated = False # Flag to indicate if truncation happened
|
|
134
|
-
|
|
135
|
-
# Function to calculate the byte size of a JSON-encoded field
|
|
136
|
-
def field_size(key, value):
|
|
137
|
-
return len(json.dumps({key: value}).encode('utf-8'))
|
|
138
|
-
|
|
139
|
-
# Function to truncate a value based on its type
|
|
140
|
-
def truncate_value(value, max_size):
|
|
141
|
-
if isinstance(value, str):
|
|
142
|
-
half_size = max_size // 2
|
|
143
|
-
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
144
|
-
elif isinstance(value, (list, tuple)):
|
|
145
|
-
half_size = max_size // 2
|
|
146
|
-
return list(value[:half_size]) + ['...'] + list(value[-(max_size - half_size - 1):])
|
|
147
|
-
elif isinstance(value, set):
|
|
148
|
-
truncated_set = set(list(value)[:max_size // 2]) | set(list(value)[-(max_size // 2):])
|
|
149
|
-
return truncated_set
|
|
150
|
-
elif isinstance(value, dict):
|
|
151
|
-
truncated_dict = {k: truncate_value(v, max_size // len(value)) for k, v in list(value.items())}
|
|
152
|
-
return truncated_dict
|
|
153
|
-
else:
|
|
154
|
-
return value
|
|
155
|
-
|
|
156
|
-
# Calculate the initial total size
|
|
157
|
-
for key, value in non_zero_fields:
|
|
158
|
-
total_size += field_size(key, value)
|
|
159
|
-
|
|
160
|
-
log_dict = {}
|
|
161
|
-
# Check if total size exceeds the size limit
|
|
162
|
-
if total_size > size_limit:
|
|
163
|
-
truncated = True # Set the truncation flag
|
|
164
|
-
# Calculate max size per field based on all non-zero fields
|
|
165
|
-
max_size_per_field = size_limit // len(non_zero_fields)
|
|
166
|
-
|
|
167
|
-
# Reset total_size to recompute with truncation
|
|
168
|
-
total_size = 0
|
|
169
|
-
|
|
170
|
-
# Add priority fields first with possible truncation
|
|
171
|
-
for key, value in priority_fields:
|
|
172
|
-
if value is not None:
|
|
173
|
-
truncated_value = value
|
|
174
|
-
if isinstance(value, (str, list, tuple, set, dict)) and field_size(key, value) > max_size_per_field:
|
|
175
|
-
truncated_value = truncate_value(value, max_size_per_field)
|
|
176
|
-
log_dict[key] = truncated_value
|
|
177
|
-
total_size += field_size(key, truncated_value)
|
|
178
|
-
else:
|
|
179
|
-
log_dict[key] = value
|
|
180
|
-
|
|
181
|
-
# Calculate remaining size for additional fields
|
|
182
|
-
remaining_size = size_limit - total_size
|
|
183
|
-
|
|
184
|
-
# Handle remaining additional fields
|
|
185
|
-
non_zero_additional_fields = [field for field in additional_fields[1:] if field[1]]
|
|
186
|
-
remaining_field_size = remaining_size // len(non_zero_additional_fields) if non_zero_additional_fields else 0
|
|
187
|
-
|
|
188
|
-
for key, value in additional_fields[1:]:
|
|
189
|
-
if value is not None:
|
|
190
|
-
if field_size(key, value) > remaining_field_size:
|
|
191
|
-
truncated_value = truncate_value(value, remaining_field_size)
|
|
192
|
-
else:
|
|
193
|
-
truncated_value = value
|
|
194
|
-
log_dict[key] = truncated_value
|
|
195
|
-
remaining_size -= field_size(key, truncated_value)
|
|
196
|
-
else:
|
|
197
|
-
log_dict[key] = value
|
|
198
|
-
else:
|
|
199
|
-
log_dict = dict(all_fields)
|
|
200
|
-
|
|
201
|
-
# Add trunc flag to the log dictionary
|
|
202
|
-
log_dict['trunc'] = truncated
|
|
203
|
-
|
|
204
|
-
return log_dict
|
|
205
|
-
|
|
206
18
|
class Pipelinemon:
|
|
207
19
|
ERROR_START_CODE = LogLevel.ERROR.value
|
|
208
20
|
WARNING_START_CODE = LogLevel.WARNING.value
|
|
209
21
|
NOTICE_START_CODE = LogLevel.NOTICE.value
|
|
210
22
|
SUCCESS_START_CODE = LogLevel.SUCCESS.value
|
|
23
|
+
INFO_START_CODE = LogLevel.INFO.value
|
|
211
24
|
|
|
212
|
-
def __init__(self, base_context: str, target_logs: TargetLogs = TargetLogs.MIXED, logger_name=None):
|
|
25
|
+
def __init__(self, base_context: str, target_logs: TargetLogs = TargetLogs.MIXED, logger_name=None, max_log_field_size:int =10000, max_log_dict_size:float=256 * 1024 * 0.80, max_log_traceback_lines:int = 30):
|
|
213
26
|
self._id = str(uuid.uuid4())
|
|
214
27
|
self._logs = []
|
|
215
28
|
self._early_stop = False
|
|
@@ -217,11 +30,16 @@ class Pipelinemon:
|
|
|
217
30
|
self._warnings_count = 0
|
|
218
31
|
self._notices_count = 0
|
|
219
32
|
self._successes_count = 0
|
|
220
|
-
self.
|
|
33
|
+
self._infos_count = 0
|
|
34
|
+
self._systems_impacted = []
|
|
35
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
221
36
|
self._base_context = base_context
|
|
222
37
|
self._context_stack = []
|
|
223
38
|
self._target_logs = target_logs.value
|
|
224
39
|
self._logger = self._initialize_logger(logger_name)
|
|
40
|
+
self._max_log_field_size = max_log_field_size
|
|
41
|
+
self._max_log_dict_size = max_log_dict_size
|
|
42
|
+
self._max_log_traceback_lines = max_log_traceback_lines
|
|
225
43
|
|
|
226
44
|
def _initialize_logger(self, logger_name):
|
|
227
45
|
if logger_name:
|
|
@@ -256,6 +74,46 @@ class Pipelinemon:
|
|
|
256
74
|
def id(self):
|
|
257
75
|
return self._id
|
|
258
76
|
|
|
77
|
+
@property
|
|
78
|
+
def systems_impacted(self):
|
|
79
|
+
return self._systems_impacted
|
|
80
|
+
|
|
81
|
+
@systems_impacted.setter
|
|
82
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
83
|
+
self._systems_impacted = list_of_si
|
|
84
|
+
|
|
85
|
+
def add_system_impacted(self, system_impacted: str):
|
|
86
|
+
if self._systems_impacted is None:
|
|
87
|
+
self._systems_impacted = []
|
|
88
|
+
self._systems_impacted.append(system_impacted)
|
|
89
|
+
|
|
90
|
+
def clear_systems_impacted(self):
|
|
91
|
+
self._systems_impacted = []
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def max_log_dict_size(self):
|
|
95
|
+
return self._max_log_dict_size
|
|
96
|
+
|
|
97
|
+
@max_log_dict_size.setter
|
|
98
|
+
def max_log_dict_size(self, value):
|
|
99
|
+
self._max_log_dict_size = value
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def max_log_field_size(self):
|
|
103
|
+
return self._max_log_field_size
|
|
104
|
+
|
|
105
|
+
@max_log_field_size.setter
|
|
106
|
+
def max_log_field_size(self, value):
|
|
107
|
+
self._max_log_field_size = value
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def max_log_traceback_lines(self):
|
|
111
|
+
return self._max_log_traceback_lines
|
|
112
|
+
|
|
113
|
+
@max_log_traceback_lines.setter
|
|
114
|
+
def max_log_traceback_lines(self, value):
|
|
115
|
+
self._max_log_traceback_lines = value
|
|
116
|
+
|
|
259
117
|
@property
|
|
260
118
|
def early_stop(self):
|
|
261
119
|
return self._early_stop
|
|
@@ -265,24 +123,23 @@ class Pipelinemon:
|
|
|
265
123
|
if create_error_log:
|
|
266
124
|
if pop_context:
|
|
267
125
|
self.pop_context()
|
|
268
|
-
self.add_log(ContextLog(level=LogLevel.
|
|
126
|
+
self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
|
|
269
127
|
subject="EARLY_STOP",
|
|
270
128
|
description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
271
129
|
|
|
272
130
|
def reset_early_stop(self):
|
|
273
131
|
self._early_stop = False
|
|
274
132
|
|
|
275
|
-
def get_early_stop(self):
|
|
276
|
-
return self._early_stop
|
|
277
133
|
|
|
278
|
-
def add_log(self, log: ContextLog):
|
|
134
|
+
def add_log(self, log: ContextLog ):
|
|
279
135
|
if (self._target_logs == TargetLogs.SUCCESSES and log.level >=self.NOTICE_START_CODE) or \
|
|
280
136
|
(self._target_logs == TargetLogs.WARNINGS_AND_ERRORS and log.level.value < self.WARNING_START_CODE):
|
|
281
137
|
raise ValueError(f"Invalid log level {log.level.name} for Pipelinemon target logs setup: {self._target_logs}")
|
|
282
138
|
log.base_context = self.base_context
|
|
283
139
|
log.context = self.current_context
|
|
284
140
|
log.collector_id = self.id
|
|
285
|
-
|
|
141
|
+
log.systems_impacted = self.systems_impacted
|
|
142
|
+
log_dict = log.to_dict(max_field_len=self.max_log_field_size, size_limit=self.max_log_dict_size, max_traceback_lines=self.max_log_traceback_lines)
|
|
286
143
|
self._logs.append(log_dict)
|
|
287
144
|
self._update_counts(log_dict)
|
|
288
145
|
|
|
@@ -306,7 +163,8 @@ class Pipelinemon:
|
|
|
306
163
|
self._warnings_count = 0
|
|
307
164
|
self._notices_count = 0
|
|
308
165
|
self._successes_count = 0
|
|
309
|
-
self.
|
|
166
|
+
self._infos_count = 0
|
|
167
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
310
168
|
|
|
311
169
|
def clear_logs(self):
|
|
312
170
|
self._logs = []
|
|
@@ -344,11 +202,20 @@ class Pipelinemon:
|
|
|
344
202
|
def count_successes(self):
|
|
345
203
|
return self._successes_count
|
|
346
204
|
|
|
205
|
+
def count_successes_with_notice(self):
|
|
206
|
+
return self.count_logs_by_level(LogLevel.SUCCESS_WITH_NOTICES)
|
|
207
|
+
|
|
208
|
+
def count_successes_no_notice(self):
|
|
209
|
+
return self.count_logs_by_level(LogLevel.SUCCESS)
|
|
210
|
+
|
|
211
|
+
def count_infos(self):
|
|
212
|
+
return self._infos_count
|
|
213
|
+
|
|
347
214
|
def count_all_logs(self):
|
|
348
215
|
return len(self._logs)
|
|
349
216
|
|
|
350
217
|
def count_logs_by_level(self, level: LogLevel):
|
|
351
|
-
return self.
|
|
218
|
+
return self._by_level_counts.get(level.name, 0)
|
|
352
219
|
|
|
353
220
|
def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
354
221
|
return sum(
|
|
@@ -400,7 +267,13 @@ class Pipelinemon:
|
|
|
400
267
|
def count_successes_for_current_and_nested_contexts(self):
|
|
401
268
|
return self._count_logs(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
402
269
|
|
|
403
|
-
def
|
|
270
|
+
def count_infos_for_current_context(self):
|
|
271
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
|
|
272
|
+
|
|
273
|
+
def count_infos_for_current_and_nested_contexts(self):
|
|
274
|
+
return self._count_logs(self.current_context, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
|
|
275
|
+
|
|
276
|
+
def export_logs_to_gcs_file(self, bucket_name, storage_client, file_prefix=None, file_name=None, top_level_context=None, save_locally=False, overwrite_if_exists=False, increment_if_exists=True, local_path=None, logger=None, max_retries=2):
|
|
404
277
|
def log_message(message):
|
|
405
278
|
if logger:
|
|
406
279
|
logger.info(message)
|
|
@@ -429,9 +302,11 @@ class Pipelinemon:
|
|
|
429
302
|
local_path=local_path,
|
|
430
303
|
logger=logger,
|
|
431
304
|
max_retries=max_retries,
|
|
432
|
-
overwrite_if_exists=
|
|
305
|
+
overwrite_if_exists=overwrite_if_exists,
|
|
306
|
+
increment_if_exists=increment_if_exists
|
|
307
|
+
|
|
433
308
|
)
|
|
434
|
-
log_message(f"{file_prefix} successfully saved (overwritten={result.get('gcs_file_overwritten')}) to GCS at {result.get('gcs_path')} and locally at {result.get('local_path')}.")
|
|
309
|
+
log_message(f"{file_prefix} successfully saved (overwritten={result.get('gcs_file_overwritten')}, incremented={result.get('gcs_file_saved_with_increment')}) to GCS at {result.get('gcs_path')} and locally at {result.get('local_path')}.")
|
|
435
310
|
except Exception as e:
|
|
436
311
|
log_error(f"Failed at export_logs_to_gcs_file for {file_prefix} for file {file_name} to bucket {bucket_name}: {type(e).__name__} - {str(e)}")
|
|
437
312
|
|
|
@@ -469,7 +344,9 @@ class Pipelinemon:
|
|
|
469
344
|
self._notices_count -= 1
|
|
470
345
|
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
471
346
|
self._successes_count -= 1
|
|
472
|
-
self.
|
|
347
|
+
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
348
|
+
self._infos_count -= 1
|
|
349
|
+
self._by_level_counts[level_name] -= 1
|
|
473
350
|
else:
|
|
474
351
|
if level_code >= self.ERROR_START_CODE:
|
|
475
352
|
self._errors_count += 1
|
|
@@ -479,4 +356,7 @@ class Pipelinemon:
|
|
|
479
356
|
self._notices_count += 1
|
|
480
357
|
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
481
358
|
self._successes_count += 1
|
|
482
|
-
self.
|
|
359
|
+
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
360
|
+
self._infos_count += 1
|
|
361
|
+
self._by_level_counts[level_name] += 1
|
|
362
|
+
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import datetime
|
|
7
7
|
from google.cloud import bigquery
|
|
8
8
|
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel
|
|
9
|
-
from ipulse_shared_core_ftredge.
|
|
9
|
+
from ipulse_shared_core_ftredge.utils_custom_logs import ContextLog
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def create_bigquery_schema_from_json(json_schema):
|
|
@@ -59,16 +59,14 @@ def check_format_against_schema_template(data_to_check, schema, dt_ts_to_str=Tru
|
|
|
59
59
|
checked_data[field_name] = value
|
|
60
60
|
|
|
61
61
|
elif mode == "REQUIRED":
|
|
62
|
-
|
|
62
|
+
warnings_or_error.append(ContextLog(level=LogLevel.WARNING,
|
|
63
63
|
subject=field_name,
|
|
64
|
-
description=f"Required field '{field_name}' is missing in the updates.")
|
|
65
|
-
warnings_or_error.append(warning)
|
|
64
|
+
description=f"Required field '{field_name}' is missing in the updates."))
|
|
66
65
|
|
|
67
66
|
except Exception as e:
|
|
68
|
-
|
|
67
|
+
warnings_or_error.append(ContextLog(level=LogLevel.ERROR_EXCEPTION,
|
|
69
68
|
subject=data_to_check,
|
|
70
|
-
description=f"An error occurred during update check: {str(e)}")
|
|
71
|
-
warnings_or_error.append(error_log)
|
|
69
|
+
description=f"An error occurred during update check: {str(e)}"))
|
|
72
70
|
|
|
73
71
|
return checked_data, warnings_or_error
|
|
74
72
|
|
|
@@ -144,11 +142,11 @@ def handle_type_conversion(field_type, field_name, value):
|
|
|
144
142
|
try:
|
|
145
143
|
return float(value), None
|
|
146
144
|
except ValueError:
|
|
147
|
-
return None, ContextLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
145
|
+
return None, ContextLog(level=LogLevel.WARNING_FIX_REQUIRED,
|
|
148
146
|
subject=field_name,
|
|
149
147
|
description=f"Expected FLOAT, but got {value} of type {type(value).__name__}.")
|
|
150
148
|
if field_type == "BOOL" and not isinstance(value, bool):
|
|
151
|
-
return bool(value), ContextLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
149
|
+
return bool(value), ContextLog(level=LogLevel.WARNING_REVIEW_RECOMMENDED,
|
|
152
150
|
subject=field_name,
|
|
153
151
|
description=f"Expected BOOL, but got {value}. Converting as {bool(value)}.")
|
|
154
152
|
|
{ipulse_shared_core_ftredge-2.55.dist-info → ipulse_shared_core_ftredge-2.57.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ipulse_shared_core_ftredge
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.57
|
|
4
4
|
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
5
|
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
6
|
Author: Russlan Ramdowar
|