ipulse-shared-data-eng-ftredge 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipulse_shared_data_eng_ftredge-2.1.1/LICENCE +19 -0
- ipulse_shared_data_eng_ftredge-2.1.1/PKG-INFO +15 -0
- ipulse_shared_data_eng_ftredge-2.1.1/README.md +21 -0
- ipulse_shared_data_eng_ftredge-2.1.1/pyproject.toml +3 -0
- ipulse_shared_data_eng_ftredge-2.1.1/setup.cfg +4 -0
- ipulse_shared_data_eng_ftredge-2.1.1/setup.py +24 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/__init__.py +10 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/__init__.py +2 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/context_log.py +210 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/pipelinemon.py +354 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/__init__.py +8 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_check_data_schema.py +151 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_cloud.py +53 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_cloud_gcp.py +359 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_local_files.py +157 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/PKG-INFO +15 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/SOURCES.txt +18 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/dependency_links.txt +1 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/requires.txt +8 -0
- ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2023 Future Edge Group
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: ipulse_shared_data_eng_ftredge
|
|
3
|
+
Version: 2.1.1
|
|
4
|
+
Summary: Shared Data Engineering functions for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
|
+
Home-page: https://github.com/TheFutureEdge/ipulse_shared_data_eng
|
|
6
|
+
Author: Russlan Ramdowar
|
|
7
|
+
License-File: LICENCE
|
|
8
|
+
Requires-Dist: python-dateutil~=2.8
|
|
9
|
+
Requires-Dist: pytest~=7.1
|
|
10
|
+
Requires-Dist: ipulse_shared_core_ftredge>=1.1.1
|
|
11
|
+
Requires-Dist: ipulse_shared_enums>=1.1.1
|
|
12
|
+
Requires-Dist: google-cloud-bigquery~=3.24.0
|
|
13
|
+
Requires-Dist: google-cloud-storage~=1.41.1
|
|
14
|
+
Requires-Dist: google-cloud-pubsub~=2.7.0
|
|
15
|
+
Requires-Dist: google-cloud-secret-manager~=2.7.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# ipulse_shared_data_eng
|
|
2
|
+
Shared Data Engineering Code for ipulse platform, especially for Oracle module
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
### Collectors i.e. Pipelinemon
|
|
6
|
+
|
|
7
|
+
Collectors are smart Objects which are added to long running functions or pipelines for which we want to collect an overall number of successes, notices, warnings or errors.
|
|
8
|
+
|
|
9
|
+
We can wait until the full pipeline is finished in order to write off a single Summary file from a Collector, or we can attach to it a logger, which will be reporting major status along the journey, which is often times better. Because if a function crashes midway through , all logs will be lost, and it would be hard to investigate if anythign has bee persisted and has to be rolled back. THis will require a lot of manual effort to recollect.
|
|
10
|
+
|
|
11
|
+
Pipelinemon , short of Pipeline Monitoring system is a type of very powerful Collector which Russlan created specifically for Pulse Data Engineering pipelines.
|
|
12
|
+
|
|
13
|
+
Pipelinemon writes all observation logs to Google CLoud Logging, and you have to setup a Log Sink (Router) which will send the Pipelinemon's observation logs to BigQuery.
|
|
14
|
+
|
|
15
|
+
Great thing about Pipelinemin is its "context" keeping feature.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
### Utils : Schema Checkers , Cloud Utils ( save file to cloud storage etc. for GCP, AWS, Azure etc.) , local files utils etc.
|
|
20
|
+
|
|
21
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# pylint: disable=import-error
|
|
2
|
+
from setuptools import setup, find_packages
|
|
3
|
+
|
|
4
|
+
setup(
|
|
5
|
+
name='ipulse_shared_data_eng_ftredge',
|
|
6
|
+
version='2.1.1',
|
|
7
|
+
package_dir={'': 'src'}, # Specify the source directory
|
|
8
|
+
packages=find_packages(where='src'), # Look for packages in 'src'
|
|
9
|
+
install_requires=[
|
|
10
|
+
# List your dependencies here
|
|
11
|
+
'python-dateutil~=2.8',
|
|
12
|
+
'pytest~=7.1',
|
|
13
|
+
'ipulse_shared_core_ftredge>=1.1.1',
|
|
14
|
+
'ipulse_shared_enums>=1.1.1',
|
|
15
|
+
'google-cloud-bigquery~=3.24.0',
|
|
16
|
+
'google-cloud-storage~=1.41.1',
|
|
17
|
+
'google-cloud-pubsub~=2.7.0',
|
|
18
|
+
'google-cloud-secret-manager~=2.7.0'
|
|
19
|
+
|
|
20
|
+
],
|
|
21
|
+
author='Russlan Ramdowar',
|
|
22
|
+
description='Shared Data Engineering functions for the Pulse platform project. Using AI for financial advisory and investment management.',
|
|
23
|
+
url='https://github.com/TheFutureEdge/ipulse_shared_data_eng'
|
|
24
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .collectors import ContextLog, Pipelinemon
|
|
2
|
+
from .utils import (check_format_against_schema_template,
|
|
3
|
+
write_json_to_cloud_storage_extended,
|
|
4
|
+
read_json_from_cloud_storage,
|
|
5
|
+
write_json_to_gcs_extended,
|
|
6
|
+
read_json_from_gcs,
|
|
7
|
+
save_json_locally_extended,
|
|
8
|
+
prepare_full_file_path
|
|
9
|
+
)
|
|
10
|
+
|
ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/context_log.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=logging-fstring-interpolation
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=missing-class-docstring
|
|
7
|
+
# pylint: disable=broad-exception-caught
|
|
8
|
+
# pylint: disable=unused-variable
|
|
9
|
+
import traceback
|
|
10
|
+
import json
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import List
|
|
13
|
+
from ipulse_shared_enums import Status, LogLevel
|
|
14
|
+
|
|
15
|
+
############################################################################
|
|
16
|
+
##################### SETTING UP custom LOGGING format= DICT ##########################
|
|
17
|
+
### Cloud Agnostic, can be used with any cloud provider , jsut use to_dict() method to get the log in dict format
|
|
18
|
+
class ContextLog:
|
|
19
|
+
|
|
20
|
+
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
21
|
+
context: str = None, description: str = None,
|
|
22
|
+
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
23
|
+
log_status: Status = Status.OPEN, subject: str = None, systems_impacted: List[str] = None,
|
|
24
|
+
):
|
|
25
|
+
|
|
26
|
+
if e is not None:
|
|
27
|
+
e_type = type(e).__name__ if e_type is None else e_type
|
|
28
|
+
e_message = str(e) if e_message is None else e_message
|
|
29
|
+
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
30
|
+
elif not e_traceback and (e_type or e_message):
|
|
31
|
+
e_traceback = traceback.format_exc()
|
|
32
|
+
|
|
33
|
+
self.level = level
|
|
34
|
+
self.subject = subject
|
|
35
|
+
self.description = description
|
|
36
|
+
self._base_context = base_context
|
|
37
|
+
self._context = context
|
|
38
|
+
self._systems_impacted = systems_impacted if systems_impacted else []
|
|
39
|
+
self.collector_id = collector_id
|
|
40
|
+
self.exception_type = e_type
|
|
41
|
+
self.exception_message = e_message
|
|
42
|
+
self.exception_traceback = e_traceback
|
|
43
|
+
self.log_status = log_status
|
|
44
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def base_context(self):
|
|
48
|
+
return self._base_context
|
|
49
|
+
|
|
50
|
+
@base_context.setter
|
|
51
|
+
def base_context(self, value):
|
|
52
|
+
self._base_context = value
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def context(self):
|
|
56
|
+
return self._context
|
|
57
|
+
|
|
58
|
+
@context.setter
|
|
59
|
+
def context(self, value):
|
|
60
|
+
self._context = value
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def systems_impacted(self):
|
|
64
|
+
return self._systems_impacted
|
|
65
|
+
|
|
66
|
+
@systems_impacted.setter
|
|
67
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
68
|
+
self._systems_impacted = list_of_si
|
|
69
|
+
|
|
70
|
+
def add_system_impacted(self, system_impacted: str):
|
|
71
|
+
if self._systems_impacted is None:
|
|
72
|
+
self._systems_impacted = []
|
|
73
|
+
self._systems_impacted.append(system_impacted)
|
|
74
|
+
|
|
75
|
+
def remove_system_impacted(self, system_impacted: str):
|
|
76
|
+
if self._systems_impacted is not None:
|
|
77
|
+
self._systems_impacted.remove(system_impacted)
|
|
78
|
+
|
|
79
|
+
def clear_systems_impacted(self):
|
|
80
|
+
self._systems_impacted = []
|
|
81
|
+
|
|
82
|
+
def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
|
|
83
|
+
if not e_traceback or e_traceback == 'None\n':
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
traceback_lines = e_traceback.splitlines()
|
|
87
|
+
|
|
88
|
+
# Check if the traceback is within the limits
|
|
89
|
+
if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
|
|
90
|
+
return e_traceback
|
|
91
|
+
|
|
92
|
+
# Remove lines that are part of the exception message if they are present in traceback
|
|
93
|
+
message_lines = e_message.splitlines() if e_message else []
|
|
94
|
+
if message_lines:
|
|
95
|
+
for message_line in message_lines:
|
|
96
|
+
if message_line in traceback_lines:
|
|
97
|
+
traceback_lines.remove(message_line)
|
|
98
|
+
|
|
99
|
+
# Filter out lines from third-party libraries (like site-packages)
|
|
100
|
+
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
101
|
+
|
|
102
|
+
# If filtering results in too few lines, revert to original traceback
|
|
103
|
+
if len(filtered_lines) < 2:
|
|
104
|
+
filtered_lines = traceback_lines
|
|
105
|
+
|
|
106
|
+
# Combine standalone bracket lines with previous or next lines
|
|
107
|
+
combined_lines = []
|
|
108
|
+
for line in filtered_lines:
|
|
109
|
+
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
110
|
+
combined_lines[-1] += " " + line.strip()
|
|
111
|
+
else:
|
|
112
|
+
combined_lines.append(line)
|
|
113
|
+
|
|
114
|
+
# Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
|
|
115
|
+
if len(combined_lines) > max_traceback_lines:
|
|
116
|
+
keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
|
|
117
|
+
keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
|
|
118
|
+
combined_lines = (
|
|
119
|
+
combined_lines[:keep_lines_start] +
|
|
120
|
+
['... (truncated) ...'] +
|
|
121
|
+
combined_lines[-keep_lines_end:]
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
formatted_traceback = '\n'.join(combined_lines)
|
|
125
|
+
|
|
126
|
+
# Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
|
|
127
|
+
if len(formatted_traceback) > max_field_len:
|
|
128
|
+
truncated_length = max_field_len - len('... (truncated) ...')
|
|
129
|
+
half_truncated_length = truncated_length // 2
|
|
130
|
+
formatted_traceback = (
|
|
131
|
+
formatted_traceback[:half_truncated_length] +
|
|
132
|
+
'\n... (truncated) ...\n' +
|
|
133
|
+
formatted_traceback[-half_truncated_length:]
|
|
134
|
+
)
|
|
135
|
+
return formatted_traceback
|
|
136
|
+
|
|
137
|
+
def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
|
|
138
|
+
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
139
|
+
|
|
140
|
+
# Unified list of all fields
|
|
141
|
+
systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
|
|
142
|
+
fields = [
|
|
143
|
+
("log_status", str(self.log_status.name)),
|
|
144
|
+
("level_code", self.level.value),
|
|
145
|
+
("level_name", str(self.level.name)),
|
|
146
|
+
("base_context", str(self.base_context)),
|
|
147
|
+
("timestamp", str(self.timestamp)),
|
|
148
|
+
("collector_id", str(self.collector_id)),
|
|
149
|
+
("systems_impacted", systems_impacted_str),
|
|
150
|
+
("context", str(self.context)), # special sizing rules apply to it
|
|
151
|
+
("subject", str(self.subject)),
|
|
152
|
+
("description", str(self.description)),
|
|
153
|
+
("exception_type", str(self.exception_type)),
|
|
154
|
+
("exception_message", str(self.exception_message)),
|
|
155
|
+
("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
# Function to calculate the byte size of a JSON-encoded field
|
|
159
|
+
def field_size(key, value):
|
|
160
|
+
return len(json.dumps({key: value}).encode('utf-8'))
|
|
161
|
+
|
|
162
|
+
# Function to truncate a value based on its type
|
|
163
|
+
# Function to truncate a value based on its type
|
|
164
|
+
def truncate_value(value, max_size):
|
|
165
|
+
if isinstance(value, str):
|
|
166
|
+
half_size = max_size // 2
|
|
167
|
+
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
168
|
+
return value
|
|
169
|
+
|
|
170
|
+
# Ensure no field exceeds max_field_len
|
|
171
|
+
for i, (key, value) in enumerate(fields):
|
|
172
|
+
if isinstance(value, str) and len(value) > max_field_len:
|
|
173
|
+
fields[i] = (key, truncate_value(value, max_field_len))
|
|
174
|
+
|
|
175
|
+
# Ensure total size of the dict doesn't exceed size_limit
|
|
176
|
+
total_size = sum(field_size(key, value) for key, value in fields)
|
|
177
|
+
log_dict = {}
|
|
178
|
+
truncated = False
|
|
179
|
+
|
|
180
|
+
if total_size > size_limit:
|
|
181
|
+
truncated = True
|
|
182
|
+
remaining_size = size_limit
|
|
183
|
+
remaining_fields = len(fields)
|
|
184
|
+
|
|
185
|
+
for key, value in fields:
|
|
186
|
+
if remaining_fields > 0:
|
|
187
|
+
max_size_per_field = remaining_size // remaining_fields
|
|
188
|
+
else:
|
|
189
|
+
max_size_per_field = 0
|
|
190
|
+
|
|
191
|
+
field_sz = field_size(key, value)
|
|
192
|
+
if field_sz > max_size_per_field:
|
|
193
|
+
value = truncate_value(value, max_size_per_field)
|
|
194
|
+
field_sz = field_size(key, value)
|
|
195
|
+
|
|
196
|
+
log_dict[key] = value
|
|
197
|
+
remaining_size -= field_sz
|
|
198
|
+
remaining_fields -= 1
|
|
199
|
+
else:
|
|
200
|
+
log_dict = dict(fields)
|
|
201
|
+
|
|
202
|
+
log_dict['trunc'] = truncated
|
|
203
|
+
|
|
204
|
+
return log_dict
|
|
205
|
+
|
|
206
|
+
def __str__(self):
|
|
207
|
+
return json.dumps(self.to_dict(), indent=4)
|
|
208
|
+
|
|
209
|
+
def __repr__(self):
|
|
210
|
+
return self.__str__()
|
ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/pipelinemon.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=logging-fstring-interpolation
|
|
4
|
+
# pylint: disable=line-too-long
|
|
5
|
+
# pylint: disable=missing-class-docstring
|
|
6
|
+
# pylint: disable=broad-exception-caught
|
|
7
|
+
import json
|
|
8
|
+
import uuid
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
from typing import List
|
|
12
|
+
from ipulse_shared_enums import LogLevel
|
|
13
|
+
from .context_log import ContextLog
|
|
14
|
+
############################################################################
|
|
15
|
+
##### PIPINEMON Collector for Logs and Statuses of running pipelines #######
|
|
16
|
+
class Pipelinemon:
|
|
17
|
+
ERROR_START_CODE = LogLevel.ERROR.value
|
|
18
|
+
WARNING_START_CODE = LogLevel.WARNING.value
|
|
19
|
+
NOTICE_START_CODE = LogLevel.NOTICE.value
|
|
20
|
+
SUCCESS_START_CODE = LogLevel.SUCCESS.value
|
|
21
|
+
INFO_START_CODE = LogLevel.INFO.value
|
|
22
|
+
|
|
23
|
+
def __init__(self, base_context: str, logger,
|
|
24
|
+
max_log_field_size:int =10000,
|
|
25
|
+
max_log_dict_size:float=256 * 1024 * 0.80,
|
|
26
|
+
max_log_traceback_lines:int = 30):
|
|
27
|
+
|
|
28
|
+
self._id = str(uuid.uuid4())
|
|
29
|
+
self._logs = []
|
|
30
|
+
self._early_stop = False
|
|
31
|
+
self._errors_count = 0
|
|
32
|
+
self._warnings_count = 0
|
|
33
|
+
self._notices_count = 0
|
|
34
|
+
self._successes_count = 0
|
|
35
|
+
self._infos_count = 0
|
|
36
|
+
self._systems_impacted = []
|
|
37
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
38
|
+
self._base_context = base_context
|
|
39
|
+
self._context_stack = []
|
|
40
|
+
self._logger = logger
|
|
41
|
+
self._max_log_field_size = max_log_field_size
|
|
42
|
+
self._max_log_dict_size = max_log_dict_size
|
|
43
|
+
self._max_log_traceback_lines = max_log_traceback_lines
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@contextmanager
|
|
47
|
+
def context(self, context):
|
|
48
|
+
self.push_context(context)
|
|
49
|
+
try:
|
|
50
|
+
yield
|
|
51
|
+
finally:
|
|
52
|
+
self.pop_context()
|
|
53
|
+
|
|
54
|
+
def push_context(self, context):
|
|
55
|
+
self._context_stack.append(context)
|
|
56
|
+
|
|
57
|
+
def pop_context(self):
|
|
58
|
+
if self._context_stack:
|
|
59
|
+
self._context_stack.pop()
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def current_context(self):
|
|
63
|
+
return " >> ".join(self._context_stack)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def base_context(self):
|
|
67
|
+
return self._base_context
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def id(self):
|
|
71
|
+
return self._id
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def systems_impacted(self):
|
|
75
|
+
return self._systems_impacted
|
|
76
|
+
|
|
77
|
+
@systems_impacted.setter
|
|
78
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
79
|
+
self._systems_impacted = list_of_si
|
|
80
|
+
|
|
81
|
+
def add_system_impacted(self, system_impacted: str):
|
|
82
|
+
if self._systems_impacted is None:
|
|
83
|
+
self._systems_impacted = []
|
|
84
|
+
self._systems_impacted.append(system_impacted)
|
|
85
|
+
|
|
86
|
+
def clear_systems_impacted(self):
|
|
87
|
+
self._systems_impacted = []
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def max_log_dict_size(self):
|
|
91
|
+
return self._max_log_dict_size
|
|
92
|
+
|
|
93
|
+
@max_log_dict_size.setter
|
|
94
|
+
def max_log_dict_size(self, value):
|
|
95
|
+
self._max_log_dict_size = value
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def max_log_field_size(self):
|
|
99
|
+
return self._max_log_field_size
|
|
100
|
+
|
|
101
|
+
@max_log_field_size.setter
|
|
102
|
+
def max_log_field_size(self, value):
|
|
103
|
+
self._max_log_field_size = value
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def max_log_traceback_lines(self):
|
|
107
|
+
return self._max_log_traceback_lines
|
|
108
|
+
|
|
109
|
+
@max_log_traceback_lines.setter
|
|
110
|
+
def max_log_traceback_lines(self, value):
|
|
111
|
+
self._max_log_traceback_lines = value
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def early_stop(self):
|
|
115
|
+
return self._early_stop
|
|
116
|
+
|
|
117
|
+
def set_early_stop(self, max_errors_tolerance:int=0, max_warnings_tolerance:int=0, create_error_log=True, pop_context=False):
|
|
118
|
+
self._early_stop = True
|
|
119
|
+
if create_error_log:
|
|
120
|
+
if pop_context:
|
|
121
|
+
self.pop_context()
|
|
122
|
+
if max_errors_tolerance > 0:
|
|
123
|
+
self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
|
|
124
|
+
subject="EARLY_STOP",
|
|
125
|
+
description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
|
|
126
|
+
elif max_warnings_tolerance > 0:
|
|
127
|
+
self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
|
|
128
|
+
subject="EARLY_STOP",
|
|
129
|
+
description=f"Total MAX_WARNINGS_TOLERANCE of {max_warnings_tolerance} has been reached."))
|
|
130
|
+
else:
|
|
131
|
+
self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
|
|
132
|
+
subject="EARLY_STOP",
|
|
133
|
+
description="Early stop has been triggered."))
|
|
134
|
+
|
|
135
|
+
def reset_early_stop(self):
|
|
136
|
+
self._early_stop = False
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def add_log(self, log: ContextLog ):
|
|
140
|
+
log.base_context = self.base_context
|
|
141
|
+
log.context = self.current_context if self.current_context else "root"
|
|
142
|
+
log.collector_id = self.id
|
|
143
|
+
log.systems_impacted = self.systems_impacted
|
|
144
|
+
log_dict = log.to_dict(max_field_len=self.max_log_field_size, size_limit=self.max_log_dict_size, max_traceback_lines=self.max_log_traceback_lines)
|
|
145
|
+
self._logs.append(log_dict)
|
|
146
|
+
self._update_counts(log_dict)
|
|
147
|
+
|
|
148
|
+
if self._logger:
|
|
149
|
+
# We specifically want to avoid having an ERROR log level for this structured Pipelinemon reporting, to ensure Errors are alerting on Critical Application Services.
|
|
150
|
+
# A single ERROR log level is usually added at the end of the entire pipeline
|
|
151
|
+
if log.level.value >= self.WARNING_START_CODE:
|
|
152
|
+
self._logger.warning(log_dict)
|
|
153
|
+
else:
|
|
154
|
+
self._logger.info(log_dict)
|
|
155
|
+
|
|
156
|
+
def add_logs(self, logs: List[ContextLog]):
|
|
157
|
+
for log in logs:
|
|
158
|
+
self.add_log(log)
|
|
159
|
+
|
|
160
|
+
def clear_logs_and_counts(self):
|
|
161
|
+
self._logs = []
|
|
162
|
+
self._errors_count = 0
|
|
163
|
+
self._warnings_count = 0
|
|
164
|
+
self._notices_count = 0
|
|
165
|
+
self._successes_count = 0
|
|
166
|
+
self._infos_count = 0
|
|
167
|
+
self._by_level_counts = {level.name: 0 for level in LogLevel}
|
|
168
|
+
|
|
169
|
+
def clear_logs(self):
|
|
170
|
+
self._logs = []
|
|
171
|
+
|
|
172
|
+
def get_all_logs(self,in_json_format=False):
|
|
173
|
+
if in_json_format:
|
|
174
|
+
return json.dumps(self._logs)
|
|
175
|
+
return self._logs
|
|
176
|
+
|
|
177
|
+
def get_logs_for_level(self, level: LogLevel):
|
|
178
|
+
return [log for log in self._logs if log["level_code"] == level.value]
|
|
179
|
+
|
|
180
|
+
def get_logs_by_str_in_context(self, context_substring: str):
|
|
181
|
+
return [
|
|
182
|
+
log for log in self._logs
|
|
183
|
+
if context_substring in log["context"]
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
def contains_errors(self):
|
|
187
|
+
return self._errors_count > 0
|
|
188
|
+
|
|
189
|
+
def count_errors(self):
|
|
190
|
+
return self._errors_count
|
|
191
|
+
|
|
192
|
+
def contains_warnings_or_errors(self):
|
|
193
|
+
return self._warnings_count > 0 or self._errors_count > 0
|
|
194
|
+
|
|
195
|
+
def count_warnings_and_errors(self):
|
|
196
|
+
return self._warnings_count + self._errors_count
|
|
197
|
+
|
|
198
|
+
def count_warnings(self):
|
|
199
|
+
return self._warnings_count
|
|
200
|
+
|
|
201
|
+
def count_notices(self):
|
|
202
|
+
return self._notices_count
|
|
203
|
+
|
|
204
|
+
def count_successes(self):
|
|
205
|
+
return self._successes_count
|
|
206
|
+
|
|
207
|
+
def count_successes_with_notice(self):
|
|
208
|
+
return self.count_logs_by_level(LogLevel.SUCCESS_WITH_NOTICES)
|
|
209
|
+
|
|
210
|
+
def count_successes_no_notice(self):
|
|
211
|
+
return self.count_logs_by_level(LogLevel.SUCCESS)
|
|
212
|
+
|
|
213
|
+
def count_infos(self):
|
|
214
|
+
return self._infos_count
|
|
215
|
+
|
|
216
|
+
def count_all_logs(self):
|
|
217
|
+
return len(self._logs)
|
|
218
|
+
|
|
219
|
+
def count_logs_by_level(self, level: LogLevel):
|
|
220
|
+
return self._by_level_counts.get(level.name, 0)
|
|
221
|
+
|
|
222
|
+
def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
|
|
223
|
+
return sum(
|
|
224
|
+
1 for log in self._logs
|
|
225
|
+
if (log["context"] == context_substring if exact_match else context_substring in log["context"]) and
|
|
226
|
+
(level_code_min is None or log["level_code"] >= level_code_min) and
|
|
227
|
+
(level_code_max is None or log["level_code"] <= level_code_max)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def count_logs_for_current_context(self):
|
|
231
|
+
return self._count_logs(self.current_context, exact_match=True)
|
|
232
|
+
|
|
233
|
+
def count_logs_for_current_and_nested_contexts(self):
|
|
234
|
+
return self._count_logs(self.current_context)
|
|
235
|
+
|
|
236
|
+
def count_logs_by_level_for_current_context(self, level: LogLevel):
|
|
237
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
|
|
238
|
+
|
|
239
|
+
def count_logs_by_level_for_current_and_nested_contexts(self, level: LogLevel):
|
|
240
|
+
return self._count_logs(self.current_context, level_code_min=level.value, level_code_max=level.value)
|
|
241
|
+
|
|
242
|
+
def count_errors_for_current_context(self):
|
|
243
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
|
|
244
|
+
|
|
245
|
+
def count_errors_for_current_and_nested_contexts(self):
|
|
246
|
+
return self._count_logs(self.current_context, level_code_min=self.ERROR_START_CODE)
|
|
247
|
+
|
|
248
|
+
def count_warnings_and_errors_for_current_context(self):
|
|
249
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
|
|
250
|
+
|
|
251
|
+
def count_warnings_and_errors_for_current_and_nested_contexts(self):
|
|
252
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE)
|
|
253
|
+
|
|
254
|
+
def count_warnings_for_current_context(self):
|
|
255
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
256
|
+
|
|
257
|
+
def count_warnings_for_current_and_nested_contexts(self):
|
|
258
|
+
return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
|
|
259
|
+
|
|
260
|
+
def count_notices_for_current_context(self):
|
|
261
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
262
|
+
|
|
263
|
+
def count_notices_for_current_and_nested_contexts(self):
|
|
264
|
+
return self._count_logs(self.current_context, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
|
|
265
|
+
|
|
266
|
+
def count_successes_for_current_context(self):
|
|
267
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
268
|
+
|
|
269
|
+
def count_successes_for_current_and_nested_contexts(self):
|
|
270
|
+
return self._count_logs(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
|
|
271
|
+
|
|
272
|
+
def count_infos_for_current_context(self):
|
|
273
|
+
return self._count_logs(self.current_context, exact_match=True, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
|
|
274
|
+
|
|
275
|
+
def count_infos_for_current_and_nested_contexts(self):
|
|
276
|
+
return self._count_logs(self.current_context, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
|
|
277
|
+
|
|
278
|
+
def _update_counts(self, log, remove=False):
|
|
279
|
+
level_code = log["level_code"]
|
|
280
|
+
level_name = log["level_name"]
|
|
281
|
+
|
|
282
|
+
if remove:
|
|
283
|
+
if level_code >= self.ERROR_START_CODE:
|
|
284
|
+
self._errors_count -= 1
|
|
285
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
286
|
+
self._warnings_count -= 1
|
|
287
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
288
|
+
self._notices_count -= 1
|
|
289
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
290
|
+
self._successes_count -= 1
|
|
291
|
+
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
292
|
+
self._infos_count -= 1
|
|
293
|
+
self._by_level_counts[level_name] -= 1
|
|
294
|
+
else:
|
|
295
|
+
if level_code >= self.ERROR_START_CODE:
|
|
296
|
+
self._errors_count += 1
|
|
297
|
+
elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
|
|
298
|
+
self._warnings_count += 1
|
|
299
|
+
elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
|
|
300
|
+
self._notices_count += 1
|
|
301
|
+
elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
|
|
302
|
+
self._successes_count += 1
|
|
303
|
+
elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
|
|
304
|
+
self._infos_count += 1
|
|
305
|
+
self._by_level_counts[level_name] += 1
|
|
306
|
+
|
|
307
|
+
def generate_file_name(self, file_prefix=None, include_base_context=True):
|
|
308
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
309
|
+
if not file_prefix:
|
|
310
|
+
file_prefix = "pipelinelogs"
|
|
311
|
+
if include_base_context:
|
|
312
|
+
file_name = f"{file_prefix}_{timestamp}_{self.base_context}_len{len(self._logs)}.json"
|
|
313
|
+
else:
|
|
314
|
+
file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
|
|
315
|
+
|
|
316
|
+
return file_name
|
|
317
|
+
|
|
318
|
+
def import_logs_from_json(self, json_or_file, logger=None):
|
|
319
|
+
def log_message(message):
|
|
320
|
+
if logger:
|
|
321
|
+
logger.info(message)
|
|
322
|
+
|
|
323
|
+
def log_warning(message, exc_info=False):
|
|
324
|
+
if logger:
|
|
325
|
+
logger.warning(message, exc_info=exc_info)
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
if isinstance(json_or_file, str): # Load from string
|
|
329
|
+
imported_logs = json.loads(json_or_file)
|
|
330
|
+
elif hasattr(json_or_file, 'read'): # Load from file-like object
|
|
331
|
+
imported_logs = json.load(json_or_file)
|
|
332
|
+
self.add_logs(imported_logs)
|
|
333
|
+
log_message("Successfully imported logs from json.")
|
|
334
|
+
except Exception as e:
|
|
335
|
+
log_warning(f"Failed to import logs from json: {type(e).__name__} - {str(e)}", exc_info=True)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def generate_final_log_message(self, subjectref: str, total_subjs: int) -> str:
|
|
339
|
+
return f"""
|
|
340
|
+
Pipeline for {self.base_context} with pipelinemon.id: {self.id},
|
|
341
|
+
SUMMARY: Early_Stop= {self.early_stop} ;
|
|
342
|
+
A)SUCCESSES: {self.count_successes()}/{total_subjs} {subjectref}(s) ; out of which SUCCESSES_WITH_NOTICES: {self.count_successes_with_notice()}/{total_subjs} {subjectref}(s) ;
|
|
343
|
+
B)NOTICES: {self.count_notices()} ;
|
|
344
|
+
C)WARNINGS: {self.count_warnings()} ;
|
|
345
|
+
D)ERRORS: {self.count_errors()} ;
|
|
346
|
+
E)INFOS: {self.count_infos()} ;
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
def log_final_message(self, subjectref: str, total_subjs: int, generallogger):
|
|
350
|
+
final_log_message = self.generate_final_log_message(subjectref=subjectref, total_subjs=total_subjs)
|
|
351
|
+
if self.count_warnings_and_errors() > 0:
|
|
352
|
+
generallogger.error(final_log_message)
|
|
353
|
+
else:
|
|
354
|
+
generallogger.info(final_log_message)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .utils_check_data_schema import check_format_against_schema_template
|
|
2
|
+
from .utils_cloud import (write_json_to_cloud_storage_extended,
|
|
3
|
+
read_json_from_cloud_storage)
|
|
4
|
+
from .utils_cloud_gcp import (write_json_to_gcs_extended,
|
|
5
|
+
read_json_from_gcs
|
|
6
|
+
)
|
|
7
|
+
from .utils_local_files import (save_json_locally_extended,
|
|
8
|
+
prepare_full_file_path)
|