ipulse-shared-data-eng-ftredge 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. ipulse_shared_data_eng_ftredge-2.1.1/LICENCE +19 -0
  2. ipulse_shared_data_eng_ftredge-2.1.1/PKG-INFO +15 -0
  3. ipulse_shared_data_eng_ftredge-2.1.1/README.md +21 -0
  4. ipulse_shared_data_eng_ftredge-2.1.1/pyproject.toml +3 -0
  5. ipulse_shared_data_eng_ftredge-2.1.1/setup.cfg +4 -0
  6. ipulse_shared_data_eng_ftredge-2.1.1/setup.py +24 -0
  7. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/__init__.py +10 -0
  8. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/__init__.py +2 -0
  9. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/context_log.py +210 -0
  10. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/collectors/pipelinemon.py +354 -0
  11. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/__init__.py +8 -0
  12. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_check_data_schema.py +151 -0
  13. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_cloud.py +53 -0
  14. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_cloud_gcp.py +359 -0
  15. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge/utils/utils_local_files.py +157 -0
  16. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/PKG-INFO +15 -0
  17. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/SOURCES.txt +18 -0
  18. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/dependency_links.txt +1 -0
  19. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/requires.txt +8 -0
  20. ipulse_shared_data_eng_ftredge-2.1.1/src/ipulse_shared_data_eng_ftredge.egg-info/top_level.txt +1 -0
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2023 Future Edge Group
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.1
2
+ Name: ipulse_shared_data_eng_ftredge
3
+ Version: 2.1.1
4
+ Summary: Shared Data Engineering functions for the Pulse platform project. Using AI for financial advisory and investment management.
5
+ Home-page: https://github.com/TheFutureEdge/ipulse_shared_data_eng
6
+ Author: Russlan Ramdowar
7
+ License-File: LICENCE
8
+ Requires-Dist: python-dateutil~=2.8
9
+ Requires-Dist: pytest~=7.1
10
+ Requires-Dist: ipulse_shared_core_ftredge>=1.1.1
11
+ Requires-Dist: ipulse_shared_enums>=1.1.1
12
+ Requires-Dist: google-cloud-bigquery~=3.24.0
13
+ Requires-Dist: google-cloud-storage~=1.41.1
14
+ Requires-Dist: google-cloud-pubsub~=2.7.0
15
+ Requires-Dist: google-cloud-secret-manager~=2.7.0
@@ -0,0 +1,21 @@
1
+ # ipulse_shared_data_eng
2
+ Shared Data Engineering Code for ipulse platform, especially for Oracle module
3
+
4
+
5
+ ### Collectors i.e. Pipelinemon
6
+
7
+ Collectors are smart Objects which are added to long running functions or pipelines for which we want to collect an overall number of successes, notices, warnings or errors.
8
+
9
+ We can wait until the full pipeline is finished in order to write off a single Summary file from a Collector, or we can attach to it a logger, which will be reporting major status along the journey, which is often times better. Because if a function crashes midway through , all logs will be lost, and it would be hard to investigate if anythign has bee persisted and has to be rolled back. THis will require a lot of manual effort to recollect.
10
+
11
+ Pipelinemon , short of Pipeline Monitoring system is a type of very powerful Collector which Russlan created specifically for Pulse Data Engineering pipelines.
12
+
13
+ Pipelinemon writes all observation logs to Google CLoud Logging, and you have to setup a Log Sink (Router) which will send the Pipelinemon's observation logs to BigQuery.
14
+
15
+ Great thing about Pipelinemin is its "context" keeping feature.
16
+
17
+
18
+
19
+ ### Utils : Schema Checkers , Cloud Utils ( save file to cloud storage etc. for GCP, AWS, Azure etc.) , local files utils etc.
20
+
21
+
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,24 @@
1
+ # pylint: disable=import-error
2
+ from setuptools import setup, find_packages
3
+
4
+ setup(
5
+ name='ipulse_shared_data_eng_ftredge',
6
+ version='2.1.1',
7
+ package_dir={'': 'src'}, # Specify the source directory
8
+ packages=find_packages(where='src'), # Look for packages in 'src'
9
+ install_requires=[
10
+ # List your dependencies here
11
+ 'python-dateutil~=2.8',
12
+ 'pytest~=7.1',
13
+ 'ipulse_shared_core_ftredge>=1.1.1',
14
+ 'ipulse_shared_enums>=1.1.1',
15
+ 'google-cloud-bigquery~=3.24.0',
16
+ 'google-cloud-storage~=1.41.1',
17
+ 'google-cloud-pubsub~=2.7.0',
18
+ 'google-cloud-secret-manager~=2.7.0'
19
+
20
+ ],
21
+ author='Russlan Ramdowar',
22
+ description='Shared Data Engineering functions for the Pulse platform project. Using AI for financial advisory and investment management.',
23
+ url='https://github.com/TheFutureEdge/ipulse_shared_data_eng'
24
+ )
@@ -0,0 +1,10 @@
1
+ from .collectors import ContextLog, Pipelinemon
2
+ from .utils import (check_format_against_schema_template,
3
+ write_json_to_cloud_storage_extended,
4
+ read_json_from_cloud_storage,
5
+ write_json_to_gcs_extended,
6
+ read_json_from_gcs,
7
+ save_json_locally_extended,
8
+ prepare_full_file_path
9
+ )
10
+
@@ -0,0 +1,2 @@
1
+ from .context_log import ContextLog
2
+ from .pipelinemon import Pipelinemon
@@ -0,0 +1,210 @@
1
+
2
+ # pylint: disable=missing-module-docstring
3
+ # pylint: disable=missing-function-docstring
4
+ # pylint: disable=logging-fstring-interpolation
5
+ # pylint: disable=line-too-long
6
+ # pylint: disable=missing-class-docstring
7
+ # pylint: disable=broad-exception-caught
8
+ # pylint: disable=unused-variable
9
+ import traceback
10
+ import json
11
+ from datetime import datetime, timezone
12
+ from typing import List
13
+ from ipulse_shared_enums import Status, LogLevel
14
+
15
+ ############################################################################
16
+ ##################### SETTING UP custom LOGGING format= DICT ##########################
17
+ ### Cloud Agnostic, can be used with any cloud provider , jsut use to_dict() method to get the log in dict format
18
+ class ContextLog:
19
+
20
+ def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
21
+ context: str = None, description: str = None,
22
+ e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
23
+ log_status: Status = Status.OPEN, subject: str = None, systems_impacted: List[str] = None,
24
+ ):
25
+
26
+ if e is not None:
27
+ e_type = type(e).__name__ if e_type is None else e_type
28
+ e_message = str(e) if e_message is None else e_message
29
+ e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
30
+ elif not e_traceback and (e_type or e_message):
31
+ e_traceback = traceback.format_exc()
32
+
33
+ self.level = level
34
+ self.subject = subject
35
+ self.description = description
36
+ self._base_context = base_context
37
+ self._context = context
38
+ self._systems_impacted = systems_impacted if systems_impacted else []
39
+ self.collector_id = collector_id
40
+ self.exception_type = e_type
41
+ self.exception_message = e_message
42
+ self.exception_traceback = e_traceback
43
+ self.log_status = log_status
44
+ self.timestamp = datetime.now(timezone.utc).isoformat()
45
+
46
+ @property
47
+ def base_context(self):
48
+ return self._base_context
49
+
50
+ @base_context.setter
51
+ def base_context(self, value):
52
+ self._base_context = value
53
+
54
+ @property
55
+ def context(self):
56
+ return self._context
57
+
58
+ @context.setter
59
+ def context(self, value):
60
+ self._context = value
61
+
62
+ @property
63
+ def systems_impacted(self):
64
+ return self._systems_impacted
65
+
66
+ @systems_impacted.setter
67
+ def systems_impacted(self, list_of_si: List[str]):
68
+ self._systems_impacted = list_of_si
69
+
70
+ def add_system_impacted(self, system_impacted: str):
71
+ if self._systems_impacted is None:
72
+ self._systems_impacted = []
73
+ self._systems_impacted.append(system_impacted)
74
+
75
+ def remove_system_impacted(self, system_impacted: str):
76
+ if self._systems_impacted is not None:
77
+ self._systems_impacted.remove(system_impacted)
78
+
79
+ def clear_systems_impacted(self):
80
+ self._systems_impacted = []
81
+
82
+ def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
83
+ if not e_traceback or e_traceback == 'None\n':
84
+ return None
85
+
86
+ traceback_lines = e_traceback.splitlines()
87
+
88
+ # Check if the traceback is within the limits
89
+ if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
90
+ return e_traceback
91
+
92
+ # Remove lines that are part of the exception message if they are present in traceback
93
+ message_lines = e_message.splitlines() if e_message else []
94
+ if message_lines:
95
+ for message_line in message_lines:
96
+ if message_line in traceback_lines:
97
+ traceback_lines.remove(message_line)
98
+
99
+ # Filter out lines from third-party libraries (like site-packages)
100
+ filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
101
+
102
+ # If filtering results in too few lines, revert to original traceback
103
+ if len(filtered_lines) < 2:
104
+ filtered_lines = traceback_lines
105
+
106
+ # Combine standalone bracket lines with previous or next lines
107
+ combined_lines = []
108
+ for line in filtered_lines:
109
+ if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
110
+ combined_lines[-1] += " " + line.strip()
111
+ else:
112
+ combined_lines.append(line)
113
+
114
+ # Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
115
+ if len(combined_lines) > max_traceback_lines:
116
+ keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
117
+ keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
118
+ combined_lines = (
119
+ combined_lines[:keep_lines_start] +
120
+ ['... (truncated) ...'] +
121
+ combined_lines[-keep_lines_end:]
122
+ )
123
+
124
+ formatted_traceback = '\n'.join(combined_lines)
125
+
126
+ # Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
127
+ if len(formatted_traceback) > max_field_len:
128
+ truncated_length = max_field_len - len('... (truncated) ...')
129
+ half_truncated_length = truncated_length // 2
130
+ formatted_traceback = (
131
+ formatted_traceback[:half_truncated_length] +
132
+ '\n... (truncated) ...\n' +
133
+ formatted_traceback[-half_truncated_length:]
134
+ )
135
+ return formatted_traceback
136
+
137
+ def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
138
+ size_limit = int(size_limit) # Ensure size_limit is an integer
139
+
140
+ # Unified list of all fields
141
+ systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
142
+ fields = [
143
+ ("log_status", str(self.log_status.name)),
144
+ ("level_code", self.level.value),
145
+ ("level_name", str(self.level.name)),
146
+ ("base_context", str(self.base_context)),
147
+ ("timestamp", str(self.timestamp)),
148
+ ("collector_id", str(self.collector_id)),
149
+ ("systems_impacted", systems_impacted_str),
150
+ ("context", str(self.context)), # special sizing rules apply to it
151
+ ("subject", str(self.subject)),
152
+ ("description", str(self.description)),
153
+ ("exception_type", str(self.exception_type)),
154
+ ("exception_message", str(self.exception_message)),
155
+ ("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
156
+ ]
157
+
158
+ # Function to calculate the byte size of a JSON-encoded field
159
+ def field_size(key, value):
160
+ return len(json.dumps({key: value}).encode('utf-8'))
161
+
162
+ # Function to truncate a value based on its type
163
+ # Function to truncate a value based on its type
164
+ def truncate_value(value, max_size):
165
+ if isinstance(value, str):
166
+ half_size = max_size // 2
167
+ return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
168
+ return value
169
+
170
+ # Ensure no field exceeds max_field_len
171
+ for i, (key, value) in enumerate(fields):
172
+ if isinstance(value, str) and len(value) > max_field_len:
173
+ fields[i] = (key, truncate_value(value, max_field_len))
174
+
175
+ # Ensure total size of the dict doesn't exceed size_limit
176
+ total_size = sum(field_size(key, value) for key, value in fields)
177
+ log_dict = {}
178
+ truncated = False
179
+
180
+ if total_size > size_limit:
181
+ truncated = True
182
+ remaining_size = size_limit
183
+ remaining_fields = len(fields)
184
+
185
+ for key, value in fields:
186
+ if remaining_fields > 0:
187
+ max_size_per_field = remaining_size // remaining_fields
188
+ else:
189
+ max_size_per_field = 0
190
+
191
+ field_sz = field_size(key, value)
192
+ if field_sz > max_size_per_field:
193
+ value = truncate_value(value, max_size_per_field)
194
+ field_sz = field_size(key, value)
195
+
196
+ log_dict[key] = value
197
+ remaining_size -= field_sz
198
+ remaining_fields -= 1
199
+ else:
200
+ log_dict = dict(fields)
201
+
202
+ log_dict['trunc'] = truncated
203
+
204
+ return log_dict
205
+
206
+ def __str__(self):
207
+ return json.dumps(self.to_dict(), indent=4)
208
+
209
+ def __repr__(self):
210
+ return self.__str__()
@@ -0,0 +1,354 @@
1
+ # pylint: disable=missing-module-docstring
2
+ # pylint: disable=missing-function-docstring
3
+ # pylint: disable=logging-fstring-interpolation
4
+ # pylint: disable=line-too-long
5
+ # pylint: disable=missing-class-docstring
6
+ # pylint: disable=broad-exception-caught
7
+ import json
8
+ import uuid
9
+ from datetime import datetime, timezone
10
+ from contextlib import contextmanager
11
+ from typing import List
12
+ from ipulse_shared_enums import LogLevel
13
+ from .context_log import ContextLog
14
+ ############################################################################
15
+ ##### PIPINEMON Collector for Logs and Statuses of running pipelines #######
16
+ class Pipelinemon:
17
+ ERROR_START_CODE = LogLevel.ERROR.value
18
+ WARNING_START_CODE = LogLevel.WARNING.value
19
+ NOTICE_START_CODE = LogLevel.NOTICE.value
20
+ SUCCESS_START_CODE = LogLevel.SUCCESS.value
21
+ INFO_START_CODE = LogLevel.INFO.value
22
+
23
+ def __init__(self, base_context: str, logger,
24
+ max_log_field_size:int =10000,
25
+ max_log_dict_size:float=256 * 1024 * 0.80,
26
+ max_log_traceback_lines:int = 30):
27
+
28
+ self._id = str(uuid.uuid4())
29
+ self._logs = []
30
+ self._early_stop = False
31
+ self._errors_count = 0
32
+ self._warnings_count = 0
33
+ self._notices_count = 0
34
+ self._successes_count = 0
35
+ self._infos_count = 0
36
+ self._systems_impacted = []
37
+ self._by_level_counts = {level.name: 0 for level in LogLevel}
38
+ self._base_context = base_context
39
+ self._context_stack = []
40
+ self._logger = logger
41
+ self._max_log_field_size = max_log_field_size
42
+ self._max_log_dict_size = max_log_dict_size
43
+ self._max_log_traceback_lines = max_log_traceback_lines
44
+
45
+
46
+ @contextmanager
47
+ def context(self, context):
48
+ self.push_context(context)
49
+ try:
50
+ yield
51
+ finally:
52
+ self.pop_context()
53
+
54
+ def push_context(self, context):
55
+ self._context_stack.append(context)
56
+
57
+ def pop_context(self):
58
+ if self._context_stack:
59
+ self._context_stack.pop()
60
+
61
+ @property
62
+ def current_context(self):
63
+ return " >> ".join(self._context_stack)
64
+
65
+ @property
66
+ def base_context(self):
67
+ return self._base_context
68
+
69
+ @property
70
+ def id(self):
71
+ return self._id
72
+
73
+ @property
74
+ def systems_impacted(self):
75
+ return self._systems_impacted
76
+
77
+ @systems_impacted.setter
78
+ def systems_impacted(self, list_of_si: List[str]):
79
+ self._systems_impacted = list_of_si
80
+
81
+ def add_system_impacted(self, system_impacted: str):
82
+ if self._systems_impacted is None:
83
+ self._systems_impacted = []
84
+ self._systems_impacted.append(system_impacted)
85
+
86
+ def clear_systems_impacted(self):
87
+ self._systems_impacted = []
88
+
89
+ @property
90
+ def max_log_dict_size(self):
91
+ return self._max_log_dict_size
92
+
93
+ @max_log_dict_size.setter
94
+ def max_log_dict_size(self, value):
95
+ self._max_log_dict_size = value
96
+
97
+ @property
98
+ def max_log_field_size(self):
99
+ return self._max_log_field_size
100
+
101
+ @max_log_field_size.setter
102
+ def max_log_field_size(self, value):
103
+ self._max_log_field_size = value
104
+
105
+ @property
106
+ def max_log_traceback_lines(self):
107
+ return self._max_log_traceback_lines
108
+
109
+ @max_log_traceback_lines.setter
110
+ def max_log_traceback_lines(self, value):
111
+ self._max_log_traceback_lines = value
112
+
113
+ @property
114
+ def early_stop(self):
115
+ return self._early_stop
116
+
117
+ def set_early_stop(self, max_errors_tolerance:int=0, max_warnings_tolerance:int=0, create_error_log=True, pop_context=False):
118
+ self._early_stop = True
119
+ if create_error_log:
120
+ if pop_context:
121
+ self.pop_context()
122
+ if max_errors_tolerance > 0:
123
+ self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
124
+ subject="EARLY_STOP",
125
+ description=f"Total MAX_ERRORS_TOLERANCE of {max_errors_tolerance} has been reached."))
126
+ elif max_warnings_tolerance > 0:
127
+ self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
128
+ subject="EARLY_STOP",
129
+ description=f"Total MAX_WARNINGS_TOLERANCE of {max_warnings_tolerance} has been reached."))
130
+ else:
131
+ self.add_log(ContextLog(level=LogLevel.ERROR_PIPELINE_THRESHOLD_REACHED,
132
+ subject="EARLY_STOP",
133
+ description="Early stop has been triggered."))
134
+
135
+ def reset_early_stop(self):
136
+ self._early_stop = False
137
+
138
+
139
+ def add_log(self, log: ContextLog ):
140
+ log.base_context = self.base_context
141
+ log.context = self.current_context if self.current_context else "root"
142
+ log.collector_id = self.id
143
+ log.systems_impacted = self.systems_impacted
144
+ log_dict = log.to_dict(max_field_len=self.max_log_field_size, size_limit=self.max_log_dict_size, max_traceback_lines=self.max_log_traceback_lines)
145
+ self._logs.append(log_dict)
146
+ self._update_counts(log_dict)
147
+
148
+ if self._logger:
149
+ # We specifically want to avoid having an ERROR log level for this structured Pipelinemon reporting, to ensure Errors are alerting on Critical Application Services.
150
+ # A single ERROR log level is usually added at the end of the entire pipeline
151
+ if log.level.value >= self.WARNING_START_CODE:
152
+ self._logger.warning(log_dict)
153
+ else:
154
+ self._logger.info(log_dict)
155
+
156
+ def add_logs(self, logs: List[ContextLog]):
157
+ for log in logs:
158
+ self.add_log(log)
159
+
160
+ def clear_logs_and_counts(self):
161
+ self._logs = []
162
+ self._errors_count = 0
163
+ self._warnings_count = 0
164
+ self._notices_count = 0
165
+ self._successes_count = 0
166
+ self._infos_count = 0
167
+ self._by_level_counts = {level.name: 0 for level in LogLevel}
168
+
169
+ def clear_logs(self):
170
+ self._logs = []
171
+
172
+ def get_all_logs(self,in_json_format=False):
173
+ if in_json_format:
174
+ return json.dumps(self._logs)
175
+ return self._logs
176
+
177
+ def get_logs_for_level(self, level: LogLevel):
178
+ return [log for log in self._logs if log["level_code"] == level.value]
179
+
180
+ def get_logs_by_str_in_context(self, context_substring: str):
181
+ return [
182
+ log for log in self._logs
183
+ if context_substring in log["context"]
184
+ ]
185
+
186
+ def contains_errors(self):
187
+ return self._errors_count > 0
188
+
189
+ def count_errors(self):
190
+ return self._errors_count
191
+
192
+ def contains_warnings_or_errors(self):
193
+ return self._warnings_count > 0 or self._errors_count > 0
194
+
195
+ def count_warnings_and_errors(self):
196
+ return self._warnings_count + self._errors_count
197
+
198
+ def count_warnings(self):
199
+ return self._warnings_count
200
+
201
+ def count_notices(self):
202
+ return self._notices_count
203
+
204
+ def count_successes(self):
205
+ return self._successes_count
206
+
207
+ def count_successes_with_notice(self):
208
+ return self.count_logs_by_level(LogLevel.SUCCESS_WITH_NOTICES)
209
+
210
+ def count_successes_no_notice(self):
211
+ return self.count_logs_by_level(LogLevel.SUCCESS)
212
+
213
+ def count_infos(self):
214
+ return self._infos_count
215
+
216
+ def count_all_logs(self):
217
+ return len(self._logs)
218
+
219
+ def count_logs_by_level(self, level: LogLevel):
220
+ return self._by_level_counts.get(level.name, 0)
221
+
222
+ def _count_logs(self, context_substring: str, exact_match=False, level_code_min=None, level_code_max=None):
223
+ return sum(
224
+ 1 for log in self._logs
225
+ if (log["context"] == context_substring if exact_match else context_substring in log["context"]) and
226
+ (level_code_min is None or log["level_code"] >= level_code_min) and
227
+ (level_code_max is None or log["level_code"] <= level_code_max)
228
+ )
229
+
230
+ def count_logs_for_current_context(self):
231
+ return self._count_logs(self.current_context, exact_match=True)
232
+
233
+ def count_logs_for_current_and_nested_contexts(self):
234
+ return self._count_logs(self.current_context)
235
+
236
+ def count_logs_by_level_for_current_context(self, level: LogLevel):
237
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=level.value, level_code_max=level.value)
238
+
239
+ def count_logs_by_level_for_current_and_nested_contexts(self, level: LogLevel):
240
+ return self._count_logs(self.current_context, level_code_min=level.value, level_code_max=level.value)
241
+
242
+ def count_errors_for_current_context(self):
243
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.ERROR_START_CODE)
244
+
245
+ def count_errors_for_current_and_nested_contexts(self):
246
+ return self._count_logs(self.current_context, level_code_min=self.ERROR_START_CODE)
247
+
248
+ def count_warnings_and_errors_for_current_context(self):
249
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE)
250
+
251
+ def count_warnings_and_errors_for_current_and_nested_contexts(self):
252
+ return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE)
253
+
254
+ def count_warnings_for_current_context(self):
255
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
256
+
257
+ def count_warnings_for_current_and_nested_contexts(self):
258
+ return self._count_logs(self.current_context, level_code_min=self.WARNING_START_CODE, level_code_max=self.ERROR_START_CODE - 1)
259
+
260
+ def count_notices_for_current_context(self):
261
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
262
+
263
+ def count_notices_for_current_and_nested_contexts(self):
264
+ return self._count_logs(self.current_context, level_code_min=self.NOTICE_START_CODE, level_code_max=self.WARNING_START_CODE-1)
265
+
266
+ def count_successes_for_current_context(self):
267
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
268
+
269
+ def count_successes_for_current_and_nested_contexts(self):
270
+ return self._count_logs(self.current_context, level_code_min=self.SUCCESS_START_CODE, level_code_max=self.NOTICE_START_CODE-1)
271
+
272
+ def count_infos_for_current_context(self):
273
+ return self._count_logs(self.current_context, exact_match=True, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
274
+
275
+ def count_infos_for_current_and_nested_contexts(self):
276
+ return self._count_logs(self.current_context, level_code_min=self.INFO_START_CODE, level_code_max=self.SUCCESS_START_CODE-1)
277
+
278
+ def _update_counts(self, log, remove=False):
279
+ level_code = log["level_code"]
280
+ level_name = log["level_name"]
281
+
282
+ if remove:
283
+ if level_code >= self.ERROR_START_CODE:
284
+ self._errors_count -= 1
285
+ elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
286
+ self._warnings_count -= 1
287
+ elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
288
+ self._notices_count -= 1
289
+ elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
290
+ self._successes_count -= 1
291
+ elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
292
+ self._infos_count -= 1
293
+ self._by_level_counts[level_name] -= 1
294
+ else:
295
+ if level_code >= self.ERROR_START_CODE:
296
+ self._errors_count += 1
297
+ elif self.WARNING_START_CODE <= level_code < self.ERROR_START_CODE:
298
+ self._warnings_count += 1
299
+ elif self.NOTICE_START_CODE <= level_code < self.WARNING_START_CODE:
300
+ self._notices_count += 1
301
+ elif self.SUCCESS_START_CODE <= level_code < self.NOTICE_START_CODE:
302
+ self._successes_count += 1
303
+ elif self.INFO_START_CODE <= level_code < self.SUCCESS_START_CODE:
304
+ self._infos_count += 1
305
+ self._by_level_counts[level_name] += 1
306
+
307
+ def generate_file_name(self, file_prefix=None, include_base_context=True):
308
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
309
+ if not file_prefix:
310
+ file_prefix = "pipelinelogs"
311
+ if include_base_context:
312
+ file_name = f"{file_prefix}_{timestamp}_{self.base_context}_len{len(self._logs)}.json"
313
+ else:
314
+ file_name = f"{file_prefix}_{timestamp}_len{len(self._logs)}.json"
315
+
316
+ return file_name
317
+
318
+ def import_logs_from_json(self, json_or_file, logger=None):
319
+ def log_message(message):
320
+ if logger:
321
+ logger.info(message)
322
+
323
+ def log_warning(message, exc_info=False):
324
+ if logger:
325
+ logger.warning(message, exc_info=exc_info)
326
+
327
+ try:
328
+ if isinstance(json_or_file, str): # Load from string
329
+ imported_logs = json.loads(json_or_file)
330
+ elif hasattr(json_or_file, 'read'): # Load from file-like object
331
+ imported_logs = json.load(json_or_file)
332
+ self.add_logs(imported_logs)
333
+ log_message("Successfully imported logs from json.")
334
+ except Exception as e:
335
+ log_warning(f"Failed to import logs from json: {type(e).__name__} - {str(e)}", exc_info=True)
336
+
337
+
338
+ def generate_final_log_message(self, subjectref: str, total_subjs: int) -> str:
339
+ return f"""
340
+ Pipeline for {self.base_context} with pipelinemon.id: {self.id},
341
+ SUMMARY: Early_Stop= {self.early_stop} ;
342
+ A)SUCCESSES: {self.count_successes()}/{total_subjs} {subjectref}(s) ; out of which SUCCESSES_WITH_NOTICES: {self.count_successes_with_notice()}/{total_subjs} {subjectref}(s) ;
343
+ B)NOTICES: {self.count_notices()} ;
344
+ C)WARNINGS: {self.count_warnings()} ;
345
+ D)ERRORS: {self.count_errors()} ;
346
+ E)INFOS: {self.count_infos()} ;
347
+ """
348
+
349
+ def log_final_message(self, subjectref: str, total_subjs: int, generallogger):
350
+ final_log_message = self.generate_final_log_message(subjectref=subjectref, total_subjs=total_subjs)
351
+ if self.count_warnings_and_errors() > 0:
352
+ generallogger.error(final_log_message)
353
+ else:
354
+ generallogger.info(final_log_message)
@@ -0,0 +1,8 @@
1
+ from .utils_check_data_schema import check_format_against_schema_template
2
+ from .utils_cloud import (write_json_to_cloud_storage_extended,
3
+ read_json_from_cloud_storage)
4
+ from .utils_cloud_gcp import (write_json_to_gcs_extended,
5
+ read_json_from_gcs
6
+ )
7
+ from .utils_local_files import (save_json_locally_extended,
8
+ prepare_full_file_path)