ipulse-shared-core-ftredge 2.7.1__py3-none-any.whl → 2.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.

Files changed (30) hide show
  1. ipulse_shared_core_ftredge/__init__.py +7 -12
  2. ipulse_shared_core_ftredge/logging/__init__.py +1 -0
  3. ipulse_shared_core_ftredge/logging/logging_handlers_and_formatters.py +144 -0
  4. ipulse_shared_core_ftredge/logging/utils_logging.py +72 -0
  5. ipulse_shared_core_ftredge/utils/__init__.py +1 -21
  6. ipulse_shared_core_ftredge/utils/utils_common.py +3 -173
  7. {ipulse_shared_core_ftredge-2.7.1.dist-info → ipulse_shared_core_ftredge-2.8.1.dist-info}/METADATA +1 -2
  8. ipulse_shared_core_ftredge-2.8.1.dist-info/RECORD +19 -0
  9. ipulse_shared_core_ftredge/enums/__init__.py +0 -37
  10. ipulse_shared_core_ftredge/enums/enums_common_utils.py +0 -107
  11. ipulse_shared_core_ftredge/enums/enums_data_eng.py +0 -313
  12. ipulse_shared_core_ftredge/enums/enums_logging.py +0 -108
  13. ipulse_shared_core_ftredge/enums/enums_module_fincore.py +0 -72
  14. ipulse_shared_core_ftredge/enums/enums_modules.py +0 -31
  15. ipulse_shared_core_ftredge/enums/enums_solution_providers.py +0 -24
  16. ipulse_shared_core_ftredge/enums/pulse_enums.py +0 -182
  17. ipulse_shared_core_ftredge/utils/logs/__init__.py +0 -2
  18. ipulse_shared_core_ftredge/utils/logs/context_log.py +0 -210
  19. ipulse_shared_core_ftredge/utils/logs/get_logger.py +0 -103
  20. ipulse_shared_core_ftredge/utils/utils_cloud.py +0 -53
  21. ipulse_shared_core_ftredge/utils/utils_cloud_gcp.py +0 -442
  22. ipulse_shared_core_ftredge/utils/utils_cloud_gcp_with_collectors.py +0 -166
  23. ipulse_shared_core_ftredge/utils/utils_cloud_with_collectors.py +0 -27
  24. ipulse_shared_core_ftredge/utils/utils_collector_pipelinemon.py +0 -356
  25. ipulse_shared_core_ftredge/utils/utils_templates_and_schemas.py +0 -151
  26. ipulse_shared_core_ftredge-2.7.1.dist-info/RECORD +0 -33
  27. /ipulse_shared_core_ftredge/{utils/logs → logging}/audit_log_firestore.py +0 -0
  28. {ipulse_shared_core_ftredge-2.7.1.dist-info → ipulse_shared_core_ftredge-2.8.1.dist-info}/LICENCE +0 -0
  29. {ipulse_shared_core_ftredge-2.7.1.dist-info → ipulse_shared_core_ftredge-2.8.1.dist-info}/WHEEL +0 -0
  30. {ipulse_shared_core_ftredge-2.7.1.dist-info → ipulse_shared_core_ftredge-2.8.1.dist-info}/top_level.txt +0 -0
@@ -1,442 +0,0 @@
1
- # pylint: disable=missing-module-docstring
2
- # pylint: disable=missing-function-docstring
3
- # pylint: disable=missing-class-docstring
4
- # pylint: disable=broad-exception-caught
5
- # pylint: disable=line-too-long
6
- # pylint: disable=unused-variable
7
- # pylint: disable=broad-exception-raised
8
- import json
9
- import csv
10
- from io import StringIO
11
- import os
12
- import time
13
- import logging
14
- import traceback
15
- from google.api_core.exceptions import NotFound
16
- from google.cloud import error_reporting
17
- from google.cloud import logging as cloud_logging
18
- from google.cloud.storage import Client as GCSClient
19
- from google.cloud import bigquery
20
- from ipulse_shared_core_ftredge.enums import DuplicationHandling, DuplicationHandlingStatus, MatchConditionType,DataSourceType, LogLevel
21
- from ipulse_shared_core_ftredge.utils.utils_common import log_error, log_warning, log_info
22
- from ipulse_shared_core_ftredge.utils.logs import ContextLog
23
- from ipulse_shared_core_ftredge.utils.utils_collector_pipelinemon import Pipelinemon
24
-
25
- ############################################################################
26
- ##################### GOOGLE CLOUD PLATFORM UTILS ##################################
27
- ############################################################################
28
-
29
- class CustomGCPLoggingHandler(cloud_logging.handlers.CloudLoggingHandler):
30
- """Custom handler for Google Cloud Logging with a dynamic logName."""
31
- def __init__(self, client, name, resource=None, labels=None):
32
- super().__init__(client=client, name=name, resource=resource, labels=labels)
33
- self.client = client # Ensure client is consistently used
34
-
35
- def emit(self, record):
36
- try:
37
- # 1. Create the basic log entry dictionary
38
- log_entry = {
39
- 'message': record.msg,
40
- 'severity': record.levelname,
41
- 'name': record.name,
42
- 'pathname': record.filename,
43
- 'lineno': record.lineno,
44
- }
45
- if record.exc_info:
46
- log_entry['exception_traceback'] = ''.join(
47
- traceback.format_exception(*record.exc_info)
48
- )
49
-
50
- # 2. Apply the formatter to the 'message' field if it's a dictionary
51
- if isinstance(record.msg, dict):
52
- formatted_message = self.formatter.format(record)
53
- try:
54
- log_entry['message'] = json.loads(formatted_message)
55
- except json.JSONDecodeError:
56
- log_entry['message'] = formatted_message
57
- else:
58
- log_entry['message'] = record.msg
59
-
60
- # 3. Set the custom logName
61
- log_entry['logName'] = f"projects/{self.client.project}/logs/{record.name}"
62
-
63
- # 4. Send to Google Cloud Logging
64
- super().emit(record)
65
- except Exception as e:
66
- self.handleError(record)
67
-
68
- class CustomGCPErrorReportingHandler(logging.Handler):
69
- def __init__(self, client=None, level=logging.ERROR):
70
- super().__init__(level)
71
- self.error_client = error_reporting.Client() if client is None else client
72
- self.propagate = True
73
-
74
- def emit(self, record):
75
- try:
76
- if record.levelno >= logging.ERROR:
77
- log_struct = {
78
- 'message': self.format(record),
79
- 'severity': record.levelname,
80
- 'pathname': getattr(record, 'pathname', None),
81
- 'lineno': getattr(record, 'lineno', None)
82
- }
83
- if record.exc_info:
84
- log_struct['exception'] = ''.join(
85
- traceback.format_exception(*record.exc_info)
86
- )
87
- self.error_client.report(str(log_struct))
88
- except Exception as e:
89
- self.handleError(record)
90
-
91
-
92
- def add_gcp_cloud_logging(logger, formatter, client=None):
93
- """Sets up Google Cloud Logging for the logger."""
94
- client = client or cloud_logging.Client()
95
- handler = CustomGCPLoggingHandler(client, logger.name)
96
- handler.setFormatter(formatter)
97
- logger.addHandler(handler)
98
-
99
- def add_gcp_error_reporting(logger, client=None):
100
- """Sets up Google Cloud Error Reporting for the logger."""
101
- client = client or error_reporting.Client()
102
- handler = CustomGCPErrorReportingHandler(client=client)
103
- logger.addHandler(handler)
104
-
105
-
106
-
107
- def create_bigquery_schema_from_json(json_schema: list) -> list:
108
- schema = []
109
- for field in json_schema:
110
- if "max_length" in field:
111
- schema.append(bigquery.SchemaField(field["name"], field["type"], mode=field["mode"], max_length=field["max_length"]))
112
- else:
113
- schema.append(bigquery.SchemaField(field["name"], field["type"], mode=field["mode"]))
114
- return schema
115
-
116
-
117
- def read_json_from_gcs(storage_client:GCSClient, bucket_name:str, file_name:str, logger=None,print_out=False):
118
- """ Helper function to read a JSON file from Google Cloud Storage """
119
- try:
120
- bucket = storage_client.bucket(bucket_name)
121
- blob = bucket.blob(file_name)
122
- data_string = blob.download_as_text()
123
- data = json.loads(data_string)
124
- return data
125
- except NotFound:
126
- log_warning(msg=f"Warning: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
127
- return None
128
- except json.JSONDecodeError:
129
- log_error(msg=f"Error: The file {file_name} could not be decoded as JSON.", logger=logger, print_out=print_out)
130
- return None
131
- except Exception as e:
132
- log_error(msg=f"An unexpected error occurred: {e}", exc_info=True, logger=logger, print_out=print_out)
133
- return None
134
-
135
- def read_csv_from_gcs(bucket_name:str, file_name:str, storage_client:GCSClient, logger=None, print_out=False):
136
- """ Helper function to read a CSV file from Google Cloud Storage """
137
-
138
- try:
139
- bucket = storage_client.bucket(bucket_name)
140
- blob = bucket.blob(file_name)
141
- data_string = blob.download_as_text()
142
- data_file = StringIO(data_string)
143
- reader = csv.DictReader(data_file)
144
- return list(reader)
145
- except NotFound:
146
- log_warning(msg=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
147
- return None
148
- except csv.Error:
149
- log_error(msg=f"Error: The file {file_name} could not be read as CSV.", logger=logger, print_out=print_out)
150
- return None
151
- except Exception as e:
152
- log_error(msg=f"An unexpected error occurred: {e}", logger=logger, print_out=print_out, exc_info=True)
153
- return None
154
-
155
- def write_json_to_gcs_extended(storage_client: GCSClient, data: dict | list | str, bucket_name: str, file_name: str,
156
- duplication_handling_enum: DuplicationHandling, duplication_match_condition_type_enum: MatchConditionType,
157
- duplication_match_condition: str = "", max_retries: int = 2, max_deletable_files: int = 1,
158
- logger=None, print_out=False, raise_e=False, pipelinemon: Pipelinemon = None):
159
-
160
- """Saves data to Google Cloud Storage with optional Pipelinemon monitoring.
161
-
162
- Handles duplication with strategies: OVERWRITE, INCREMENT, SKIP, or RAISE_ERROR.
163
- """
164
-
165
- max_deletable_files_allowed = 3
166
- cloud_storage_ref=DataSourceType.GCS.value
167
-
168
- # GCS-related metadata
169
- saved_to_path = None
170
- matched_duplicates_count = 0
171
- matched_duplicates_deleted = []
172
- duplication_handling_status = None
173
- error_during_operation = None
174
-
175
- response = {
176
- "saved_to_path": saved_to_path,
177
- "matched_duplicates_count": matched_duplicates_count,
178
- "matched_duplicates_deleted": matched_duplicates_deleted,
179
- "duplication_handling_status": duplication_handling_status,
180
- "duplication_match_condition_type": duplication_match_condition_type_enum.value,
181
- "duplication_match_condition": duplication_match_condition,
182
- "error_during_operation": error_during_operation
183
- }
184
-
185
- supported_match_condition_types = [MatchConditionType.EXACT, MatchConditionType.PREFIX]
186
- supported_duplication_handling = [DuplicationHandling.RAISE_ERROR, DuplicationHandling.OVERWRITE, DuplicationHandling.INCREMENT, DuplicationHandling.SKIP]
187
-
188
- try:
189
- if max_deletable_files > max_deletable_files_allowed:
190
- raise ValueError(f"max_deletable_files should be less than or equal to {max_deletable_files_allowed} for safety.")
191
- if duplication_handling_enum not in supported_duplication_handling:
192
- msg = f"Error: Duplication handling not supported. Supported types: {[dh.value for dh in supported_duplication_handling]}"
193
- raise ValueError(msg)
194
- if duplication_match_condition_type_enum not in supported_match_condition_types:
195
- msg = f"Error: Match condition type not supported. Supported types: {[mct.value for mct in supported_match_condition_types]}"
196
- raise ValueError(msg)
197
- elif duplication_match_condition_type_enum != MatchConditionType.EXACT and not duplication_match_condition:
198
- msg = f"Error: Match condition is required for match condition type: {duplication_match_condition_type_enum.value}"
199
- raise ValueError(msg)
200
-
201
- # Prepare data
202
- if isinstance(data, (list, dict)):
203
- data_str = json.dumps(data, indent=2)
204
- else:
205
- data_str = data
206
-
207
- increment = 0
208
- attempts = 0
209
- success = False
210
-
211
- # Check for existing files based on duplication_match_condition_type
212
- files_matched_on_condition = []
213
- bucket = storage_client.bucket(bucket_name)
214
- base_file_name, ext = os.path.splitext(file_name)
215
- if duplication_match_condition_type_enum == MatchConditionType.PREFIX:
216
- files_matched_on_condition = list(bucket.list_blobs(prefix=duplication_match_condition))
217
- elif duplication_match_condition_type_enum == MatchConditionType.EXACT:
218
- duplication_match_condition = file_name if not duplication_match_condition else duplication_match_condition
219
- if bucket.blob(duplication_match_condition).exists():
220
- files_matched_on_condition = [bucket.blob(file_name)]
221
-
222
- matched_duplicates_count = len(files_matched_on_condition)
223
- response["matched_duplicates_count"] = matched_duplicates_count
224
-
225
- # Handle duplication based on duplication_handling
226
- if matched_duplicates_count:
227
- log_msg = f"Duplicate FOUND, matched_duplicates_count: {matched_duplicates_count}"
228
- if pipelinemon:
229
- pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject="duplicate_found", description=log_msg))
230
-
231
- if duplication_handling_enum == DuplicationHandling.RAISE_ERROR:
232
- raise FileExistsError("File(s) matching the condition already exist.")
233
-
234
- if duplication_handling_enum == DuplicationHandling.SKIP:
235
- response["duplication_handling_status"] = DuplicationHandlingStatus.SKIPPED.value
236
- log_msg = f"SKIPPING, response: {response}"
237
- log_info(log_msg, logger=logger, print_out=print_out) ## only logsor prints if logger is provided and print_out is True
238
- return response
239
-
240
- if duplication_handling_enum == DuplicationHandling.OVERWRITE:
241
- if matched_duplicates_count > max_deletable_files:
242
- raise ValueError(f"Error: Attempt to delete {matched_duplicates_count} matched files, but limit is {max_deletable_files}. Operation Cancelled.")
243
-
244
- for blob in files_matched_on_condition:
245
- cloud_storage_path_to_delete = f"gs://{bucket_name}/{blob.name}"
246
- blob.delete()
247
- matched_duplicates_deleted.append(cloud_storage_path_to_delete)
248
- log_msg = f"File deleted as part of overwrite: {cloud_storage_path_to_delete}"
249
- if pipelinemon:
250
- pipelinemon.add_system_impacted(f"delete: {cloud_storage_ref}_bucket_file: {cloud_storage_path_to_delete}")
251
- pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject="delete_duplicate", description=log_msg))
252
- log_info(log_msg, logger=logger, print_out=print_out)
253
-
254
- response["matched_duplicates_deleted"] = matched_duplicates_deleted
255
- response["duplication_handling_status"] = DuplicationHandlingStatus.OVERWRITTEN.value
256
-
257
- elif duplication_handling_enum == DuplicationHandling.INCREMENT:
258
- while bucket.blob(file_name).exists():
259
- increment += 1
260
- file_name = f"{base_file_name}_v{increment}{ext}"
261
- saved_to_path = f"gs://{bucket_name}/{file_name}"
262
- response["duplication_handling_status"] = DuplicationHandlingStatus.INCREMENTED.value
263
- log_msg = "INCREMENTING as Duplicate FOUND "
264
- log_info(log_msg, logger=logger, print_out=print_out) ## only logsor prints if logger is provided and print_out is True
265
-
266
- # GCS Upload
267
- saved_to_path = f"gs://{bucket_name}/{file_name}"
268
- while attempts < max_retries and not success:
269
- try:
270
- blob = bucket.blob(file_name)
271
- blob.upload_from_string(data_str, content_type='application/json')
272
- log_msg = f"File uploaded to GCS: {saved_to_path}"
273
- if pipelinemon:
274
- pipelinemon.add_system_impacted(f"upload: {cloud_storage_ref}_bucket_file: {saved_to_path}")
275
- pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_PERSISTNACE_COMPLETE, subject="file_upload", description=log_msg))
276
- log_info(log_msg, logger=logger, print_out=print_out)
277
- success = True
278
- except Exception as e:
279
- attempts += 1
280
- if attempts < max_retries:
281
- time.sleep(2 ** attempts)
282
- else:
283
- raise e
284
-
285
- except Exception as e:
286
- error_during_operation = f"Error occurred while writing JSON to GCS path: {saved_to_path} ; Error details: {type(e).__name__} - {str(e)}"
287
- response["error_during_operation"] = error_during_operation
288
- if pipelinemon:
289
- pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e, description="response: {response}"))
290
- log_error(response, logger=logger, print_out=print_out)
291
- if raise_e:
292
- raise e
293
-
294
- response["saved_to_path"] = saved_to_path if success else None
295
- return response
296
-
297
- # def write_json_to_gcs_extended(storage_client: GCSClient, data: dict | list | str, bucket_name: str, file_name: str,
298
- # duplication_handling: DuplicationHandling, duplication_match_condition_type: MatchConditionType,
299
- # duplication_match_condition: str | List[str] = "", max_retries: int = 2, max_deletable_files: int = 1,
300
- # logger=None, print_out=False, raise_e=False):
301
-
302
- # """Saves data to Google Cloud Storage.
303
-
304
- # Handles duplication with strategies: OVERWRITE, INCREMENT, SKIP, or RAISE_ERROR.
305
- # """
306
-
307
- # max_deletable_files_allowed = 3
308
-
309
- # # GCS-related metadata
310
- # saved_to_path = None
311
- # matched_duplicates_count = 0
312
- # matched_duplicates_deleted = []
313
- # duplication_handling_status = None
314
- # error_during_operation = None
315
-
316
- # response = {
317
- # "saved_to_path": saved_to_path,
318
- # "matched_duplicates_count": matched_duplicates_count,
319
- # "matched_duplicates_deleted": matched_duplicates_deleted,
320
- # "duplication_handling_status": duplication_handling_status,
321
- # "duplication_match_condition_type": duplication_match_condition_type,
322
- # "duplication_match_condition": duplication_match_condition,
323
- # "error_during_operation": error_during_operation
324
- # }
325
-
326
- # supported_match_condition_types = [MatchConditionType.EXACT, MatchConditionType.PREFIX]
327
- # supported_duplication_handling = [DuplicationHandling.RAISE_ERROR, DuplicationHandling.OVERWRITE, DuplicationHandling.INCREMENT, DuplicationHandling.SKIP]
328
-
329
- # try:
330
- # if max_deletable_files > max_deletable_files_allowed:
331
- # raise ValueError(f"max_deletable_files should be less than or equal to {max_deletable_files_allowed} for safety.")
332
- # if duplication_handling not in supported_duplication_handling:
333
- # msg = f"Error: Duplication handling not supported. Supported types: {supported_duplication_handling}"
334
- # raise ValueError(msg)
335
- # if duplication_match_condition_type not in supported_match_condition_types:
336
- # msg = f"Error: Match condition type not supported. Supported types: {supported_match_condition_types}"
337
- # raise ValueError(msg)
338
- # elif duplication_match_condition_type!=MatchConditionType.EXACT and not duplication_match_condition:
339
- # msg = f"Error: Match condition is required for match condition type: {duplication_match_condition_type}"
340
- # raise ValueError(msg)
341
-
342
- # # Prepare data
343
- # if isinstance(data, (list, dict)):
344
- # data_str = json.dumps(data, indent=2)
345
- # else:
346
- # data_str = data
347
-
348
- # increment = 0
349
- # attempts = 0
350
- # success = False
351
-
352
- # # Check for existing files based on duplication_match_condition_type
353
- # files_matched_on_condition = []
354
- # bucket = storage_client.bucket(bucket_name)
355
- # base_file_name, ext = os.path.splitext(file_name)
356
- # if duplication_match_condition_type == MatchConditionType.PREFIX:
357
- # files_matched_on_condition = list(bucket.list_blobs(prefix=duplication_match_condition))
358
- # elif duplication_match_condition_type == MatchConditionType.EXACT:
359
- # if bucket.blob(file_name).exists():
360
- # files_matched_on_condition = [bucket.blob(file_name)]
361
-
362
- # matched_duplicates_count = len(files_matched_on_condition)
363
- # response["matched_duplicates_count"] = matched_duplicates_count
364
-
365
- # # Handle duplication based on duplication_handling
366
- # if matched_duplicates_count:
367
- # if duplication_handling == DuplicationHandling.RAISE_ERROR:
368
- # raise FileExistsError("File(s) matching the condition already exist.")
369
-
370
- # if duplication_handling == DuplicationHandling.SKIP:
371
- # log_warning("Skipping saving to GCS: file(s) matching the condition already exist.", logger=logger, print_out=print_out)
372
- # response["duplication_handling_status"] = DuplicationHandlingStatus.SKIPPED.value
373
- # return response
374
-
375
- # if duplication_handling == DuplicationHandling.OVERWRITE:
376
- # if matched_duplicates_count > max_deletable_files:
377
- # raise ValueError(f"Error: Attempt to delete {matched_duplicates_count} matched files, but limit is {max_deletable_files}. Operation Cancelled.")
378
-
379
- # for blob in files_matched_on_condition:
380
- # cloud_storage_path_to_delete = f"gs://{bucket_name}/{blob.name}"
381
- # blob.delete()
382
- # matched_duplicates_deleted.append(cloud_storage_path_to_delete)
383
-
384
- # response["matched_duplicates_deleted"] = matched_duplicates_deleted
385
- # response["duplication_handling_status"] = DuplicationHandlingStatus.OVERWRITTEN.value
386
-
387
- # elif duplication_handling == DuplicationHandling.INCREMENT:
388
- # while bucket.blob(file_name).exists():
389
- # increment += 1
390
- # file_name = f"{base_file_name}_v{increment}{ext}"
391
- # saved_to_path = f"gs://{bucket_name}/{file_name}"
392
- # response["duplication_handling_status"] = DuplicationHandlingStatus.INCREMENTED.value
393
-
394
- # # GCS Upload
395
- # saved_to_path = f"gs://{bucket_name}/{file_name}"
396
- # while attempts < max_retries and not success:
397
- # try:
398
- # blob = bucket.blob(file_name)
399
- # blob.upload_from_string(data_str, content_type='application/json')
400
- # success = True
401
- # except Exception as e:
402
- # attempts += 1
403
- # if attempts < max_retries:
404
- # time.sleep(2 ** attempts)
405
- # else:
406
- # if raise_e:
407
- # raise e
408
-
409
- # except Exception as e:
410
- # error_message = f"Error occurred while writing JSON to GCS path: {saved_to_path} : {type(e).__name__} - {str(e)}"
411
- # log_error(error_message, logger=logger, print_out=print_out)
412
- # response["error_during_operation"] = error_message
413
- # if raise_e:
414
- # raise e
415
-
416
- # response["saved_to_path"] = saved_to_path if success else None
417
- # return response
418
-
419
-
420
- def write_csv_to_gcs(bucket_name:str, file_name:str, data:dict | list | str, storage_client:GCSClient, logger=None, print_out=False, raise_e=False):
421
- """ Helper function to write a CSV file to Google Cloud Storage """
422
- try:
423
- bucket = storage_client.bucket(bucket_name)
424
- blob = bucket.blob(file_name)
425
- data_file = StringIO()
426
- if data and isinstance(data, list) and isinstance(data[0], dict):
427
- fieldnames = data[0].keys()
428
- writer = csv.DictWriter(data_file, fieldnames=fieldnames)
429
- writer.writeheader()
430
- writer.writerows(data)
431
- else:
432
- raise ValueError("Data should be a list of dictionaries")
433
- blob.upload_from_string(data_file.getvalue(), content_type='text/csv')
434
- log_info(msg=f"Successfully wrote CSV to {file_name} in bucket {bucket_name}.", logger=logger, print_out=print_out)
435
- except ValueError as e:
436
- log_error(msg=f"ValueError: {e}",logger=logger, print_out=print_out)
437
- if raise_e:
438
- raise e
439
- except Exception as e:
440
- log_error(msg=f"An unexpected error occurred while writing CSV to GCS: {e}", logger=logger, print_out=print_out, exc_info=True)
441
- if raise_e:
442
- raise e
@@ -1,166 +0,0 @@
1
- # pylint: disable=missing-module-docstring
2
- # pylint: disable=missing-function-docstring
3
- # pylint: disable=missing-class-docstring
4
- # pylint: disable=broad-exception-caught
5
- # pylint: disable=line-too-long
6
- # pylint: disable=unused-variable
7
- # pylint: disable=broad-exception-raised
8
-
9
- # import json
10
- # import os
11
- # import time
12
- # from google.cloud.storage import Client as GCSClient
13
- # from ipulse_shared_core_ftredge.enums import LogLevel, DuplicationHandling, DuplicationHandlingStatus, MatchConditionType, DataSourceType
14
- # from ipulse_shared_core_ftredge.utils import log_error, log_info
15
- # from .utils_collector_pipelinemon import Pipelinemon
16
- # from .logs.context_log import ContextLog
17
-
18
-
19
-
20
-
21
-
22
- # def write_json_to_gcs_with_pipelinemon_extended( pipelinemon:Pipelinemon, storage_client:GCSClient, data:dict | list | str, bucket_name: str, file_name: str,
23
- # file_exists_if_starts_with_prefix:Optional[str] =None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
24
- # max_retries:int=2, max_deletable_files:int=1):
25
- # """Saves data to Google Cloud Storage and optionally locally.
26
-
27
- # This function attempts to upload data to GCS.
28
- # - If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
29
- # - It handles file name conflicts based on these rules:
30
- # - If `overwrite_if_exists` is True:
31
- # - If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
32
- # - If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
33
- # - If `increment_if_exists` is True:
34
- # - If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
35
- # - If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
36
-
37
- # -If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
38
- # """
39
-
40
- # cloud_storage_ref="GCP_GCS"
41
-
42
- # with pipelinemon.context(f"write_json_to_{cloud_storage_ref}_with_pipelinemon"):
43
- # cloud_storage_upload_error = None
44
- # # Input validation
45
- # if overwrite_if_exists and increment_if_exists:
46
- # err_msg="Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously."
47
- # pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM, subject="Param validation", description=err_msg))
48
- # return {"cloud_storage_upload_error": err_msg}
49
- # if max_deletable_files > 10:
50
- # err_msg="max_deletable_files should be less than 10 for safety. For more use another method."
51
- # pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM,subject="max_deletable_files", description=err_msg))
52
- # return {"cloud_storage_upload_error": err_msg}
53
-
54
- # # Prepare data
55
- # if isinstance(data, (list, dict)):
56
- # data_str = json.dumps(data, indent=2)
57
- # else:
58
- # data_str = data
59
-
60
- # bucket = storage_client.bucket(bucket_name)
61
- # base_file_name, ext = os.path.splitext(file_name)
62
- # increment = 0
63
- # attempts = 0
64
- # success = False
65
-
66
- # # GCS-related metadata
67
- # cloud_storage_path = None
68
- # cloud_storage_file_overwritten = False
69
- # cloud_storage_file_already_exists = False
70
- # cloud_storage_file_saved_with_increment = False
71
- # cloud_storage_file_exists_checked_on_name = file_name
72
- # cloud_storage_deleted_files=[]
73
-
74
- # try:
75
- # upload_allowed = True
76
- # # --- Overwrite Logic ---
77
- # if overwrite_if_exists:
78
- # with pipelinemon.context("overwriting"):
79
- # if file_exists_if_starts_with_prefix:
80
- # cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
81
- # blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
82
- # if len(blobs_to_delete) > max_deletable_files:
83
- # err_msg=f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}."
84
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
85
- # pipelinemon.add_log(ContextLog(LogLevel.ERROR_CUSTOM, subject="Too many files", description=err_msg))
86
- # #### Ensuring to quit the operation if too many files are found, it will be catched below
87
- # return {"cloud_storage_upload_error": err_msg}
88
- # if blobs_to_delete:
89
- # cloud_storage_file_already_exists = True
90
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_to_delete)} files in bucket {bucket_name}"))
91
- # for blob in blobs_to_delete:
92
- # cloud_storage_path_del = f"gs://{bucket_name}/{blob.name}"
93
- # pipelinemon.add_system_impacted(f"delete: {cloud_storage_ref}_bucket_file: {cloud_storage_path_del}")
94
- # blob.delete()
95
- # pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= cloud_storage_path_del, description=f"file deleted from {cloud_storage_ref} as part of overwrite, matched with prefix"))
96
- # cloud_storage_deleted_files.append(cloud_storage_path_del)
97
- # cloud_storage_file_overwritten = True
98
- # elif bucket.blob(file_name).exists():
99
- # cloud_storage_file_already_exists = True
100
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}"))
101
- # cloud_storage_path_del = f"gs://{bucket_name}/{file_name}"
102
- # pipelinemon.add_system_impacted(f"delete: {cloud_storage_ref}_bucket_file: {cloud_storage_path_del}")
103
- # blob.delete() # Delete the existing blob
104
- # pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject= cloud_storage_path_del, description=f"file deleted from {cloud_storage_ref} as part of overwrite, matched with exact name"))
105
- # cloud_storage_deleted_files.append(cloud_storage_path_del)
106
- # cloud_storage_file_overwritten = True
107
- # # --- Increment Logic ---
108
- # elif increment_if_exists:
109
- # with pipelinemon.context("incrementing"):
110
- # cloud_storage_file_exists_checked_on_name = file_name # We only increment if the exact name exists
111
- # while bucket.blob(file_name).exists():
112
- # cloud_storage_file_already_exists = True
113
- # increment += 1
114
- # file_name = f"{base_file_name}_v{increment}{ext}"
115
- # cloud_storage_file_saved_with_increment = True
116
- # if increment>0:
117
- # cloud_storage_path = f"gs://{bucket_name}/{file_name}"
118
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Attempting to save file with incremented version in {bucket_name}"))
119
- # # --- Check for Conflicts (Including Prefix) ---
120
- # else:
121
- # if file_exists_if_starts_with_prefix:
122
- # blobs_matched = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
123
- # cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
124
- # if blobs_matched:
125
- # upload_allowed = False
126
- # cloud_storage_file_already_exists = True
127
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_exists_if_starts_with_prefix, description=f"Prefix matched with {len(blobs_matched)} existing files in bucket {bucket_name}."))
128
- # elif bucket.blob(file_name).exists():
129
- # pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject=file_name, description=f"Exact name matched with existing file in bucket {bucket_name}."))
130
- # upload_allowed = False
131
- # cloud_storage_file_already_exists = True
132
-
133
- # # --- GCS Upload ---
134
- # cloud_storage_path = f"gs://{bucket_name}/{file_name}"
135
- # if overwrite_if_exists or increment_if_exists or upload_allowed:
136
- # with pipelinemon.context("uploading"):
137
- # while attempts < max_retries and not success:
138
- # try:
139
- # blob = bucket.blob(file_name) # Use the potentially updated file_name
140
- # pipelinemon.add_system_impacted(f"upload: {cloud_storage_ref}_bucket_file: {cloud_storage_path}")
141
- # blob.upload_from_string(data_str, content_type='application/json')
142
- # pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_PERSISTNACE_COMPLETE, subject= cloud_storage_path, description=f"file uploaded to {cloud_storage_ref}"))
143
- # success = True
144
- # except Exception as e:
145
- # attempts += 1
146
- # if attempts < max_retries:
147
- # time.sleep(2 ** attempts)
148
- # else:
149
- # err_msg=f"Error uploading file to {cloud_storage_ref} bucket {bucket_name} with name {file_name} : {type(e).__name__}-{str(e)}"
150
- # pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e, description=err_msg))
151
- # return {"cloud_storage_upload_error": err_msg}
152
-
153
- # except Exception as e:
154
- # pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e))
155
- # return {"cloud_storage_upload_error": f"Exception in GCS upload {type(e).__name__}-{str(e)}"}
156
- # # --- Return Metadata ---
157
- # return {
158
- # "cloud_storage_path": cloud_storage_path if ((success or not upload_allowed) and not cloud_storage_upload_error ) else None,
159
- # "cloud_storage_file_already_exists": cloud_storage_file_already_exists,
160
- # "cloud_storage_file_exists_checked_on_name":cloud_storage_file_exists_checked_on_name ,
161
- # "cloud_storage_file_overwritten": cloud_storage_file_overwritten,
162
- # "cloud_storage_deleted_file_names": ",,,".join(cloud_storage_deleted_files) if cloud_storage_deleted_files else None,
163
- # "cloud_storage_file_saved_with_increment": cloud_storage_file_saved_with_increment,
164
- # "cloud_storage_upload_error": cloud_storage_upload_error
165
- # }
166
-
@@ -1,27 +0,0 @@
1
- # pylint: disable=missing-module-docstring
2
- # pylint: disable=missing-function-docstring
3
- # pylint: disable=missing-class-docstring
4
- # pylint: disable=broad-exception-caught
5
- # pylint: disable=line-too-long
6
- # pylint: disable=unused-variable
7
- # pylint: disable=broad-exception-raised
8
- # from typing import Optional
9
- # from ipulse_shared_core_ftredge.enums import DataSourceType
10
- # from .utils_collector_pipelinemon import Pipelinemon
11
- # from .utils_cloud_gcp import write_json_to_gcs_extended
12
-
13
-
14
- # def write_json_to_cloud_storage_with_pipelinemon_extended(cloud_storage_type:DataSourceType, cloud_storage_client, pipelinemon:Pipelinemon,
15
- # data:dict | list | str, bucket_name: str, file_name: str,
16
-
17
-
18
- # max_retries:int=2, max_deletable_files:int=1):
19
-
20
- # supported_cloud_storage_types = [DataSourceType.GCS]
21
- # if cloud_storage_type == DataSourceType.GCS:
22
- # return write_json_to_gcs_extended(pipelinemon=pipelinemon, storage_client=cloud_storage_client, data=data, bucket_name=bucket_name, file_name=file_name,
23
- # ,
24
- # max_retries=max_retries,
25
- # max_deletable_files=max_deletable_files)
26
-
27
- # raise ValueError(f"Unsupported cloud provider: {cloud_storage_type}. Supported cloud providers: {supported_cloud_storage_types}")