ipulse-shared-core-ftredge 2.6.1__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.

Files changed (24) hide show
  1. ipulse_shared_core_ftredge/__init__.py +10 -9
  2. ipulse_shared_core_ftredge/enums/__init__.py +12 -7
  3. ipulse_shared_core_ftredge/enums/enums_common_utils.py +9 -0
  4. ipulse_shared_core_ftredge/enums/enums_data_eng.py +280 -76
  5. ipulse_shared_core_ftredge/enums/{enums_logs.py → enums_logging.py} +30 -1
  6. ipulse_shared_core_ftredge/enums/enums_module_fincore.py +16 -2
  7. ipulse_shared_core_ftredge/enums/enums_modules.py +6 -0
  8. ipulse_shared_core_ftredge/enums/{enums_cloud.py → enums_solution_providers.py} +11 -4
  9. ipulse_shared_core_ftredge/utils/__init__.py +11 -7
  10. ipulse_shared_core_ftredge/utils/logs/context_log.py +2 -3
  11. ipulse_shared_core_ftredge/utils/logs/get_logger.py +47 -20
  12. ipulse_shared_core_ftredge/utils/utils_cloud.py +26 -17
  13. ipulse_shared_core_ftredge/utils/utils_cloud_gcp.py +311 -180
  14. ipulse_shared_core_ftredge/utils/utils_cloud_gcp_with_collectors.py +150 -153
  15. ipulse_shared_core_ftredge/utils/utils_cloud_with_collectors.py +16 -15
  16. ipulse_shared_core_ftredge/utils/utils_collector_pipelinemon.py +2 -2
  17. ipulse_shared_core_ftredge/utils/utils_common.py +145 -110
  18. ipulse_shared_core_ftredge/utils/utils_templates_and_schemas.py +2 -2
  19. {ipulse_shared_core_ftredge-2.6.1.dist-info → ipulse_shared_core_ftredge-2.7.1.dist-info}/METADATA +1 -1
  20. ipulse_shared_core_ftredge-2.7.1.dist-info/RECORD +33 -0
  21. ipulse_shared_core_ftredge-2.6.1.dist-info/RECORD +0 -33
  22. {ipulse_shared_core_ftredge-2.6.1.dist-info → ipulse_shared_core_ftredge-2.7.1.dist-info}/LICENCE +0 -0
  23. {ipulse_shared_core_ftredge-2.6.1.dist-info → ipulse_shared_core_ftredge-2.7.1.dist-info}/WHEEL +0 -0
  24. {ipulse_shared_core_ftredge-2.6.1.dist-info → ipulse_shared_core_ftredge-2.7.1.dist-info}/top_level.txt +0 -0
@@ -11,76 +11,29 @@ from io import StringIO
11
11
  import os
12
12
  import time
13
13
  import logging
14
- from typing import Optional
15
14
  import traceback
16
15
  from google.api_core.exceptions import NotFound
17
16
  from google.cloud import error_reporting
18
17
  from google.cloud import logging as cloud_logging
19
18
  from google.cloud.storage import Client as GCSClient
20
19
  from google.cloud import bigquery
20
+ from ipulse_shared_core_ftredge.enums import DuplicationHandling, DuplicationHandlingStatus, MatchConditionType,DataSourceType, LogLevel
21
+ from ipulse_shared_core_ftredge.utils.utils_common import log_error, log_warning, log_info
22
+ from ipulse_shared_core_ftredge.utils.logs import ContextLog
23
+ from ipulse_shared_core_ftredge.utils.utils_collector_pipelinemon import Pipelinemon
21
24
 
22
25
  ############################################################################
23
- ##################### GOOGLE CLOUD UTILS ##################################
26
+ ##################### GOOGLE CLOUD PLATFORM UTILS ##################################
24
27
  ############################################################################
25
28
 
26
- def log_error(message, logger=None , print_out=False, exc_info=False):
27
- if logger:
28
- logger.error(message, exc_info=exc_info)
29
- elif print_out:
30
- print(message)
29
+ class CustomGCPLoggingHandler(cloud_logging.handlers.CloudLoggingHandler):
30
+ """Custom handler for Google Cloud Logging with a dynamic logName."""
31
+ def __init__(self, client, name, resource=None, labels=None):
32
+ super().__init__(client=client, name=name, resource=resource, labels=labels)
33
+ self.client = client # Ensure client is consistently used
31
34
 
32
- def log_warning(message, logger=None, print_out=False):
33
- if logger:
34
- logger.warning(message)
35
- elif print_out:
36
- print(message)
37
-
38
- def log_info(message, logger=None, print_out=False):
39
- if logger:
40
- logger.info(message)
41
- elif print_out:
42
- print(message)
43
-
44
-
45
-
46
- ############################################################################
47
- ##################### LOGGING and ERROR reporting ##########################
48
- ####DEPCREACATED: THIS APPROACH WAS GOOD, BUT ERRORS WERE NOT REPORTED TO ERROR REPORTING
49
- # logging.basicConfig(level=logging.INFO)
50
- # logging_client = google.cloud.logging.Client()
51
- # logging_client.setup_logging()
52
- ###################################
53
- def setup_gcp_logging(logger, formatter, enable_error_reporting=True):
54
-
55
- class CustomGCPErrorReportingHandler(logging.Handler):
56
- def __init__(self, level=logging.ERROR):
57
- super().__init__(level)
58
- self.error_client = error_reporting.Client()
59
- self.propagate = True
60
-
61
- def emit(self, record):
62
- try:
63
- if record.levelno >= logging.ERROR:
64
- log_struct = {
65
- 'message': self.format(record),
66
- 'severity': record.levelname,
67
- 'pathname': getattr(record, 'pathname', None),
68
- 'lineno': getattr(record, 'lineno', None)
69
- }
70
- if record.exc_info:
71
- log_struct['exception'] = ''.join(
72
- traceback.format_exception(*record.exc_info)
73
- )
74
- self.error_client.report(str(log_struct))
75
- except Exception as e:
76
- self.handleError(record)
77
-
78
- class CustomGCPLoggingHandler(cloud_logging.handlers.CloudLoggingHandler):
79
- """Custom handler for Google Cloud Logging with a dynamic logName."""
80
- def __init__(self, client, name, resource=None, labels=None):
81
- super().__init__(client=client, name=name, resource=resource, labels=labels)
82
-
83
- def emit(self, record):
35
+ def emit(self, record):
36
+ try:
84
37
  # 1. Create the basic log entry dictionary
85
38
  log_entry = {
86
39
  'message': record.msg,
@@ -109,17 +62,45 @@ def setup_gcp_logging(logger, formatter, enable_error_reporting=True):
109
62
 
110
63
  # 4. Send to Google Cloud Logging
111
64
  super().emit(record)
65
+ except Exception as e:
66
+ self.handleError(record)
67
+
68
+ class CustomGCPErrorReportingHandler(logging.Handler):
69
+ def __init__(self, client=None, level=logging.ERROR):
70
+ super().__init__(level)
71
+ self.error_client = error_reporting.Client() if client is None else client
72
+ self.propagate = True
73
+
74
+ def emit(self, record):
75
+ try:
76
+ if record.levelno >= logging.ERROR:
77
+ log_struct = {
78
+ 'message': self.format(record),
79
+ 'severity': record.levelname,
80
+ 'pathname': getattr(record, 'pathname', None),
81
+ 'lineno': getattr(record, 'lineno', None)
82
+ }
83
+ if record.exc_info:
84
+ log_struct['exception'] = ''.join(
85
+ traceback.format_exception(*record.exc_info)
86
+ )
87
+ self.error_client.report(str(log_struct))
88
+ except Exception as e:
89
+ self.handleError(record)
90
+
112
91
 
113
- # Create Google Cloud Logging handler
114
- cloud_logging_client = cloud_logging.Client()
115
- cloud_logging_handler = CustomGCPLoggingHandler(cloud_logging_client, logger.name) # No prefix needed
116
- cloud_logging_handler.setFormatter(formatter)
117
- logger.addHandler(cloud_logging_handler)
92
+ def add_gcp_cloud_logging(logger, formatter, client=None):
93
+ """Sets up Google Cloud Logging for the logger."""
94
+ client = client or cloud_logging.Client()
95
+ handler = CustomGCPLoggingHandler(client, logger.name)
96
+ handler.setFormatter(formatter)
97
+ logger.addHandler(handler)
118
98
 
119
- if enable_error_reporting:
120
- # Create and add Error Reporting handler
121
- error_reporting_handler = CustomGCPErrorReportingHandler()
122
- logger.addHandler(error_reporting_handler)
99
+ def add_gcp_error_reporting(logger, client=None):
100
+ """Sets up Google Cloud Error Reporting for the logger."""
101
+ client = client or error_reporting.Client()
102
+ handler = CustomGCPErrorReportingHandler(client=client)
103
+ logger.addHandler(handler)
123
104
 
124
105
 
125
106
 
@@ -142,13 +123,13 @@ def read_json_from_gcs(storage_client:GCSClient, bucket_name:str, file_name:str,
142
123
  data = json.loads(data_string)
143
124
  return data
144
125
  except NotFound:
145
- log_error(message=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
126
+ log_warning(msg=f"Warning: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
146
127
  return None
147
128
  except json.JSONDecodeError:
148
- log_error(message=f"Error: The file {file_name} could not be decoded as JSON.", logger=logger, print_out=print_out)
129
+ log_error(msg=f"Error: The file {file_name} could not be decoded as JSON.", logger=logger, print_out=print_out)
149
130
  return None
150
131
  except Exception as e:
151
- log_error(message=f"An unexpected error occurred: {e}", exc_info=True, logger=logger, print_out=print_out)
132
+ log_error(msg=f"An unexpected error occurred: {e}", exc_info=True, logger=logger, print_out=print_out)
152
133
  return None
153
134
 
154
135
  def read_csv_from_gcs(bucket_name:str, file_name:str, storage_client:GCSClient, logger=None, print_out=False):
@@ -162,114 +143,137 @@ def read_csv_from_gcs(bucket_name:str, file_name:str, storage_client:GCSClient,
162
143
  reader = csv.DictReader(data_file)
163
144
  return list(reader)
164
145
  except NotFound:
165
- log_error(message=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
146
+ log_warning(msg=f"Error: The file {file_name} was not found in the bucket {bucket_name}.", logger=logger, print_out=print_out)
166
147
  return None
167
148
  except csv.Error:
168
- log_error(message=f"Error: The file {file_name} could not be read as CSV.", logger=logger, print_out=print_out)
149
+ log_error(msg=f"Error: The file {file_name} could not be read as CSV.", logger=logger, print_out=print_out)
169
150
  return None
170
151
  except Exception as e:
171
- log_error(message=f"An unexpected error occurred: {e}", logger=logger, print_out=print_out, exc_info=True)
152
+ log_error(msg=f"An unexpected error occurred: {e}", logger=logger, print_out=print_out, exc_info=True)
172
153
  return None
173
154
 
155
+ def write_json_to_gcs_extended(storage_client: GCSClient, data: dict | list | str, bucket_name: str, file_name: str,
156
+ duplication_handling_enum: DuplicationHandling, duplication_match_condition_type_enum: MatchConditionType,
157
+ duplication_match_condition: str = "", max_retries: int = 2, max_deletable_files: int = 1,
158
+ logger=None, print_out=False, raise_e=False, pipelinemon: Pipelinemon = None):
174
159
 
160
+ """Saves data to Google Cloud Storage with optional Pipelinemon monitoring.
175
161
 
176
- def write_json_to_gcs_extended( storage_client:GCSClient, data:dict | list | str, bucket_name: str, file_name: str,
177
- file_exists_if_starts_with_prefix:Optional[str] =None, overwrite_if_exists:bool=False, increment_if_exists:bool=False,
178
- max_retries:int=2, max_deletable_files:int=1, logger=None, print_out=False):
179
-
180
- """Saves data to Google Cloud Storage and optionally locally.
181
-
182
- This function attempts to upload data to GCS.
183
- - If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
184
- - It handles file name conflicts based on these rules:
185
- - If `overwrite_if_exists` is True:
186
- - If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
187
- - If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
188
- - If `increment_if_exists` is True:
189
- - If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
190
- - If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
191
-
192
- -If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
162
+ Handles duplication with strategies: OVERWRITE, INCREMENT, SKIP, or RAISE_ERROR.
193
163
  """
194
- # GCS upload exception
195
- # Input validation
196
- if overwrite_if_exists and increment_if_exists:
197
- raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
198
- if not isinstance(data, (list, dict, str)):
199
- raise ValueError("Data should be a list, dict, or string.")
200
- if max_deletable_files > 10:
201
- raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
202
-
203
- # Prepare data
204
- if isinstance(data, (list, dict)):
205
- data_str = json.dumps(data, indent=2)
206
- else:
207
- data_str = data
208
-
209
- bucket = storage_client.bucket(bucket_name)
210
- base_file_name, ext = os.path.splitext(file_name)
211
- increment = 0
212
- attempts = 0
213
- success = False
164
+
165
+ max_deletable_files_allowed = 3
166
+ cloud_storage_ref=DataSourceType.GCS.value
214
167
 
215
168
  # GCS-related metadata
216
- cloud_storage_path = None
217
- cloud_storage_file_overwritten = False
218
- cloud_storage_file_already_exists = False
219
- cloud_storage_file_saved_with_increment = False
220
- cloud_storage_file_exists_checked_on_name = file_name
221
- cloud_storage_deleted_files=[]
222
-
223
-
224
- upload_allowed = True
225
- # --- Overwrite Logic ---
226
- if overwrite_if_exists:
227
- if file_exists_if_starts_with_prefix:
228
- cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
229
- blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
230
- if len(blobs_to_delete) > max_deletable_files:
231
- raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
232
- if blobs_to_delete:
233
- for blob in blobs_to_delete:
234
- cloud_storage_path_del = f"gs://{bucket_name}/{blob.name}"
169
+ saved_to_path = None
170
+ matched_duplicates_count = 0
171
+ matched_duplicates_deleted = []
172
+ duplication_handling_status = None
173
+ error_during_operation = None
174
+
175
+ response = {
176
+ "saved_to_path": saved_to_path,
177
+ "matched_duplicates_count": matched_duplicates_count,
178
+ "matched_duplicates_deleted": matched_duplicates_deleted,
179
+ "duplication_handling_status": duplication_handling_status,
180
+ "duplication_match_condition_type": duplication_match_condition_type_enum.value,
181
+ "duplication_match_condition": duplication_match_condition,
182
+ "error_during_operation": error_during_operation
183
+ }
184
+
185
+ supported_match_condition_types = [MatchConditionType.EXACT, MatchConditionType.PREFIX]
186
+ supported_duplication_handling = [DuplicationHandling.RAISE_ERROR, DuplicationHandling.OVERWRITE, DuplicationHandling.INCREMENT, DuplicationHandling.SKIP]
187
+
188
+ try:
189
+ if max_deletable_files > max_deletable_files_allowed:
190
+ raise ValueError(f"max_deletable_files should be less than or equal to {max_deletable_files_allowed} for safety.")
191
+ if duplication_handling_enum not in supported_duplication_handling:
192
+ msg = f"Error: Duplication handling not supported. Supported types: {[dh.value for dh in supported_duplication_handling]}"
193
+ raise ValueError(msg)
194
+ if duplication_match_condition_type_enum not in supported_match_condition_types:
195
+ msg = f"Error: Match condition type not supported. Supported types: {[mct.value for mct in supported_match_condition_types]}"
196
+ raise ValueError(msg)
197
+ elif duplication_match_condition_type_enum != MatchConditionType.EXACT and not duplication_match_condition:
198
+ msg = f"Error: Match condition is required for match condition type: {duplication_match_condition_type_enum.value}"
199
+ raise ValueError(msg)
200
+
201
+ # Prepare data
202
+ if isinstance(data, (list, dict)):
203
+ data_str = json.dumps(data, indent=2)
204
+ else:
205
+ data_str = data
206
+
207
+ increment = 0
208
+ attempts = 0
209
+ success = False
210
+
211
+ # Check for existing files based on duplication_match_condition_type
212
+ files_matched_on_condition = []
213
+ bucket = storage_client.bucket(bucket_name)
214
+ base_file_name, ext = os.path.splitext(file_name)
215
+ if duplication_match_condition_type_enum == MatchConditionType.PREFIX:
216
+ files_matched_on_condition = list(bucket.list_blobs(prefix=duplication_match_condition))
217
+ elif duplication_match_condition_type_enum == MatchConditionType.EXACT:
218
+ duplication_match_condition = file_name if not duplication_match_condition else duplication_match_condition
219
+ if bucket.blob(duplication_match_condition).exists():
220
+ files_matched_on_condition = [bucket.blob(file_name)]
221
+
222
+ matched_duplicates_count = len(files_matched_on_condition)
223
+ response["matched_duplicates_count"] = matched_duplicates_count
224
+
225
+ # Handle duplication based on duplication_handling
226
+ if matched_duplicates_count:
227
+ log_msg = f"Duplicate FOUND, matched_duplicates_count: {matched_duplicates_count}"
228
+ if pipelinemon:
229
+ pipelinemon.add_log(ContextLog(LogLevel.NOTICE_ALREADY_EXISTS, subject="duplicate_found", description=log_msg))
230
+
231
+ if duplication_handling_enum == DuplicationHandling.RAISE_ERROR:
232
+ raise FileExistsError("File(s) matching the condition already exist.")
233
+
234
+ if duplication_handling_enum == DuplicationHandling.SKIP:
235
+ response["duplication_handling_status"] = DuplicationHandlingStatus.SKIPPED.value
236
+ log_msg = f"SKIPPING, response: {response}"
237
+ log_info(log_msg, logger=logger, print_out=print_out) ## only logsor prints if logger is provided and print_out is True
238
+ return response
239
+
240
+ if duplication_handling_enum == DuplicationHandling.OVERWRITE:
241
+ if matched_duplicates_count > max_deletable_files:
242
+ raise ValueError(f"Error: Attempt to delete {matched_duplicates_count} matched files, but limit is {max_deletable_files}. Operation Cancelled.")
243
+
244
+ for blob in files_matched_on_condition:
245
+ cloud_storage_path_to_delete = f"gs://{bucket_name}/{blob.name}"
235
246
  blob.delete()
236
- cloud_storage_deleted_files.append(cloud_storage_path_del)
237
- cloud_storage_file_overwritten = True
238
- elif bucket.blob(file_name).exists():
239
- cloud_storage_file_already_exists = True
240
- cloud_storage_path_del = f"gs://{bucket_name}/{file_name}"
241
- blob.delete() # Delete the existing blob
242
- cloud_storage_deleted_files.append(cloud_storage_path_del)
243
- cloud_storage_file_overwritten = True
244
- # --- Increment Logic ---
245
- elif increment_if_exists:
246
- cloud_storage_file_exists_checked_on_name = file_name # We only increment if the exact name exists
247
- while bucket.blob(file_name).exists():
248
- cloud_storage_file_already_exists = True
249
- increment += 1
250
- file_name = f"{base_file_name}_v{increment}{ext}"
251
- cloud_storage_file_saved_with_increment = True
252
- if increment>0:
253
- cloud_storage_path = f"gs://{bucket_name}/{file_name}"
254
- # --- Check for Conflicts (Including Prefix) ---
255
- else:
256
- if file_exists_if_starts_with_prefix:
257
- blobs_matched = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
258
- cloud_storage_file_exists_checked_on_name = file_exists_if_starts_with_prefix
259
- if blobs_matched:
260
- upload_allowed = False
261
- cloud_storage_file_already_exists = True
262
- elif bucket.blob(file_name).exists():
263
- upload_allowed = False
264
- cloud_storage_file_already_exists = True
265
-
266
- # --- GCS Upload ---
267
- cloud_storage_path = f"gs://{bucket_name}/{file_name}"
268
- if overwrite_if_exists or increment_if_exists or upload_allowed:
247
+ matched_duplicates_deleted.append(cloud_storage_path_to_delete)
248
+ log_msg = f"File deleted as part of overwrite: {cloud_storage_path_to_delete}"
249
+ if pipelinemon:
250
+ pipelinemon.add_system_impacted(f"delete: {cloud_storage_ref}_bucket_file: {cloud_storage_path_to_delete}")
251
+ pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_DELETE_COMPLETE, subject="delete_duplicate", description=log_msg))
252
+ log_info(log_msg, logger=logger, print_out=print_out)
253
+
254
+ response["matched_duplicates_deleted"] = matched_duplicates_deleted
255
+ response["duplication_handling_status"] = DuplicationHandlingStatus.OVERWRITTEN.value
256
+
257
+ elif duplication_handling_enum == DuplicationHandling.INCREMENT:
258
+ while bucket.blob(file_name).exists():
259
+ increment += 1
260
+ file_name = f"{base_file_name}_v{increment}{ext}"
261
+ saved_to_path = f"gs://{bucket_name}/{file_name}"
262
+ response["duplication_handling_status"] = DuplicationHandlingStatus.INCREMENTED.value
263
+ log_msg = "INCREMENTING as Duplicate FOUND "
264
+ log_info(log_msg, logger=logger, print_out=print_out) ## only logsor prints if logger is provided and print_out is True
265
+
266
+ # GCS Upload
267
+ saved_to_path = f"gs://{bucket_name}/{file_name}"
269
268
  while attempts < max_retries and not success:
270
269
  try:
271
- blob = bucket.blob(file_name) # Use the potentially updated file_name
270
+ blob = bucket.blob(file_name)
272
271
  blob.upload_from_string(data_str, content_type='application/json')
272
+ log_msg = f"File uploaded to GCS: {saved_to_path}"
273
+ if pipelinemon:
274
+ pipelinemon.add_system_impacted(f"upload: {cloud_storage_ref}_bucket_file: {saved_to_path}")
275
+ pipelinemon.add_log(ContextLog(LogLevel.INFO_REMOTE_PERSISTNACE_COMPLETE, subject="file_upload", description=log_msg))
276
+ log_info(log_msg, logger=logger, print_out=print_out)
273
277
  success = True
274
278
  except Exception as e:
275
279
  attempts += 1
@@ -278,18 +282,142 @@ def write_json_to_gcs_extended( storage_client:GCSClient, data:dict | list | str
278
282
  else:
279
283
  raise e
280
284
 
281
- # --- Return Metadata ---
282
- return {
283
- "cloud_storage_path": cloud_storage_path if (success or not upload_allowed ) else None,
284
- "cloud_storage_file_already_exists": cloud_storage_file_already_exists,
285
- "cloud_storage_file_exists_checked_on_name":cloud_storage_file_exists_checked_on_name ,
286
- "cloud_storage_file_overwritten": cloud_storage_file_overwritten,
287
- "cloud_storage_deleted_file_names": ",,,".join(cloud_storage_deleted_files) if cloud_storage_deleted_files else None,
288
- "cloud_storage_file_saved_with_increment": cloud_storage_file_saved_with_increment
289
- }
290
-
291
-
292
- def write_csv_to_gcs(bucket_name:str, file_name:str, data:dict | list | str, storage_client:GCSClient, logger,log_info_verbose=True):
285
+ except Exception as e:
286
+ error_during_operation = f"Error occurred while writing JSON to GCS path: {saved_to_path} ; Error details: {type(e).__name__} - {str(e)}"
287
+ response["error_during_operation"] = error_during_operation
288
+ if pipelinemon:
289
+ pipelinemon.add_log(ContextLog(LogLevel.ERROR_EXCEPTION, e=e, description="response: {response}"))
290
+ log_error(response, logger=logger, print_out=print_out)
291
+ if raise_e:
292
+ raise e
293
+
294
+ response["saved_to_path"] = saved_to_path if success else None
295
+ return response
296
+
297
+ # def write_json_to_gcs_extended(storage_client: GCSClient, data: dict | list | str, bucket_name: str, file_name: str,
298
+ # duplication_handling: DuplicationHandling, duplication_match_condition_type: MatchConditionType,
299
+ # duplication_match_condition: str | List[str] = "", max_retries: int = 2, max_deletable_files: int = 1,
300
+ # logger=None, print_out=False, raise_e=False):
301
+
302
+ # """Saves data to Google Cloud Storage.
303
+
304
+ # Handles duplication with strategies: OVERWRITE, INCREMENT, SKIP, or RAISE_ERROR.
305
+ # """
306
+
307
+ # max_deletable_files_allowed = 3
308
+
309
+ # # GCS-related metadata
310
+ # saved_to_path = None
311
+ # matched_duplicates_count = 0
312
+ # matched_duplicates_deleted = []
313
+ # duplication_handling_status = None
314
+ # error_during_operation = None
315
+
316
+ # response = {
317
+ # "saved_to_path": saved_to_path,
318
+ # "matched_duplicates_count": matched_duplicates_count,
319
+ # "matched_duplicates_deleted": matched_duplicates_deleted,
320
+ # "duplication_handling_status": duplication_handling_status,
321
+ # "duplication_match_condition_type": duplication_match_condition_type,
322
+ # "duplication_match_condition": duplication_match_condition,
323
+ # "error_during_operation": error_during_operation
324
+ # }
325
+
326
+ # supported_match_condition_types = [MatchConditionType.EXACT, MatchConditionType.PREFIX]
327
+ # supported_duplication_handling = [DuplicationHandling.RAISE_ERROR, DuplicationHandling.OVERWRITE, DuplicationHandling.INCREMENT, DuplicationHandling.SKIP]
328
+
329
+ # try:
330
+ # if max_deletable_files > max_deletable_files_allowed:
331
+ # raise ValueError(f"max_deletable_files should be less than or equal to {max_deletable_files_allowed} for safety.")
332
+ # if duplication_handling not in supported_duplication_handling:
333
+ # msg = f"Error: Duplication handling not supported. Supported types: {supported_duplication_handling}"
334
+ # raise ValueError(msg)
335
+ # if duplication_match_condition_type not in supported_match_condition_types:
336
+ # msg = f"Error: Match condition type not supported. Supported types: {supported_match_condition_types}"
337
+ # raise ValueError(msg)
338
+ # elif duplication_match_condition_type!=MatchConditionType.EXACT and not duplication_match_condition:
339
+ # msg = f"Error: Match condition is required for match condition type: {duplication_match_condition_type}"
340
+ # raise ValueError(msg)
341
+
342
+ # # Prepare data
343
+ # if isinstance(data, (list, dict)):
344
+ # data_str = json.dumps(data, indent=2)
345
+ # else:
346
+ # data_str = data
347
+
348
+ # increment = 0
349
+ # attempts = 0
350
+ # success = False
351
+
352
+ # # Check for existing files based on duplication_match_condition_type
353
+ # files_matched_on_condition = []
354
+ # bucket = storage_client.bucket(bucket_name)
355
+ # base_file_name, ext = os.path.splitext(file_name)
356
+ # if duplication_match_condition_type == MatchConditionType.PREFIX:
357
+ # files_matched_on_condition = list(bucket.list_blobs(prefix=duplication_match_condition))
358
+ # elif duplication_match_condition_type == MatchConditionType.EXACT:
359
+ # if bucket.blob(file_name).exists():
360
+ # files_matched_on_condition = [bucket.blob(file_name)]
361
+
362
+ # matched_duplicates_count = len(files_matched_on_condition)
363
+ # response["matched_duplicates_count"] = matched_duplicates_count
364
+
365
+ # # Handle duplication based on duplication_handling
366
+ # if matched_duplicates_count:
367
+ # if duplication_handling == DuplicationHandling.RAISE_ERROR:
368
+ # raise FileExistsError("File(s) matching the condition already exist.")
369
+
370
+ # if duplication_handling == DuplicationHandling.SKIP:
371
+ # log_warning("Skipping saving to GCS: file(s) matching the condition already exist.", logger=logger, print_out=print_out)
372
+ # response["duplication_handling_status"] = DuplicationHandlingStatus.SKIPPED.value
373
+ # return response
374
+
375
+ # if duplication_handling == DuplicationHandling.OVERWRITE:
376
+ # if matched_duplicates_count > max_deletable_files:
377
+ # raise ValueError(f"Error: Attempt to delete {matched_duplicates_count} matched files, but limit is {max_deletable_files}. Operation Cancelled.")
378
+
379
+ # for blob in files_matched_on_condition:
380
+ # cloud_storage_path_to_delete = f"gs://{bucket_name}/{blob.name}"
381
+ # blob.delete()
382
+ # matched_duplicates_deleted.append(cloud_storage_path_to_delete)
383
+
384
+ # response["matched_duplicates_deleted"] = matched_duplicates_deleted
385
+ # response["duplication_handling_status"] = DuplicationHandlingStatus.OVERWRITTEN.value
386
+
387
+ # elif duplication_handling == DuplicationHandling.INCREMENT:
388
+ # while bucket.blob(file_name).exists():
389
+ # increment += 1
390
+ # file_name = f"{base_file_name}_v{increment}{ext}"
391
+ # saved_to_path = f"gs://{bucket_name}/{file_name}"
392
+ # response["duplication_handling_status"] = DuplicationHandlingStatus.INCREMENTED.value
393
+
394
+ # # GCS Upload
395
+ # saved_to_path = f"gs://{bucket_name}/{file_name}"
396
+ # while attempts < max_retries and not success:
397
+ # try:
398
+ # blob = bucket.blob(file_name)
399
+ # blob.upload_from_string(data_str, content_type='application/json')
400
+ # success = True
401
+ # except Exception as e:
402
+ # attempts += 1
403
+ # if attempts < max_retries:
404
+ # time.sleep(2 ** attempts)
405
+ # else:
406
+ # if raise_e:
407
+ # raise e
408
+
409
+ # except Exception as e:
410
+ # error_message = f"Error occurred while writing JSON to GCS path: {saved_to_path} : {type(e).__name__} - {str(e)}"
411
+ # log_error(error_message, logger=logger, print_out=print_out)
412
+ # response["error_during_operation"] = error_message
413
+ # if raise_e:
414
+ # raise e
415
+
416
+ # response["saved_to_path"] = saved_to_path if success else None
417
+ # return response
418
+
419
+
420
+ def write_csv_to_gcs(bucket_name:str, file_name:str, data:dict | list | str, storage_client:GCSClient, logger=None, print_out=False, raise_e=False):
293
421
  """ Helper function to write a CSV file to Google Cloud Storage """
294
422
  try:
295
423
  bucket = storage_client.bucket(bucket_name)
@@ -303,9 +431,12 @@ def write_csv_to_gcs(bucket_name:str, file_name:str, data:dict | list | str, sto
303
431
  else:
304
432
  raise ValueError("Data should be a list of dictionaries")
305
433
  blob.upload_from_string(data_file.getvalue(), content_type='text/csv')
306
- if log_info_verbose:
307
- logger.info(f"Successfully wrote CSV to {file_name} in bucket {bucket_name}.")
434
+ log_info(msg=f"Successfully wrote CSV to {file_name} in bucket {bucket_name}.", logger=logger, print_out=print_out)
308
435
  except ValueError as e:
309
- logger.error(f"ValueError: {e}")
436
+ log_error(msg=f"ValueError: {e}",logger=logger, print_out=print_out)
437
+ if raise_e:
438
+ raise e
310
439
  except Exception as e:
311
- logger.error(f"An unexpected error occurred while writing CSV to GCS: {e}", exc_info=True)
440
+ log_error(msg=f"An unexpected error occurred while writing CSV to GCS: {e}", logger=logger, print_out=print_out, exc_info=True)
441
+ if raise_e:
442
+ raise e