ipulse-shared-core-ftredge 2.55__py3-none-any.whl → 2.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.

@@ -1,13 +1,18 @@
1
1
  from .models import (Organisation, UserAuth, UserProfile,
2
2
  UserStatus, UserProfileUpdate, pulse_enums)
3
- from .utils_gcp import (setup_gcp_logger_and_error_report,
4
- read_csv_from_gcs, read_json_from_gcs,
5
- write_csv_to_gcs, write_json_to_gcs)
3
+
4
+
5
+ from .utils_gcp import (setup_gcp_logger_and_error_report,
6
+ read_csv_from_gcs, read_json_from_gcs,
7
+ write_csv_to_gcs,write_json_to_gcs)
8
+ from .utils_custom_logs import (ContextLog)
9
+ from .utils_pipelinemon import ( Pipelinemon)
10
+ from .utils_gcp_for_pipelines import (write_json_to_gcs_in_pipeline )
11
+
6
12
  from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
7
13
  check_format_against_schema_template)
8
- from .utils_common import (ContextLog, Pipelinemon)
9
14
 
10
- from .enums import (TargetLogs, LogLevel, Unit, Frequency,
15
+ from .enums import (TargetLogs, LogStatus, LogLevel, Unit, Frequency,
11
16
  Module, SubModule, BaseDataCategory,
12
17
  FinCoreCategory, FincCoreSubCategory,
13
18
  FinCoreRecordsCategory, ExchangeOrPublisher,
@@ -4,6 +4,8 @@
4
4
  # pylint: disable=missing-class-docstring
5
5
 
6
6
  from .enums_common_utils import (LogLevel,
7
+ LogStatus,
8
+ SystemsImpacted,
7
9
  TargetLogs,
8
10
  Unit,
9
11
  Frequency)
@@ -2,10 +2,28 @@
2
2
  # pylint: disable=missing-module-docstring
3
3
  # pylint: disable=missing-function-docstring
4
4
  # pylint: disable=missing-class-docstring
5
+ # pylint: disable=line-too-long
5
6
 
6
7
  from enum import Enum
7
8
 
8
9
 
10
+ class SystemsImpacted(Enum):
11
+ NO = "__no"
12
+ YES = "__yes"
13
+ INVESTIGATE = "__investigate"
14
+ MULTIPLE = "__multiple"
15
+ DB = "db"
16
+ BQ_TABLE= "bq_table"
17
+ BQ_TABLES = "bq_tables"
18
+ GCS_BUCKET = "gcs_bucket"
19
+ GCS_BUCKETS = "gcs_buckets"
20
+ GCS_BUCKET_FILE = "gcs_bucket_file"
21
+ GCS_BUCKET_FILES = "gcs_bucket_files"
22
+ API = "api"
23
+ APIS = "apis"
24
+ LOCAL_FILE = "local_file"
25
+ LOCAL_FILES = "local_files"
26
+
9
27
  class TargetLogs(Enum):
10
28
  MIXED="mixed_logs"
11
29
  SUCCESSES = "success_logs"
@@ -15,22 +33,33 @@ class TargetLogs(Enum):
15
33
  WARNINGS_AND_ERRORS = "warn_n_err_logs"
16
34
  ERRORS = "error_logs"
17
35
 
18
-
19
36
  class LogLevel(Enum):
20
37
  """
21
38
  Standardized notice levels for data engineering pipelines,
22
39
  designed for easy analysis and identification of manual
23
40
  intervention needs.
24
41
  """
25
- DEBUG = 100 # Detailed debug information (for development/troubleshooting)
42
+ DEBUG = 10 # Detailed debug information (for development/troubleshooting)
43
+
44
+ INFO = 100
45
+ INFO_REMOTE_PERSISTNACE_COMPLETE= 101
46
+ INFO_REMOTE_UPDATE_COMPLETE = 102
47
+ INFO_REMOTE_DELETE_COMPLETE = 103
48
+
49
+ INFO_REMOTE_BULK_PERSISTNACE_COMPLETE= 111
50
+ INFO_REMOTE_BULK_UPDATE_COMPLETE = 112
51
+ INFO_REMOTE_BULK_DELETE_COMPLETE = 113
52
+
53
+ INFO_LOCAL_PERSISTNACE_COMPLETE = 121
26
54
 
27
- INFO = 200
28
55
  SUCCESS = 201
56
+ SUCCESS_WITH_NOTICES = 211
57
+ SUCCESS_WITH_WARNINGS = 212
29
58
 
30
59
  NOTICE = 300 # Maybe same file or data already fully or partially exists
31
60
  NOTICE_ALREADY_EXISTS = 301 # Data already exists, no action required
32
61
  NOTICE_PARTIAL_EXISTS = 302 # Partial data exists, no action required
33
- NOTICE_CANCELLED = 303 # Data processing cancelled, no action required
62
+ NOTICE_ACTION_CANCELLED = 303 # Data processing cancelled, no action required
34
63
 
35
64
  # Warnings indicate potential issues that might require attention:
36
65
  WARNING = 400 # General warning, no immediate action required
@@ -40,18 +69,22 @@ class LogLevel(Enum):
40
69
  WARNING_FIX_REQUIRED = 404 # Action required, pipeline can likely continue
41
70
 
42
71
  ERROR = 500 # General error, no immediate action required
43
- # Errors indicate a problem that disrupts normal pipeline execution:
44
- ERROR_EXCEPTION_REDO = 501
45
- ERROR_CUSTOM_REDO = 502 # Temporary error, automatic retry likely to succeed
46
-
47
- ERROR_EXCEPTION_INVESTIGATE = 601 # Exception occured after some data was likely persisted (e.g., to GCS or BQ)
48
- ERROR_CUSTOM_INVESTIGATE= 602
49
- ERROR_EXCEPTION_PERSTISTANCE = 603 # Exception occured after data was persisted (e.g., to GCS or BQ)
50
- ERROR_CUSTOM_PERSTISTANCE = 604
51
72
 
73
+ ERROR_EXCEPTION = 501
74
+ ERROR_CUSTOM = 502 # Temporary error, automatic retry likely to succeed
75
+ ERROR_OPERATION_PARTIALLY_FAILED = 511 # Partial or full failure, manual intervention required
76
+ ERROR_OPERATION_FAILED = 512 # Operation failed, manual intervention required
77
+ ERORR_OPERATION_WITH_WARNINGS = 513 # Partial or full failure, manual intervention required
78
+ ERORR_OPERATION_WITH_ERRORS = 514 # Partial or full failure, manual intervention required
79
+ ERORR_OPERATION_WITH_WARNINGS_OR_ERRORS = 515 # Partial or full failure, manual intervention required
80
+
81
+ ERROR_THRESHOLD_REACHED = 551
82
+ ERROR_PIPELINE_THRESHOLD_REACHED = 552 # Error due to threshold reached, no immediate action required
83
+ ERROR_SUBTHRESHOLD_REACHED = 553 # Error due to threshold reached, no immediate action required
84
+ ERROR_DATA_QUALITY_THRESHOLD_REACHED = 554 # Error due to threshold reached, no immediate action required
52
85
  # Critical errors indicate severe failures requiring immediate attention:
53
- CRITICAL_SYSTEM_FAILURE = 701 # System-level failure (e.g., infrastructure), requires immediate action
54
- CRITICAL_PIPELINE_FAILURE = 702 # Complete pipeline failure, requires investigation and potential rollback
86
+ CRITICAL=600 # General critical error, requires immediate action
87
+ CRITICAL_SYSTEM_FAILURE = 601 # System-level failure (e.g., infrastructure, stackoverflow ), requires immediate action
55
88
 
56
89
  UNKNOWN=1001 # Unknown error, should not be used in normal operation
57
90
 
@@ -63,8 +96,6 @@ class LogStatus(Enum):
63
96
  RESOLVED = "resolved"
64
97
  IGNORED = "ignored"
65
98
  CANCELLED = "cancelled"
66
-
67
-
68
99
 
69
100
  ### Exception during full exection, partially saved
70
101
  # Exception during ensemble pipeline; modifications collected in local object , nothing persisted
@@ -143,4 +174,4 @@ class Frequency(Enum):
143
174
  THREE_M="3m"
144
175
  SIX_M="6m"
145
176
  ONE_Y="1y"
146
- THREE_Y="3y"
177
+ THREE_Y="3y"
@@ -33,9 +33,9 @@ class UserProfile(BaseModel):
33
33
  provider_id: str #User can Read only
34
34
 
35
35
  username: Optional[str] = None #User can Read and Edit
36
- dob: Optional[date] = None #User can Read and Edit
37
- first_name: Optional[str] = None #User can Read and Edit
38
- last_name: Optional[str] = None #User can Read and Edit
36
+ dob: Optional[date] = None #User can Read and Edit
37
+ first_name: Optional[str] = None #User can Read and Edit
38
+ last_name: Optional[str] = None #User can Read and Edit
39
39
  mobile: Optional[str] = None #User can Read and Edit
40
40
  class Config:
41
41
  extra = "forbid"
@@ -0,0 +1,201 @@
1
+
2
+ # pylint: disable=missing-module-docstring
3
+ # pylint: disable=missing-function-docstring
4
+ # pylint: disable=logging-fstring-interpolation
5
+ # pylint: disable=line-too-long
6
+ # pylint: disable=missing-class-docstring
7
+ # pylint: disable=broad-exception-caught
8
+ import traceback
9
+ import json
10
+ from datetime import datetime, timezone
11
+ from typing import List
12
+ from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel, LogStatus
13
+
14
+
15
+ class ContextLog:
16
+
17
+ def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
18
+ context: str = None, description: str = None,
19
+ e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
20
+ log_status: LogStatus = LogStatus.OPEN, subject: str = None, systems_impacted: List[str] = None,
21
+ ):
22
+
23
+ if e is not None:
24
+ e_type = type(e).__name__ if e_type is None else e_type
25
+ e_message = str(e) if e_message is None else e_message
26
+ e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
27
+ elif e_traceback is None and (e_type or e_message):
28
+ e_traceback = traceback.format_exc()
29
+
30
+ self.level = level
31
+ self.subject = subject
32
+ self.description = description
33
+ self._base_context = base_context
34
+ self._context = context
35
+ self._systems_impacted = systems_impacted if systems_impacted else []
36
+ self.collector_id = collector_id
37
+ self.exception_type = e_type
38
+ self.exception_message = e_message
39
+ self.exception_traceback = e_traceback
40
+ self.log_status = log_status
41
+ self.timestamp = datetime.now(timezone.utc).isoformat()
42
+
43
+ @property
44
+ def base_context(self):
45
+ return self._base_context
46
+
47
+ @base_context.setter
48
+ def base_context(self, value):
49
+ self._base_context = value
50
+
51
+ @property
52
+ def context(self):
53
+ return self._context
54
+
55
+ @context.setter
56
+ def context(self, value):
57
+ self._context = value
58
+
59
+ @property
60
+ def systems_impacted(self):
61
+ return self._systems_impacted
62
+
63
+ @systems_impacted.setter
64
+ def systems_impacted(self, list_of_si: List[str]):
65
+ self._systems_impacted = list_of_si
66
+
67
+ def add_system_impacted(self, system_impacted: str):
68
+ if self._systems_impacted is None:
69
+ self._systems_impacted = []
70
+ self._systems_impacted.append(system_impacted)
71
+
72
+ def remove_system_impacted(self, system_impacted: str):
73
+ if self._systems_impacted is not None:
74
+ self._systems_impacted.remove(system_impacted)
75
+
76
+ def clear_systems_impacted(self):
77
+ self._systems_impacted = []
78
+
79
+ def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
80
+ if not e_traceback or e_traceback == 'None\n':
81
+ return None
82
+
83
+ traceback_lines = e_traceback.splitlines()
84
+
85
+ # Check if the traceback is within the limits
86
+ if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
87
+ return e_traceback
88
+
89
+ # Remove lines that are part of the exception message if they are present in traceback
90
+ message_lines = e_message.splitlines() if e_message else []
91
+ if message_lines:
92
+ for message_line in message_lines:
93
+ if message_line in traceback_lines:
94
+ traceback_lines.remove(message_line)
95
+
96
+ # Filter out lines from third-party libraries (like site-packages)
97
+ filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
98
+
99
+ # If filtering results in too few lines, revert to original traceback
100
+ if len(filtered_lines) < 2:
101
+ filtered_lines = traceback_lines
102
+
103
+ # Combine standalone bracket lines with previous or next lines
104
+ combined_lines = []
105
+ for line in filtered_lines:
106
+ if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
107
+ combined_lines[-1] += " " + line.strip()
108
+ else:
109
+ combined_lines.append(line)
110
+
111
+ # Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
112
+ if len(combined_lines) > max_traceback_lines:
113
+ keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
114
+ keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
115
+ combined_lines = (
116
+ combined_lines[:keep_lines_start] +
117
+ ['... (truncated) ...'] +
118
+ combined_lines[-keep_lines_end:]
119
+ )
120
+
121
+ formatted_traceback = '\n'.join(combined_lines)
122
+
123
+ # Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
124
+ if len(formatted_traceback) > max_field_len:
125
+ truncated_length = max_field_len - len('... (truncated) ...')
126
+ half_truncated_length = truncated_length // 2
127
+ formatted_traceback = (
128
+ formatted_traceback[:half_truncated_length] +
129
+ '\n... (truncated) ...\n' +
130
+ formatted_traceback[-half_truncated_length:]
131
+ )
132
+ return formatted_traceback
133
+
134
+ def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
135
+ size_limit = int(size_limit) # Ensure size_limit is an integer
136
+
137
+ # Unified list of all fields
138
+ systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
139
+ fields = [
140
+ ("log_status", str(self.log_status.name)),
141
+ ("level_code", self.level.value),
142
+ ("level_name", str(self.level.name)),
143
+ ("base_context", str(self.base_context)),
144
+ ("timestamp", str(self.timestamp)),
145
+ ("collector_id", str(self.collector_id)),
146
+ ("systems_impacted", systems_impacted_str),
147
+ ("context", str(self.context)), # special sizing rules apply to it
148
+ ("subject", str(self.subject)),
149
+ ("description", str(self.description)),
150
+ ("exception_type", str(self.exception_type)),
151
+ ("exception_message", str(self.exception_message)),
152
+ ("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
153
+ ]
154
+
155
+ # Function to calculate the byte size of a JSON-encoded field
156
+ def field_size(key, value):
157
+ return len(json.dumps({key: value}).encode('utf-8'))
158
+
159
+ # Function to truncate a value based on its type
160
+ # Function to truncate a value based on its type
161
+ def truncate_value(value, max_size):
162
+ if isinstance(value, str):
163
+ half_size = max_size // 2
164
+ return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
165
+ return value
166
+
167
+ # Ensure no field exceeds max_field_len
168
+ for i, (key, value) in enumerate(fields):
169
+ if isinstance(value, str) and len(value) > max_field_len:
170
+ fields[i] = (key, truncate_value(value, max_field_len))
171
+
172
+ # Ensure total size of the dict doesn't exceed size_limit
173
+ total_size = sum(field_size(key, value) for key, value in fields)
174
+ log_dict = {}
175
+ truncated = False
176
+
177
+ if total_size > size_limit:
178
+ truncated = True
179
+ remaining_size = size_limit
180
+ remaining_fields = len(fields)
181
+
182
+ for key, value in fields:
183
+ if remaining_fields > 0:
184
+ max_size_per_field = remaining_size // remaining_fields
185
+ else:
186
+ max_size_per_field = 0
187
+
188
+ field_sz = field_size(key, value)
189
+ if field_sz > max_size_per_field:
190
+ value = truncate_value(value, max_size_per_field)
191
+ field_sz = field_size(key, value)
192
+
193
+ log_dict[key] = value
194
+ remaining_size -= field_sz
195
+ remaining_fields -= 1
196
+ else:
197
+ log_dict = dict(fields)
198
+
199
+ log_dict['trunc'] = truncated
200
+
201
+ return log_dict
@@ -1,6 +1,9 @@
1
1
  # pylint: disable=missing-module-docstring
2
2
  # pylint: disable=missing-function-docstring
3
3
  # pylint: disable=missing-class-docstring
4
+ # pylint: disable=broad-exception-caught
5
+ # pylint: disable=line-too-long
6
+ # pylint: disable=unused-variable
4
7
  import json
5
8
  import csv
6
9
  from io import StringIO
@@ -11,7 +14,6 @@ import traceback
11
14
  from google.cloud import error_reporting, logging as cloud_logging
12
15
  from google.api_core.exceptions import NotFound
13
16
 
14
-
15
17
  ############################################################################
16
18
  ##################### SETTING UP LOGGER ##########################
17
19
 
@@ -23,7 +25,6 @@ from google.api_core.exceptions import NotFound
23
25
 
24
26
 
25
27
  ##### THIS APPROACH IS USED NOW ########
26
- ## TODO Fix the issue with POST 0B Nan.... printed in Cloud Logging , which is referring to posting to Cloud Logging probably.
27
28
  ENV = os.getenv('ENV', 'LOCAL').strip("'")
28
29
 
29
30
  def setup_gcp_logger_and_error_report(logger_name,level=logging.INFO, use_cloud_logging=True):
@@ -122,19 +123,22 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
122
123
 
123
124
 
124
125
 
125
- def write_json_to_gcs(bucket_name, storage_client, data, file_name,
126
- save_locally=False, local_path=None, logger=None, max_retries=2,
127
- overwrite_if_exists=False, increment_if_exists=False):
126
+ def write_json_to_gcs( storage_client, data, bucket_name, file_name,
127
+ file_exists_if_starts_with_prefix=None, overwrite_if_exists=False, increment_if_exists=False,
128
+ save_locally=False, local_path=None, max_retries=2, max_deletable_files=1, logger=None):
128
129
  """Saves data to Google Cloud Storage and optionally locally.
129
-
130
- This function attempts to upload data to GCS. If the upload fails after
131
- retries and `save_locally` is True or `local_path` is provided, it attempts
132
- to save the data locally.
133
-
134
- Returns:
135
- dict: A dictionary containing the GCS path (or None if upload failed),
136
- the local path (or None if not saved locally), a boolean indicating if the file was overwritten,
137
- a boolean indicating if the file already existed, and a boolean indicating if the file was saved with an incremented name.
130
+
131
+ This function attempts to upload data to GCS.
132
+ - If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
133
+ - It handles file name conflicts based on these rules:
134
+ - If `overwrite_if_exists` is True:
135
+ - If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
136
+ - If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
137
+ - If `increment_if_exists` is True:
138
+ - If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
139
+ - If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
140
+
141
+ -If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
138
142
  """
139
143
 
140
144
  def log_message(message):
@@ -149,101 +153,141 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name,
149
153
  if logger:
150
154
  logger.warning(message)
151
155
 
152
- attempts = 0
153
- success = False
154
- gcs_path = None
155
- local_path_final = None
156
- gcs_file_overwritten = False
157
- gcs_file_already_exists = False
158
- gcs_file_saved_with_increment = False
159
- gcs_upload_exception = None # Store potential GCS exception
160
-
161
- # Check for conflicting options
156
+ # Input validation
162
157
  if overwrite_if_exists and increment_if_exists:
163
- raise ValueError("When writing JSON to GCS, both overwrite and increment_if_exists cannot be True at the same time.")
158
+ raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
159
+ if not isinstance(data, (list, dict, str)):
160
+ raise ValueError("Unsupported data type. Data must be a list, dict, or str.")
161
+ if max_deletable_files > 10:
162
+ raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
164
163
 
164
+ # Prepare data
165
165
  if isinstance(data, (list, dict)):
166
166
  data_str = json.dumps(data, indent=2)
167
- elif isinstance(data, str):
168
- data_str = data
169
167
  else:
170
- raise ValueError("Unsupported data type. It should be a list, dict, or str.")
168
+ data_str = data
171
169
 
172
170
  bucket = storage_client.bucket(bucket_name)
173
171
  base_file_name, ext = os.path.splitext(file_name)
174
172
  increment = 0
173
+ attempts = 0
174
+ success = False
175
175
 
176
- while attempts < max_retries and not success:
177
- try:
178
- if increment_if_exists:
179
- while bucket.blob(file_name).exists():
180
- gcs_file_already_exists = True
181
- increment += 1
182
- file_name = f"{base_file_name}_{increment}{ext}"
183
- gcs_file_saved_with_increment = True
184
- log_warning(f"File {file_name} already exists in bucket {bucket_name}. Writing with increment: {increment_if_exists}")
176
+ # GCS-related metadata
177
+ gcs_path = None
178
+ gcs_file_overwritten = False
179
+ gcs_file_already_exists = False
180
+ gcs_file_saved_with_increment = False
181
+ gcs_file_exists_checked_on_name = file_name
182
+ gcs_deleted_files=[]
183
+
184
+ # GCS upload exception
185
+ gcs_upload_exception = None
186
+
187
+ # Local file path
188
+ local_path_final = None
189
+
190
+ try:
191
+ # --- Overwrite Logic ---
192
+ if overwrite_if_exists:
193
+ if file_exists_if_starts_with_prefix:
194
+ gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
195
+ blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
196
+ if len(blobs_to_delete) > max_deletable_files:
197
+ raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
198
+ if blobs_to_delete:
199
+ log_message(f"Deleting files containing '{file_exists_if_starts_with_prefix}' for overwrite.")
200
+ for blob in blobs_to_delete:
201
+ blob.delete()
202
+ gcs_deleted_files.append(blob.name)
203
+ log_message(f"Deleted: gs://{bucket_name}/{blob.name}")
204
+ gcs_file_overwritten = True
185
205
  else:
186
206
  blob = bucket.blob(file_name)
187
-
188
- # Check if the file exists
189
207
  if blob.exists():
190
208
  gcs_file_already_exists = True
191
209
  gcs_path = f"gs://{bucket_name}/{file_name}"
192
- log_message(f"File {file_name} already exists in bucket {bucket_name}. Overwriting: {overwrite_if_exists}")
193
- if not overwrite_if_exists:
194
- log_warning(f"File {file_name} already exists and overwrite is set to False. Skipping save to GCS.")
195
- break
210
+ log_message(f"File '{file_name}' already exists. Overwriting.")
211
+ blob.delete() # Delete the existing blob
212
+ gcs_deleted_files.append(blob.name)
213
+ gcs_file_overwritten = True
214
+
215
+ # --- Increment Logic ---
216
+ elif increment_if_exists:
217
+ gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
218
+ while bucket.blob(file_name).exists():
219
+ gcs_file_already_exists = True
220
+ increment += 1
221
+ file_name = f"{base_file_name}_v{increment}{ext}"
222
+ gcs_file_saved_with_increment = True
223
+ log_warning(f"File already exists. Using incremented name: {file_name}")
224
+
225
+ # --- GCS Upload ---
226
+ if overwrite_if_exists or increment_if_exists: # Only upload if either overwrite or increment is True
227
+ while attempts < max_retries and not success:
228
+ try:
229
+ blob = bucket.blob(file_name) # Use the potentially updated file_name
230
+ blob.upload_from_string(data_str, content_type='application/json')
231
+ gcs_path = f"gs://{bucket_name}/{file_name}"
232
+ log_message(f"Successfully saved file to GCS: {gcs_path}")
233
+ success = True
234
+ except Exception as e:
235
+ gcs_upload_exception=e
236
+ attempts += 1
237
+ if attempts < max_retries:
238
+ log_warning(f"Attempt {attempts} to upload to GCS failed. Retrying...")
239
+ time.sleep(2 ** attempts)
196
240
  else:
197
- gcs_file_overwritten = True
198
-
199
- blob.upload_from_string(data_str, content_type='application/json')
200
- gcs_path = f"gs://{bucket_name}/{file_name}"
201
- log_message(f"Successfully saved file to GCS {gcs_path}.")
202
- success = True
203
- except Exception as e:
204
- gcs_upload_exception = e
205
- attempts += 1
206
- if attempts < max_retries:
207
- time.sleep(2 ** attempts)
208
- else:
209
- log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
241
+ log_error(f"Failed to write '{file_name}' to GCS bucket '{bucket_name}' after {max_retries} attempts: {e}", exc_info=True)
242
+ if save_locally or local_path:
243
+ log_message(f"Attempting to save '{file_name}' locally due to GCS upload failure.")
244
+ except Exception as e:
245
+ log_error(f"Error during GCS operations: {e}", exc_info=True)
246
+ gcs_upload_exception = e
210
247
 
248
+ # --- Save Locally ---
249
+ write_out=False
211
250
  if not success or save_locally or local_path:
212
251
  try:
213
- if not local_path:
214
- local_path_final = os.path.join("/tmp", file_name)
215
- else:
216
- local_path_final = os.path.join(local_path, file_name)
217
-
252
+ local_path=local_path if local_path else "/tmp"
253
+ local_path_final = os.path.join(local_path, file_name)
254
+
218
255
  if os.path.exists(local_path_final):
219
256
  if increment_if_exists:
220
257
  increment = 0
221
258
  while os.path.exists(local_path_final):
222
259
  increment += 1
223
- local_path_final = os.path.join(local_path, f"{base_file_name}_{increment}{ext}")
224
- gcs_file_saved_with_increment = True
225
- elif not overwrite_if_exists:
226
- log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
227
- success = True
228
- else:
260
+ local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
261
+ log_warning(f"Local file already exists. Using incremented name: {local_path_final}")
262
+ write_out=True
263
+ elif overwrite_if_exists:
264
+ write_out=True
229
265
  log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
266
+ else:
267
+ log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
268
+ write_out=False
269
+ else:
270
+ write_out=True
230
271
 
231
- if not success:
272
+ if write_out:
232
273
  with open(local_path_final, 'w', encoding='utf-8') as f:
233
274
  f.write(data_str)
234
- log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
235
- success = True
275
+ log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
276
+
236
277
  except Exception as local_e:
237
278
  log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
238
279
 
239
280
  if gcs_upload_exception is not None:
240
281
  raise gcs_upload_exception # Propagate without nesting
241
282
 
283
+ # --- Return Metadata ---
242
284
  return {
243
- "gcs_path": gcs_path,
244
- "local_path": local_path_final,
285
+ "gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
286
+ "local_path": local_path_final if write_out else None, # Only set local_path if saved locally
245
287
  "gcs_file_already_exists": gcs_file_already_exists,
288
+ "gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
246
289
  "gcs_file_overwritten": gcs_file_overwritten,
290
+ "gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
247
291
  "gcs_file_saved_with_increment": gcs_file_saved_with_increment
248
292
  }
249
293
 
@@ -267,4 +311,4 @@ def write_csv_to_gcs(bucket_name, file_name, data, storage_client, logger,log_in
267
311
  except ValueError as e:
268
312
  logger.error(f"ValueError: {e}")
269
313
  except Exception as e:
270
- logger.error(f"An unexpected error occurred while writing CSV to GCS: {e}", exc_info=True)
314
+ logger.error(f"An unexpected error occurred while writing CSV to GCS: {e}", exc_info=True)