ipulse-shared-core-ftredge 2.56__tar.gz → 2.57__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge.egg-info → ipulse_shared_core_ftredge-2.57}/PKG-INFO +1 -1
  2. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/setup.py +1 -1
  3. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/__init__.py +10 -5
  4. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/__init__.py +1 -0
  5. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_common_utils.py +9 -3
  6. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_profile.py +3 -3
  7. ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_custom_logs.py +201 -0
  8. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/utils_gcp.py +112 -65
  9. ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_gcp_for_pipelines.py +201 -0
  10. ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/utils_common.py → ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_pipelinemon.py +25 -206
  11. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/utils_templates_and_schemas.py +7 -9
  12. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge.egg-info}/PKG-INFO +1 -1
  13. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/SOURCES.txt +4 -3
  14. ipulse_shared_core_ftredge-2.57/tests/test_utils_gcp.py +189 -0
  15. ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/tests/__init__.py +0 -0
  16. ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/tests/test.py +0 -17
  17. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/LICENCE +0 -0
  18. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/README.md +0 -0
  19. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/pyproject.toml +0 -0
  20. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/setup.cfg +0 -0
  21. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_data_eng.py +0 -0
  22. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_module_fincore.py +0 -0
  23. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_modules.py +0 -0
  24. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/__init__.py +0 -0
  25. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/audit_log_firestore.py +0 -0
  26. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/organisation.py +0 -0
  27. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/pulse_enums.py +0 -0
  28. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/resource_catalog_item.py +0 -0
  29. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_auth.py +0 -0
  30. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_profile_update.py +0 -0
  31. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_status.py +0 -0
  32. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/dependency_links.txt +0 -0
  33. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/requires.txt +0 -0
  34. {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipulse_shared_core_ftredge
3
- Version: 2.56
3
+ Version: 2.57
4
4
  Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
5
5
  Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
6
6
  Author: Russlan Ramdowar
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  setup(
5
5
  name='ipulse_shared_core_ftredge',
6
- version='2.56',
6
+ version='2.57',
7
7
  package_dir={'': 'src'}, # Specify the source directory
8
8
  packages=find_packages(where='src'), # Look for packages in 'src'
9
9
  install_requires=[
@@ -1,13 +1,18 @@
1
1
  from .models import (Organisation, UserAuth, UserProfile,
2
2
  UserStatus, UserProfileUpdate, pulse_enums)
3
- from .utils_gcp import (setup_gcp_logger_and_error_report,
4
- read_csv_from_gcs, read_json_from_gcs,
5
- write_csv_to_gcs, write_json_to_gcs)
3
+
4
+
5
+ from .utils_gcp import (setup_gcp_logger_and_error_report,
6
+ read_csv_from_gcs, read_json_from_gcs,
7
+ write_csv_to_gcs,write_json_to_gcs)
8
+ from .utils_custom_logs import (ContextLog)
9
+ from .utils_pipelinemon import ( Pipelinemon)
10
+ from .utils_gcp_for_pipelines import (write_json_to_gcs_in_pipeline )
11
+
6
12
  from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
7
13
  check_format_against_schema_template)
8
- from .utils_common import (ContextLog, Pipelinemon)
9
14
 
10
- from .enums import (TargetLogs, LogLevel, Unit, Frequency,
15
+ from .enums import (TargetLogs, LogStatus, LogLevel, Unit, Frequency,
11
16
  Module, SubModule, BaseDataCategory,
12
17
  FinCoreCategory, FincCoreSubCategory,
13
18
  FinCoreRecordsCategory, ExchangeOrPublisher,
@@ -4,6 +4,7 @@
4
4
  # pylint: disable=missing-class-docstring
5
5
 
6
6
  from .enums_common_utils import (LogLevel,
7
+ LogStatus,
7
8
  SystemsImpacted,
8
9
  TargetLogs,
9
10
  Unit,
@@ -42,9 +42,15 @@ class LogLevel(Enum):
42
42
  DEBUG = 10 # Detailed debug information (for development/troubleshooting)
43
43
 
44
44
  INFO = 100
45
- INFO_PERSISTNACE_COMPLETE= 101
46
- INFO_UPDATE_COMPLETE = 102
47
- INFO_DELETE_COMPLETE = 103
45
+ INFO_REMOTE_PERSISTNACE_COMPLETE= 101
46
+ INFO_REMOTE_UPDATE_COMPLETE = 102
47
+ INFO_REMOTE_DELETE_COMPLETE = 103
48
+
49
+ INFO_REMOTE_BULK_PERSISTNACE_COMPLETE= 111
50
+ INFO_REMOTE_BULK_UPDATE_COMPLETE = 112
51
+ INFO_REMOTE_BULK_DELETE_COMPLETE = 113
52
+
53
+ INFO_LOCAL_PERSISTNACE_COMPLETE = 121
48
54
 
49
55
  SUCCESS = 201
50
56
  SUCCESS_WITH_NOTICES = 211
@@ -33,9 +33,9 @@ class UserProfile(BaseModel):
33
33
  provider_id: str #User can Read only
34
34
 
35
35
  username: Optional[str] = None #User can Read and Edit
36
- dob: Optional[date] = None #User can Read and Edit
37
- first_name: Optional[str] = None #User can Read and Edit
38
- last_name: Optional[str] = None #User can Read and Edit
36
+ dob: Optional[date] = None #User can Read and Edit
37
+ first_name: Optional[str] = None #User can Read and Edit
38
+ last_name: Optional[str] = None #User can Read and Edit
39
39
  mobile: Optional[str] = None #User can Read and Edit
40
40
  class Config:
41
41
  extra = "forbid"
@@ -0,0 +1,201 @@
1
+
2
+ # pylint: disable=missing-module-docstring
3
+ # pylint: disable=missing-function-docstring
4
+ # pylint: disable=logging-fstring-interpolation
5
+ # pylint: disable=line-too-long
6
+ # pylint: disable=missing-class-docstring
7
+ # pylint: disable=broad-exception-caught
8
+ import traceback
9
+ import json
10
+ from datetime import datetime, timezone
11
+ from typing import List
12
+ from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel, LogStatus
13
+
14
+
15
+ class ContextLog:
16
+
17
+ def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
18
+ context: str = None, description: str = None,
19
+ e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
20
+ log_status: LogStatus = LogStatus.OPEN, subject: str = None, systems_impacted: List[str] = None,
21
+ ):
22
+
23
+ if e is not None:
24
+ e_type = type(e).__name__ if e_type is None else e_type
25
+ e_message = str(e) if e_message is None else e_message
26
+ e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
27
+ elif e_traceback is None and (e_type or e_message):
28
+ e_traceback = traceback.format_exc()
29
+
30
+ self.level = level
31
+ self.subject = subject
32
+ self.description = description
33
+ self._base_context = base_context
34
+ self._context = context
35
+ self._systems_impacted = systems_impacted if systems_impacted else []
36
+ self.collector_id = collector_id
37
+ self.exception_type = e_type
38
+ self.exception_message = e_message
39
+ self.exception_traceback = e_traceback
40
+ self.log_status = log_status
41
+ self.timestamp = datetime.now(timezone.utc).isoformat()
42
+
43
+ @property
44
+ def base_context(self):
45
+ return self._base_context
46
+
47
+ @base_context.setter
48
+ def base_context(self, value):
49
+ self._base_context = value
50
+
51
+ @property
52
+ def context(self):
53
+ return self._context
54
+
55
+ @context.setter
56
+ def context(self, value):
57
+ self._context = value
58
+
59
+ @property
60
+ def systems_impacted(self):
61
+ return self._systems_impacted
62
+
63
+ @systems_impacted.setter
64
+ def systems_impacted(self, list_of_si: List[str]):
65
+ self._systems_impacted = list_of_si
66
+
67
+ def add_system_impacted(self, system_impacted: str):
68
+ if self._systems_impacted is None:
69
+ self._systems_impacted = []
70
+ self._systems_impacted.append(system_impacted)
71
+
72
+ def remove_system_impacted(self, system_impacted: str):
73
+ if self._systems_impacted is not None:
74
+ self._systems_impacted.remove(system_impacted)
75
+
76
+ def clear_systems_impacted(self):
77
+ self._systems_impacted = []
78
+
79
+ def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
80
+ if not e_traceback or e_traceback == 'None\n':
81
+ return None
82
+
83
+ traceback_lines = e_traceback.splitlines()
84
+
85
+ # Check if the traceback is within the limits
86
+ if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
87
+ return e_traceback
88
+
89
+ # Remove lines that are part of the exception message if they are present in traceback
90
+ message_lines = e_message.splitlines() if e_message else []
91
+ if message_lines:
92
+ for message_line in message_lines:
93
+ if message_line in traceback_lines:
94
+ traceback_lines.remove(message_line)
95
+
96
+ # Filter out lines from third-party libraries (like site-packages)
97
+ filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
98
+
99
+ # If filtering results in too few lines, revert to original traceback
100
+ if len(filtered_lines) < 2:
101
+ filtered_lines = traceback_lines
102
+
103
+ # Combine standalone bracket lines with previous or next lines
104
+ combined_lines = []
105
+ for line in filtered_lines:
106
+ if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
107
+ combined_lines[-1] += " " + line.strip()
108
+ else:
109
+ combined_lines.append(line)
110
+
111
+ # Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
112
+ if len(combined_lines) > max_traceback_lines:
113
+ keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
114
+ keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
115
+ combined_lines = (
116
+ combined_lines[:keep_lines_start] +
117
+ ['... (truncated) ...'] +
118
+ combined_lines[-keep_lines_end:]
119
+ )
120
+
121
+ formatted_traceback = '\n'.join(combined_lines)
122
+
123
+ # Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
124
+ if len(formatted_traceback) > max_field_len:
125
+ truncated_length = max_field_len - len('... (truncated) ...')
126
+ half_truncated_length = truncated_length // 2
127
+ formatted_traceback = (
128
+ formatted_traceback[:half_truncated_length] +
129
+ '\n... (truncated) ...\n' +
130
+ formatted_traceback[-half_truncated_length:]
131
+ )
132
+ return formatted_traceback
133
+
134
+ def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
135
+ size_limit = int(size_limit) # Ensure size_limit is an integer
136
+
137
+ # Unified list of all fields
138
+ systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
139
+ fields = [
140
+ ("log_status", str(self.log_status.name)),
141
+ ("level_code", self.level.value),
142
+ ("level_name", str(self.level.name)),
143
+ ("base_context", str(self.base_context)),
144
+ ("timestamp", str(self.timestamp)),
145
+ ("collector_id", str(self.collector_id)),
146
+ ("systems_impacted", systems_impacted_str),
147
+ ("context", str(self.context)), # special sizing rules apply to it
148
+ ("subject", str(self.subject)),
149
+ ("description", str(self.description)),
150
+ ("exception_type", str(self.exception_type)),
151
+ ("exception_message", str(self.exception_message)),
152
+ ("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
153
+ ]
154
+
155
+ # Function to calculate the byte size of a JSON-encoded field
156
+ def field_size(key, value):
157
+ return len(json.dumps({key: value}).encode('utf-8'))
158
+
159
+ # Function to truncate a value based on its type
160
+ # Function to truncate a value based on its type
161
+ def truncate_value(value, max_size):
162
+ if isinstance(value, str):
163
+ half_size = max_size // 2
164
+ return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
165
+ return value
166
+
167
+ # Ensure no field exceeds max_field_len
168
+ for i, (key, value) in enumerate(fields):
169
+ if isinstance(value, str) and len(value) > max_field_len:
170
+ fields[i] = (key, truncate_value(value, max_field_len))
171
+
172
+ # Ensure total size of the dict doesn't exceed size_limit
173
+ total_size = sum(field_size(key, value) for key, value in fields)
174
+ log_dict = {}
175
+ truncated = False
176
+
177
+ if total_size > size_limit:
178
+ truncated = True
179
+ remaining_size = size_limit
180
+ remaining_fields = len(fields)
181
+
182
+ for key, value in fields:
183
+ if remaining_fields > 0:
184
+ max_size_per_field = remaining_size // remaining_fields
185
+ else:
186
+ max_size_per_field = 0
187
+
188
+ field_sz = field_size(key, value)
189
+ if field_sz > max_size_per_field:
190
+ value = truncate_value(value, max_size_per_field)
191
+ field_sz = field_size(key, value)
192
+
193
+ log_dict[key] = value
194
+ remaining_size -= field_sz
195
+ remaining_fields -= 1
196
+ else:
197
+ log_dict = dict(fields)
198
+
199
+ log_dict['trunc'] = truncated
200
+
201
+ return log_dict
@@ -14,7 +14,6 @@ import traceback
14
14
  from google.cloud import error_reporting, logging as cloud_logging
15
15
  from google.api_core.exceptions import NotFound
16
16
 
17
-
18
17
  ############################################################################
19
18
  ##################### SETTING UP LOGGER ##########################
20
19
 
@@ -124,15 +123,22 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
124
123
 
125
124
 
126
125
 
127
- def write_json_to_gcs(bucket_name, storage_client, data, file_name,
128
- save_locally=False, local_path=None, logger=None, max_retries=2,
129
- overwrite_if_exists=False, increment_if_exists=False):
126
+ def write_json_to_gcs( storage_client, data, bucket_name, file_name,
127
+ file_exists_if_starts_with_prefix=None, overwrite_if_exists=False, increment_if_exists=False,
128
+ save_locally=False, local_path=None, max_retries=2, max_deletable_files=1, logger=None):
130
129
  """Saves data to Google Cloud Storage and optionally locally.
131
-
132
- This function attempts to upload data to GCS. If the upload fails after
133
- retries and `save_locally` is True or `local_path` is provided, it attempts
134
- to save the data locally.
135
- It also tries to handle file name conflicts by overwriting or incrementing. If both are provided as Ture, an exception will be raised.
130
+
131
+ This function attempts to upload data to GCS.
132
+ - If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
133
+ - It handles file name conflicts based on these rules:
134
+ - If `overwrite_if_exists` is True:
135
+ - If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
136
+ - If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
137
+ - If `increment_if_exists` is True:
138
+ - If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
139
+ - If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
140
+
141
+ -If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
136
142
  """
137
143
 
138
144
  def log_message(message):
@@ -147,100 +153,141 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name,
147
153
  if logger:
148
154
  logger.warning(message)
149
155
 
150
- attempts = 0
151
- success = False
152
- gcs_path = None
153
- local_path_final = None
154
- gcs_file_overwritten = False
155
- gcs_file_already_exists = False
156
- gcs_file_saved_with_increment = False
157
- gcs_upload_exception = None # Store potential GCS exception
158
-
159
- # Check for conflicting options
156
+ # Input validation
160
157
  if overwrite_if_exists and increment_if_exists:
161
- raise ValueError("When writing JSON to GCS, both overwrite and increment_if_exists cannot be True at the same time.")
158
+ raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
159
+ if not isinstance(data, (list, dict, str)):
160
+ raise ValueError("Unsupported data type. Data must be a list, dict, or str.")
161
+ if max_deletable_files > 10:
162
+ raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
162
163
 
164
+ # Prepare data
163
165
  if isinstance(data, (list, dict)):
164
166
  data_str = json.dumps(data, indent=2)
165
- elif isinstance(data, str):
166
- data_str = data
167
167
  else:
168
- raise ValueError("Unsupported data type. It should be a list, dict, or str.")
168
+ data_str = data
169
169
 
170
170
  bucket = storage_client.bucket(bucket_name)
171
171
  base_file_name, ext = os.path.splitext(file_name)
172
172
  increment = 0
173
+ attempts = 0
174
+ success = False
173
175
 
174
- while attempts < max_retries and not success:
175
- try:
176
- if increment_if_exists:
177
- while bucket.blob(file_name).exists():
178
- gcs_file_already_exists = True
179
- increment += 1
180
- file_name = f"{base_file_name}_{increment}{ext}"
181
- gcs_file_saved_with_increment = True
182
- log_warning(f"File {file_name} already exists in bucket {bucket_name}. Writing with increment: {increment_if_exists}")
176
+ # GCS-related metadata
177
+ gcs_path = None
178
+ gcs_file_overwritten = False
179
+ gcs_file_already_exists = False
180
+ gcs_file_saved_with_increment = False
181
+ gcs_file_exists_checked_on_name = file_name
182
+ gcs_deleted_files=[]
183
+
184
+ # GCS upload exception
185
+ gcs_upload_exception = None
186
+
187
+ # Local file path
188
+ local_path_final = None
189
+
190
+ try:
191
+ # --- Overwrite Logic ---
192
+ if overwrite_if_exists:
193
+ if file_exists_if_starts_with_prefix:
194
+ gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
195
+ blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
196
+ if len(blobs_to_delete) > max_deletable_files:
197
+ raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
198
+ if blobs_to_delete:
199
+ log_message(f"Deleting files containing '{file_exists_if_starts_with_prefix}' for overwrite.")
200
+ for blob in blobs_to_delete:
201
+ blob.delete()
202
+ gcs_deleted_files.append(blob.name)
203
+ log_message(f"Deleted: gs://{bucket_name}/{blob.name}")
204
+ gcs_file_overwritten = True
183
205
  else:
184
206
  blob = bucket.blob(file_name)
185
-
186
- # Check if the file exists
187
207
  if blob.exists():
188
208
  gcs_file_already_exists = True
189
209
  gcs_path = f"gs://{bucket_name}/{file_name}"
190
- log_message(f"File {file_name} already exists in bucket {bucket_name}. Overwriting: {overwrite_if_exists}")
191
- if not overwrite_if_exists:
192
- log_warning(f"File {file_name} already exists and overwrite is set to False. Skipping save to GCS.")
193
- break
210
+ log_message(f"File '{file_name}' already exists. Overwriting.")
211
+ blob.delete() # Delete the existing blob
212
+ gcs_deleted_files.append(blob.name)
213
+ gcs_file_overwritten = True
214
+
215
+ # --- Increment Logic ---
216
+ elif increment_if_exists:
217
+ gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
218
+ while bucket.blob(file_name).exists():
219
+ gcs_file_already_exists = True
220
+ increment += 1
221
+ file_name = f"{base_file_name}_v{increment}{ext}"
222
+ gcs_file_saved_with_increment = True
223
+ log_warning(f"File already exists. Using incremented name: {file_name}")
224
+
225
+ # --- GCS Upload ---
226
+ if overwrite_if_exists or increment_if_exists: # Only upload if either overwrite or increment is True
227
+ while attempts < max_retries and not success:
228
+ try:
229
+ blob = bucket.blob(file_name) # Use the potentially updated file_name
230
+ blob.upload_from_string(data_str, content_type='application/json')
231
+ gcs_path = f"gs://{bucket_name}/{file_name}"
232
+ log_message(f"Successfully saved file to GCS: {gcs_path}")
233
+ success = True
234
+ except Exception as e:
235
+ gcs_upload_exception=e
236
+ attempts += 1
237
+ if attempts < max_retries:
238
+ log_warning(f"Attempt {attempts} to upload to GCS failed. Retrying...")
239
+ time.sleep(2 ** attempts)
194
240
  else:
195
- gcs_file_overwritten = True
196
-
197
- blob.upload_from_string(data_str, content_type='application/json')
198
- gcs_path = f"gs://{bucket_name}/{file_name}"
199
- log_message(f"Successfully saved file to GCS {gcs_path}.")
200
- success = True
201
- except Exception as e:
202
- gcs_upload_exception = e
203
- attempts += 1
204
- if attempts < max_retries:
205
- time.sleep(2 ** attempts)
206
- else:
207
- log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
241
+ log_error(f"Failed to write '{file_name}' to GCS bucket '{bucket_name}' after {max_retries} attempts: {e}", exc_info=True)
242
+ if save_locally or local_path:
243
+ log_message(f"Attempting to save '{file_name}' locally due to GCS upload failure.")
244
+ except Exception as e:
245
+ log_error(f"Error during GCS operations: {e}", exc_info=True)
246
+ gcs_upload_exception = e
208
247
 
248
+ # --- Save Locally ---
249
+ write_out=False
209
250
  if not success or save_locally or local_path:
210
251
  try:
211
- if not local_path:
212
- local_path_final = os.path.join("/tmp", file_name)
213
- else:
214
- local_path_final = os.path.join(local_path, file_name)
252
+ local_path=local_path if local_path else "/tmp"
253
+ local_path_final = os.path.join(local_path, file_name)
215
254
 
216
255
  if os.path.exists(local_path_final):
217
256
  if increment_if_exists:
218
257
  increment = 0
219
258
  while os.path.exists(local_path_final):
220
259
  increment += 1
221
- local_path_final = os.path.join(local_path, f"{base_file_name}_{increment}{ext}")
222
- elif not overwrite_if_exists:
223
- log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
224
- success = True
225
- else:
260
+ local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
261
+ log_warning(f"Local file already exists. Using incremented name: {local_path_final}")
262
+ write_out=True
263
+ elif overwrite_if_exists:
264
+ write_out=True
226
265
  log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
266
+ else:
267
+ log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
268
+ write_out=False
269
+ else:
270
+ write_out=True
227
271
 
228
- if not success:
272
+ if write_out:
229
273
  with open(local_path_final, 'w', encoding='utf-8') as f:
230
274
  f.write(data_str)
231
- log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
232
- success = True
275
+ log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
276
+
233
277
  except Exception as local_e:
234
278
  log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
235
279
 
236
280
  if gcs_upload_exception is not None:
237
281
  raise gcs_upload_exception # Propagate without nesting
238
282
 
283
+ # --- Return Metadata ---
239
284
  return {
240
- "gcs_path": gcs_path,
241
- "local_path": local_path_final,
285
+ "gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
286
+ "local_path": local_path_final if write_out else None, # Only set local_path if saved locally
242
287
  "gcs_file_already_exists": gcs_file_already_exists,
288
+ "gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
243
289
  "gcs_file_overwritten": gcs_file_overwritten,
290
+ "gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
244
291
  "gcs_file_saved_with_increment": gcs_file_saved_with_increment
245
292
  }
246
293