ipulse-shared-core-ftredge 2.56__tar.gz → 2.57__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge.egg-info → ipulse_shared_core_ftredge-2.57}/PKG-INFO +1 -1
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/setup.py +1 -1
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/__init__.py +10 -5
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/__init__.py +1 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_common_utils.py +9 -3
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_profile.py +3 -3
- ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_custom_logs.py +201 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/utils_gcp.py +112 -65
- ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_gcp_for_pipelines.py +201 -0
- ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/utils_common.py → ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge/utils_pipelinemon.py +25 -206
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/utils_templates_and_schemas.py +7 -9
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57/src/ipulse_shared_core_ftredge.egg-info}/PKG-INFO +1 -1
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/SOURCES.txt +4 -3
- ipulse_shared_core_ftredge-2.57/tests/test_utils_gcp.py +189 -0
- ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/tests/__init__.py +0 -0
- ipulse_shared_core_ftredge-2.56/src/ipulse_shared_core_ftredge/tests/test.py +0 -17
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/README.md +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/pyproject.toml +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/setup.cfg +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_data_eng.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_module_fincore.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/enums/enums_modules.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/__init__.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/audit_log_firestore.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/organisation.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/pulse_enums.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/resource_catalog_item.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_auth.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_profile_update.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge/models/user_status.py +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/dependency_links.txt +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/requires.txt +0 -0
- {ipulse_shared_core_ftredge-2.56 → ipulse_shared_core_ftredge-2.57}/src/ipulse_shared_core_ftredge.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ipulse_shared_core_ftredge
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.57
|
|
4
4
|
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
5
|
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
6
|
Author: Russlan Ramdowar
|
|
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
|
|
|
3
3
|
|
|
4
4
|
setup(
|
|
5
5
|
name='ipulse_shared_core_ftredge',
|
|
6
|
-
version='2.
|
|
6
|
+
version='2.57',
|
|
7
7
|
package_dir={'': 'src'}, # Specify the source directory
|
|
8
8
|
packages=find_packages(where='src'), # Look for packages in 'src'
|
|
9
9
|
install_requires=[
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
from .models import (Organisation, UserAuth, UserProfile,
|
|
2
2
|
UserStatus, UserProfileUpdate, pulse_enums)
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from .utils_gcp import (setup_gcp_logger_and_error_report,
|
|
6
|
+
read_csv_from_gcs, read_json_from_gcs,
|
|
7
|
+
write_csv_to_gcs,write_json_to_gcs)
|
|
8
|
+
from .utils_custom_logs import (ContextLog)
|
|
9
|
+
from .utils_pipelinemon import ( Pipelinemon)
|
|
10
|
+
from .utils_gcp_for_pipelines import (write_json_to_gcs_in_pipeline )
|
|
11
|
+
|
|
6
12
|
from .utils_templates_and_schemas import (create_bigquery_schema_from_json,
|
|
7
13
|
check_format_against_schema_template)
|
|
8
|
-
from .utils_common import (ContextLog, Pipelinemon)
|
|
9
14
|
|
|
10
|
-
from .enums import (TargetLogs, LogLevel, Unit, Frequency,
|
|
15
|
+
from .enums import (TargetLogs, LogStatus, LogLevel, Unit, Frequency,
|
|
11
16
|
Module, SubModule, BaseDataCategory,
|
|
12
17
|
FinCoreCategory, FincCoreSubCategory,
|
|
13
18
|
FinCoreRecordsCategory, ExchangeOrPublisher,
|
|
@@ -42,9 +42,15 @@ class LogLevel(Enum):
|
|
|
42
42
|
DEBUG = 10 # Detailed debug information (for development/troubleshooting)
|
|
43
43
|
|
|
44
44
|
INFO = 100
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
INFO_REMOTE_PERSISTNACE_COMPLETE= 101
|
|
46
|
+
INFO_REMOTE_UPDATE_COMPLETE = 102
|
|
47
|
+
INFO_REMOTE_DELETE_COMPLETE = 103
|
|
48
|
+
|
|
49
|
+
INFO_REMOTE_BULK_PERSISTNACE_COMPLETE= 111
|
|
50
|
+
INFO_REMOTE_BULK_UPDATE_COMPLETE = 112
|
|
51
|
+
INFO_REMOTE_BULK_DELETE_COMPLETE = 113
|
|
52
|
+
|
|
53
|
+
INFO_LOCAL_PERSISTNACE_COMPLETE = 121
|
|
48
54
|
|
|
49
55
|
SUCCESS = 201
|
|
50
56
|
SUCCESS_WITH_NOTICES = 211
|
|
@@ -33,9 +33,9 @@ class UserProfile(BaseModel):
|
|
|
33
33
|
provider_id: str #User can Read only
|
|
34
34
|
|
|
35
35
|
username: Optional[str] = None #User can Read and Edit
|
|
36
|
-
dob: Optional[date] = None #User can Read and Edit
|
|
37
|
-
first_name: Optional[str] = None #User can Read and Edit
|
|
38
|
-
last_name: Optional[str] = None #User can Read and Edit
|
|
36
|
+
dob: Optional[date] = None #User can Read and Edit
|
|
37
|
+
first_name: Optional[str] = None #User can Read and Edit
|
|
38
|
+
last_name: Optional[str] = None #User can Read and Edit
|
|
39
39
|
mobile: Optional[str] = None #User can Read and Edit
|
|
40
40
|
class Config:
|
|
41
41
|
extra = "forbid"
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=logging-fstring-interpolation
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
# pylint: disable=missing-class-docstring
|
|
7
|
+
# pylint: disable=broad-exception-caught
|
|
8
|
+
import traceback
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import List
|
|
12
|
+
from ipulse_shared_core_ftredge.enums.enums_common_utils import LogLevel, LogStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContextLog:
|
|
16
|
+
|
|
17
|
+
def __init__(self, level: LogLevel, base_context: str = None, collector_id: str = None,
|
|
18
|
+
context: str = None, description: str = None,
|
|
19
|
+
e: Exception = None, e_type: str = None, e_message: str = None, e_traceback: str = None,
|
|
20
|
+
log_status: LogStatus = LogStatus.OPEN, subject: str = None, systems_impacted: List[str] = None,
|
|
21
|
+
):
|
|
22
|
+
|
|
23
|
+
if e is not None:
|
|
24
|
+
e_type = type(e).__name__ if e_type is None else e_type
|
|
25
|
+
e_message = str(e) if e_message is None else e_message
|
|
26
|
+
e_traceback = traceback.format_exc() if e_traceback is None else e_traceback
|
|
27
|
+
elif e_traceback is None and (e_type or e_message):
|
|
28
|
+
e_traceback = traceback.format_exc()
|
|
29
|
+
|
|
30
|
+
self.level = level
|
|
31
|
+
self.subject = subject
|
|
32
|
+
self.description = description
|
|
33
|
+
self._base_context = base_context
|
|
34
|
+
self._context = context
|
|
35
|
+
self._systems_impacted = systems_impacted if systems_impacted else []
|
|
36
|
+
self.collector_id = collector_id
|
|
37
|
+
self.exception_type = e_type
|
|
38
|
+
self.exception_message = e_message
|
|
39
|
+
self.exception_traceback = e_traceback
|
|
40
|
+
self.log_status = log_status
|
|
41
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def base_context(self):
|
|
45
|
+
return self._base_context
|
|
46
|
+
|
|
47
|
+
@base_context.setter
|
|
48
|
+
def base_context(self, value):
|
|
49
|
+
self._base_context = value
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def context(self):
|
|
53
|
+
return self._context
|
|
54
|
+
|
|
55
|
+
@context.setter
|
|
56
|
+
def context(self, value):
|
|
57
|
+
self._context = value
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def systems_impacted(self):
|
|
61
|
+
return self._systems_impacted
|
|
62
|
+
|
|
63
|
+
@systems_impacted.setter
|
|
64
|
+
def systems_impacted(self, list_of_si: List[str]):
|
|
65
|
+
self._systems_impacted = list_of_si
|
|
66
|
+
|
|
67
|
+
def add_system_impacted(self, system_impacted: str):
|
|
68
|
+
if self._systems_impacted is None:
|
|
69
|
+
self._systems_impacted = []
|
|
70
|
+
self._systems_impacted.append(system_impacted)
|
|
71
|
+
|
|
72
|
+
def remove_system_impacted(self, system_impacted: str):
|
|
73
|
+
if self._systems_impacted is not None:
|
|
74
|
+
self._systems_impacted.remove(system_impacted)
|
|
75
|
+
|
|
76
|
+
def clear_systems_impacted(self):
|
|
77
|
+
self._systems_impacted = []
|
|
78
|
+
|
|
79
|
+
def _format_traceback(self, e_traceback, e_message, max_field_len:int, max_traceback_lines:int):
|
|
80
|
+
if not e_traceback or e_traceback == 'None\n':
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
traceback_lines = e_traceback.splitlines()
|
|
84
|
+
|
|
85
|
+
# Check if the traceback is within the limits
|
|
86
|
+
if len(traceback_lines) <= max_traceback_lines and len(e_traceback) <= max_field_len:
|
|
87
|
+
return e_traceback
|
|
88
|
+
|
|
89
|
+
# Remove lines that are part of the exception message if they are present in traceback
|
|
90
|
+
message_lines = e_message.splitlines() if e_message else []
|
|
91
|
+
if message_lines:
|
|
92
|
+
for message_line in message_lines:
|
|
93
|
+
if message_line in traceback_lines:
|
|
94
|
+
traceback_lines.remove(message_line)
|
|
95
|
+
|
|
96
|
+
# Filter out lines from third-party libraries (like site-packages)
|
|
97
|
+
filtered_lines = [line for line in traceback_lines if "site-packages" not in line]
|
|
98
|
+
|
|
99
|
+
# If filtering results in too few lines, revert to original traceback
|
|
100
|
+
if len(filtered_lines) < 2:
|
|
101
|
+
filtered_lines = traceback_lines
|
|
102
|
+
|
|
103
|
+
# Combine standalone bracket lines with previous or next lines
|
|
104
|
+
combined_lines = []
|
|
105
|
+
for line in filtered_lines:
|
|
106
|
+
if line.strip() in {"(", ")", "{", "}", "[", "]"} and combined_lines:
|
|
107
|
+
combined_lines[-1] += " " + line.strip()
|
|
108
|
+
else:
|
|
109
|
+
combined_lines.append(line)
|
|
110
|
+
|
|
111
|
+
# Ensure the number of lines doesn't exceed MAX_TRACEBACK_LINES
|
|
112
|
+
if len(combined_lines) > max_traceback_lines:
|
|
113
|
+
keep_lines_start = min(max_traceback_lines // 2, len(combined_lines))
|
|
114
|
+
keep_lines_end = min(max_traceback_lines // 2, len(combined_lines) - keep_lines_start)
|
|
115
|
+
combined_lines = (
|
|
116
|
+
combined_lines[:keep_lines_start] +
|
|
117
|
+
['... (truncated) ...'] +
|
|
118
|
+
combined_lines[-keep_lines_end:]
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
formatted_traceback = '\n'.join(combined_lines)
|
|
122
|
+
|
|
123
|
+
# Ensure the total length doesn't exceed MAX_TRACEBACK_LENGTH
|
|
124
|
+
if len(formatted_traceback) > max_field_len:
|
|
125
|
+
truncated_length = max_field_len - len('... (truncated) ...')
|
|
126
|
+
half_truncated_length = truncated_length // 2
|
|
127
|
+
formatted_traceback = (
|
|
128
|
+
formatted_traceback[:half_truncated_length] +
|
|
129
|
+
'\n... (truncated) ...\n' +
|
|
130
|
+
formatted_traceback[-half_truncated_length:]
|
|
131
|
+
)
|
|
132
|
+
return formatted_traceback
|
|
133
|
+
|
|
134
|
+
def to_dict(self, max_field_len:int =10000, size_limit:float=256 * 1024 * 0.80,max_traceback_lines:int = 30):
|
|
135
|
+
size_limit = int(size_limit) # Ensure size_limit is an integer
|
|
136
|
+
|
|
137
|
+
# Unified list of all fields
|
|
138
|
+
systems_impacted_str = f"{len(self.systems_impacted)} system(s): " + " ,,, ".join(self.systems_impacted) if self.systems_impacted else None
|
|
139
|
+
fields = [
|
|
140
|
+
("log_status", str(self.log_status.name)),
|
|
141
|
+
("level_code", self.level.value),
|
|
142
|
+
("level_name", str(self.level.name)),
|
|
143
|
+
("base_context", str(self.base_context)),
|
|
144
|
+
("timestamp", str(self.timestamp)),
|
|
145
|
+
("collector_id", str(self.collector_id)),
|
|
146
|
+
("systems_impacted", systems_impacted_str),
|
|
147
|
+
("context", str(self.context)), # special sizing rules apply to it
|
|
148
|
+
("subject", str(self.subject)),
|
|
149
|
+
("description", str(self.description)),
|
|
150
|
+
("exception_type", str(self.exception_type)),
|
|
151
|
+
("exception_message", str(self.exception_message)),
|
|
152
|
+
("exception_traceback", str(self._format_traceback(self.exception_traceback,self.exception_message, max_field_len, max_traceback_lines)))
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
# Function to calculate the byte size of a JSON-encoded field
|
|
156
|
+
def field_size(key, value):
|
|
157
|
+
return len(json.dumps({key: value}).encode('utf-8'))
|
|
158
|
+
|
|
159
|
+
# Function to truncate a value based on its type
|
|
160
|
+
# Function to truncate a value based on its type
|
|
161
|
+
def truncate_value(value, max_size):
|
|
162
|
+
if isinstance(value, str):
|
|
163
|
+
half_size = max_size // 2
|
|
164
|
+
return value[:half_size] + '...' + value[-(max_size - half_size - 3):]
|
|
165
|
+
return value
|
|
166
|
+
|
|
167
|
+
# Ensure no field exceeds max_field_len
|
|
168
|
+
for i, (key, value) in enumerate(fields):
|
|
169
|
+
if isinstance(value, str) and len(value) > max_field_len:
|
|
170
|
+
fields[i] = (key, truncate_value(value, max_field_len))
|
|
171
|
+
|
|
172
|
+
# Ensure total size of the dict doesn't exceed size_limit
|
|
173
|
+
total_size = sum(field_size(key, value) for key, value in fields)
|
|
174
|
+
log_dict = {}
|
|
175
|
+
truncated = False
|
|
176
|
+
|
|
177
|
+
if total_size > size_limit:
|
|
178
|
+
truncated = True
|
|
179
|
+
remaining_size = size_limit
|
|
180
|
+
remaining_fields = len(fields)
|
|
181
|
+
|
|
182
|
+
for key, value in fields:
|
|
183
|
+
if remaining_fields > 0:
|
|
184
|
+
max_size_per_field = remaining_size // remaining_fields
|
|
185
|
+
else:
|
|
186
|
+
max_size_per_field = 0
|
|
187
|
+
|
|
188
|
+
field_sz = field_size(key, value)
|
|
189
|
+
if field_sz > max_size_per_field:
|
|
190
|
+
value = truncate_value(value, max_size_per_field)
|
|
191
|
+
field_sz = field_size(key, value)
|
|
192
|
+
|
|
193
|
+
log_dict[key] = value
|
|
194
|
+
remaining_size -= field_sz
|
|
195
|
+
remaining_fields -= 1
|
|
196
|
+
else:
|
|
197
|
+
log_dict = dict(fields)
|
|
198
|
+
|
|
199
|
+
log_dict['trunc'] = truncated
|
|
200
|
+
|
|
201
|
+
return log_dict
|
|
@@ -14,7 +14,6 @@ import traceback
|
|
|
14
14
|
from google.cloud import error_reporting, logging as cloud_logging
|
|
15
15
|
from google.api_core.exceptions import NotFound
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
############################################################################
|
|
19
18
|
##################### SETTING UP LOGGER ##########################
|
|
20
19
|
|
|
@@ -124,15 +123,22 @@ def read_csv_from_gcs(bucket_name, file_name, storage_client, logger):
|
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
|
|
127
|
-
def write_json_to_gcs(
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
def write_json_to_gcs( storage_client, data, bucket_name, file_name,
|
|
127
|
+
file_exists_if_starts_with_prefix=None, overwrite_if_exists=False, increment_if_exists=False,
|
|
128
|
+
save_locally=False, local_path=None, max_retries=2, max_deletable_files=1, logger=None):
|
|
130
129
|
"""Saves data to Google Cloud Storage and optionally locally.
|
|
131
|
-
|
|
132
|
-
This function attempts to upload data to GCS.
|
|
133
|
-
retries and `save_locally` is True or `local_path` is provided, it attempts
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
|
|
131
|
+
This function attempts to upload data to GCS.
|
|
132
|
+
- If the upload fails after retries and `save_locally` is True or `local_path` is provided, it attempts to save the data locally.
|
|
133
|
+
- It handles file name conflicts based on these rules:
|
|
134
|
+
- If `overwrite_if_exists` is True:
|
|
135
|
+
- If `file_exists_if_contains_substr` is provided, ANY existing file containing the substring is deleted, and the new file is saved with the provided `file_name`.
|
|
136
|
+
- If `file_exists_if_contains_substr` is None, and a file with the exact `file_name` exists, it's overwritten.
|
|
137
|
+
- If `increment_if_exists` is True:
|
|
138
|
+
- If `file_exists_if_contains_substr` is provided, a new file with an incremented version is created ONLY if a file with the EXACT `file_name` exists.
|
|
139
|
+
- If `file_exists_if_contains_substr` is None, a new file with an incremented version is created if a file with the exact `file_name` exists.
|
|
140
|
+
|
|
141
|
+
-If both overwrite_if_exists and increment_if_exists are provided as Ture, an exception will be raised.
|
|
136
142
|
"""
|
|
137
143
|
|
|
138
144
|
def log_message(message):
|
|
@@ -147,100 +153,141 @@ def write_json_to_gcs(bucket_name, storage_client, data, file_name,
|
|
|
147
153
|
if logger:
|
|
148
154
|
logger.warning(message)
|
|
149
155
|
|
|
150
|
-
|
|
151
|
-
success = False
|
|
152
|
-
gcs_path = None
|
|
153
|
-
local_path_final = None
|
|
154
|
-
gcs_file_overwritten = False
|
|
155
|
-
gcs_file_already_exists = False
|
|
156
|
-
gcs_file_saved_with_increment = False
|
|
157
|
-
gcs_upload_exception = None # Store potential GCS exception
|
|
158
|
-
|
|
159
|
-
# Check for conflicting options
|
|
156
|
+
# Input validation
|
|
160
157
|
if overwrite_if_exists and increment_if_exists:
|
|
161
|
-
raise ValueError("
|
|
158
|
+
raise ValueError("Both 'overwrite_if_exists' and 'increment_if_exists' cannot be True simultaneously.")
|
|
159
|
+
if not isinstance(data, (list, dict, str)):
|
|
160
|
+
raise ValueError("Unsupported data type. Data must be a list, dict, or str.")
|
|
161
|
+
if max_deletable_files > 10:
|
|
162
|
+
raise ValueError("max_deletable_files should be less than 10 for safety. For more use another method.")
|
|
162
163
|
|
|
164
|
+
# Prepare data
|
|
163
165
|
if isinstance(data, (list, dict)):
|
|
164
166
|
data_str = json.dumps(data, indent=2)
|
|
165
|
-
elif isinstance(data, str):
|
|
166
|
-
data_str = data
|
|
167
167
|
else:
|
|
168
|
-
|
|
168
|
+
data_str = data
|
|
169
169
|
|
|
170
170
|
bucket = storage_client.bucket(bucket_name)
|
|
171
171
|
base_file_name, ext = os.path.splitext(file_name)
|
|
172
172
|
increment = 0
|
|
173
|
+
attempts = 0
|
|
174
|
+
success = False
|
|
173
175
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
176
|
+
# GCS-related metadata
|
|
177
|
+
gcs_path = None
|
|
178
|
+
gcs_file_overwritten = False
|
|
179
|
+
gcs_file_already_exists = False
|
|
180
|
+
gcs_file_saved_with_increment = False
|
|
181
|
+
gcs_file_exists_checked_on_name = file_name
|
|
182
|
+
gcs_deleted_files=[]
|
|
183
|
+
|
|
184
|
+
# GCS upload exception
|
|
185
|
+
gcs_upload_exception = None
|
|
186
|
+
|
|
187
|
+
# Local file path
|
|
188
|
+
local_path_final = None
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
# --- Overwrite Logic ---
|
|
192
|
+
if overwrite_if_exists:
|
|
193
|
+
if file_exists_if_starts_with_prefix:
|
|
194
|
+
gcs_file_exists_checked_on_name = file_exists_if_starts_with_prefix
|
|
195
|
+
blobs_to_delete = list(bucket.list_blobs(prefix=file_exists_if_starts_with_prefix))
|
|
196
|
+
if len(blobs_to_delete) > max_deletable_files:
|
|
197
|
+
raise Exception(f"Error: Attempt to delete {len(blobs_to_delete)} matched files, but limit is {max_deletable_files}.")
|
|
198
|
+
if blobs_to_delete:
|
|
199
|
+
log_message(f"Deleting files containing '{file_exists_if_starts_with_prefix}' for overwrite.")
|
|
200
|
+
for blob in blobs_to_delete:
|
|
201
|
+
blob.delete()
|
|
202
|
+
gcs_deleted_files.append(blob.name)
|
|
203
|
+
log_message(f"Deleted: gs://{bucket_name}/{blob.name}")
|
|
204
|
+
gcs_file_overwritten = True
|
|
183
205
|
else:
|
|
184
206
|
blob = bucket.blob(file_name)
|
|
185
|
-
|
|
186
|
-
# Check if the file exists
|
|
187
207
|
if blob.exists():
|
|
188
208
|
gcs_file_already_exists = True
|
|
189
209
|
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
190
|
-
log_message(f"File {file_name} already exists
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
210
|
+
log_message(f"File '{file_name}' already exists. Overwriting.")
|
|
211
|
+
blob.delete() # Delete the existing blob
|
|
212
|
+
gcs_deleted_files.append(blob.name)
|
|
213
|
+
gcs_file_overwritten = True
|
|
214
|
+
|
|
215
|
+
# --- Increment Logic ---
|
|
216
|
+
elif increment_if_exists:
|
|
217
|
+
gcs_file_exists_checked_on_name = file_name # We only increment if the exact name exists
|
|
218
|
+
while bucket.blob(file_name).exists():
|
|
219
|
+
gcs_file_already_exists = True
|
|
220
|
+
increment += 1
|
|
221
|
+
file_name = f"{base_file_name}_v{increment}{ext}"
|
|
222
|
+
gcs_file_saved_with_increment = True
|
|
223
|
+
log_warning(f"File already exists. Using incremented name: {file_name}")
|
|
224
|
+
|
|
225
|
+
# --- GCS Upload ---
|
|
226
|
+
if overwrite_if_exists or increment_if_exists: # Only upload if either overwrite or increment is True
|
|
227
|
+
while attempts < max_retries and not success:
|
|
228
|
+
try:
|
|
229
|
+
blob = bucket.blob(file_name) # Use the potentially updated file_name
|
|
230
|
+
blob.upload_from_string(data_str, content_type='application/json')
|
|
231
|
+
gcs_path = f"gs://{bucket_name}/{file_name}"
|
|
232
|
+
log_message(f"Successfully saved file to GCS: {gcs_path}")
|
|
233
|
+
success = True
|
|
234
|
+
except Exception as e:
|
|
235
|
+
gcs_upload_exception=e
|
|
236
|
+
attempts += 1
|
|
237
|
+
if attempts < max_retries:
|
|
238
|
+
log_warning(f"Attempt {attempts} to upload to GCS failed. Retrying...")
|
|
239
|
+
time.sleep(2 ** attempts)
|
|
194
240
|
else:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
except Exception as e:
|
|
202
|
-
gcs_upload_exception = e
|
|
203
|
-
attempts += 1
|
|
204
|
-
if attempts < max_retries:
|
|
205
|
-
time.sleep(2 ** attempts)
|
|
206
|
-
else:
|
|
207
|
-
log_error(f"Failed to write {file_name} to GCS bucket {bucket_name} after {max_retries} attempts: {e}")
|
|
241
|
+
log_error(f"Failed to write '{file_name}' to GCS bucket '{bucket_name}' after {max_retries} attempts: {e}", exc_info=True)
|
|
242
|
+
if save_locally or local_path:
|
|
243
|
+
log_message(f"Attempting to save '{file_name}' locally due to GCS upload failure.")
|
|
244
|
+
except Exception as e:
|
|
245
|
+
log_error(f"Error during GCS operations: {e}", exc_info=True)
|
|
246
|
+
gcs_upload_exception = e
|
|
208
247
|
|
|
248
|
+
# --- Save Locally ---
|
|
249
|
+
write_out=False
|
|
209
250
|
if not success or save_locally or local_path:
|
|
210
251
|
try:
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
else:
|
|
214
|
-
local_path_final = os.path.join(local_path, file_name)
|
|
252
|
+
local_path=local_path if local_path else "/tmp"
|
|
253
|
+
local_path_final = os.path.join(local_path, file_name)
|
|
215
254
|
|
|
216
255
|
if os.path.exists(local_path_final):
|
|
217
256
|
if increment_if_exists:
|
|
218
257
|
increment = 0
|
|
219
258
|
while os.path.exists(local_path_final):
|
|
220
259
|
increment += 1
|
|
221
|
-
local_path_final = os.path.join(local_path, f"{base_file_name}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
260
|
+
local_path_final = os.path.join(local_path, f"{base_file_name}_v{increment}{ext}")
|
|
261
|
+
log_warning(f"Local file already exists. Using incremented name: {local_path_final}")
|
|
262
|
+
write_out=True
|
|
263
|
+
elif overwrite_if_exists:
|
|
264
|
+
write_out=True
|
|
226
265
|
log_message(f"File {file_name} already exists locally at {local_path_final}. Overwriting: {overwrite_if_exists}")
|
|
266
|
+
else:
|
|
267
|
+
log_message(f"File {file_name} already exists locally at {local_path_final} and overwrite is set to False. Skipping save.")
|
|
268
|
+
write_out=False
|
|
269
|
+
else:
|
|
270
|
+
write_out=True
|
|
227
271
|
|
|
228
|
-
if
|
|
272
|
+
if write_out:
|
|
229
273
|
with open(local_path_final, 'w', encoding='utf-8') as f:
|
|
230
274
|
f.write(data_str)
|
|
231
|
-
|
|
232
|
-
|
|
275
|
+
log_message(f"Saved {file_name} locally at {local_path_final}. Overwritten: {overwrite_if_exists}")
|
|
276
|
+
|
|
233
277
|
except Exception as local_e:
|
|
234
278
|
log_error(f"Failed to write {file_name} locally: {local_e}", exc_info=True)
|
|
235
279
|
|
|
236
280
|
if gcs_upload_exception is not None:
|
|
237
281
|
raise gcs_upload_exception # Propagate without nesting
|
|
238
282
|
|
|
283
|
+
# --- Return Metadata ---
|
|
239
284
|
return {
|
|
240
|
-
"gcs_path": gcs_path,
|
|
241
|
-
"local_path": local_path_final,
|
|
285
|
+
"gcs_path": gcs_path if success else None, # Only set gcs_path if upload succeeded
|
|
286
|
+
"local_path": local_path_final if write_out else None, # Only set local_path if saved locally
|
|
242
287
|
"gcs_file_already_exists": gcs_file_already_exists,
|
|
288
|
+
"gcs_file_exists_checked_on_name":gcs_file_exists_checked_on_name ,
|
|
243
289
|
"gcs_file_overwritten": gcs_file_overwritten,
|
|
290
|
+
"gcs_deleted_file_names": ",,,".join(gcs_deleted_files) if gcs_deleted_files else None,
|
|
244
291
|
"gcs_file_saved_with_increment": gcs_file_saved_with_increment
|
|
245
292
|
}
|
|
246
293
|
|