datamarket 0.7.41__py3-none-any.whl → 0.7.125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamarket/__init__.py CHANGED
@@ -1 +0,0 @@
1
- # init file
@@ -0,0 +1 @@
1
+ from .main import * # noqa: F403
@@ -0,0 +1,118 @@
1
+ ########################################################################################################################
2
+ # CLASSES
3
+
4
+
5
+ from typing import Optional
6
+
7
+ from requests import Request, Response
8
+ from requests.exceptions import HTTPError
9
+
10
+
11
+ class ManagedHTTPError(HTTPError):
12
+ """Signal that this HTTP status was handled and should not be retried."""
13
+
14
+ def __init__(
15
+ self,
16
+ message: Optional[str] = None,
17
+ response: Optional[Response] = None,
18
+ request: Optional[Request] = None,
19
+ *args,
20
+ **kwargs,
21
+ ):
22
+ self.response = response
23
+ self.request = request or getattr(response, "request", None)
24
+
25
+ # Build a safe default message
26
+ if not message:
27
+ status = getattr(self.response, "status_code", "unknown")
28
+ url = getattr(self.request, "url", "unknown")
29
+ message = f"HTTP {status} for {url}"
30
+
31
+ self.message = message
32
+
33
+ super().__init__(message, *args, response=response, **kwargs)
34
+
35
+
36
+ class IgnoredHTTPError(ManagedHTTPError):
37
+ """Exception type that signals the error should be ignored by retry logic."""
38
+
39
+ pass
40
+
41
+
42
+ class NotFoundError(ManagedHTTPError):
43
+ def __init__(
44
+ self,
45
+ message: Optional[str] = None,
46
+ response: Optional[Response] = None,
47
+ request: Optional[Request] = None,
48
+ *args,
49
+ **kwargs,
50
+ ):
51
+ if not message:
52
+ status = getattr(response, "status_code", 404)
53
+ req = request or getattr(response, "request", None)
54
+ url = getattr(req, "url", "unknown")
55
+ message = f"HTTP {status} for {url}"
56
+ super().__init__(message, response, request, *args, **kwargs)
57
+
58
+
59
+ class BadRequestError(ManagedHTTPError):
60
+ def __init__(
61
+ self,
62
+ message: Optional[str] = None,
63
+ response: Optional[Response] = None,
64
+ request: Optional[Request] = None,
65
+ *args,
66
+ **kwargs,
67
+ ):
68
+ if not message:
69
+ status = getattr(response, "status_code", 400)
70
+ req = request or getattr(response, "request", None)
71
+ url = getattr(req, "url", "unknown")
72
+ message = f"HTTP {status} for {url}"
73
+ super().__init__(message, response, request, *args, **kwargs)
74
+
75
+
76
+ class EmptyResponseError(ManagedHTTPError):
77
+ def __init__(
78
+ self,
79
+ message: Optional[str] = None,
80
+ response: Optional[Response] = None,
81
+ request: Optional[Request] = None,
82
+ *args,
83
+ **kwargs,
84
+ ):
85
+ if not message:
86
+ req = request or getattr(response, "request", None)
87
+ url = getattr(req, "url", "unknown")
88
+ message = f"Empty response for {url}"
89
+ super().__init__(message, response, request, *args, **kwargs)
90
+
91
+
92
+ class RedirectionDetectedError(ManagedHTTPError):
93
+ def __init__(
94
+ self,
95
+ message: Optional[str] = None,
96
+ response: Optional[Response] = None,
97
+ request: Optional[Request] = None,
98
+ *args,
99
+ **kwargs,
100
+ ):
101
+ if not message:
102
+ status = getattr(response, "status_code", 300)
103
+ req = request or getattr(response, "request", None)
104
+ url = getattr(req, "url", "unknown")
105
+ message = f"HTTP {status} for {url}"
106
+ super().__init__(message, response, request, *args, **kwargs)
107
+
108
+
109
+ class NoWorkingProxiesError(Exception):
110
+ def __init__(self, message="No working proxies available"):
111
+ self.message = message
112
+ super().__init__(self.message)
113
+
114
+
115
+ class EnsureNewIPTimeoutError(Exception):
116
+ def __init__(self, message="Timed out waiting for new IP"):
117
+ self.message = message
118
+ super().__init__(self.message)
@@ -3,14 +3,16 @@
3
3
 
4
4
  import logging
5
5
  from collections.abc import MutableMapping
6
- from typing import Any, Iterator, List, Type, TypeVar
6
+ from enum import Enum, auto
7
+ from typing import Any, Iterator, List, Optional, Type, TypeVar, Union
7
8
  from urllib.parse import quote_plus
8
9
 
9
- from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
10
+ from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, text
10
11
  from sqlalchemy.dialects.postgresql import insert
11
12
  from sqlalchemy.exc import IntegrityError
12
13
  from sqlalchemy.ext.declarative import DeclarativeMeta
13
- from sqlalchemy.orm import sessionmaker
14
+ from sqlalchemy.orm import Session, sessionmaker
15
+ from sqlalchemy.sql.expression import ClauseElement
14
16
 
15
17
  ########################################################################################################################
16
18
  # CLASSES
@@ -20,6 +22,11 @@ logger = logging.getLogger(__name__)
20
22
  ModelType = TypeVar("ModelType", bound=DeclarativeMeta)
21
23
 
22
24
 
25
+ class CommitStrategy(Enum):
26
+ COMMIT_ON_SUCCESS = auto()
27
+ FORCE_COMMIT = auto()
28
+
29
+
23
30
  class MockContext:
24
31
  def __init__(self, column: SQLColumnExpression) -> None:
25
32
  self.current_parameters = {}
@@ -29,16 +36,58 @@ class MockContext:
29
36
 
30
37
  class AlchemyInterface:
31
38
  def __init__(self, config: MutableMapping) -> None:
39
+ self.session: Optional[Session] = None
32
40
  if "db" in config:
33
41
  self.config = config["db"]
34
-
35
42
  self.engine = create_engine(self.get_conn_str())
36
- self.session = sessionmaker(bind=self.engine)()
37
- self.cursor = self.session.connection().connection.cursor()
38
-
43
+ self.Session = sessionmaker(bind=self.engine)
39
44
  else:
40
45
  logger.warning("no db section in config")
41
46
 
47
+ def __enter__(self) -> "AlchemyInterface":
48
+ """Enter the runtime context related to this object (starts session)."""
49
+ self.start()
50
+ return self
51
+
52
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
53
+ """Exit the runtime context related to this object (stops session)."""
54
+ should_commit = exc_type is None
55
+ self.stop(commit=should_commit)
56
+
57
+ def start(self) -> None:
58
+ """Starts a new SQLAlchemy session manually."""
59
+ if not hasattr(self, "Session"):
60
+ raise AttributeError("Database configuration not initialized. Cannot create session.")
61
+ if self.session is not None:
62
+ raise RuntimeError("Session already active.")
63
+ self.session = self.Session()
64
+ logger.debug("SQLAlchemy session started manually.")
65
+
66
+ def stop(self, commit: bool = True) -> None:
67
+ """Stops the manually started SQLAlchemy session."""
68
+ if self.session is None:
69
+ logger.warning("No active session to stop.")
70
+ return
71
+
72
+ try:
73
+ if commit:
74
+ logger.debug("Committing SQLAlchemy session before stopping.")
75
+ self.session.commit()
76
+ else:
77
+ logger.debug("Rolling back SQLAlchemy session before stopping.")
78
+ self.session.rollback()
79
+ except Exception as e:
80
+ logger.error(f"Exception during session commit/rollback on stop: {e}", exc_info=True)
81
+ try:
82
+ self.session.rollback()
83
+ except Exception as rb_exc:
84
+ logger.error(f"Exception during secondary rollback attempt on stop: {rb_exc}", exc_info=True)
85
+ raise
86
+ finally:
87
+ logger.debug("Closing SQLAlchemy session.")
88
+ self.session.close()
89
+ self.session = None
90
+
42
91
  def get_conn_str(self):
43
92
  return (
44
93
  f"{self.config['engine']}://"
@@ -110,6 +159,16 @@ class AlchemyInterface:
110
159
  self.create_tables(tables)
111
160
 
112
161
  def reset_column(self, query_results: List[Result[Any]], column_name: str) -> None:
162
+ """
163
+ Reset a column to its default value for a list of query results.
164
+
165
+ Args:
166
+ query_results: List of query results to update
167
+ column_name: Name of the column to reset
168
+ """
169
+ if self.session is None:
170
+ raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
171
+
113
172
  if not query_results:
114
173
  logger.warning("No objects to reset column for.")
115
174
  return
@@ -124,109 +183,192 @@ class AlchemyInterface:
124
183
 
125
184
  column = table.columns[column_name]
126
185
 
186
+ # Determine the default value to use
127
187
  if column.server_default is not None:
128
- query_results.update({column_name: text("DEFAULT")}, synchronize_session=False)
188
+ default_value = text("DEFAULT")
129
189
  elif column.default is not None:
130
190
  default_value = column.default.arg
131
191
  if callable(default_value):
132
192
  default_value = default_value(MockContext(column))
133
- query_results.update({column_name: default_value}, synchronize_session=False)
134
193
  else:
135
194
  raise ValueError(f"Column '{column_name}' doesn't have a default value defined.")
136
195
 
137
- self.session.commit()
196
+ query_results.update({column_name: default_value}, synchronize_session=False)
138
197
 
139
- def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> None:
140
- try:
141
- if not silent:
142
- logger.info(f"adding {alchemy_obj}...")
198
+ @staticmethod
199
+ def _log_integrity_error(ex: IntegrityError, alchemy_obj, action="insert"):
200
+ """
201
+ Compact, readable IntegrityError logger using SQLSTATE codes.
202
+ Consult https://www.postgresql.org/docs/current/errcodes-appendix.html for details.
203
+ """
143
204
 
144
- self.session.add(alchemy_obj)
145
- self.session.commit()
205
+ PG_ERROR_LABELS = {
206
+ "23000": "Integrity constraint violation",
207
+ "23001": "Restrict violation",
208
+ "23502": "NOT NULL violation",
209
+ "23503": "Foreign key violation",
210
+ "23505": "Unique violation",
211
+ "23514": "Check constraint violation",
212
+ "23P01": "Exclusion constraint violation",
213
+ }
214
+ code = getattr(ex.orig, "pgcode", None)
215
+ label = PG_ERROR_LABELS.get(code, "Integrity error (unspecified)")
146
216
 
147
- except IntegrityError:
217
+ # Log one clean message with trace + the raw DB message separately
218
+ if code == "23505": # A simple info log for unique violations
219
+ logger.info(f"{label} trying to {action} {alchemy_obj}")
220
+ else:
221
+ logger.error(f"{label} trying to {action} {alchemy_obj}\nPostgreSQL message: {ex.orig}")
222
+
223
+ def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> bool:
224
+ if self.session is None:
225
+ raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
226
+
227
+ try:
228
+ # Use a savepoint (nested transaction)
229
+ with self.session.begin_nested():
230
+ if not silent:
231
+ logger.info(f"adding {alchemy_obj}...")
232
+ self.session.add(alchemy_obj)
233
+ except IntegrityError as ex:
234
+ # Rollback is handled automatically by begin_nested() context manager on error
148
235
  if not silent:
149
- logger.info(f"{alchemy_obj} already in db")
236
+ self._log_integrity_error(ex, alchemy_obj, action="insert")
237
+ # Do not re-raise, allow outer transaction/loop to continue
238
+ return False
239
+
240
+ return True
150
241
 
151
- self.session.rollback()
242
+ def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> bool:
243
+ if self.session is None:
244
+ raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
152
245
 
153
- def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> None:
154
246
  if not silent:
155
247
  logger.info(f"upserting {alchemy_obj}")
156
248
 
157
- primary_keys = list(col.name for col in alchemy_obj.__table__.primary_key.columns.values())
158
- obj_dict = {
249
+ table = alchemy_obj.__table__
250
+ primary_keys = list(col.name for col in table.primary_key.columns.values())
251
+
252
+ # Build the dictionary for the INSERT values
253
+ insert_values = {
254
+ col.name: getattr(alchemy_obj, col.name)
255
+ for col in table.columns
256
+ if getattr(alchemy_obj, col.name) is not None # Include all non-None values for insert
257
+ }
258
+
259
+ # Build the dictionary for the UPDATE set clause
260
+ # Start with values from the object, excluding primary keys
261
+ update_set_values = {
159
262
  col.name: val
160
- for col in alchemy_obj.__table__.columns
263
+ for col in table.columns
161
264
  if col.name not in primary_keys and (val := getattr(alchemy_obj, col.name)) is not None
162
265
  }
163
266
 
267
+ # Add columns with SQL-based onupdate values explicitly to the set clause
268
+ for column in table.columns:
269
+ actual_sql_expression = None
270
+ if column.onupdate is not None:
271
+ if hasattr(column.onupdate, "arg") and isinstance(column.onupdate.arg, ClauseElement):
272
+ # This handles wrappers like ColumnElementColumnDefault,
273
+ # where the actual SQL expression is in the .arg attribute.
274
+ actual_sql_expression = column.onupdate.arg
275
+ elif isinstance(column.onupdate, ClauseElement):
276
+ # This handles cases where onupdate might be a direct SQL expression.
277
+ actual_sql_expression = column.onupdate
278
+
279
+ if actual_sql_expression is not None:
280
+ update_set_values[column.name] = actual_sql_expression
281
+
164
282
  statement = (
165
- insert(alchemy_obj.__table__)
166
- .values(obj_dict)
167
- .on_conflict_do_update(index_elements=index_elements, set_=obj_dict)
283
+ insert(table)
284
+ .values(insert_values)
285
+ .on_conflict_do_update(index_elements=index_elements, set_=update_set_values)
168
286
  )
169
287
 
170
288
  try:
171
- self.session.execute(statement)
172
- self.session.commit()
173
- except IntegrityError:
289
+ # Use a savepoint (nested transaction)
290
+ with self.session.begin_nested():
291
+ self.session.execute(statement)
292
+ except IntegrityError as ex:
293
+ # Rollback is handled automatically by begin_nested() context manager on error
174
294
  if not silent:
175
- logger.info(f"could not upsert {alchemy_obj}")
295
+ self._log_integrity_error(ex, alchemy_obj, action="upsert")
296
+ # Do not re-raise, allow outer transaction/loop to continue
297
+ return False
176
298
 
177
- self.session.rollback()
299
+ return True
178
300
 
179
301
  def windowed_query(
180
302
  self,
181
303
  stmt: Select[Any],
182
304
  order_by: List[SQLColumnExpression[Any]],
183
305
  windowsize: int,
306
+ commit_strategy: Union[CommitStrategy, str] = CommitStrategy.COMMIT_ON_SUCCESS,
184
307
  ) -> Iterator[Result[Any]]:
185
308
  """
186
- Executes a windowed query on the given statement.
309
+ Executes a windowed query, fetching each window in a separate, short-lived session.
187
310
 
188
311
  Args:
189
312
  stmt: The SQL select statement to execute.
190
313
  order_by: The columns to use for ordering.
191
314
  windowsize: The number of rows to fetch in each window.
315
+ commit_strategy: The strategy to use for committing the session after each window.
316
+ Defaults to `CommitStrategy.COMMIT_ON_SUCCESS`.
192
317
 
193
318
  Returns:
194
319
  An iterator of Result objects, each containing a window of data.
320
+ The session used to fetch the Result is closed immediately after yielding.
195
321
 
196
322
  More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
197
323
  """
198
- # Add row_number over the specified order
199
- row_number = func.row_number().over(order_by=order_by).label("row_number")
324
+ # Parameter mapping
325
+ if isinstance(commit_strategy, str):
326
+ commit_strategy = CommitStrategy[commit_strategy.upper()]
200
327
 
201
- # Add the windowing column to the statement and sort by it for deterministic results
202
- inner_stmt = stmt.add_columns(row_number).order_by(row_number)
203
- subq = inner_stmt.subquery()
204
-
205
- # Create an outer query that selects from the subquery
206
- cols = [c for c in subq.c]
207
- outer_query = select(*cols)
208
-
209
- last_row_number = 0
328
+ # Find id column in stmt
329
+ if not any(column.get("entity").id for column in stmt.column_descriptions):
330
+ raise Exception("Column 'id' not found in any entity of the query.")
331
+ id_column = stmt.column_descriptions[0]["entity"].id
210
332
 
333
+ last_id = 0
211
334
  while True:
212
- # Filter on row_number in the outer query
213
- current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
214
- result = self.session.execute(current_query)
215
-
216
- # Create a FrozenResult to allow peeking at the data without consuming
217
- frozen_result: FrozenResult = result.freeze()
218
- chunk = frozen_result().all()
219
-
220
- if not chunk:
221
- break
222
-
223
- # Update for next iteration
224
- last_row_number = chunk[-1].row_number - 1
225
-
226
- # Create a new Result object from the FrozenResult
227
- yield_result = frozen_result()
228
-
229
- # Remove row_number from result
230
- yield_result = yield_result.columns(*range(len(cols) - 1))
231
-
232
- yield yield_result
335
+ session_active = False
336
+ commit_needed = False
337
+ try:
338
+ self.start()
339
+ session_active = True
340
+
341
+ # Filter on row_number in the outer query
342
+ current_query = stmt.where(id_column > last_id).order_by(order_by[0], *order_by[1:]).limit(windowsize)
343
+ result = self.session.execute(current_query)
344
+
345
+ # Create a FrozenResult to allow peeking at the data without consuming
346
+ frozen_result: FrozenResult = result.freeze()
347
+ chunk = frozen_result().all()
348
+
349
+ if not chunk:
350
+ break
351
+
352
+ # Update for next iteration
353
+ last_id = chunk[-1].id
354
+
355
+ # Create a new Result object from the FrozenResult
356
+ yield_result = frozen_result()
357
+
358
+ yield yield_result
359
+ commit_needed = True
360
+
361
+ finally:
362
+ if session_active and self.session:
363
+ if commit_strategy == CommitStrategy.FORCE_COMMIT:
364
+ # For forced commit, always attempt to commit.
365
+ # The self.stop() method already handles potential exceptions during commit/rollback.
366
+ self.stop(commit=True)
367
+ elif commit_strategy == CommitStrategy.COMMIT_ON_SUCCESS:
368
+ # Commit only if no exception occurred before yielding the result.
369
+ self.stop(commit=commit_needed)
370
+ else:
371
+ # Fallback or error for unknown strategy, though type hinting should prevent this.
372
+ # For safety, default to rollback.
373
+ logger.warning(f"Unknown commit strategy: {commit_strategy}. Defaulting to rollback.")
374
+ self.stop(commit=False)
@@ -3,6 +3,8 @@
3
3
 
4
4
  import io
5
5
  import logging
6
+ from typing import Any, Dict, List, Optional
7
+
6
8
  import boto3
7
9
 
8
10
  ########################################################################################################################
@@ -12,34 +14,44 @@ logger = logging.getLogger(__name__)
12
14
 
13
15
 
14
16
  class AWSInterface:
15
- def __init__(self, config):
16
- self.profiles = []
17
+ def __init__(self, config) -> None:
18
+ self.profiles: List[Dict[str, Any]] = []
17
19
  self.config = config
18
20
 
19
- for section in self.config.sections():
21
+ for section in getattr(self.config, "sections", lambda: [])():
20
22
  if section.startswith("aws:"):
21
23
  profile_name = section.split(":", 1)[1]
24
+ bucket_value = self.config[section].get("buckets", "")
25
+ buckets = [b.strip() for b in bucket_value.split(",") if b.strip()]
26
+ session = boto3.Session(profile_name=profile_name)
27
+
22
28
  self.profiles.append(
23
29
  {
24
30
  "profile": profile_name,
25
- "bucket": self.config[section]["bucket"],
26
- "session": boto3.Session(profile_name=profile_name),
31
+ "buckets": buckets,
32
+ "session": session,
27
33
  }
28
34
  )
29
35
 
30
36
  if not self.profiles:
31
37
  logger.warning("No AWS profiles found in config file")
32
38
 
33
- self.current_profile = self.profiles[0] if self.profiles else None
39
+ self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
34
40
  self._update_resources()
35
41
 
36
- def _update_resources(self):
42
+ def _update_resources(self) -> None:
43
+ """Refresh S3 resources for the current profile and set default bucket (first in list)"""
37
44
  if self.current_profile:
38
45
  self.s3 = self.current_profile["session"].resource("s3")
39
46
  self.s3_client = self.s3.meta.client
40
- self.bucket = self.current_profile["bucket"]
47
+ buckets = self.current_profile.get("buckets", [])
48
+ self.bucket = buckets[0] if buckets else None
49
+ else:
50
+ self.s3 = None
51
+ self.s3_client = None
52
+ self.bucket = None
41
53
 
42
- def switch_profile(self, profile_name):
54
+ def switch_profile(self, profile_name: str) -> None:
43
55
  for profile in self.profiles:
44
56
  if profile["profile"] == profile_name:
45
57
  self.current_profile = profile
@@ -47,14 +59,69 @@ class AWSInterface:
47
59
  return
48
60
  logger.warning(f"Profile {profile_name} not found")
49
61
 
50
- def get_file(self, s3_path):
62
+ def switch_bucket(self, bucket: str) -> None:
63
+ if not self.current_profile:
64
+ logger.warning("No current AWS profile to switch bucket on")
65
+ return
66
+
67
+ buckets = self.current_profile.get("buckets") or []
68
+ if bucket not in buckets:
69
+ logger.warning(f"Bucket {bucket} not found in profile {self.current_profile.get('profile')}")
70
+ return
71
+
72
+ self.bucket = bucket
73
+
74
+ def switch_bucket_for_profile(self, profile_name: str, bucket: str) -> None:
75
+ """
76
+ Select a profile and then switch its active bucket.
77
+ """
78
+ for profile in self.profiles:
79
+ if profile["profile"] == profile_name:
80
+ self.current_profile = profile
81
+ self._update_resources() # sets default bucket & s3 clients
82
+ self.switch_bucket(bucket) # only sets self.bucket if valid
83
+ return
84
+ logger.warning(f"Profile {profile_name} not found")
85
+
86
+ def get_bucket_url(self) -> Optional[str]:
87
+ """Return active bucket URL."""
88
+ if not self.bucket:
89
+ logger.warning("No active bucket selected")
90
+ return None
91
+ region = self.s3_client.meta.region_name
92
+ return f"https://{self.bucket}.s3.{region}.amazonaws.com"
93
+
94
+ def get_file(self, s3_path: str):
95
+ if not self.bucket:
96
+ logger.warning("No active bucket selected")
97
+ return None
51
98
  try:
52
99
  return self.s3.Object(self.bucket, s3_path).get()
53
100
  except self.s3_client.exceptions.NoSuchKey:
54
101
  logger.info(f"{s3_path} does not exist")
102
+ return None
103
+
104
+ def file_exists(self, s3_path: str) -> bool:
105
+ if not self.bucket:
106
+ logger.warning("No active bucket selected")
107
+ return False
108
+ try:
109
+ self.s3_client.head_object(Bucket=self.bucket, Key=s3_path)
110
+ return True
111
+ except self.s3_client.exceptions.NoSuchKey:
112
+ return False
113
+ except Exception as e:
114
+ logger.error(f"Error checking existence of {s3_path}: {e}")
115
+ raise
55
116
 
56
- def read_file_as_bytes(self, s3_path):
57
- return io.BytesIO(self.get_file(s3_path)["Body"].read())
117
+ def read_file_as_bytes(self, s3_path: str) -> Optional[io.BytesIO]:
118
+ obj = self.get_file(s3_path)
119
+ if not obj:
120
+ return None
121
+ return io.BytesIO(obj["Body"].read())
58
122
 
59
- def upload_file(self, local_path, s3_path):
60
- self.s3.Bucket(self.bucket).upload_file(local_path, s3_path)
123
+ def upload_file(self, local_path: str, s3_path: str, **kwargs) -> None:
124
+ if not self.bucket:
125
+ logger.warning("No active bucket selected")
126
+ return
127
+ self.s3.Bucket(self.bucket).upload_file(local_path, s3_path, **kwargs)