nebu 0.1.94__py3-none-any.whl → 0.1.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nebu/data.py CHANGED
@@ -1,7 +1,9 @@
1
+ import fnmatch
1
2
  import os
2
3
  import subprocess
4
+ from abc import ABC, abstractmethod
3
5
  from datetime import datetime, timedelta, timezone
4
- from typing import Any, Dict, List, Optional, Tuple
6
+ from typing import Any, Callable, Dict, List, Optional, Tuple
5
7
  from urllib.parse import urlparse
6
8
 
7
9
  import boto3
@@ -10,6 +12,25 @@ from botocore.exceptions import ClientError
10
12
  from nebu.logging import logger
11
13
 
12
14
 
15
+ # For RcloneBucket with direct subprocess calls
16
+ def is_rclone_installed() -> bool:
17
+ """Check if rclone is installed and available in the PATH."""
18
+ try:
19
+ result = subprocess.run(
20
+ ["rclone", "--version"],
21
+ stdout=subprocess.PIPE,
22
+ stderr=subprocess.PIPE,
23
+ text=True,
24
+ check=False,
25
+ )
26
+ return result.returncode == 0
27
+ except Exception:
28
+ return False
29
+
30
+
31
+ import logging # For logging.DEBUG etc.
32
+
33
+
13
34
  def rclone_copy(
14
35
  source_dir: str,
15
36
  destination: str,
@@ -115,7 +136,46 @@ def _parse_s3_path(path: str) -> Tuple[Optional[str], Optional[str]]:
115
136
  return bucket, prefix
116
137
 
117
138
 
118
- class Bucket:
139
+ class StorageBucket(ABC):
140
+ """Abstract base class for bucket operations."""
141
+
142
+ def __init__(self, verbose: bool = True):
143
+ self.verbose = verbose
144
+
145
+ @abstractmethod
146
+ def sync(
147
+ self,
148
+ source: str,
149
+ destination: str,
150
+ delete: bool = False,
151
+ dry_run: bool = False,
152
+ excludes: Optional[List[str]] = None,
153
+ ) -> None:
154
+ """
155
+ Synchronizes files between a source and a destination.
156
+ """
157
+ pass
158
+
159
+ @abstractmethod
160
+ def copy(
161
+ self,
162
+ source: str,
163
+ destination: str,
164
+ ) -> None:
165
+ """
166
+ Copies files or directories between a source and a destination.
167
+ """
168
+ pass
169
+
170
+ @abstractmethod
171
+ def check(self, path_uri: str) -> bool:
172
+ """
173
+ Checks if an object or prefix exists.
174
+ """
175
+ pass
176
+
177
+
178
+ class S3Bucket(StorageBucket):
119
179
  """Handles interactions with AWS S3."""
120
180
 
121
181
  def __init__(
@@ -124,6 +184,7 @@ class Bucket:
124
184
  aws_access_key_id: Optional[str] = None,
125
185
  aws_secret_access_key: Optional[str] = None,
126
186
  aws_session_token: Optional[str] = None,
187
+ region: str = "us-east-1",
127
188
  ):
128
189
  """
129
190
  Initializes the S3 handler. Can use default credentials or provided temporary ones.
@@ -133,23 +194,25 @@ class Bucket:
133
194
  aws_access_key_id (Optional[str]): Temporary AWS Access Key ID.
134
195
  aws_secret_access_key (Optional[str]): Temporary AWS Secret Access Key.
135
196
  aws_session_token (Optional[str]): Temporary AWS Session Token (required if keys are temporary).
197
+ region (str): AWS region for S3 operations. Defaults to "us-east-1".
136
198
  """
199
+ super().__init__(verbose=verbose)
137
200
  if aws_access_key_id and aws_secret_access_key:
138
- if verbose:
201
+ if self.verbose:
139
202
  logger.info(
140
203
  "Initializing S3 client with provided temporary credentials."
141
204
  )
142
205
  self.client = boto3.client(
143
206
  "s3",
207
+ region_name=region,
144
208
  aws_access_key_id=aws_access_key_id,
145
209
  aws_secret_access_key=aws_secret_access_key,
146
210
  aws_session_token=aws_session_token, # Pass session token if provided
147
211
  )
148
212
  else:
149
- if verbose:
213
+ if self.verbose:
150
214
  logger.info("Initializing S3 client with default credentials.")
151
- self.client = boto3.client("s3")
152
- self.verbose = verbose
215
+ self.client = boto3.client("s3", region_name=region)
153
216
 
154
217
  def _parse_path(self, path: str) -> Tuple[Optional[str], Optional[str]]:
155
218
  """Class method: Parses an S3 path (s3://bucket/prefix) into bucket and prefix."""
@@ -224,7 +287,9 @@ class Bucket:
224
287
  logger.info(f"Found {len(objects)} objects in S3.")
225
288
  return objects
226
289
 
227
- def _list_local(self, local_dir: str) -> Dict[str, Dict[str, Any]]:
290
+ def _list_local(
291
+ self, local_dir: str, excludes: Optional[List[str]] = None
292
+ ) -> Dict[str, Dict[str, Any]]:
228
293
  """Class method: Lists files in a local directory."""
229
294
  files: Dict[str, Dict[str, Any]] = {}
230
295
  if not os.path.exists(local_dir):
@@ -253,8 +318,41 @@ class Bucket:
253
318
  return files
254
319
  if self.verbose:
255
320
  logger.info(f"Scanning local directory: {local_dir}...")
256
- for root, _, file_list in os.walk(local_dir):
321
+ for root, dirs, file_list in os.walk(local_dir):
322
+ # Exclude __pycache__ directories
323
+ if "__pycache__" in dirs:
324
+ dirs.remove("__pycache__")
325
+
326
+ # Apply custom excludes for directories
327
+ if excludes:
328
+ dirs[:] = [
329
+ d
330
+ for d in dirs
331
+ if not any(fnmatch.fnmatch(d, pattern) for pattern in excludes)
332
+ ]
333
+
257
334
  for file_name in file_list:
335
+ # Exclude .pyc files
336
+ if file_name.endswith(".pyc"):
337
+ continue
338
+
339
+ # Apply custom excludes for files
340
+ if excludes and any(
341
+ fnmatch.fnmatch(file_name, pattern) for pattern in excludes
342
+ ):
343
+ continue
344
+
345
+ # Also check full relative path for excludes
346
+ # This allows patterns like 'subdir/*' or '*.log' to work across the tree
347
+ potential_relative_path = os.path.relpath(
348
+ os.path.join(root, file_name), local_dir
349
+ ).replace("\\", "/")
350
+ if excludes and any(
351
+ fnmatch.fnmatch(potential_relative_path, pattern)
352
+ for pattern in excludes
353
+ ):
354
+ continue
355
+
258
356
  local_path = os.path.join(root, file_name)
259
357
  try:
260
358
  relative_path = os.path.relpath(local_path, local_dir).replace(
@@ -286,6 +384,7 @@ class Bucket:
286
384
  destination: str,
287
385
  delete: bool = False,
288
386
  dry_run: bool = False,
387
+ excludes: Optional[List[str]] = None,
289
388
  ) -> None:
290
389
  """
291
390
  Synchronizes files between a source and a destination (local or S3).
@@ -296,6 +395,7 @@ class Bucket:
296
395
  destination (str): The destination path (local directory or s3://...).
297
396
  delete (bool): If True, delete extraneous files from the destination.
298
397
  dry_run (bool): If True, print actions without performing them.
398
+ excludes (Optional[List[str]]): List of patterns to exclude from sync.
299
399
  """
300
400
  mtime_tolerance = timedelta(seconds=2)
301
401
  src_bucket, src_prefix = self._parse_path(source)
@@ -307,7 +407,7 @@ class Bucket:
307
407
 
308
408
  if src_bucket is None and dest_bucket is not None:
309
409
  sync_direction = "upload"
310
- source_items = self._list_local(source)
410
+ source_items = self._list_local(source, excludes=excludes)
311
411
  dest_items = self._list_objects(dest_bucket, dest_prefix)
312
412
  if not source_items and not os.path.exists(source):
313
413
  logger.warning(
@@ -326,7 +426,7 @@ class Bucket:
326
426
  f"Error: Local destination '{destination}' exists but is not a directory."
327
427
  )
328
428
  return
329
- dest_items = self._list_local(destination)
429
+ dest_items = self._list_local(destination, excludes=excludes)
330
430
  if not dry_run:
331
431
  os.makedirs(destination, exist_ok=True)
332
432
  elif not os.path.isdir(destination) and self.verbose:
@@ -843,23 +943,23 @@ class Bucket:
843
943
  else:
844
944
  logger.error("Error: Unknown copy operation type.")
845
945
 
846
- def check(self, s3_uri: str) -> bool:
946
+ def check(self, path_uri: str) -> bool:
847
947
  """
848
948
  Check if an object or prefix exists in an S3 bucket using an S3 URI.
849
949
 
850
950
  Args:
851
- s3_uri (str): The S3 URI (e.g., 's3://my-bucket/my-key' or 's3://my-bucket/my-prefix/').
951
+ path_uri (str): The S3 URI (e.g., 's3://my-bucket/my-key' or 's3://my-bucket/my-prefix/').
852
952
  Use a trailing '/' to check for a prefix/directory.
853
953
 
854
954
  Returns:
855
955
  bool: True if the object or prefix exists, False otherwise.
856
956
  """
857
957
  # Use the class client and parse method
858
- bucket_name, s3_key = self._parse_path(s3_uri)
958
+ bucket_name, s3_key = self._parse_path(path_uri)
859
959
 
860
960
  if bucket_name is None or s3_key is None:
861
961
  # _parse_path returns None, None if scheme is not 's3'
862
- logger.error(f"Error: Invalid S3 URI format: {s3_uri}")
962
+ logger.error(f"Error: Invalid S3 URI format: {path_uri}")
863
963
  return False
864
964
 
865
965
  is_prefix = s3_key.endswith("/")
@@ -886,12 +986,508 @@ class Bucket:
886
986
  elif e.response["Error"]["Code"] == "NoSuchBucket":
887
987
  if self.verbose:
888
988
  logger.error(
889
- f"Error: Bucket '{bucket_name}' not found (from URI: {s3_uri})."
989
+ f"Error: Bucket '{bucket_name}' not found (from URI: {path_uri})."
890
990
  )
891
991
  return False
892
992
  # Handle other potential errors like AccessDenied differently if needed
893
- logger.error(f"Error checking {s3_uri}: {e}")
993
+ logger.error(f"Error checking {path_uri}: {e}")
894
994
  return False
895
995
  except Exception as e:
896
- logger.error(f"An unexpected error occurred checking {s3_uri}: {e}")
996
+ logger.error(f"An unexpected error occurred checking {path_uri}: {e}")
897
997
  return False
998
+
999
+
1000
+ # Standalone helper for RcloneBucket._is_rclone_path
1001
+ def _is_rclone_path_standalone(path: str) -> bool:
1002
+ """
1003
+ Standalone helper: Determines if a path string is an rclone path (e.g., "remote:path",
1004
+ ":backend:path", or "s3://bucket/path").
1005
+ """
1006
+ parsed_url = urlparse(path)
1007
+
1008
+ # Explicitly allow s3:// paths as rclone paths
1009
+ if parsed_url.scheme == "s3" and parsed_url.netloc:
1010
+ return True
1011
+
1012
+ # Check for Windows drive letter paths (e.g., C:\path or C:) - these are local.
1013
+ if os.name == "nt":
1014
+ if len(path) >= 2 and path[0].isalpha() and path[1] == ":":
1015
+ if len(path) == 2: # e.g., "C:"
1016
+ return False # Local current directory on drive
1017
+ if path[2] in ["\\", "/"]: # e.g., "C:\foo" or "C:/foo"
1018
+ return False # Local absolute path
1019
+
1020
+ # Handle file:// scheme as local
1021
+ if parsed_url.scheme == "file":
1022
+ return False
1023
+
1024
+ # If it has another scheme (e.g., http, ftp) and a network location,
1025
+ # it's a URL, not typically an rclone path for copy/sync operations in this context.
1026
+ if parsed_url.scheme and parsed_url.scheme != "s3" and parsed_url.netloc:
1027
+ return False
1028
+
1029
+ # If the path contains a colon, it's likely an rclone remote path
1030
+ # (e.g., "myremote:path" or ":s3:path" or "s3:path" if scheme not picked up by urlparse for s3:).
1031
+ # This check comes after specific local/URL patterns are ruled out.
1032
+ if ":" in path:
1033
+ return True
1034
+
1035
+ # Default to local if none of the above (e.g., "/abs/path", "rel/path")
1036
+ return False
1037
+
1038
+
1039
+ class RcloneBucket(StorageBucket):
1040
+ """Handles interactions with storage using the rclone-python library."""
1041
+
1042
+ def __init__(
1043
+ self,
1044
+ verbose: bool = True,
1045
+ aws_access_key_id: Optional[str] = None,
1046
+ aws_secret_access_key: Optional[str] = None,
1047
+ aws_session_token: Optional[str] = None,
1048
+ region: str = "us-east-1",
1049
+ ):
1050
+ """
1051
+ Initializes the RcloneBucket handler.
1052
+
1053
+ Args:
1054
+ verbose (bool): If True, prints status messages and sets rclone-python log level.
1055
+ aws_access_key_id (Optional[str]): AWS Access Key ID for rclone S3 remotes.
1056
+ aws_secret_access_key (Optional[str]): AWS Secret Access Key for rclone S3 remotes.
1057
+ aws_session_token (Optional[str]): AWS Session Token for rclone S3 remotes.
1058
+ region (str): AWS region for S3 operations. Defaults to "us-east-1".
1059
+ """
1060
+ super().__init__(verbose=verbose)
1061
+ self.aws_access_key_id = aws_access_key_id
1062
+ self.aws_secret_access_key = aws_secret_access_key
1063
+ self.aws_session_token = aws_session_token
1064
+ self.region = region
1065
+
1066
+ if not is_rclone_installed():
1067
+ logger.error(
1068
+ "rclone command not found. Please ensure rclone is installed and configured correctly (https://rclone.org/install/)."
1069
+ )
1070
+ # Consider raising an exception if rclone CLI is essential for rclone-python to function.
1071
+ return
1072
+
1073
+ if self.verbose:
1074
+ logger.info("Initialized RcloneBucket with rclone-python.")
1075
+ logger.info("rclone-python log level set to DEBUG.")
1076
+ else:
1077
+ logger.info("rclone-python log level set to WARNING.")
1078
+
1079
+ # Store a mtime tolerance, similar to S3 Bucket
1080
+ self.mtime_tolerance = timedelta(seconds=2)
1081
+
1082
+ def _is_rclone_path(self, path: str) -> bool:
1083
+ """
1084
+ Determines if a path string is an rclone path (e.g., "remote:path").
1085
+ """
1086
+ return _is_rclone_path_standalone(path)
1087
+
1088
+ def _execute_with_aws_env(
1089
+ self, func: Callable[..., Any], *args: Any, **kwargs: Any
1090
+ ) -> Any:
1091
+ """Helper to execute rclone functions with temporary AWS env vars if provided."""
1092
+ old_env: Dict[str, Optional[str]] = {}
1093
+ aws_vars = {
1094
+ "AWS_ACCESS_KEY_ID": self.aws_access_key_id,
1095
+ "AWS_SECRET_ACCESS_KEY": self.aws_secret_access_key,
1096
+ "AWS_SESSION_TOKEN": self.aws_session_token,
1097
+ # Add rclone-specific S3 configuration
1098
+ "RCLONE_CONFIG_S3_TYPE": "s3",
1099
+ "RCLONE_CONFIG_S3_PROVIDER": "AWS",
1100
+ "RCLONE_CONFIG_S3_ENV_AUTH": "true",
1101
+ "RCLONE_CONFIG_S3_REGION": self.region,
1102
+ }
1103
+
1104
+ try:
1105
+ for key, value in aws_vars.items():
1106
+ if value is not None:
1107
+ old_env[key] = os.environ.get(key)
1108
+ os.environ[key] = value
1109
+ elif key in os.environ: # Value is None but was set in env
1110
+ old_env[key] = os.environ.get(key)
1111
+ del os.environ[key]
1112
+
1113
+ # Ensure stderr is captured by setting show_progress to False
1114
+ if "show_progress" not in kwargs:
1115
+ kwargs["show_progress"] = False
1116
+
1117
+ # Set DEBUG log level for rclone to get more verbose output
1118
+ old_log_level = logging.getLogger("rclone").level
1119
+ logging.getLogger("rclone").setLevel(logging.DEBUG)
1120
+
1121
+ # Convert s3:// URLs if needed
1122
+ modified_args = list(args)
1123
+ for i, arg in enumerate(modified_args):
1124
+ if isinstance(arg, str) and arg.startswith("s3://"):
1125
+ # Convert s3://bucket/path to s3:bucket/path
1126
+ arg_parts = arg[5:].split("/", 1)
1127
+ bucket_name = arg_parts[0]
1128
+ path = arg_parts[1] if len(arg_parts) > 1 else ""
1129
+ modified_args[i] = f"s3:{bucket_name}/{path}"
1130
+
1131
+ try:
1132
+ return func(*modified_args, **kwargs)
1133
+ finally:
1134
+ # Restore the original log level
1135
+ logging.getLogger("rclone").setLevel(old_log_level)
1136
+ finally:
1137
+ for key, value in old_env.items():
1138
+ if value is None:
1139
+ if key in os.environ: # It was set by us, now remove
1140
+ del os.environ[key]
1141
+ else:
1142
+ os.environ[key] = value
1143
+
1144
+ # Clean up any env vars we set but weren't in old_env
1145
+ for key in aws_vars.keys():
1146
+ if key not in old_env and key in os.environ:
1147
+ del os.environ[key]
1148
+
1149
+ def sync(
1150
+ self,
1151
+ source: str,
1152
+ destination: str,
1153
+ delete: bool = False,
1154
+ dry_run: bool = False,
1155
+ excludes: Optional[List[str]] = None,
1156
+ ) -> None:
1157
+ if not is_rclone_installed():
1158
+ logger.error("Cannot sync: rclone command not found.")
1159
+ return
1160
+
1161
+ if self.verbose:
1162
+ logger.info(f"Rclone sync: {source} -> {destination}")
1163
+ if delete:
1164
+ logger.info("Deletion enabled.")
1165
+ if dry_run:
1166
+ logger.info("Dry run mode.")
1167
+ if excludes:
1168
+ logger.info(f"Excludes: {excludes}")
1169
+
1170
+ rc_args = [
1171
+ "--modify-window=2s",
1172
+ "--log-level=DEBUG" if self.verbose else "--log-level=INFO",
1173
+ "--log-format=date,time,level,message",
1174
+ "--progress", # Add progress display
1175
+ ]
1176
+ if dry_run:
1177
+ rc_args.append("--dry-run")
1178
+ if delete:
1179
+ rc_args.append("--delete-after")
1180
+
1181
+ if excludes:
1182
+ for ex_pattern in excludes:
1183
+ rc_args.append(f"--exclude={ex_pattern}")
1184
+
1185
+ # Set environment variables for AWS credentials if they exist
1186
+ env = os.environ.copy()
1187
+ if self.aws_access_key_id:
1188
+ env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
1189
+ if self.aws_secret_access_key:
1190
+ env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
1191
+ if self.aws_session_token:
1192
+ env["AWS_SESSION_TOKEN"] = self.aws_session_token
1193
+
1194
+ # Set rclone-specific environment variables
1195
+ env["RCLONE_CONFIG_S3_TYPE"] = "s3"
1196
+ env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
1197
+ env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
1198
+ env["RCLONE_CONFIG_S3_REGION"] = self.region
1199
+
1200
+ # Convert s3:// URLs if needed
1201
+ rclone_src = source
1202
+ rclone_dest = destination
1203
+
1204
+ # If source or destination uses s3:// URL format, convert it for rclone CLI
1205
+ if source.startswith("s3://"):
1206
+ # Convert s3://bucket/path to s3:bucket/path
1207
+ source_parts = source[5:].split("/", 1)
1208
+ bucket_name = source_parts[0]
1209
+ path = source_parts[1] if len(source_parts) > 1 else ""
1210
+ rclone_src = f"s3:{bucket_name}/{path}"
1211
+
1212
+ if destination.startswith("s3://"):
1213
+ # Convert s3://bucket/path to s3:bucket/path
1214
+ destination_parts = destination[5:].split("/", 1)
1215
+ bucket_name = destination_parts[0]
1216
+ path = destination_parts[1] if len(destination_parts) > 1 else ""
1217
+ rclone_dest = f"s3:{bucket_name}/{path}"
1218
+
1219
+ # Build the rclone command
1220
+ cmd = ["rclone", "sync", rclone_src, rclone_dest] + rc_args
1221
+
1222
+ if self.verbose:
1223
+ logger.info(f"Running command: {' '.join(cmd)}")
1224
+
1225
+ try:
1226
+ # Run the command and capture output
1227
+ process = subprocess.run(
1228
+ cmd,
1229
+ env=env,
1230
+ stdout=subprocess.PIPE,
1231
+ stderr=subprocess.PIPE,
1232
+ text=True,
1233
+ check=False,
1234
+ )
1235
+
1236
+ # Always log the stdout and stderr for verbose mode
1237
+ if self.verbose:
1238
+ if process.stdout:
1239
+ logger.info(f"Rclone stdout:\n{process.stdout}")
1240
+ if process.stderr:
1241
+ logger.info(f"Rclone stderr:\n{process.stderr}")
1242
+
1243
+ if process.returncode == 0:
1244
+ logger.info(
1245
+ f"Rclone sync completed successfully from {source} to {destination}."
1246
+ )
1247
+ if dry_run:
1248
+ logger.info(
1249
+ "Dry run summary (see rclone output above for details)."
1250
+ )
1251
+ else:
1252
+ logger.error(f"Rclone sync failed with exit code: {process.returncode}")
1253
+
1254
+ if (
1255
+ not self.verbose
1256
+ ): # Only log these again if not already logged in verbose mode
1257
+ if process.stdout:
1258
+ logger.error(f"Rclone stdout:\n{process.stdout}")
1259
+ if process.stderr:
1260
+ logger.error(f"Rclone stderr:\n{process.stderr}")
1261
+ except Exception as e:
1262
+ logger.error(f"Error running rclone sync command: {e}")
1263
+
1264
+ def copy(
1265
+ self,
1266
+ source: str,
1267
+ destination: str,
1268
+ ) -> None:
1269
+ if not is_rclone_installed():
1270
+ logger.error("Cannot copy: rclone command not found.")
1271
+ return
1272
+
1273
+ # Determine if source/destination are rclone paths or local
1274
+ is_src_rclone = self._is_rclone_path(source)
1275
+ is_dest_rclone = self._is_rclone_path(destination)
1276
+
1277
+ if not is_src_rclone and not is_dest_rclone:
1278
+ logger.error(
1279
+ "Error: Both source and destination are local. Use 'shutil.copy' or 'shutil.copytree'."
1280
+ )
1281
+ return
1282
+
1283
+ if self.verbose:
1284
+ logger.info(f"Rclone copy: {source} -> {destination}")
1285
+
1286
+ rc_args = [
1287
+ "--log-level=DEBUG" if self.verbose else "--log-level=INFO",
1288
+ "--log-format=date,time,level,message",
1289
+ "--progress", # Add progress display
1290
+ ]
1291
+
1292
+ # Set environment variables for AWS credentials if they exist
1293
+ env = os.environ.copy()
1294
+ if self.aws_access_key_id:
1295
+ env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
1296
+ if self.aws_secret_access_key:
1297
+ env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
1298
+ if self.aws_session_token:
1299
+ env["AWS_SESSION_TOKEN"] = self.aws_session_token
1300
+
1301
+ # Set rclone-specific environment variables
1302
+ env["RCLONE_CONFIG_S3_TYPE"] = "s3"
1303
+ env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
1304
+ env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
1305
+ env["RCLONE_CONFIG_S3_REGION"] = self.region
1306
+
1307
+ # Convert s3:// URLs if needed
1308
+ rclone_src = source
1309
+ rclone_dest = destination
1310
+
1311
+ # If source or destination uses s3:// URL format, convert it for rclone CLI
1312
+ if source.startswith("s3://"):
1313
+ # Convert s3://bucket/path to s3:bucket/path
1314
+ source_parts = source[5:].split("/", 1)
1315
+ bucket_name = source_parts[0]
1316
+ path = source_parts[1] if len(source_parts) > 1 else ""
1317
+ rclone_src = f"s3:{bucket_name}/{path}"
1318
+
1319
+ if destination.startswith("s3://"):
1320
+ # Convert s3://bucket/path to s3:bucket/path
1321
+ destination_parts = destination[5:].split("/", 1)
1322
+ bucket_name = destination_parts[0]
1323
+ path = destination_parts[1] if len(destination_parts) > 1 else ""
1324
+ rclone_dest = f"s3:{bucket_name}/{path}"
1325
+
1326
+ # Build the rclone command
1327
+ cmd = ["rclone", "copy", rclone_src, rclone_dest] + rc_args
1328
+
1329
+ if self.verbose:
1330
+ logger.info(f"Running command: {' '.join(cmd)}")
1331
+
1332
+ try:
1333
+ # Run the command and capture output
1334
+ process = subprocess.run(
1335
+ cmd,
1336
+ env=env,
1337
+ stdout=subprocess.PIPE,
1338
+ stderr=subprocess.PIPE,
1339
+ text=True,
1340
+ check=False,
1341
+ )
1342
+
1343
+ # Always log the stdout and stderr for verbose mode
1344
+ if self.verbose:
1345
+ if process.stdout:
1346
+ logger.info(f"Rclone stdout:\n{process.stdout}")
1347
+ if process.stderr:
1348
+ logger.info(f"Rclone stderr:\n{process.stderr}")
1349
+
1350
+ if process.returncode == 0:
1351
+ logger.info(
1352
+ f"Rclone copy completed successfully from {source} to {destination}."
1353
+ )
1354
+ else:
1355
+ logger.error(f"Rclone copy failed with exit code: {process.returncode}")
1356
+
1357
+ if (
1358
+ not self.verbose
1359
+ ): # Only log these again if not already logged in verbose mode
1360
+ if process.stdout:
1361
+ logger.error(f"Rclone stdout:\n{process.stdout}")
1362
+ if process.stderr:
1363
+ logger.error(f"Rclone stderr:\n{process.stderr}")
1364
+ except Exception as e:
1365
+ logger.error(f"Error running rclone copy command: {e}")
1366
+
1367
+ def check(self, path_uri: str) -> bool:
1368
+ if not is_rclone_installed():
1369
+ logger.error("Cannot check path: rclone command not found.")
1370
+ return False
1371
+
1372
+ if self.verbose:
1373
+ logger.info(f"Checking existence of path: {path_uri}")
1374
+
1375
+ # Convert s3:// URL if needed
1376
+ rclone_path = path_uri
1377
+ if path_uri.startswith("s3://"):
1378
+ # Convert s3://bucket/path to s3:bucket/path
1379
+ path_parts = path_uri[5:].split("/", 1)
1380
+ bucket_name = path_parts[0]
1381
+ path = path_parts[1] if len(path_parts) > 1 else ""
1382
+ rclone_path = f"s3:{bucket_name}/{path}"
1383
+
1384
+ # Set environment variables for AWS credentials if they exist
1385
+ env = os.environ.copy()
1386
+ if self.aws_access_key_id:
1387
+ env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
1388
+ if self.aws_secret_access_key:
1389
+ env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
1390
+ if self.aws_session_token:
1391
+ env["AWS_SESSION_TOKEN"] = self.aws_session_token
1392
+
1393
+ # Set rclone-specific environment variables
1394
+ env["RCLONE_CONFIG_S3_TYPE"] = "s3"
1395
+ env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
1396
+ env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
1397
+ env["RCLONE_CONFIG_S3_REGION"] = self.region
1398
+
1399
+ # Build the rclone command (size is a good way to check existence)
1400
+ cmd = ["rclone", "size", rclone_path, "--json"]
1401
+
1402
+ try:
1403
+ # Run the command and capture output
1404
+ process = subprocess.run(
1405
+ cmd,
1406
+ env=env,
1407
+ stdout=subprocess.PIPE,
1408
+ stderr=subprocess.PIPE,
1409
+ text=True,
1410
+ check=False,
1411
+ )
1412
+
1413
+ if process.returncode == 0:
1414
+ if self.verbose:
1415
+ logger.debug(f"Path {path_uri} exists and is accessible.")
1416
+ return True
1417
+ else:
1418
+ # Check if this is a "not found" error vs. another type of error
1419
+ err_lower = process.stderr.lower() if process.stderr else ""
1420
+ not_found_indicators = [
1421
+ "directory not found",
1422
+ "object not found",
1423
+ "path not found",
1424
+ "no such file or directory",
1425
+ "source or destination not found",
1426
+ "error: couldn't find file",
1427
+ "failed to size: can't find object",
1428
+ "can't find source directory",
1429
+ "source not found",
1430
+ ]
1431
+ is_not_found = any(
1432
+ indicator in err_lower for indicator in not_found_indicators
1433
+ )
1434
+
1435
+ if is_not_found:
1436
+ if self.verbose:
1437
+ logger.debug(f"Path {path_uri} not found.")
1438
+ return False
1439
+ else:
1440
+ # Other type of error (permissions, network, etc.)
1441
+ logger.warning(f"Error checking path {path_uri}: {process.stderr}")
1442
+ return False
1443
+ except Exception as e:
1444
+ logger.error(f"Error running rclone check command: {e}")
1445
+ return False
1446
+
1447
+
1448
+ # Factory function Bucket()
1449
+ def Bucket(
1450
+ verbose: bool = True,
1451
+ aws_access_key_id: Optional[str] = None,
1452
+ aws_secret_access_key: Optional[str] = None,
1453
+ aws_session_token: Optional[str] = None,
1454
+ region: str = "us-east-1",
1455
+ ) -> StorageBucket:
1456
+ """
1457
+ Factory function to create a bucket instance.
1458
+ Returns RcloneBucket if rclone is available and installed, otherwise S3Bucket.
1459
+
1460
+ Args:
1461
+ verbose (bool): If True, prints status messages. Defaults to True.
1462
+ aws_access_key_id (Optional[str]): Temporary AWS Access Key ID (for S3Bucket).
1463
+ aws_secret_access_key (Optional[str]): Temporary AWS Secret Access Key (for S3Bucket).
1464
+ aws_session_token (Optional[str]): Temporary AWS Session Token (for S3Bucket).
1465
+ region (str): AWS region for S3 operations. Defaults to "us-east-1".
1466
+
1467
+ Returns:
1468
+ StorageBucket: An instance of RcloneBucket or S3Bucket.
1469
+ """
1470
+ if is_rclone_installed():
1471
+ print("rclone is installed and available")
1472
+ if verbose:
1473
+ logger.info("rclone is installed and available. Using RcloneBucket.")
1474
+ return RcloneBucket(
1475
+ verbose=verbose,
1476
+ aws_access_key_id=aws_access_key_id,
1477
+ aws_secret_access_key=aws_secret_access_key,
1478
+ aws_session_token=aws_session_token,
1479
+ region=region,
1480
+ )
1481
+ else:
1482
+ print("rclone not installed or not available")
1483
+ if verbose:
1484
+ logger.info(
1485
+ "rclone not installed or not available. Falling back to S3Bucket."
1486
+ )
1487
+ return S3Bucket(
1488
+ verbose=verbose,
1489
+ aws_access_key_id=aws_access_key_id,
1490
+ aws_secret_access_key=aws_secret_access_key,
1491
+ aws_session_token=aws_session_token,
1492
+ region=region,
1493
+ )
@@ -15,4 +15,4 @@ DEFAULT_SCALE = V1Scale(
15
15
  )
16
16
 
17
17
  DEFAULT_MIN_REPLICAS = 1
18
- DEFAULT_MAX_REPLICAS = 10
18
+ DEFAULT_MAX_REPLICAS = 3
@@ -214,11 +214,14 @@ class Processor(Generic[InputType, OutputType]):
214
214
  wait: bool = False,
215
215
  logs: bool = False,
216
216
  api_key: Optional[str] = None,
217
+ user_key: Optional[str] = None,
217
218
  ) -> OutputType | Dict[str, Any] | None:
218
219
  """
219
220
  Allows the Processor instance to be called like a function, sending data.
220
221
  """
221
- return self.send(data=data, wait=wait, logs=logs, api_key=api_key)
222
+ return self.send(
223
+ data=data, wait=wait, logs=logs, api_key=api_key, user_key=user_key
224
+ )
222
225
 
223
226
  def send(
224
227
  self,
@@ -226,6 +229,7 @@ class Processor(Generic[InputType, OutputType]):
226
229
  wait: bool = False,
227
230
  logs: bool = False,
228
231
  api_key: Optional[str] = None,
232
+ user_key: Optional[str] = None,
229
233
  ) -> OutputType | Dict[str, Any] | None:
230
234
  """
231
235
  Send data to the processor and optionally stream logs in the background.
@@ -250,6 +254,7 @@ class Processor(Generic[InputType, OutputType]):
250
254
  stream_data = V1StreamData(
251
255
  content=data,
252
256
  wait=wait,
257
+ user_key=user_key,
253
258
  )
254
259
  response = requests.post(
255
260
  messages_url,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nebu
3
- Version: 0.1.94
3
+ Version: 0.1.97
4
4
  Summary: A globally distributed container runtime
5
5
  Requires-Python: >=3.10.14
6
6
  Description-Content-Type: text/markdown
@@ -13,6 +13,7 @@ Requires-Dist: pillow>=10.4.0
13
13
  Requires-Dist: pydantic>=2.10.6
14
14
  Requires-Dist: pysocks>=1.7.1
15
15
  Requires-Dist: pyyaml>=6.0.2
16
+ Requires-Dist: rclone-python>=0.1.21
16
17
  Requires-Dist: redis>=5.0
17
18
  Requires-Dist: requests>=2.32.3
18
19
  Dynamic: license-file
@@ -2,7 +2,7 @@ nebu/__init__.py,sha256=xNtWiN29MJZK_WBEUP-9hDmlkfLxoASVI-f4tNTXO58,454
2
2
  nebu/auth.py,sha256=N_v6SPFD9HU_UoRDTaouH03g2Hmo9C-xxqInE1FweXE,1471
3
3
  nebu/cache.py,sha256=JqRb4FdZrRrO4ePlwvsKdxRC8dNEFMxfTWag0aJz8Gw,4893
4
4
  nebu/config.py,sha256=C5Jt9Bd0i0HrgzBSVNJ-Ml3KwX_gaYbYYZEtNL2gvJg,7031
5
- nebu/data.py,sha256=randOsFQxOQdw7Yhg7mb6PvH5Efjfc-KpvqdBdpWTEE,40923
5
+ nebu/data.py,sha256=4fEvO_xNCeTNW4RFr1u6iZ7j92S-5zWwxmFdXopxae8,63502
6
6
  nebu/errors.py,sha256=bBnK5YQ6qZg4OMY81AN2k03ppefg89FUwF_SHEMlqCA,170
7
7
  nebu/logging.py,sha256=VzpjCEoXm3c4i0sKJL5GTsPIhTQ6Y4BPUTzPmwhve7o,950
8
8
  nebu/meta.py,sha256=CzFHMND9seuewzq9zNNx9WTr6JvrCBExe7BLqDSr7lM,745
@@ -16,13 +16,13 @@ nebu/namespaces/namespace.py,sha256=oeZyGqsIGIrppyjif1ZONsdTmqRgd9oSLFE1BChXTTE,
16
16
  nebu/processors/consumer.py,sha256=j6iKF_wc8RUNKrFqjB5keUX-Gj9hGZUbmAjEyTm-Oj0,55367
17
17
  nebu/processors/consumer_process_worker.py,sha256=h--eNFKaLbUayxn88mB8oGGdrU2liE1dnwm_TPlewX8,36960
18
18
  nebu/processors/decorate.py,sha256=pGA0m8cxTnEu7ullKTaAgjf_FFP5RhRqBnQnsdSwtJs,55476
19
- nebu/processors/default.py,sha256=W4slJenG59rvyTlJ7gRp58eFfXcNOTT2Hfi6zzJAobI,365
19
+ nebu/processors/default.py,sha256=cy4ETMdbdRGkrvbYec1o60h7mGDlGN5JsuUph0ENtDU,364
20
20
  nebu/processors/models.py,sha256=g4B1t6Rgoy-NUEHBLeQc0EENzHXLDlWSio8Muv7cTDU,4093
21
- nebu/processors/processor.py,sha256=dCQ9si_P03Lw_dLQESE_TUD6ZGGTYZce28GmiyWm1Hg,16525
21
+ nebu/processors/processor.py,sha256=xxduD632g_2lBhAF2tZ3g0aoZJFgbfNwrDgtOAio0Is,16677
22
22
  nebu/redis/models.py,sha256=coPovAcVXnOU1Xh_fpJL4PO3QctgK9nBe5QYoqEcnxg,1230
23
23
  nebu/services/service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- nebu-0.1.94.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
25
- nebu-0.1.94.dist-info/METADATA,sha256=cWPAvvbux2Uv8x40BYTFcS2FV2mack_5fsJ2SBQrLC8,1760
26
- nebu-0.1.94.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
27
- nebu-0.1.94.dist-info/top_level.txt,sha256=uLIbEKJeGSHWOAJN5S0i5XBGwybALlF9bYoB1UhdEgQ,5
28
- nebu-0.1.94.dist-info/RECORD,,
24
+ nebu-0.1.97.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
25
+ nebu-0.1.97.dist-info/METADATA,sha256=aHHT7MjCgIZVBjWxTIK5bDp3aCaIsy0wMbUTDKmzolI,1797
26
+ nebu-0.1.97.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
27
+ nebu-0.1.97.dist-info/top_level.txt,sha256=uLIbEKJeGSHWOAJN5S0i5XBGwybALlF9bYoB1UhdEgQ,5
28
+ nebu-0.1.97.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5