nebu 0.1.94__py3-none-any.whl → 0.1.97__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nebu/data.py +613 -17
- nebu/processors/default.py +1 -1
- nebu/processors/processor.py +6 -1
- {nebu-0.1.94.dist-info → nebu-0.1.97.dist-info}/METADATA +2 -1
- {nebu-0.1.94.dist-info → nebu-0.1.97.dist-info}/RECORD +8 -8
- {nebu-0.1.94.dist-info → nebu-0.1.97.dist-info}/WHEEL +1 -1
- {nebu-0.1.94.dist-info → nebu-0.1.97.dist-info}/licenses/LICENSE +0 -0
- {nebu-0.1.94.dist-info → nebu-0.1.97.dist-info}/top_level.txt +0 -0
nebu/data.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
import fnmatch
|
1
2
|
import os
|
2
3
|
import subprocess
|
4
|
+
from abc import ABC, abstractmethod
|
3
5
|
from datetime import datetime, timedelta, timezone
|
4
|
-
from typing import Any, Dict, List, Optional, Tuple
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
5
7
|
from urllib.parse import urlparse
|
6
8
|
|
7
9
|
import boto3
|
@@ -10,6 +12,25 @@ from botocore.exceptions import ClientError
|
|
10
12
|
from nebu.logging import logger
|
11
13
|
|
12
14
|
|
15
|
+
# For RcloneBucket with direct subprocess calls
|
16
|
+
def is_rclone_installed() -> bool:
|
17
|
+
"""Check if rclone is installed and available in the PATH."""
|
18
|
+
try:
|
19
|
+
result = subprocess.run(
|
20
|
+
["rclone", "--version"],
|
21
|
+
stdout=subprocess.PIPE,
|
22
|
+
stderr=subprocess.PIPE,
|
23
|
+
text=True,
|
24
|
+
check=False,
|
25
|
+
)
|
26
|
+
return result.returncode == 0
|
27
|
+
except Exception:
|
28
|
+
return False
|
29
|
+
|
30
|
+
|
31
|
+
import logging # For logging.DEBUG etc.
|
32
|
+
|
33
|
+
|
13
34
|
def rclone_copy(
|
14
35
|
source_dir: str,
|
15
36
|
destination: str,
|
@@ -115,7 +136,46 @@ def _parse_s3_path(path: str) -> Tuple[Optional[str], Optional[str]]:
|
|
115
136
|
return bucket, prefix
|
116
137
|
|
117
138
|
|
118
|
-
class
|
139
|
+
class StorageBucket(ABC):
|
140
|
+
"""Abstract base class for bucket operations."""
|
141
|
+
|
142
|
+
def __init__(self, verbose: bool = True):
|
143
|
+
self.verbose = verbose
|
144
|
+
|
145
|
+
@abstractmethod
|
146
|
+
def sync(
|
147
|
+
self,
|
148
|
+
source: str,
|
149
|
+
destination: str,
|
150
|
+
delete: bool = False,
|
151
|
+
dry_run: bool = False,
|
152
|
+
excludes: Optional[List[str]] = None,
|
153
|
+
) -> None:
|
154
|
+
"""
|
155
|
+
Synchronizes files between a source and a destination.
|
156
|
+
"""
|
157
|
+
pass
|
158
|
+
|
159
|
+
@abstractmethod
|
160
|
+
def copy(
|
161
|
+
self,
|
162
|
+
source: str,
|
163
|
+
destination: str,
|
164
|
+
) -> None:
|
165
|
+
"""
|
166
|
+
Copies files or directories between a source and a destination.
|
167
|
+
"""
|
168
|
+
pass
|
169
|
+
|
170
|
+
@abstractmethod
|
171
|
+
def check(self, path_uri: str) -> bool:
|
172
|
+
"""
|
173
|
+
Checks if an object or prefix exists.
|
174
|
+
"""
|
175
|
+
pass
|
176
|
+
|
177
|
+
|
178
|
+
class S3Bucket(StorageBucket):
|
119
179
|
"""Handles interactions with AWS S3."""
|
120
180
|
|
121
181
|
def __init__(
|
@@ -124,6 +184,7 @@ class Bucket:
|
|
124
184
|
aws_access_key_id: Optional[str] = None,
|
125
185
|
aws_secret_access_key: Optional[str] = None,
|
126
186
|
aws_session_token: Optional[str] = None,
|
187
|
+
region: str = "us-east-1",
|
127
188
|
):
|
128
189
|
"""
|
129
190
|
Initializes the S3 handler. Can use default credentials or provided temporary ones.
|
@@ -133,23 +194,25 @@ class Bucket:
|
|
133
194
|
aws_access_key_id (Optional[str]): Temporary AWS Access Key ID.
|
134
195
|
aws_secret_access_key (Optional[str]): Temporary AWS Secret Access Key.
|
135
196
|
aws_session_token (Optional[str]): Temporary AWS Session Token (required if keys are temporary).
|
197
|
+
region (str): AWS region for S3 operations. Defaults to "us-east-1".
|
136
198
|
"""
|
199
|
+
super().__init__(verbose=verbose)
|
137
200
|
if aws_access_key_id and aws_secret_access_key:
|
138
|
-
if verbose:
|
201
|
+
if self.verbose:
|
139
202
|
logger.info(
|
140
203
|
"Initializing S3 client with provided temporary credentials."
|
141
204
|
)
|
142
205
|
self.client = boto3.client(
|
143
206
|
"s3",
|
207
|
+
region_name=region,
|
144
208
|
aws_access_key_id=aws_access_key_id,
|
145
209
|
aws_secret_access_key=aws_secret_access_key,
|
146
210
|
aws_session_token=aws_session_token, # Pass session token if provided
|
147
211
|
)
|
148
212
|
else:
|
149
|
-
if verbose:
|
213
|
+
if self.verbose:
|
150
214
|
logger.info("Initializing S3 client with default credentials.")
|
151
|
-
self.client = boto3.client("s3")
|
152
|
-
self.verbose = verbose
|
215
|
+
self.client = boto3.client("s3", region_name=region)
|
153
216
|
|
154
217
|
def _parse_path(self, path: str) -> Tuple[Optional[str], Optional[str]]:
|
155
218
|
"""Class method: Parses an S3 path (s3://bucket/prefix) into bucket and prefix."""
|
@@ -224,7 +287,9 @@ class Bucket:
|
|
224
287
|
logger.info(f"Found {len(objects)} objects in S3.")
|
225
288
|
return objects
|
226
289
|
|
227
|
-
def _list_local(
|
290
|
+
def _list_local(
|
291
|
+
self, local_dir: str, excludes: Optional[List[str]] = None
|
292
|
+
) -> Dict[str, Dict[str, Any]]:
|
228
293
|
"""Class method: Lists files in a local directory."""
|
229
294
|
files: Dict[str, Dict[str, Any]] = {}
|
230
295
|
if not os.path.exists(local_dir):
|
@@ -253,8 +318,41 @@ class Bucket:
|
|
253
318
|
return files
|
254
319
|
if self.verbose:
|
255
320
|
logger.info(f"Scanning local directory: {local_dir}...")
|
256
|
-
for root,
|
321
|
+
for root, dirs, file_list in os.walk(local_dir):
|
322
|
+
# Exclude __pycache__ directories
|
323
|
+
if "__pycache__" in dirs:
|
324
|
+
dirs.remove("__pycache__")
|
325
|
+
|
326
|
+
# Apply custom excludes for directories
|
327
|
+
if excludes:
|
328
|
+
dirs[:] = [
|
329
|
+
d
|
330
|
+
for d in dirs
|
331
|
+
if not any(fnmatch.fnmatch(d, pattern) for pattern in excludes)
|
332
|
+
]
|
333
|
+
|
257
334
|
for file_name in file_list:
|
335
|
+
# Exclude .pyc files
|
336
|
+
if file_name.endswith(".pyc"):
|
337
|
+
continue
|
338
|
+
|
339
|
+
# Apply custom excludes for files
|
340
|
+
if excludes and any(
|
341
|
+
fnmatch.fnmatch(file_name, pattern) for pattern in excludes
|
342
|
+
):
|
343
|
+
continue
|
344
|
+
|
345
|
+
# Also check full relative path for excludes
|
346
|
+
# This allows patterns like 'subdir/*' or '*.log' to work across the tree
|
347
|
+
potential_relative_path = os.path.relpath(
|
348
|
+
os.path.join(root, file_name), local_dir
|
349
|
+
).replace("\\", "/")
|
350
|
+
if excludes and any(
|
351
|
+
fnmatch.fnmatch(potential_relative_path, pattern)
|
352
|
+
for pattern in excludes
|
353
|
+
):
|
354
|
+
continue
|
355
|
+
|
258
356
|
local_path = os.path.join(root, file_name)
|
259
357
|
try:
|
260
358
|
relative_path = os.path.relpath(local_path, local_dir).replace(
|
@@ -286,6 +384,7 @@ class Bucket:
|
|
286
384
|
destination: str,
|
287
385
|
delete: bool = False,
|
288
386
|
dry_run: bool = False,
|
387
|
+
excludes: Optional[List[str]] = None,
|
289
388
|
) -> None:
|
290
389
|
"""
|
291
390
|
Synchronizes files between a source and a destination (local or S3).
|
@@ -296,6 +395,7 @@ class Bucket:
|
|
296
395
|
destination (str): The destination path (local directory or s3://...).
|
297
396
|
delete (bool): If True, delete extraneous files from the destination.
|
298
397
|
dry_run (bool): If True, print actions without performing them.
|
398
|
+
excludes (Optional[List[str]]): List of patterns to exclude from sync.
|
299
399
|
"""
|
300
400
|
mtime_tolerance = timedelta(seconds=2)
|
301
401
|
src_bucket, src_prefix = self._parse_path(source)
|
@@ -307,7 +407,7 @@ class Bucket:
|
|
307
407
|
|
308
408
|
if src_bucket is None and dest_bucket is not None:
|
309
409
|
sync_direction = "upload"
|
310
|
-
source_items = self._list_local(source)
|
410
|
+
source_items = self._list_local(source, excludes=excludes)
|
311
411
|
dest_items = self._list_objects(dest_bucket, dest_prefix)
|
312
412
|
if not source_items and not os.path.exists(source):
|
313
413
|
logger.warning(
|
@@ -326,7 +426,7 @@ class Bucket:
|
|
326
426
|
f"Error: Local destination '{destination}' exists but is not a directory."
|
327
427
|
)
|
328
428
|
return
|
329
|
-
dest_items = self._list_local(destination)
|
429
|
+
dest_items = self._list_local(destination, excludes=excludes)
|
330
430
|
if not dry_run:
|
331
431
|
os.makedirs(destination, exist_ok=True)
|
332
432
|
elif not os.path.isdir(destination) and self.verbose:
|
@@ -843,23 +943,23 @@ class Bucket:
|
|
843
943
|
else:
|
844
944
|
logger.error("Error: Unknown copy operation type.")
|
845
945
|
|
846
|
-
def check(self,
|
946
|
+
def check(self, path_uri: str) -> bool:
|
847
947
|
"""
|
848
948
|
Check if an object or prefix exists in an S3 bucket using an S3 URI.
|
849
949
|
|
850
950
|
Args:
|
851
|
-
|
951
|
+
path_uri (str): The S3 URI (e.g., 's3://my-bucket/my-key' or 's3://my-bucket/my-prefix/').
|
852
952
|
Use a trailing '/' to check for a prefix/directory.
|
853
953
|
|
854
954
|
Returns:
|
855
955
|
bool: True if the object or prefix exists, False otherwise.
|
856
956
|
"""
|
857
957
|
# Use the class client and parse method
|
858
|
-
bucket_name, s3_key = self._parse_path(
|
958
|
+
bucket_name, s3_key = self._parse_path(path_uri)
|
859
959
|
|
860
960
|
if bucket_name is None or s3_key is None:
|
861
961
|
# _parse_path returns None, None if scheme is not 's3'
|
862
|
-
logger.error(f"Error: Invalid S3 URI format: {
|
962
|
+
logger.error(f"Error: Invalid S3 URI format: {path_uri}")
|
863
963
|
return False
|
864
964
|
|
865
965
|
is_prefix = s3_key.endswith("/")
|
@@ -886,12 +986,508 @@ class Bucket:
|
|
886
986
|
elif e.response["Error"]["Code"] == "NoSuchBucket":
|
887
987
|
if self.verbose:
|
888
988
|
logger.error(
|
889
|
-
f"Error: Bucket '{bucket_name}' not found (from URI: {
|
989
|
+
f"Error: Bucket '{bucket_name}' not found (from URI: {path_uri})."
|
890
990
|
)
|
891
991
|
return False
|
892
992
|
# Handle other potential errors like AccessDenied differently if needed
|
893
|
-
logger.error(f"Error checking {
|
993
|
+
logger.error(f"Error checking {path_uri}: {e}")
|
894
994
|
return False
|
895
995
|
except Exception as e:
|
896
|
-
logger.error(f"An unexpected error occurred checking {
|
996
|
+
logger.error(f"An unexpected error occurred checking {path_uri}: {e}")
|
897
997
|
return False
|
998
|
+
|
999
|
+
|
1000
|
+
# Standalone helper for RcloneBucket._is_rclone_path
|
1001
|
+
def _is_rclone_path_standalone(path: str) -> bool:
|
1002
|
+
"""
|
1003
|
+
Standalone helper: Determines if a path string is an rclone path (e.g., "remote:path",
|
1004
|
+
":backend:path", or "s3://bucket/path").
|
1005
|
+
"""
|
1006
|
+
parsed_url = urlparse(path)
|
1007
|
+
|
1008
|
+
# Explicitly allow s3:// paths as rclone paths
|
1009
|
+
if parsed_url.scheme == "s3" and parsed_url.netloc:
|
1010
|
+
return True
|
1011
|
+
|
1012
|
+
# Check for Windows drive letter paths (e.g., C:\path or C:) - these are local.
|
1013
|
+
if os.name == "nt":
|
1014
|
+
if len(path) >= 2 and path[0].isalpha() and path[1] == ":":
|
1015
|
+
if len(path) == 2: # e.g., "C:"
|
1016
|
+
return False # Local current directory on drive
|
1017
|
+
if path[2] in ["\\", "/"]: # e.g., "C:\foo" or "C:/foo"
|
1018
|
+
return False # Local absolute path
|
1019
|
+
|
1020
|
+
# Handle file:// scheme as local
|
1021
|
+
if parsed_url.scheme == "file":
|
1022
|
+
return False
|
1023
|
+
|
1024
|
+
# If it has another scheme (e.g., http, ftp) and a network location,
|
1025
|
+
# it's a URL, not typically an rclone path for copy/sync operations in this context.
|
1026
|
+
if parsed_url.scheme and parsed_url.scheme != "s3" and parsed_url.netloc:
|
1027
|
+
return False
|
1028
|
+
|
1029
|
+
# If the path contains a colon, it's likely an rclone remote path
|
1030
|
+
# (e.g., "myremote:path" or ":s3:path" or "s3:path" if scheme not picked up by urlparse for s3:).
|
1031
|
+
# This check comes after specific local/URL patterns are ruled out.
|
1032
|
+
if ":" in path:
|
1033
|
+
return True
|
1034
|
+
|
1035
|
+
# Default to local if none of the above (e.g., "/abs/path", "rel/path")
|
1036
|
+
return False
|
1037
|
+
|
1038
|
+
|
1039
|
+
class RcloneBucket(StorageBucket):
|
1040
|
+
"""Handles interactions with storage using the rclone-python library."""
|
1041
|
+
|
1042
|
+
def __init__(
|
1043
|
+
self,
|
1044
|
+
verbose: bool = True,
|
1045
|
+
aws_access_key_id: Optional[str] = None,
|
1046
|
+
aws_secret_access_key: Optional[str] = None,
|
1047
|
+
aws_session_token: Optional[str] = None,
|
1048
|
+
region: str = "us-east-1",
|
1049
|
+
):
|
1050
|
+
"""
|
1051
|
+
Initializes the RcloneBucket handler.
|
1052
|
+
|
1053
|
+
Args:
|
1054
|
+
verbose (bool): If True, prints status messages and sets rclone-python log level.
|
1055
|
+
aws_access_key_id (Optional[str]): AWS Access Key ID for rclone S3 remotes.
|
1056
|
+
aws_secret_access_key (Optional[str]): AWS Secret Access Key for rclone S3 remotes.
|
1057
|
+
aws_session_token (Optional[str]): AWS Session Token for rclone S3 remotes.
|
1058
|
+
region (str): AWS region for S3 operations. Defaults to "us-east-1".
|
1059
|
+
"""
|
1060
|
+
super().__init__(verbose=verbose)
|
1061
|
+
self.aws_access_key_id = aws_access_key_id
|
1062
|
+
self.aws_secret_access_key = aws_secret_access_key
|
1063
|
+
self.aws_session_token = aws_session_token
|
1064
|
+
self.region = region
|
1065
|
+
|
1066
|
+
if not is_rclone_installed():
|
1067
|
+
logger.error(
|
1068
|
+
"rclone command not found. Please ensure rclone is installed and configured correctly (https://rclone.org/install/)."
|
1069
|
+
)
|
1070
|
+
# Consider raising an exception if rclone CLI is essential for rclone-python to function.
|
1071
|
+
return
|
1072
|
+
|
1073
|
+
if self.verbose:
|
1074
|
+
logger.info("Initialized RcloneBucket with rclone-python.")
|
1075
|
+
logger.info("rclone-python log level set to DEBUG.")
|
1076
|
+
else:
|
1077
|
+
logger.info("rclone-python log level set to WARNING.")
|
1078
|
+
|
1079
|
+
# Store a mtime tolerance, similar to S3 Bucket
|
1080
|
+
self.mtime_tolerance = timedelta(seconds=2)
|
1081
|
+
|
1082
|
+
def _is_rclone_path(self, path: str) -> bool:
|
1083
|
+
"""
|
1084
|
+
Determines if a path string is an rclone path (e.g., "remote:path").
|
1085
|
+
"""
|
1086
|
+
return _is_rclone_path_standalone(path)
|
1087
|
+
|
1088
|
+
def _execute_with_aws_env(
|
1089
|
+
self, func: Callable[..., Any], *args: Any, **kwargs: Any
|
1090
|
+
) -> Any:
|
1091
|
+
"""Helper to execute rclone functions with temporary AWS env vars if provided."""
|
1092
|
+
old_env: Dict[str, Optional[str]] = {}
|
1093
|
+
aws_vars = {
|
1094
|
+
"AWS_ACCESS_KEY_ID": self.aws_access_key_id,
|
1095
|
+
"AWS_SECRET_ACCESS_KEY": self.aws_secret_access_key,
|
1096
|
+
"AWS_SESSION_TOKEN": self.aws_session_token,
|
1097
|
+
# Add rclone-specific S3 configuration
|
1098
|
+
"RCLONE_CONFIG_S3_TYPE": "s3",
|
1099
|
+
"RCLONE_CONFIG_S3_PROVIDER": "AWS",
|
1100
|
+
"RCLONE_CONFIG_S3_ENV_AUTH": "true",
|
1101
|
+
"RCLONE_CONFIG_S3_REGION": self.region,
|
1102
|
+
}
|
1103
|
+
|
1104
|
+
try:
|
1105
|
+
for key, value in aws_vars.items():
|
1106
|
+
if value is not None:
|
1107
|
+
old_env[key] = os.environ.get(key)
|
1108
|
+
os.environ[key] = value
|
1109
|
+
elif key in os.environ: # Value is None but was set in env
|
1110
|
+
old_env[key] = os.environ.get(key)
|
1111
|
+
del os.environ[key]
|
1112
|
+
|
1113
|
+
# Ensure stderr is captured by setting show_progress to False
|
1114
|
+
if "show_progress" not in kwargs:
|
1115
|
+
kwargs["show_progress"] = False
|
1116
|
+
|
1117
|
+
# Set DEBUG log level for rclone to get more verbose output
|
1118
|
+
old_log_level = logging.getLogger("rclone").level
|
1119
|
+
logging.getLogger("rclone").setLevel(logging.DEBUG)
|
1120
|
+
|
1121
|
+
# Convert s3:// URLs if needed
|
1122
|
+
modified_args = list(args)
|
1123
|
+
for i, arg in enumerate(modified_args):
|
1124
|
+
if isinstance(arg, str) and arg.startswith("s3://"):
|
1125
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1126
|
+
arg_parts = arg[5:].split("/", 1)
|
1127
|
+
bucket_name = arg_parts[0]
|
1128
|
+
path = arg_parts[1] if len(arg_parts) > 1 else ""
|
1129
|
+
modified_args[i] = f"s3:{bucket_name}/{path}"
|
1130
|
+
|
1131
|
+
try:
|
1132
|
+
return func(*modified_args, **kwargs)
|
1133
|
+
finally:
|
1134
|
+
# Restore the original log level
|
1135
|
+
logging.getLogger("rclone").setLevel(old_log_level)
|
1136
|
+
finally:
|
1137
|
+
for key, value in old_env.items():
|
1138
|
+
if value is None:
|
1139
|
+
if key in os.environ: # It was set by us, now remove
|
1140
|
+
del os.environ[key]
|
1141
|
+
else:
|
1142
|
+
os.environ[key] = value
|
1143
|
+
|
1144
|
+
# Clean up any env vars we set but weren't in old_env
|
1145
|
+
for key in aws_vars.keys():
|
1146
|
+
if key not in old_env and key in os.environ:
|
1147
|
+
del os.environ[key]
|
1148
|
+
|
1149
|
+
def sync(
|
1150
|
+
self,
|
1151
|
+
source: str,
|
1152
|
+
destination: str,
|
1153
|
+
delete: bool = False,
|
1154
|
+
dry_run: bool = False,
|
1155
|
+
excludes: Optional[List[str]] = None,
|
1156
|
+
) -> None:
|
1157
|
+
if not is_rclone_installed():
|
1158
|
+
logger.error("Cannot sync: rclone command not found.")
|
1159
|
+
return
|
1160
|
+
|
1161
|
+
if self.verbose:
|
1162
|
+
logger.info(f"Rclone sync: {source} -> {destination}")
|
1163
|
+
if delete:
|
1164
|
+
logger.info("Deletion enabled.")
|
1165
|
+
if dry_run:
|
1166
|
+
logger.info("Dry run mode.")
|
1167
|
+
if excludes:
|
1168
|
+
logger.info(f"Excludes: {excludes}")
|
1169
|
+
|
1170
|
+
rc_args = [
|
1171
|
+
"--modify-window=2s",
|
1172
|
+
"--log-level=DEBUG" if self.verbose else "--log-level=INFO",
|
1173
|
+
"--log-format=date,time,level,message",
|
1174
|
+
"--progress", # Add progress display
|
1175
|
+
]
|
1176
|
+
if dry_run:
|
1177
|
+
rc_args.append("--dry-run")
|
1178
|
+
if delete:
|
1179
|
+
rc_args.append("--delete-after")
|
1180
|
+
|
1181
|
+
if excludes:
|
1182
|
+
for ex_pattern in excludes:
|
1183
|
+
rc_args.append(f"--exclude={ex_pattern}")
|
1184
|
+
|
1185
|
+
# Set environment variables for AWS credentials if they exist
|
1186
|
+
env = os.environ.copy()
|
1187
|
+
if self.aws_access_key_id:
|
1188
|
+
env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
|
1189
|
+
if self.aws_secret_access_key:
|
1190
|
+
env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
|
1191
|
+
if self.aws_session_token:
|
1192
|
+
env["AWS_SESSION_TOKEN"] = self.aws_session_token
|
1193
|
+
|
1194
|
+
# Set rclone-specific environment variables
|
1195
|
+
env["RCLONE_CONFIG_S3_TYPE"] = "s3"
|
1196
|
+
env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
|
1197
|
+
env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
|
1198
|
+
env["RCLONE_CONFIG_S3_REGION"] = self.region
|
1199
|
+
|
1200
|
+
# Convert s3:// URLs if needed
|
1201
|
+
rclone_src = source
|
1202
|
+
rclone_dest = destination
|
1203
|
+
|
1204
|
+
# If source or destination uses s3:// URL format, convert it for rclone CLI
|
1205
|
+
if source.startswith("s3://"):
|
1206
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1207
|
+
source_parts = source[5:].split("/", 1)
|
1208
|
+
bucket_name = source_parts[0]
|
1209
|
+
path = source_parts[1] if len(source_parts) > 1 else ""
|
1210
|
+
rclone_src = f"s3:{bucket_name}/{path}"
|
1211
|
+
|
1212
|
+
if destination.startswith("s3://"):
|
1213
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1214
|
+
destination_parts = destination[5:].split("/", 1)
|
1215
|
+
bucket_name = destination_parts[0]
|
1216
|
+
path = destination_parts[1] if len(destination_parts) > 1 else ""
|
1217
|
+
rclone_dest = f"s3:{bucket_name}/{path}"
|
1218
|
+
|
1219
|
+
# Build the rclone command
|
1220
|
+
cmd = ["rclone", "sync", rclone_src, rclone_dest] + rc_args
|
1221
|
+
|
1222
|
+
if self.verbose:
|
1223
|
+
logger.info(f"Running command: {' '.join(cmd)}")
|
1224
|
+
|
1225
|
+
try:
|
1226
|
+
# Run the command and capture output
|
1227
|
+
process = subprocess.run(
|
1228
|
+
cmd,
|
1229
|
+
env=env,
|
1230
|
+
stdout=subprocess.PIPE,
|
1231
|
+
stderr=subprocess.PIPE,
|
1232
|
+
text=True,
|
1233
|
+
check=False,
|
1234
|
+
)
|
1235
|
+
|
1236
|
+
# Always log the stdout and stderr for verbose mode
|
1237
|
+
if self.verbose:
|
1238
|
+
if process.stdout:
|
1239
|
+
logger.info(f"Rclone stdout:\n{process.stdout}")
|
1240
|
+
if process.stderr:
|
1241
|
+
logger.info(f"Rclone stderr:\n{process.stderr}")
|
1242
|
+
|
1243
|
+
if process.returncode == 0:
|
1244
|
+
logger.info(
|
1245
|
+
f"Rclone sync completed successfully from {source} to {destination}."
|
1246
|
+
)
|
1247
|
+
if dry_run:
|
1248
|
+
logger.info(
|
1249
|
+
"Dry run summary (see rclone output above for details)."
|
1250
|
+
)
|
1251
|
+
else:
|
1252
|
+
logger.error(f"Rclone sync failed with exit code: {process.returncode}")
|
1253
|
+
|
1254
|
+
if (
|
1255
|
+
not self.verbose
|
1256
|
+
): # Only log these again if not already logged in verbose mode
|
1257
|
+
if process.stdout:
|
1258
|
+
logger.error(f"Rclone stdout:\n{process.stdout}")
|
1259
|
+
if process.stderr:
|
1260
|
+
logger.error(f"Rclone stderr:\n{process.stderr}")
|
1261
|
+
except Exception as e:
|
1262
|
+
logger.error(f"Error running rclone sync command: {e}")
|
1263
|
+
|
1264
|
+
def copy(
|
1265
|
+
self,
|
1266
|
+
source: str,
|
1267
|
+
destination: str,
|
1268
|
+
) -> None:
|
1269
|
+
if not is_rclone_installed():
|
1270
|
+
logger.error("Cannot copy: rclone command not found.")
|
1271
|
+
return
|
1272
|
+
|
1273
|
+
# Determine if source/destination are rclone paths or local
|
1274
|
+
is_src_rclone = self._is_rclone_path(source)
|
1275
|
+
is_dest_rclone = self._is_rclone_path(destination)
|
1276
|
+
|
1277
|
+
if not is_src_rclone and not is_dest_rclone:
|
1278
|
+
logger.error(
|
1279
|
+
"Error: Both source and destination are local. Use 'shutil.copy' or 'shutil.copytree'."
|
1280
|
+
)
|
1281
|
+
return
|
1282
|
+
|
1283
|
+
if self.verbose:
|
1284
|
+
logger.info(f"Rclone copy: {source} -> {destination}")
|
1285
|
+
|
1286
|
+
rc_args = [
|
1287
|
+
"--log-level=DEBUG" if self.verbose else "--log-level=INFO",
|
1288
|
+
"--log-format=date,time,level,message",
|
1289
|
+
"--progress", # Add progress display
|
1290
|
+
]
|
1291
|
+
|
1292
|
+
# Set environment variables for AWS credentials if they exist
|
1293
|
+
env = os.environ.copy()
|
1294
|
+
if self.aws_access_key_id:
|
1295
|
+
env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
|
1296
|
+
if self.aws_secret_access_key:
|
1297
|
+
env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
|
1298
|
+
if self.aws_session_token:
|
1299
|
+
env["AWS_SESSION_TOKEN"] = self.aws_session_token
|
1300
|
+
|
1301
|
+
# Set rclone-specific environment variables
|
1302
|
+
env["RCLONE_CONFIG_S3_TYPE"] = "s3"
|
1303
|
+
env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
|
1304
|
+
env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
|
1305
|
+
env["RCLONE_CONFIG_S3_REGION"] = self.region
|
1306
|
+
|
1307
|
+
# Convert s3:// URLs if needed
|
1308
|
+
rclone_src = source
|
1309
|
+
rclone_dest = destination
|
1310
|
+
|
1311
|
+
# If source or destination uses s3:// URL format, convert it for rclone CLI
|
1312
|
+
if source.startswith("s3://"):
|
1313
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1314
|
+
source_parts = source[5:].split("/", 1)
|
1315
|
+
bucket_name = source_parts[0]
|
1316
|
+
path = source_parts[1] if len(source_parts) > 1 else ""
|
1317
|
+
rclone_src = f"s3:{bucket_name}/{path}"
|
1318
|
+
|
1319
|
+
if destination.startswith("s3://"):
|
1320
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1321
|
+
destination_parts = destination[5:].split("/", 1)
|
1322
|
+
bucket_name = destination_parts[0]
|
1323
|
+
path = destination_parts[1] if len(destination_parts) > 1 else ""
|
1324
|
+
rclone_dest = f"s3:{bucket_name}/{path}"
|
1325
|
+
|
1326
|
+
# Build the rclone command
|
1327
|
+
cmd = ["rclone", "copy", rclone_src, rclone_dest] + rc_args
|
1328
|
+
|
1329
|
+
if self.verbose:
|
1330
|
+
logger.info(f"Running command: {' '.join(cmd)}")
|
1331
|
+
|
1332
|
+
try:
|
1333
|
+
# Run the command and capture output
|
1334
|
+
process = subprocess.run(
|
1335
|
+
cmd,
|
1336
|
+
env=env,
|
1337
|
+
stdout=subprocess.PIPE,
|
1338
|
+
stderr=subprocess.PIPE,
|
1339
|
+
text=True,
|
1340
|
+
check=False,
|
1341
|
+
)
|
1342
|
+
|
1343
|
+
# Always log the stdout and stderr for verbose mode
|
1344
|
+
if self.verbose:
|
1345
|
+
if process.stdout:
|
1346
|
+
logger.info(f"Rclone stdout:\n{process.stdout}")
|
1347
|
+
if process.stderr:
|
1348
|
+
logger.info(f"Rclone stderr:\n{process.stderr}")
|
1349
|
+
|
1350
|
+
if process.returncode == 0:
|
1351
|
+
logger.info(
|
1352
|
+
f"Rclone copy completed successfully from {source} to {destination}."
|
1353
|
+
)
|
1354
|
+
else:
|
1355
|
+
logger.error(f"Rclone copy failed with exit code: {process.returncode}")
|
1356
|
+
|
1357
|
+
if (
|
1358
|
+
not self.verbose
|
1359
|
+
): # Only log these again if not already logged in verbose mode
|
1360
|
+
if process.stdout:
|
1361
|
+
logger.error(f"Rclone stdout:\n{process.stdout}")
|
1362
|
+
if process.stderr:
|
1363
|
+
logger.error(f"Rclone stderr:\n{process.stderr}")
|
1364
|
+
except Exception as e:
|
1365
|
+
logger.error(f"Error running rclone copy command: {e}")
|
1366
|
+
|
1367
|
+
def check(self, path_uri: str) -> bool:
|
1368
|
+
if not is_rclone_installed():
|
1369
|
+
logger.error("Cannot check path: rclone command not found.")
|
1370
|
+
return False
|
1371
|
+
|
1372
|
+
if self.verbose:
|
1373
|
+
logger.info(f"Checking existence of path: {path_uri}")
|
1374
|
+
|
1375
|
+
# Convert s3:// URL if needed
|
1376
|
+
rclone_path = path_uri
|
1377
|
+
if path_uri.startswith("s3://"):
|
1378
|
+
# Convert s3://bucket/path to s3:bucket/path
|
1379
|
+
path_parts = path_uri[5:].split("/", 1)
|
1380
|
+
bucket_name = path_parts[0]
|
1381
|
+
path = path_parts[1] if len(path_parts) > 1 else ""
|
1382
|
+
rclone_path = f"s3:{bucket_name}/{path}"
|
1383
|
+
|
1384
|
+
# Set environment variables for AWS credentials if they exist
|
1385
|
+
env = os.environ.copy()
|
1386
|
+
if self.aws_access_key_id:
|
1387
|
+
env["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id
|
1388
|
+
if self.aws_secret_access_key:
|
1389
|
+
env["AWS_SECRET_ACCESS_KEY"] = self.aws_secret_access_key
|
1390
|
+
if self.aws_session_token:
|
1391
|
+
env["AWS_SESSION_TOKEN"] = self.aws_session_token
|
1392
|
+
|
1393
|
+
# Set rclone-specific environment variables
|
1394
|
+
env["RCLONE_CONFIG_S3_TYPE"] = "s3"
|
1395
|
+
env["RCLONE_CONFIG_S3_PROVIDER"] = "AWS"
|
1396
|
+
env["RCLONE_CONFIG_S3_ENV_AUTH"] = "true"
|
1397
|
+
env["RCLONE_CONFIG_S3_REGION"] = self.region
|
1398
|
+
|
1399
|
+
# Build the rclone command (size is a good way to check existence)
|
1400
|
+
cmd = ["rclone", "size", rclone_path, "--json"]
|
1401
|
+
|
1402
|
+
try:
|
1403
|
+
# Run the command and capture output
|
1404
|
+
process = subprocess.run(
|
1405
|
+
cmd,
|
1406
|
+
env=env,
|
1407
|
+
stdout=subprocess.PIPE,
|
1408
|
+
stderr=subprocess.PIPE,
|
1409
|
+
text=True,
|
1410
|
+
check=False,
|
1411
|
+
)
|
1412
|
+
|
1413
|
+
if process.returncode == 0:
|
1414
|
+
if self.verbose:
|
1415
|
+
logger.debug(f"Path {path_uri} exists and is accessible.")
|
1416
|
+
return True
|
1417
|
+
else:
|
1418
|
+
# Check if this is a "not found" error vs. another type of error
|
1419
|
+
err_lower = process.stderr.lower() if process.stderr else ""
|
1420
|
+
not_found_indicators = [
|
1421
|
+
"directory not found",
|
1422
|
+
"object not found",
|
1423
|
+
"path not found",
|
1424
|
+
"no such file or directory",
|
1425
|
+
"source or destination not found",
|
1426
|
+
"error: couldn't find file",
|
1427
|
+
"failed to size: can't find object",
|
1428
|
+
"can't find source directory",
|
1429
|
+
"source not found",
|
1430
|
+
]
|
1431
|
+
is_not_found = any(
|
1432
|
+
indicator in err_lower for indicator in not_found_indicators
|
1433
|
+
)
|
1434
|
+
|
1435
|
+
if is_not_found:
|
1436
|
+
if self.verbose:
|
1437
|
+
logger.debug(f"Path {path_uri} not found.")
|
1438
|
+
return False
|
1439
|
+
else:
|
1440
|
+
# Other type of error (permissions, network, etc.)
|
1441
|
+
logger.warning(f"Error checking path {path_uri}: {process.stderr}")
|
1442
|
+
return False
|
1443
|
+
except Exception as e:
|
1444
|
+
logger.error(f"Error running rclone check command: {e}")
|
1445
|
+
return False
|
1446
|
+
|
1447
|
+
|
1448
|
+
# Factory function Bucket()
|
1449
|
+
def Bucket(
|
1450
|
+
verbose: bool = True,
|
1451
|
+
aws_access_key_id: Optional[str] = None,
|
1452
|
+
aws_secret_access_key: Optional[str] = None,
|
1453
|
+
aws_session_token: Optional[str] = None,
|
1454
|
+
region: str = "us-east-1",
|
1455
|
+
) -> StorageBucket:
|
1456
|
+
"""
|
1457
|
+
Factory function to create a bucket instance.
|
1458
|
+
Returns RcloneBucket if rclone is available and installed, otherwise S3Bucket.
|
1459
|
+
|
1460
|
+
Args:
|
1461
|
+
verbose (bool): If True, prints status messages. Defaults to True.
|
1462
|
+
aws_access_key_id (Optional[str]): Temporary AWS Access Key ID (for S3Bucket).
|
1463
|
+
aws_secret_access_key (Optional[str]): Temporary AWS Secret Access Key (for S3Bucket).
|
1464
|
+
aws_session_token (Optional[str]): Temporary AWS Session Token (for S3Bucket).
|
1465
|
+
region (str): AWS region for S3 operations. Defaults to "us-east-1".
|
1466
|
+
|
1467
|
+
Returns:
|
1468
|
+
StorageBucket: An instance of RcloneBucket or S3Bucket.
|
1469
|
+
"""
|
1470
|
+
if is_rclone_installed():
|
1471
|
+
print("rclone is installed and available")
|
1472
|
+
if verbose:
|
1473
|
+
logger.info("rclone is installed and available. Using RcloneBucket.")
|
1474
|
+
return RcloneBucket(
|
1475
|
+
verbose=verbose,
|
1476
|
+
aws_access_key_id=aws_access_key_id,
|
1477
|
+
aws_secret_access_key=aws_secret_access_key,
|
1478
|
+
aws_session_token=aws_session_token,
|
1479
|
+
region=region,
|
1480
|
+
)
|
1481
|
+
else:
|
1482
|
+
print("rclone not installed or not available")
|
1483
|
+
if verbose:
|
1484
|
+
logger.info(
|
1485
|
+
"rclone not installed or not available. Falling back to S3Bucket."
|
1486
|
+
)
|
1487
|
+
return S3Bucket(
|
1488
|
+
verbose=verbose,
|
1489
|
+
aws_access_key_id=aws_access_key_id,
|
1490
|
+
aws_secret_access_key=aws_secret_access_key,
|
1491
|
+
aws_session_token=aws_session_token,
|
1492
|
+
region=region,
|
1493
|
+
)
|
nebu/processors/default.py
CHANGED
nebu/processors/processor.py
CHANGED
@@ -214,11 +214,14 @@ class Processor(Generic[InputType, OutputType]):
|
|
214
214
|
wait: bool = False,
|
215
215
|
logs: bool = False,
|
216
216
|
api_key: Optional[str] = None,
|
217
|
+
user_key: Optional[str] = None,
|
217
218
|
) -> OutputType | Dict[str, Any] | None:
|
218
219
|
"""
|
219
220
|
Allows the Processor instance to be called like a function, sending data.
|
220
221
|
"""
|
221
|
-
return self.send(
|
222
|
+
return self.send(
|
223
|
+
data=data, wait=wait, logs=logs, api_key=api_key, user_key=user_key
|
224
|
+
)
|
222
225
|
|
223
226
|
def send(
|
224
227
|
self,
|
@@ -226,6 +229,7 @@ class Processor(Generic[InputType, OutputType]):
|
|
226
229
|
wait: bool = False,
|
227
230
|
logs: bool = False,
|
228
231
|
api_key: Optional[str] = None,
|
232
|
+
user_key: Optional[str] = None,
|
229
233
|
) -> OutputType | Dict[str, Any] | None:
|
230
234
|
"""
|
231
235
|
Send data to the processor and optionally stream logs in the background.
|
@@ -250,6 +254,7 @@ class Processor(Generic[InputType, OutputType]):
|
|
250
254
|
stream_data = V1StreamData(
|
251
255
|
content=data,
|
252
256
|
wait=wait,
|
257
|
+
user_key=user_key,
|
253
258
|
)
|
254
259
|
response = requests.post(
|
255
260
|
messages_url,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nebu
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.97
|
4
4
|
Summary: A globally distributed container runtime
|
5
5
|
Requires-Python: >=3.10.14
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -13,6 +13,7 @@ Requires-Dist: pillow>=10.4.0
|
|
13
13
|
Requires-Dist: pydantic>=2.10.6
|
14
14
|
Requires-Dist: pysocks>=1.7.1
|
15
15
|
Requires-Dist: pyyaml>=6.0.2
|
16
|
+
Requires-Dist: rclone-python>=0.1.21
|
16
17
|
Requires-Dist: redis>=5.0
|
17
18
|
Requires-Dist: requests>=2.32.3
|
18
19
|
Dynamic: license-file
|
@@ -2,7 +2,7 @@ nebu/__init__.py,sha256=xNtWiN29MJZK_WBEUP-9hDmlkfLxoASVI-f4tNTXO58,454
|
|
2
2
|
nebu/auth.py,sha256=N_v6SPFD9HU_UoRDTaouH03g2Hmo9C-xxqInE1FweXE,1471
|
3
3
|
nebu/cache.py,sha256=JqRb4FdZrRrO4ePlwvsKdxRC8dNEFMxfTWag0aJz8Gw,4893
|
4
4
|
nebu/config.py,sha256=C5Jt9Bd0i0HrgzBSVNJ-Ml3KwX_gaYbYYZEtNL2gvJg,7031
|
5
|
-
nebu/data.py,sha256=
|
5
|
+
nebu/data.py,sha256=4fEvO_xNCeTNW4RFr1u6iZ7j92S-5zWwxmFdXopxae8,63502
|
6
6
|
nebu/errors.py,sha256=bBnK5YQ6qZg4OMY81AN2k03ppefg89FUwF_SHEMlqCA,170
|
7
7
|
nebu/logging.py,sha256=VzpjCEoXm3c4i0sKJL5GTsPIhTQ6Y4BPUTzPmwhve7o,950
|
8
8
|
nebu/meta.py,sha256=CzFHMND9seuewzq9zNNx9WTr6JvrCBExe7BLqDSr7lM,745
|
@@ -16,13 +16,13 @@ nebu/namespaces/namespace.py,sha256=oeZyGqsIGIrppyjif1ZONsdTmqRgd9oSLFE1BChXTTE,
|
|
16
16
|
nebu/processors/consumer.py,sha256=j6iKF_wc8RUNKrFqjB5keUX-Gj9hGZUbmAjEyTm-Oj0,55367
|
17
17
|
nebu/processors/consumer_process_worker.py,sha256=h--eNFKaLbUayxn88mB8oGGdrU2liE1dnwm_TPlewX8,36960
|
18
18
|
nebu/processors/decorate.py,sha256=pGA0m8cxTnEu7ullKTaAgjf_FFP5RhRqBnQnsdSwtJs,55476
|
19
|
-
nebu/processors/default.py,sha256=
|
19
|
+
nebu/processors/default.py,sha256=cy4ETMdbdRGkrvbYec1o60h7mGDlGN5JsuUph0ENtDU,364
|
20
20
|
nebu/processors/models.py,sha256=g4B1t6Rgoy-NUEHBLeQc0EENzHXLDlWSio8Muv7cTDU,4093
|
21
|
-
nebu/processors/processor.py,sha256=
|
21
|
+
nebu/processors/processor.py,sha256=xxduD632g_2lBhAF2tZ3g0aoZJFgbfNwrDgtOAio0Is,16677
|
22
22
|
nebu/redis/models.py,sha256=coPovAcVXnOU1Xh_fpJL4PO3QctgK9nBe5QYoqEcnxg,1230
|
23
23
|
nebu/services/service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
-
nebu-0.1.
|
25
|
-
nebu-0.1.
|
26
|
-
nebu-0.1.
|
27
|
-
nebu-0.1.
|
28
|
-
nebu-0.1.
|
24
|
+
nebu-0.1.97.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
25
|
+
nebu-0.1.97.dist-info/METADATA,sha256=aHHT7MjCgIZVBjWxTIK5bDp3aCaIsy0wMbUTDKmzolI,1797
|
26
|
+
nebu-0.1.97.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
27
|
+
nebu-0.1.97.dist-info/top_level.txt,sha256=uLIbEKJeGSHWOAJN5S0i5XBGwybALlF9bYoB1UhdEgQ,5
|
28
|
+
nebu-0.1.97.dist-info/RECORD,,
|
File without changes
|
File without changes
|