apppy-aws 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apppy_aws-0.3.0/.gitignore +28 -0
- apppy_aws-0.3.0/PKG-INFO +17 -0
- apppy_aws-0.3.0/README.md +0 -0
- apppy_aws-0.3.0/aws.mk +23 -0
- apppy_aws-0.3.0/pyproject.toml +45 -0
- apppy_aws-0.3.0/src/apppy/aws/__init__.py +0 -0
- apppy_aws-0.3.0/src/apppy/aws/fs.py +284 -0
- apppy_aws-0.3.0/src/apppy/aws/fs_unit_test.py +273 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
__generated__/
|
|
2
|
+
dist/
|
|
3
|
+
*.egg-info
|
|
4
|
+
.env
|
|
5
|
+
.env.*
|
|
6
|
+
*.env
|
|
7
|
+
!.env.ci
|
|
8
|
+
.file_store/
|
|
9
|
+
*.pid
|
|
10
|
+
.python-version
|
|
11
|
+
*.secrets
|
|
12
|
+
.secrets
|
|
13
|
+
*.tar.gz
|
|
14
|
+
*.test_output/
|
|
15
|
+
.test_output/
|
|
16
|
+
uv.lock
|
|
17
|
+
*.whl
|
|
18
|
+
|
|
19
|
+
# System files
|
|
20
|
+
__pycache__
|
|
21
|
+
.DS_Store
|
|
22
|
+
|
|
23
|
+
# Editor files
|
|
24
|
+
*.sublime-project
|
|
25
|
+
*.sublime-workspace
|
|
26
|
+
.vscode/*
|
|
27
|
+
!.vscode/settings.json
|
|
28
|
+
|
apppy_aws-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: apppy-aws
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: AWS integrations for server development
|
|
5
|
+
Project-URL: Homepage, https://github.com/spals/apppy
|
|
6
|
+
Author: Tim Kral
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Requires-Dist: anyio==4.11.0
|
|
12
|
+
Requires-Dist: apppy-env>=0.3.0
|
|
13
|
+
Requires-Dist: apppy-fs>=0.3.0
|
|
14
|
+
Requires-Dist: apppy-generic>=0.3.0
|
|
15
|
+
Requires-Dist: apppy-logger>=0.3.0
|
|
16
|
+
Requires-Dist: boto3-stubs[s3]==1.40.49
|
|
17
|
+
Requires-Dist: boto3==1.40.49
|
|
File without changes
|
apppy_aws-0.3.0/aws.mk
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
ifndef APPPY_AWS_MK_INCLUDED
|
|
2
|
+
APPPY_AWS_MK_INCLUDED := 1
|
|
3
|
+
AWS_PKG_DIR := $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
|
|
4
|
+
|
|
5
|
+
.PHONY: aws aws-dev aws/build aws/clean aws/install aws/install-dev
|
|
6
|
+
|
|
7
|
+
aws: aws/clean aws/install
|
|
8
|
+
|
|
9
|
+
aws-dev: aws/clean aws/install-dev
|
|
10
|
+
|
|
11
|
+
aws/build:
|
|
12
|
+
cd $(AWS_PKG_DIR) && uvx --from build pyproject-build
|
|
13
|
+
|
|
14
|
+
aws/clean:
|
|
15
|
+
cd $(AWS_PKG_DIR) && rm -rf dist/ *.egg-info .venv
|
|
16
|
+
|
|
17
|
+
aws/install: aws/build
|
|
18
|
+
cd $(AWS_PKG_DIR) && uv pip install dist/*.whl
|
|
19
|
+
|
|
20
|
+
aws/install-dev:
|
|
21
|
+
cd $(AWS_PKG_DIR) && uv pip install -e .
|
|
22
|
+
|
|
23
|
+
endif # APPPY_AWS_MK_INCLUDED
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.25"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[dependency-groups]
|
|
6
|
+
dev = []
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "apppy-aws"
|
|
10
|
+
description = "AWS integrations for server development"
|
|
11
|
+
dynamic = ["version"]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.11"
|
|
14
|
+
license = {text = "MIT"}
|
|
15
|
+
authors = [{ name = "Tim Kral" }]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"anyio==4.11.0",
|
|
22
|
+
"apppy-env>=0.3.0",
|
|
23
|
+
"apppy-fs>=0.3.0",
|
|
24
|
+
"apppy-generic>=0.3.0",
|
|
25
|
+
"apppy-logger>=0.3.0",
|
|
26
|
+
"boto3==1.40.49",
|
|
27
|
+
"boto3-stubs[s3]==1.40.49",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/spals/apppy"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/apppy"]
|
|
35
|
+
|
|
36
|
+
[tool.hatch.version]
|
|
37
|
+
path = "../../VERSION"
|
|
38
|
+
pattern = "^(?P<version>\\d+\\.\\d+\\.\\d+)$"
|
|
39
|
+
source = "regex"
|
|
40
|
+
|
|
41
|
+
[tool.uv.sources]
|
|
42
|
+
apppy-env = { workspace = true }
|
|
43
|
+
apppy-fs = { workspace = true }
|
|
44
|
+
apppy-generic = { workspace = true }
|
|
45
|
+
apppy-logger = { workspace = true }
|
|
File without changes
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import boto3
|
|
6
|
+
from anyio.to_thread import run_sync
|
|
7
|
+
from botocore.exceptions import ClientError as BotoClientError
|
|
8
|
+
from fastapi_lifespan_manager import LifespanManager
|
|
9
|
+
from fsspec import AbstractFileSystem, register_implementation
|
|
10
|
+
from mypy_boto3_s3 import S3Client
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
from s3fs import S3FileSystem as NativeS3FileSystem
|
|
13
|
+
|
|
14
|
+
from apppy.env import Env, EnvSettings
|
|
15
|
+
from apppy.fs import (
|
|
16
|
+
FileSystem,
|
|
17
|
+
FileSystemBucket,
|
|
18
|
+
FileSystemPermission,
|
|
19
|
+
FileUrl,
|
|
20
|
+
GenericFileUrl,
|
|
21
|
+
ProxyFileSystem,
|
|
22
|
+
)
|
|
23
|
+
from apppy.fs.errors import (
|
|
24
|
+
FileSystemInvalidProtocolError,
|
|
25
|
+
MalformedFileUrlError,
|
|
26
|
+
)
|
|
27
|
+
from apppy.generic.encrypt import BytesEncrypter
|
|
28
|
+
from apppy.logger import WithLogger
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class S3FileUrl(GenericFileUrl):
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
_filesystem_protocol: str,
|
|
35
|
+
_filesystem_bucket: FileSystemBucket,
|
|
36
|
+
_filesystem_external_id: str | None,
|
|
37
|
+
_partition: str,
|
|
38
|
+
_directory: str | None,
|
|
39
|
+
_file_name: str | None,
|
|
40
|
+
) -> None:
|
|
41
|
+
super().__init__(
|
|
42
|
+
_filesystem_protocol=_filesystem_protocol,
|
|
43
|
+
_filesystem_bucket=_filesystem_bucket,
|
|
44
|
+
_filesystem_external_id=_filesystem_external_id,
|
|
45
|
+
_partition=_partition,
|
|
46
|
+
_directory=_directory,
|
|
47
|
+
_file_name=_file_name,
|
|
48
|
+
)
|
|
49
|
+
str_instance = self.as_str_internal()
|
|
50
|
+
|
|
51
|
+
# Validation
|
|
52
|
+
if _filesystem_protocol != "enc://s3" and _filesystem_protocol != "s3":
|
|
53
|
+
raise FileSystemInvalidProtocolError(protocol=_filesystem_protocol)
|
|
54
|
+
# If we have an id, we must also have a file name
|
|
55
|
+
if _filesystem_external_id is not None and _file_name is None:
|
|
56
|
+
raise MalformedFileUrlError(
|
|
57
|
+
url=str_instance, code="s3_file_url_external_id_without_file_name"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self._key_prefix = str_instance[
|
|
61
|
+
len(f"{self.filesystem_protocol}://{_filesystem_bucket.value}") + 1 :
|
|
62
|
+
]
|
|
63
|
+
self._key_prefix_parent = str(Path(self.key_prefix).parent)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def key_prefix(self) -> str:
|
|
67
|
+
return self._key_prefix
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def key_prefix_parent(self) -> str:
|
|
71
|
+
return self._key_prefix_parent
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def split_path(path: str, protocol: str, bucket: FileSystemBucket) -> "S3FileUrl":
|
|
75
|
+
generic_file_url = GenericFileUrl.split_path(path=path, protocol=protocol, bucket=bucket)
|
|
76
|
+
|
|
77
|
+
return S3FileUrl(
|
|
78
|
+
_filesystem_protocol=generic_file_url.filesystem_protocol,
|
|
79
|
+
_filesystem_bucket=bucket,
|
|
80
|
+
_filesystem_external_id=generic_file_url.filesystem_external_id,
|
|
81
|
+
_partition=generic_file_url.partition,
|
|
82
|
+
_directory=generic_file_url.directory,
|
|
83
|
+
_file_name=generic_file_url.file_name,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def split_url(url: str, bucket: FileSystemBucket) -> "S3FileUrl":
|
|
88
|
+
url = url.strip()
|
|
89
|
+
protocol = GenericFileUrl._parse_protocol(url, unencrypted=False)
|
|
90
|
+
path = url[len(f"{protocol}://") :]
|
|
91
|
+
|
|
92
|
+
return S3FileUrl.split_path(path=path, protocol=protocol, bucket=bucket)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class S3FileSystemSettings(EnvSettings):
|
|
96
|
+
# S3_FS_BUCKET_EXTERNAL
|
|
97
|
+
bucket_external: str = Field()
|
|
98
|
+
# S3_FS_BUCKET_INTERNAL
|
|
99
|
+
bucket_internal: str = Field()
|
|
100
|
+
# S3_FS_REGION
|
|
101
|
+
region: str = Field()
|
|
102
|
+
# S3_FS_ENDPOINT
|
|
103
|
+
endpoint: str = Field()
|
|
104
|
+
# S3_FS_USE_SSL
|
|
105
|
+
use_ssl: bool = Field(default=True)
|
|
106
|
+
# S3_FS_VERSION_AWARE
|
|
107
|
+
version_aware: bool = Field(default=False)
|
|
108
|
+
# S3_FS_ACCESS_KEY_ID
|
|
109
|
+
access_key_id: str = Field()
|
|
110
|
+
# S3_FS_SECRET_ACCESS_KEY
|
|
111
|
+
secret_access_key: str = Field(exclude=True)
|
|
112
|
+
# S3_FS_ENCRYPT_PASSPHRASE
|
|
113
|
+
encrypt_passphrase: str | None = Field(default=None, exclude=True)
|
|
114
|
+
# S3_FS_ENCRYPT_SALT
|
|
115
|
+
encrypt_salt: str | None = Field(default=None, exclude=True)
|
|
116
|
+
|
|
117
|
+
def __init__(self, env: Env) -> None:
|
|
118
|
+
super().__init__(env=env, domain_prefix="S3_FS")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class S3FileSystem(ProxyFileSystem, WithLogger):
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
settings: S3FileSystemSettings,
|
|
125
|
+
lifespan: LifespanManager,
|
|
126
|
+
fs: FileSystem,
|
|
127
|
+
) -> None:
|
|
128
|
+
self._settings: S3FileSystemSettings = settings
|
|
129
|
+
|
|
130
|
+
self._bytes_encrypter: BytesEncrypter | None = None
|
|
131
|
+
if (
|
|
132
|
+
settings.encrypt_passphrase is not None
|
|
133
|
+
and len(settings.encrypt_passphrase) > 0
|
|
134
|
+
and settings.encrypt_salt is not None
|
|
135
|
+
and len(settings.encrypt_salt) > 0
|
|
136
|
+
):
|
|
137
|
+
self._bytes_encrypter = BytesEncrypter(
|
|
138
|
+
settings.encrypt_passphrase, settings.encrypt_salt
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self._configure_nativefs(settings, fs)
|
|
142
|
+
lifespan.add(self.__configure_s3_storage)
|
|
143
|
+
|
|
144
|
+
def _configure_nativefs(
|
|
145
|
+
self,
|
|
146
|
+
settings: S3FileSystemSettings,
|
|
147
|
+
fs: FileSystem,
|
|
148
|
+
) -> None:
|
|
149
|
+
# Use generic test buckets for all tests
|
|
150
|
+
self._bucket_external = FileSystemBucket(
|
|
151
|
+
bucket_type="external",
|
|
152
|
+
value=settings.bucket_external,
|
|
153
|
+
)
|
|
154
|
+
self._bucket_internal = FileSystemBucket(
|
|
155
|
+
bucket_type="internal",
|
|
156
|
+
value=settings.bucket_internal,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self._nativefs: NativeS3FileSystem = NativeS3FileSystem(
|
|
160
|
+
endpoint_url=settings.endpoint,
|
|
161
|
+
key=settings.access_key_id,
|
|
162
|
+
secret=settings.secret_access_key,
|
|
163
|
+
version_aware=settings.version_aware,
|
|
164
|
+
)
|
|
165
|
+
fs.register_proxyfs(self, "s3")
|
|
166
|
+
fs.register_nativefs(self._nativefs, "s3")
|
|
167
|
+
|
|
168
|
+
if self.is_encrypted:
|
|
169
|
+
# In the encrypted case, we'll need to also register the file
|
|
170
|
+
# system with fsspec itself so that the encrypted filesystem
|
|
171
|
+
# can instantiate it independently
|
|
172
|
+
register_implementation("sb", NativeS3FileSystem, clobber=True)
|
|
173
|
+
|
|
174
|
+
async def __configure_s3_storage(self):
|
|
175
|
+
self._logger.info("Creating S3 boto3 client for S3 filesystem bucket management")
|
|
176
|
+
s3: S3Client = boto3.client(
|
|
177
|
+
"s3",
|
|
178
|
+
region_name=self._settings.region,
|
|
179
|
+
endpoint_url=self._settings.endpoint,
|
|
180
|
+
aws_access_key_id=self._settings.access_key_id,
|
|
181
|
+
aws_secret_access_key=self._settings.secret_access_key,
|
|
182
|
+
use_ssl=self._settings.use_ssl,
|
|
183
|
+
)
|
|
184
|
+
try:
|
|
185
|
+
for bucket in (self._bucket_external, self._bucket_internal):
|
|
186
|
+
await run_sync(self.__ensure_s3_bucket, s3, bucket.value)
|
|
187
|
+
finally:
|
|
188
|
+
self._logger.info("Closing S3 boto3 client for S3 filesystem bucket management")
|
|
189
|
+
s3.close()
|
|
190
|
+
|
|
191
|
+
yield
|
|
192
|
+
|
|
193
|
+
self._logger.info("Closing S3 filesystem clients")
|
|
194
|
+
|
|
195
|
+
def __ensure_s3_bucket(self, s3: S3Client, bucket_name: str):
|
|
196
|
+
try:
|
|
197
|
+
s3.head_bucket(Bucket=bucket_name)
|
|
198
|
+
self._logger.info("Found bucket for S3 filesystem", extra={"bucket": bucket_name})
|
|
199
|
+
except BotoClientError as e:
|
|
200
|
+
error_code = e.response["Error"]["Code"]
|
|
201
|
+
if error_code == "404":
|
|
202
|
+
self._logger.info("Bucket missing for S3 filesystem", extra={"bucket": bucket_name})
|
|
203
|
+
if self._settings.region == "us-east-1":
|
|
204
|
+
s3.create_bucket(Bucket=bucket_name)
|
|
205
|
+
else:
|
|
206
|
+
s3.create_bucket(
|
|
207
|
+
Bucket=bucket_name,
|
|
208
|
+
CreateBucketConfiguration={"LocationConstraint": self._settings.region}, # type: ignore[typeddict-item]
|
|
209
|
+
)
|
|
210
|
+
self._logger.info(
|
|
211
|
+
"Created new bucket for S3 filesystem",
|
|
212
|
+
extra={"bucket": bucket_name},
|
|
213
|
+
)
|
|
214
|
+
elif error_code == "403":
|
|
215
|
+
self._logger.warning(
|
|
216
|
+
"Bucket access is forbidden while bootstrapping S3 filesystem",
|
|
217
|
+
extra={"bucket": bucket_name},
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
raise
|
|
221
|
+
|
|
222
|
+
def convert_file_url(self, file_url: FileUrl) -> FileUrl:
|
|
223
|
+
if isinstance(file_url, S3FileUrl) and self.is_encrypted == file_url.is_encrypted:
|
|
224
|
+
return file_url
|
|
225
|
+
|
|
226
|
+
if self.is_encrypted is True and file_url.is_encrypted is False:
|
|
227
|
+
# Add the encyption protocol if we need to
|
|
228
|
+
filesystem_protocol = f"enc://{file_url.filesystem_protocol}"
|
|
229
|
+
else:
|
|
230
|
+
filesystem_protocol = file_url.filesystem_protocol
|
|
231
|
+
|
|
232
|
+
# If we're not version aware, ensure that we have a unique id associated
|
|
233
|
+
# with the file_url. Unfortunately, it's not possible to get the actual
|
|
234
|
+
# unique identiter out of S3 storage so we'll just make one up here and
|
|
235
|
+
# pass it along
|
|
236
|
+
filesystem_external_id: str | None = (
|
|
237
|
+
str(uuid.uuid4())
|
|
238
|
+
if file_url.filesystem_external_id is None and self._settings.version_aware is False
|
|
239
|
+
else file_url.filesystem_external_id
|
|
240
|
+
)
|
|
241
|
+
return S3FileUrl(
|
|
242
|
+
_filesystem_protocol=filesystem_protocol,
|
|
243
|
+
_filesystem_bucket=(
|
|
244
|
+
self._bucket_external
|
|
245
|
+
if file_url.filesystem_bucket.is_external
|
|
246
|
+
else self._bucket_internal
|
|
247
|
+
),
|
|
248
|
+
_filesystem_external_id=filesystem_external_id,
|
|
249
|
+
_partition=file_url.partition,
|
|
250
|
+
_directory=file_url.directory,
|
|
251
|
+
_file_name=file_url.file_name,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
def file_url_kwargs(self, file_url: FileUrl) -> dict[str, Any]:
|
|
255
|
+
# No extra parameters needed
|
|
256
|
+
return {}
|
|
257
|
+
|
|
258
|
+
def parse_file_url(self, url: str) -> S3FileUrl:
|
|
259
|
+
if url.find(self._bucket_internal.value) > -1:
|
|
260
|
+
return S3FileUrl.split_url(url, self._bucket_internal)
|
|
261
|
+
|
|
262
|
+
return S3FileUrl.split_url(url, self._bucket_external)
|
|
263
|
+
|
|
264
|
+
def rm(self, url: str, recursive=False, maxdepth=None, **kwargs) -> None:
|
|
265
|
+
self.native.rm(url, recursive=recursive, maxdepth=maxdepth)
|
|
266
|
+
|
|
267
|
+
@property
|
|
268
|
+
def encryption(self) -> BytesEncrypter | None:
|
|
269
|
+
return self._bytes_encrypter
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def name(self) -> str:
|
|
273
|
+
return "AWS S3"
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def native(self) -> AbstractFileSystem:
|
|
277
|
+
return self._nativefs
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def permissions(self) -> list[FileSystemPermission]:
|
|
281
|
+
return [
|
|
282
|
+
FileSystemPermission.PRIVATE_INTERNAL,
|
|
283
|
+
FileSystemPermission.READWRITE,
|
|
284
|
+
]
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from apppy.aws.fs import S3FileUrl
|
|
4
|
+
from apppy.fs import FileSystemBucket, FileUrl
|
|
5
|
+
|
|
6
|
+
_s3_filesystem_bucket_external = FileSystemBucket(bucket_type="external", value="fs_s3_test")
|
|
7
|
+
|
|
8
|
+
_case_dir_only: S3FileUrl = S3FileUrl(
|
|
9
|
+
_filesystem_protocol="s3",
|
|
10
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
11
|
+
_filesystem_external_id=None,
|
|
12
|
+
_partition="partition",
|
|
13
|
+
_directory="dir",
|
|
14
|
+
_file_name=None,
|
|
15
|
+
)
|
|
16
|
+
_case_dir_with_subdir: S3FileUrl = S3FileUrl(
|
|
17
|
+
_filesystem_protocol="s3",
|
|
18
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
19
|
+
_filesystem_external_id=None,
|
|
20
|
+
_partition="partition",
|
|
21
|
+
_directory="dir/subdir",
|
|
22
|
+
_file_name=None,
|
|
23
|
+
)
|
|
24
|
+
# Valid url for version aware case
|
|
25
|
+
_case_file_name_only: S3FileUrl = S3FileUrl(
|
|
26
|
+
_filesystem_protocol="s3",
|
|
27
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
28
|
+
_filesystem_external_id=None,
|
|
29
|
+
_partition="partition",
|
|
30
|
+
_directory=None,
|
|
31
|
+
_file_name="f.txt",
|
|
32
|
+
)
|
|
33
|
+
# Valid url for version aware case
|
|
34
|
+
_case_file_name_with_dir: S3FileUrl = S3FileUrl(
|
|
35
|
+
_filesystem_protocol="s3",
|
|
36
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
37
|
+
_filesystem_external_id=None,
|
|
38
|
+
_partition="partition",
|
|
39
|
+
_directory="dir",
|
|
40
|
+
_file_name="f.txt",
|
|
41
|
+
)
|
|
42
|
+
# Malformed url for the S3 case
|
|
43
|
+
# _case_unique_id_only: S3FileUrl = S3FileUrl(
|
|
44
|
+
# _filesystem_protocol="s3",
|
|
45
|
+
# _filesystem_bucket=_s3_filesystem_bucket_external,
|
|
46
|
+
# _filesystem_external_id="123-abc",
|
|
47
|
+
# _partition="partition",
|
|
48
|
+
# _directory=None,
|
|
49
|
+
# _file_name=None,
|
|
50
|
+
# )
|
|
51
|
+
# Malformed url for the S3 case
|
|
52
|
+
# _case_unique_id_with_dir: S3FileUrl = S3FileUrl(
|
|
53
|
+
# _filesystem_protocol="s3",
|
|
54
|
+
# _filesystem_bucket=_s3_filesystem_bucket_external,
|
|
55
|
+
# _filesystem_external_id="123-abc",
|
|
56
|
+
# _partition="partition",
|
|
57
|
+
# _directory="dir",
|
|
58
|
+
# _file_name=None,
|
|
59
|
+
# )
|
|
60
|
+
_case_unique_id_with_dir_and_file_name: S3FileUrl = S3FileUrl(
|
|
61
|
+
_filesystem_protocol="s3",
|
|
62
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
63
|
+
_filesystem_external_id="123-abc",
|
|
64
|
+
_partition="partition",
|
|
65
|
+
_directory="dir",
|
|
66
|
+
_file_name="f.txt",
|
|
67
|
+
)
|
|
68
|
+
_case_unique_id_with_file_name: S3FileUrl = S3FileUrl(
|
|
69
|
+
_filesystem_protocol="s3",
|
|
70
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
71
|
+
_filesystem_external_id="123-abc",
|
|
72
|
+
_partition="partition",
|
|
73
|
+
_directory=None,
|
|
74
|
+
_file_name="f.txt",
|
|
75
|
+
)
|
|
76
|
+
_case_unique_id_with_dir_and_file_name_encrypted: S3FileUrl = S3FileUrl(
|
|
77
|
+
_filesystem_protocol="enc://s3",
|
|
78
|
+
_filesystem_bucket=_s3_filesystem_bucket_external,
|
|
79
|
+
_filesystem_external_id="123-abc",
|
|
80
|
+
_partition="partition",
|
|
81
|
+
_directory="dir",
|
|
82
|
+
_file_name="f.txt",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.mark.parametrize(
|
|
87
|
+
"file_url, expected_str",
|
|
88
|
+
[
|
|
89
|
+
(_case_dir_only, "s3://external/partition/dir"),
|
|
90
|
+
(_case_dir_with_subdir, "s3://external/partition/dir/subdir"),
|
|
91
|
+
(_case_file_name_only, "s3://external/partition/f.txt"),
|
|
92
|
+
(_case_file_name_with_dir, "s3://external/partition/dir/f.txt"),
|
|
93
|
+
(_case_unique_id_with_dir_and_file_name, "s3://external/partition/dir/@123-abc$f.txt"),
|
|
94
|
+
(_case_unique_id_with_file_name, "s3://external/partition/@123-abc$f.txt"),
|
|
95
|
+
(
|
|
96
|
+
_case_unique_id_with_dir_and_file_name_encrypted,
|
|
97
|
+
"enc://s3://external/partition/dir/@123-abc$f.txt",
|
|
98
|
+
),
|
|
99
|
+
],
|
|
100
|
+
)
|
|
101
|
+
def test_s3_file_url_str(file_url: FileUrl, expected_str: str):
|
|
102
|
+
assert str(file_url) == expected_str
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@pytest.mark.parametrize(
|
|
106
|
+
"file_url, expected_str",
|
|
107
|
+
[
|
|
108
|
+
(_case_dir_only, "s3://fs_s3_test/partition/dir"),
|
|
109
|
+
(_case_dir_with_subdir, "s3://fs_s3_test/partition/dir/subdir"),
|
|
110
|
+
(_case_file_name_only, "s3://fs_s3_test/partition/f.txt"),
|
|
111
|
+
(_case_file_name_with_dir, "s3://fs_s3_test/partition/dir/f.txt"),
|
|
112
|
+
(_case_unique_id_with_dir_and_file_name, "s3://fs_s3_test/partition/dir/@123-abc$f.txt"),
|
|
113
|
+
(_case_unique_id_with_file_name, "s3://fs_s3_test/partition/@123-abc$f.txt"),
|
|
114
|
+
(
|
|
115
|
+
_case_unique_id_with_dir_and_file_name_encrypted,
|
|
116
|
+
"s3://fs_s3_test/partition/dir/@123-abc$f.txt",
|
|
117
|
+
),
|
|
118
|
+
],
|
|
119
|
+
)
|
|
120
|
+
def test_s3_file_url_str_internal(file_url: FileUrl, expected_str: str):
|
|
121
|
+
assert file_url.as_str_internal() == expected_str
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@pytest.mark.parametrize(
|
|
125
|
+
"path, expected_file_url",
|
|
126
|
+
[
|
|
127
|
+
("external/partition/dir", _case_dir_only),
|
|
128
|
+
("external/partition/dir/subdir", _case_dir_with_subdir),
|
|
129
|
+
("external/partition/f.txt", _case_file_name_only),
|
|
130
|
+
("external/partition/dir/f.txt", _case_file_name_with_dir),
|
|
131
|
+
("external/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
|
|
132
|
+
("external/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
|
|
133
|
+
],
|
|
134
|
+
)
|
|
135
|
+
def test_s3_file_url_split_path(path: str, expected_file_url: FileUrl):
|
|
136
|
+
file_url = S3FileUrl.split_path(path, protocol="s3", bucket=_s3_filesystem_bucket_external)
|
|
137
|
+
assert file_url == expected_file_url
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@pytest.mark.parametrize(
|
|
141
|
+
"path, expected_file_url",
|
|
142
|
+
[
|
|
143
|
+
("fs_s3_test/partition/dir", _case_dir_only),
|
|
144
|
+
("fs_s3_test/partition/dir/subdir", _case_dir_with_subdir),
|
|
145
|
+
("fs_s3_test/partition/f.txt", _case_file_name_only),
|
|
146
|
+
("fs_s3_test/partition/dir/f.txt", _case_file_name_with_dir),
|
|
147
|
+
("fs_s3_test/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
|
|
148
|
+
("fs_s3_test/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
|
|
149
|
+
],
|
|
150
|
+
)
|
|
151
|
+
def test_s3_file_url_split_path_unobfuscated(path: str, expected_file_url: FileUrl):
|
|
152
|
+
file_url = S3FileUrl.split_path(path, protocol="s3", bucket=_s3_filesystem_bucket_external)
|
|
153
|
+
assert file_url == expected_file_url
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@pytest.mark.parametrize(
|
|
157
|
+
"url, expected_file_url",
|
|
158
|
+
[
|
|
159
|
+
("s3://external/partition/dir", _case_dir_only),
|
|
160
|
+
("s3://external/partition/dir/subdir", _case_dir_with_subdir),
|
|
161
|
+
("s3://external/partition/f.txt", _case_file_name_only),
|
|
162
|
+
("s3://external/partition/dir/f.txt", _case_file_name_with_dir),
|
|
163
|
+
("s3://external/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
|
|
164
|
+
("s3://external/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
|
|
165
|
+
(
|
|
166
|
+
"enc://s3://external/partition/dir/@123-abc$f.txt",
|
|
167
|
+
_case_unique_id_with_dir_and_file_name_encrypted,
|
|
168
|
+
),
|
|
169
|
+
],
|
|
170
|
+
)
|
|
171
|
+
def test_s3_file_url_split_url(url: str, expected_file_url: FileUrl):
|
|
172
|
+
file_url = S3FileUrl.split_url(url, bucket=_s3_filesystem_bucket_external)
|
|
173
|
+
assert file_url == expected_file_url
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@pytest.mark.parametrize(
|
|
177
|
+
"file_url, expected_key_prefix",
|
|
178
|
+
[
|
|
179
|
+
(_case_dir_only, "partition/dir"),
|
|
180
|
+
(_case_dir_with_subdir, "partition/dir/subdir"),
|
|
181
|
+
(_case_file_name_only, "partition/f.txt"),
|
|
182
|
+
(_case_file_name_with_dir, "partition/dir/f.txt"),
|
|
183
|
+
(_case_unique_id_with_dir_and_file_name, "partition/dir/@123-abc$f.txt"),
|
|
184
|
+
(_case_unique_id_with_file_name, "partition/@123-abc$f.txt"),
|
|
185
|
+
],
|
|
186
|
+
)
|
|
187
|
+
def test_s3_file_url_key_prefix(file_url: S3FileUrl, expected_key_prefix: str):
|
|
188
|
+
assert file_url.key_prefix == expected_key_prefix
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@pytest.mark.parametrize(
|
|
192
|
+
"file_url, expected_key_prefix_parent",
|
|
193
|
+
[
|
|
194
|
+
(_case_dir_only, "partition"),
|
|
195
|
+
(_case_dir_with_subdir, "partition/dir"),
|
|
196
|
+
(_case_file_name_only, "partition"),
|
|
197
|
+
(_case_file_name_with_dir, "partition/dir"),
|
|
198
|
+
(_case_unique_id_with_dir_and_file_name, "partition/dir"),
|
|
199
|
+
(_case_unique_id_with_file_name, "partition"),
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
def test_s3_file_url_key_prefix_parent(file_url: S3FileUrl, expected_key_prefix_parent: str):
|
|
203
|
+
assert file_url.key_prefix_parent == expected_key_prefix_parent
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@pytest.mark.parametrize(
|
|
207
|
+
"file_url",
|
|
208
|
+
[
|
|
209
|
+
(_case_dir_only),
|
|
210
|
+
(_case_dir_with_subdir),
|
|
211
|
+
],
|
|
212
|
+
)
|
|
213
|
+
def test_s3_file_url_is_directory(file_url: FileUrl):
|
|
214
|
+
assert file_url.is_valid is True
|
|
215
|
+
assert file_url.is_directory is True
|
|
216
|
+
assert file_url.is_file is False
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@pytest.mark.parametrize(
|
|
220
|
+
"file_url",
|
|
221
|
+
[
|
|
222
|
+
(_case_file_name_only),
|
|
223
|
+
(_case_file_name_with_dir),
|
|
224
|
+
(_case_unique_id_with_dir_and_file_name),
|
|
225
|
+
(_case_unique_id_with_file_name),
|
|
226
|
+
],
|
|
227
|
+
)
|
|
228
|
+
def test_s3_file_url_is_file(file_url: FileUrl):
|
|
229
|
+
assert file_url.is_valid is True
|
|
230
|
+
assert file_url.is_directory is False
|
|
231
|
+
assert file_url.is_file is True
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@pytest.mark.parametrize(
|
|
235
|
+
"file_url, join_dir, join_file_name, expected_joined_path",
|
|
236
|
+
[
|
|
237
|
+
(_case_dir_only, None, None, "s3://fs_s3_test/partition/dir"),
|
|
238
|
+
(_case_dir_only, "join_dir", None, "s3://fs_s3_test/partition/dir/join_dir"),
|
|
239
|
+
(_case_dir_only, None, "join_f.txt", "s3://fs_s3_test/partition/dir/join_f.txt"),
|
|
240
|
+
(
|
|
241
|
+
_case_dir_only,
|
|
242
|
+
"join_dir",
|
|
243
|
+
"join_f.txt",
|
|
244
|
+
"s3://fs_s3_test/partition/dir/join_dir/join_f.txt",
|
|
245
|
+
),
|
|
246
|
+
],
|
|
247
|
+
)
|
|
248
|
+
def test_s3_file_url_join(
|
|
249
|
+
file_url: S3FileUrl,
|
|
250
|
+
join_dir: str | None,
|
|
251
|
+
join_file_name: str | None,
|
|
252
|
+
expected_joined_path: str,
|
|
253
|
+
):
|
|
254
|
+
joined_file_url = file_url.join(directory=join_dir, file_name=join_file_name)
|
|
255
|
+
assert joined_file_url.as_str_internal() == expected_joined_path
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@pytest.mark.parametrize(
|
|
259
|
+
"file_url, expected_parent_path",
|
|
260
|
+
[
|
|
261
|
+
(_case_dir_only, "s3://fs_s3_test/partition"),
|
|
262
|
+
(_case_dir_with_subdir, "s3://fs_s3_test/partition/dir"),
|
|
263
|
+
(_case_unique_id_with_dir_and_file_name, "s3://fs_s3_test/partition/dir"),
|
|
264
|
+
(_case_unique_id_with_file_name, "s3://fs_s3_test/partition"),
|
|
265
|
+
(
|
|
266
|
+
_case_unique_id_with_dir_and_file_name_encrypted,
|
|
267
|
+
"s3://fs_s3_test/partition/dir",
|
|
268
|
+
),
|
|
269
|
+
],
|
|
270
|
+
)
|
|
271
|
+
def test_s3_file_url_parent(file_url: S3FileUrl, expected_parent_path: str):
|
|
272
|
+
parent_file_url = file_url.parent()
|
|
273
|
+
assert parent_file_url.as_str_internal() == expected_parent_path
|