apppy-aws 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ __generated__/
2
+ dist/
3
+ *.egg-info
4
+ .env
5
+ .env.*
6
+ *.env
7
+ !.env.ci
8
+ .file_store/
9
+ *.pid
10
+ .python-version
11
+ *.secrets
12
+ .secrets
13
+ *.tar.gz
14
+ *.test_output/
15
+ .test_output/
16
+ uv.lock
17
+ *.whl
18
+
19
+ # System files
20
+ __pycache__
21
+ .DS_Store
22
+
23
+ # Editor files
24
+ *.sublime-project
25
+ *.sublime-workspace
26
+ .vscode/*
27
+ !.vscode/settings.json
28
+
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: apppy-aws
3
+ Version: 0.3.0
4
+ Summary: AWS integrations for server development
5
+ Project-URL: Homepage, https://github.com/spals/apppy
6
+ Author: Tim Kral
7
+ License: MIT
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.11
11
+ Requires-Dist: anyio==4.11.0
12
+ Requires-Dist: apppy-env>=0.3.0
13
+ Requires-Dist: apppy-fs>=0.3.0
14
+ Requires-Dist: apppy-generic>=0.3.0
15
+ Requires-Dist: apppy-logger>=0.3.0
16
+ Requires-Dist: boto3-stubs[s3]==1.40.49
17
+ Requires-Dist: boto3==1.40.49
File without changes
apppy_aws-0.3.0/aws.mk ADDED
@@ -0,0 +1,23 @@
1
+ ifndef APPPY_AWS_MK_INCLUDED
2
+ APPPY_AWS_MK_INCLUDED := 1
3
+ AWS_PKG_DIR := $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
4
+
5
+ .PHONY: aws aws-dev aws/build aws/clean aws/install aws/install-dev
6
+
7
+ aws: aws/clean aws/install
8
+
9
+ aws-dev: aws/clean aws/install-dev
10
+
11
+ aws/build:
12
+ cd $(AWS_PKG_DIR) && uvx --from build pyproject-build
13
+
14
+ aws/clean:
15
+ cd $(AWS_PKG_DIR) && rm -rf dist/ *.egg-info .venv
16
+
17
+ aws/install: aws/build
18
+ cd $(AWS_PKG_DIR) && uv pip install dist/*.whl
19
+
20
+ aws/install-dev:
21
+ cd $(AWS_PKG_DIR) && uv pip install -e .
22
+
23
+ endif # APPPY_AWS_MK_INCLUDED
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.25"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [dependency-groups]
6
+ dev = []
7
+
8
+ [project]
9
+ name = "apppy-aws"
10
+ description = "AWS integrations for server development"
11
+ dynamic = ["version"]
12
+ readme = "README.md"
13
+ requires-python = ">=3.11"
14
+ license = {text = "MIT"}
15
+ authors = [{ name = "Tim Kral" }]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ ]
20
+ dependencies = [
21
+ "anyio==4.11.0",
22
+ "apppy-env>=0.3.0",
23
+ "apppy-fs>=0.3.0",
24
+ "apppy-generic>=0.3.0",
25
+ "apppy-logger>=0.3.0",
26
+ "boto3==1.40.49",
27
+ "boto3-stubs[s3]==1.40.49",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/spals/apppy"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["src/apppy"]
35
+
36
+ [tool.hatch.version]
37
+ path = "../../VERSION"
38
+ pattern = "^(?P<version>\\d+\\.\\d+\\.\\d+)$"
39
+ source = "regex"
40
+
41
+ [tool.uv.sources]
42
+ apppy-env = { workspace = true }
43
+ apppy-fs = { workspace = true }
44
+ apppy-generic = { workspace = true }
45
+ apppy-logger = { workspace = true }
File without changes
@@ -0,0 +1,284 @@
1
+ import uuid
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ import boto3
6
+ from anyio.to_thread import run_sync
7
+ from botocore.exceptions import ClientError as BotoClientError
8
+ from fastapi_lifespan_manager import LifespanManager
9
+ from fsspec import AbstractFileSystem, register_implementation
10
+ from mypy_boto3_s3 import S3Client
11
+ from pydantic import Field
12
+ from s3fs import S3FileSystem as NativeS3FileSystem
13
+
14
+ from apppy.env import Env, EnvSettings
15
+ from apppy.fs import (
16
+ FileSystem,
17
+ FileSystemBucket,
18
+ FileSystemPermission,
19
+ FileUrl,
20
+ GenericFileUrl,
21
+ ProxyFileSystem,
22
+ )
23
+ from apppy.fs.errors import (
24
+ FileSystemInvalidProtocolError,
25
+ MalformedFileUrlError,
26
+ )
27
+ from apppy.generic.encrypt import BytesEncrypter
28
+ from apppy.logger import WithLogger
29
+
30
+
31
+ class S3FileUrl(GenericFileUrl):
32
+ def __init__(
33
+ self,
34
+ _filesystem_protocol: str,
35
+ _filesystem_bucket: FileSystemBucket,
36
+ _filesystem_external_id: str | None,
37
+ _partition: str,
38
+ _directory: str | None,
39
+ _file_name: str | None,
40
+ ) -> None:
41
+ super().__init__(
42
+ _filesystem_protocol=_filesystem_protocol,
43
+ _filesystem_bucket=_filesystem_bucket,
44
+ _filesystem_external_id=_filesystem_external_id,
45
+ _partition=_partition,
46
+ _directory=_directory,
47
+ _file_name=_file_name,
48
+ )
49
+ str_instance = self.as_str_internal()
50
+
51
+ # Validation
52
+ if _filesystem_protocol != "enc://s3" and _filesystem_protocol != "s3":
53
+ raise FileSystemInvalidProtocolError(protocol=_filesystem_protocol)
54
+ # If we have an id, we must also have a file name
55
+ if _filesystem_external_id is not None and _file_name is None:
56
+ raise MalformedFileUrlError(
57
+ url=str_instance, code="s3_file_url_external_id_without_file_name"
58
+ )
59
+
60
+ self._key_prefix = str_instance[
61
+ len(f"{self.filesystem_protocol}://{_filesystem_bucket.value}") + 1 :
62
+ ]
63
+ self._key_prefix_parent = str(Path(self.key_prefix).parent)
64
+
65
+ @property
66
+ def key_prefix(self) -> str:
67
+ return self._key_prefix
68
+
69
+ @property
70
+ def key_prefix_parent(self) -> str:
71
+ return self._key_prefix_parent
72
+
73
+ @staticmethod
74
+ def split_path(path: str, protocol: str, bucket: FileSystemBucket) -> "S3FileUrl":
75
+ generic_file_url = GenericFileUrl.split_path(path=path, protocol=protocol, bucket=bucket)
76
+
77
+ return S3FileUrl(
78
+ _filesystem_protocol=generic_file_url.filesystem_protocol,
79
+ _filesystem_bucket=bucket,
80
+ _filesystem_external_id=generic_file_url.filesystem_external_id,
81
+ _partition=generic_file_url.partition,
82
+ _directory=generic_file_url.directory,
83
+ _file_name=generic_file_url.file_name,
84
+ )
85
+
86
+ @staticmethod
87
+ def split_url(url: str, bucket: FileSystemBucket) -> "S3FileUrl":
88
+ url = url.strip()
89
+ protocol = GenericFileUrl._parse_protocol(url, unencrypted=False)
90
+ path = url[len(f"{protocol}://") :]
91
+
92
+ return S3FileUrl.split_path(path=path, protocol=protocol, bucket=bucket)
93
+
94
+
95
+ class S3FileSystemSettings(EnvSettings):
96
+ # S3_FS_BUCKET_EXTERNAL
97
+ bucket_external: str = Field()
98
+ # S3_FS_BUCKET_INTERNAL
99
+ bucket_internal: str = Field()
100
+ # S3_FS_REGION
101
+ region: str = Field()
102
+ # S3_FS_ENDPOINT
103
+ endpoint: str = Field()
104
+ # S3_FS_USE_SSL
105
+ use_ssl: bool = Field(default=True)
106
+ # S3_FS_VERSION_AWARE
107
+ version_aware: bool = Field(default=False)
108
+ # S3_FS_ACCESS_KEY_ID
109
+ access_key_id: str = Field()
110
+ # S3_FS_SECRET_ACCESS_KEY
111
+ secret_access_key: str = Field(exclude=True)
112
+ # S3_FS_ENCRYPT_PASSPHRASE
113
+ encrypt_passphrase: str | None = Field(default=None, exclude=True)
114
+ # S3_FS_ENCRYPT_SALT
115
+ encrypt_salt: str | None = Field(default=None, exclude=True)
116
+
117
+ def __init__(self, env: Env) -> None:
118
+ super().__init__(env=env, domain_prefix="S3_FS")
119
+
120
+
121
+ class S3FileSystem(ProxyFileSystem, WithLogger):
122
+ def __init__(
123
+ self,
124
+ settings: S3FileSystemSettings,
125
+ lifespan: LifespanManager,
126
+ fs: FileSystem,
127
+ ) -> None:
128
+ self._settings: S3FileSystemSettings = settings
129
+
130
+ self._bytes_encrypter: BytesEncrypter | None = None
131
+ if (
132
+ settings.encrypt_passphrase is not None
133
+ and len(settings.encrypt_passphrase) > 0
134
+ and settings.encrypt_salt is not None
135
+ and len(settings.encrypt_salt) > 0
136
+ ):
137
+ self._bytes_encrypter = BytesEncrypter(
138
+ settings.encrypt_passphrase, settings.encrypt_salt
139
+ )
140
+
141
+ self._configure_nativefs(settings, fs)
142
+ lifespan.add(self.__configure_s3_storage)
143
+
144
+ def _configure_nativefs(
145
+ self,
146
+ settings: S3FileSystemSettings,
147
+ fs: FileSystem,
148
+ ) -> None:
149
+ # Use generic test buckets for all tests
150
+ self._bucket_external = FileSystemBucket(
151
+ bucket_type="external",
152
+ value=settings.bucket_external,
153
+ )
154
+ self._bucket_internal = FileSystemBucket(
155
+ bucket_type="internal",
156
+ value=settings.bucket_internal,
157
+ )
158
+
159
+ self._nativefs: NativeS3FileSystem = NativeS3FileSystem(
160
+ endpoint_url=settings.endpoint,
161
+ key=settings.access_key_id,
162
+ secret=settings.secret_access_key,
163
+ version_aware=settings.version_aware,
164
+ )
165
+ fs.register_proxyfs(self, "s3")
166
+ fs.register_nativefs(self._nativefs, "s3")
167
+
168
+ if self.is_encrypted:
169
+ # In the encrypted case, we'll need to also register the file
170
+ # system with fsspec itself so that the encrypted filesystem
171
+ # can instantiate it independently
172
+ register_implementation("sb", NativeS3FileSystem, clobber=True)
173
+
174
+ async def __configure_s3_storage(self):
175
+ self._logger.info("Creating S3 boto3 client for S3 filesystem bucket management")
176
+ s3: S3Client = boto3.client(
177
+ "s3",
178
+ region_name=self._settings.region,
179
+ endpoint_url=self._settings.endpoint,
180
+ aws_access_key_id=self._settings.access_key_id,
181
+ aws_secret_access_key=self._settings.secret_access_key,
182
+ use_ssl=self._settings.use_ssl,
183
+ )
184
+ try:
185
+ for bucket in (self._bucket_external, self._bucket_internal):
186
+ await run_sync(self.__ensure_s3_bucket, s3, bucket.value)
187
+ finally:
188
+ self._logger.info("Closing S3 boto3 client for S3 filesystem bucket management")
189
+ s3.close()
190
+
191
+ yield
192
+
193
+ self._logger.info("Closing S3 filesystem clients")
194
+
195
+ def __ensure_s3_bucket(self, s3: S3Client, bucket_name: str):
196
+ try:
197
+ s3.head_bucket(Bucket=bucket_name)
198
+ self._logger.info("Found bucket for S3 filesystem", extra={"bucket": bucket_name})
199
+ except BotoClientError as e:
200
+ error_code = e.response["Error"]["Code"]
201
+ if error_code == "404":
202
+ self._logger.info("Bucket missing for S3 filesystem", extra={"bucket": bucket_name})
203
+ if self._settings.region == "us-east-1":
204
+ s3.create_bucket(Bucket=bucket_name)
205
+ else:
206
+ s3.create_bucket(
207
+ Bucket=bucket_name,
208
+ CreateBucketConfiguration={"LocationConstraint": self._settings.region}, # type: ignore[typeddict-item]
209
+ )
210
+ self._logger.info(
211
+ "Created new bucket for S3 filesystem",
212
+ extra={"bucket": bucket_name},
213
+ )
214
+ elif error_code == "403":
215
+ self._logger.warning(
216
+ "Bucket access is forbidden while bootstrapping S3 filesystem",
217
+ extra={"bucket": bucket_name},
218
+ )
219
+ else:
220
+ raise
221
+
222
+ def convert_file_url(self, file_url: FileUrl) -> FileUrl:
223
+ if isinstance(file_url, S3FileUrl) and self.is_encrypted == file_url.is_encrypted:
224
+ return file_url
225
+
226
+ if self.is_encrypted is True and file_url.is_encrypted is False:
227
+ # Add the encyption protocol if we need to
228
+ filesystem_protocol = f"enc://{file_url.filesystem_protocol}"
229
+ else:
230
+ filesystem_protocol = file_url.filesystem_protocol
231
+
232
+ # If we're not version aware, ensure that we have a unique id associated
233
+ # with the file_url. Unfortunately, it's not possible to get the actual
234
+ # unique identiter out of S3 storage so we'll just make one up here and
235
+ # pass it along
236
+ filesystem_external_id: str | None = (
237
+ str(uuid.uuid4())
238
+ if file_url.filesystem_external_id is None and self._settings.version_aware is False
239
+ else file_url.filesystem_external_id
240
+ )
241
+ return S3FileUrl(
242
+ _filesystem_protocol=filesystem_protocol,
243
+ _filesystem_bucket=(
244
+ self._bucket_external
245
+ if file_url.filesystem_bucket.is_external
246
+ else self._bucket_internal
247
+ ),
248
+ _filesystem_external_id=filesystem_external_id,
249
+ _partition=file_url.partition,
250
+ _directory=file_url.directory,
251
+ _file_name=file_url.file_name,
252
+ )
253
+
254
+ def file_url_kwargs(self, file_url: FileUrl) -> dict[str, Any]:
255
+ # No extra parameters needed
256
+ return {}
257
+
258
+ def parse_file_url(self, url: str) -> S3FileUrl:
259
+ if url.find(self._bucket_internal.value) > -1:
260
+ return S3FileUrl.split_url(url, self._bucket_internal)
261
+
262
+ return S3FileUrl.split_url(url, self._bucket_external)
263
+
264
+ def rm(self, url: str, recursive=False, maxdepth=None, **kwargs) -> None:
265
+ self.native.rm(url, recursive=recursive, maxdepth=maxdepth)
266
+
267
+ @property
268
+ def encryption(self) -> BytesEncrypter | None:
269
+ return self._bytes_encrypter
270
+
271
+ @property
272
+ def name(self) -> str:
273
+ return "AWS S3"
274
+
275
+ @property
276
+ def native(self) -> AbstractFileSystem:
277
+ return self._nativefs
278
+
279
+ @property
280
+ def permissions(self) -> list[FileSystemPermission]:
281
+ return [
282
+ FileSystemPermission.PRIVATE_INTERNAL,
283
+ FileSystemPermission.READWRITE,
284
+ ]
@@ -0,0 +1,273 @@
1
+ import pytest
2
+
3
+ from apppy.aws.fs import S3FileUrl
4
+ from apppy.fs import FileSystemBucket, FileUrl
5
+
6
+ _s3_filesystem_bucket_external = FileSystemBucket(bucket_type="external", value="fs_s3_test")
7
+
8
+ _case_dir_only: S3FileUrl = S3FileUrl(
9
+ _filesystem_protocol="s3",
10
+ _filesystem_bucket=_s3_filesystem_bucket_external,
11
+ _filesystem_external_id=None,
12
+ _partition="partition",
13
+ _directory="dir",
14
+ _file_name=None,
15
+ )
16
+ _case_dir_with_subdir: S3FileUrl = S3FileUrl(
17
+ _filesystem_protocol="s3",
18
+ _filesystem_bucket=_s3_filesystem_bucket_external,
19
+ _filesystem_external_id=None,
20
+ _partition="partition",
21
+ _directory="dir/subdir",
22
+ _file_name=None,
23
+ )
24
+ # Valid url for version aware case
25
+ _case_file_name_only: S3FileUrl = S3FileUrl(
26
+ _filesystem_protocol="s3",
27
+ _filesystem_bucket=_s3_filesystem_bucket_external,
28
+ _filesystem_external_id=None,
29
+ _partition="partition",
30
+ _directory=None,
31
+ _file_name="f.txt",
32
+ )
33
+ # Valid url for version aware case
34
+ _case_file_name_with_dir: S3FileUrl = S3FileUrl(
35
+ _filesystem_protocol="s3",
36
+ _filesystem_bucket=_s3_filesystem_bucket_external,
37
+ _filesystem_external_id=None,
38
+ _partition="partition",
39
+ _directory="dir",
40
+ _file_name="f.txt",
41
+ )
42
+ # Malformed url for the S3 case
43
+ # _case_unique_id_only: S3FileUrl = S3FileUrl(
44
+ # _filesystem_protocol="s3",
45
+ # _filesystem_bucket=_s3_filesystem_bucket_external,
46
+ # _filesystem_external_id="123-abc",
47
+ # _partition="partition",
48
+ # _directory=None,
49
+ # _file_name=None,
50
+ # )
51
+ # Malformed url for the S3 case
52
+ # _case_unique_id_with_dir: S3FileUrl = S3FileUrl(
53
+ # _filesystem_protocol="s3",
54
+ # _filesystem_bucket=_s3_filesystem_bucket_external,
55
+ # _filesystem_external_id="123-abc",
56
+ # _partition="partition",
57
+ # _directory="dir",
58
+ # _file_name=None,
59
+ # )
60
+ _case_unique_id_with_dir_and_file_name: S3FileUrl = S3FileUrl(
61
+ _filesystem_protocol="s3",
62
+ _filesystem_bucket=_s3_filesystem_bucket_external,
63
+ _filesystem_external_id="123-abc",
64
+ _partition="partition",
65
+ _directory="dir",
66
+ _file_name="f.txt",
67
+ )
68
+ _case_unique_id_with_file_name: S3FileUrl = S3FileUrl(
69
+ _filesystem_protocol="s3",
70
+ _filesystem_bucket=_s3_filesystem_bucket_external,
71
+ _filesystem_external_id="123-abc",
72
+ _partition="partition",
73
+ _directory=None,
74
+ _file_name="f.txt",
75
+ )
76
+ _case_unique_id_with_dir_and_file_name_encrypted: S3FileUrl = S3FileUrl(
77
+ _filesystem_protocol="enc://s3",
78
+ _filesystem_bucket=_s3_filesystem_bucket_external,
79
+ _filesystem_external_id="123-abc",
80
+ _partition="partition",
81
+ _directory="dir",
82
+ _file_name="f.txt",
83
+ )
84
+
85
+
86
+ @pytest.mark.parametrize(
87
+ "file_url, expected_str",
88
+ [
89
+ (_case_dir_only, "s3://external/partition/dir"),
90
+ (_case_dir_with_subdir, "s3://external/partition/dir/subdir"),
91
+ (_case_file_name_only, "s3://external/partition/f.txt"),
92
+ (_case_file_name_with_dir, "s3://external/partition/dir/f.txt"),
93
+ (_case_unique_id_with_dir_and_file_name, "s3://external/partition/dir/@123-abc$f.txt"),
94
+ (_case_unique_id_with_file_name, "s3://external/partition/@123-abc$f.txt"),
95
+ (
96
+ _case_unique_id_with_dir_and_file_name_encrypted,
97
+ "enc://s3://external/partition/dir/@123-abc$f.txt",
98
+ ),
99
+ ],
100
+ )
101
+ def test_s3_file_url_str(file_url: FileUrl, expected_str: str):
102
+ assert str(file_url) == expected_str
103
+
104
+
105
+ @pytest.mark.parametrize(
106
+ "file_url, expected_str",
107
+ [
108
+ (_case_dir_only, "s3://fs_s3_test/partition/dir"),
109
+ (_case_dir_with_subdir, "s3://fs_s3_test/partition/dir/subdir"),
110
+ (_case_file_name_only, "s3://fs_s3_test/partition/f.txt"),
111
+ (_case_file_name_with_dir, "s3://fs_s3_test/partition/dir/f.txt"),
112
+ (_case_unique_id_with_dir_and_file_name, "s3://fs_s3_test/partition/dir/@123-abc$f.txt"),
113
+ (_case_unique_id_with_file_name, "s3://fs_s3_test/partition/@123-abc$f.txt"),
114
+ (
115
+ _case_unique_id_with_dir_and_file_name_encrypted,
116
+ "s3://fs_s3_test/partition/dir/@123-abc$f.txt",
117
+ ),
118
+ ],
119
+ )
120
+ def test_s3_file_url_str_internal(file_url: FileUrl, expected_str: str):
121
+ assert file_url.as_str_internal() == expected_str
122
+
123
+
124
+ @pytest.mark.parametrize(
125
+ "path, expected_file_url",
126
+ [
127
+ ("external/partition/dir", _case_dir_only),
128
+ ("external/partition/dir/subdir", _case_dir_with_subdir),
129
+ ("external/partition/f.txt", _case_file_name_only),
130
+ ("external/partition/dir/f.txt", _case_file_name_with_dir),
131
+ ("external/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
132
+ ("external/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
133
+ ],
134
+ )
135
+ def test_s3_file_url_split_path(path: str, expected_file_url: FileUrl):
136
+ file_url = S3FileUrl.split_path(path, protocol="s3", bucket=_s3_filesystem_bucket_external)
137
+ assert file_url == expected_file_url
138
+
139
+
140
+ @pytest.mark.parametrize(
141
+ "path, expected_file_url",
142
+ [
143
+ ("fs_s3_test/partition/dir", _case_dir_only),
144
+ ("fs_s3_test/partition/dir/subdir", _case_dir_with_subdir),
145
+ ("fs_s3_test/partition/f.txt", _case_file_name_only),
146
+ ("fs_s3_test/partition/dir/f.txt", _case_file_name_with_dir),
147
+ ("fs_s3_test/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
148
+ ("fs_s3_test/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
149
+ ],
150
+ )
151
+ def test_s3_file_url_split_path_unobfuscated(path: str, expected_file_url: FileUrl):
152
+ file_url = S3FileUrl.split_path(path, protocol="s3", bucket=_s3_filesystem_bucket_external)
153
+ assert file_url == expected_file_url
154
+
155
+
156
+ @pytest.mark.parametrize(
157
+ "url, expected_file_url",
158
+ [
159
+ ("s3://external/partition/dir", _case_dir_only),
160
+ ("s3://external/partition/dir/subdir", _case_dir_with_subdir),
161
+ ("s3://external/partition/f.txt", _case_file_name_only),
162
+ ("s3://external/partition/dir/f.txt", _case_file_name_with_dir),
163
+ ("s3://external/partition/dir/@123-abc$f.txt", _case_unique_id_with_dir_and_file_name),
164
+ ("s3://external/partition/@123-abc$f.txt", _case_unique_id_with_file_name),
165
+ (
166
+ "enc://s3://external/partition/dir/@123-abc$f.txt",
167
+ _case_unique_id_with_dir_and_file_name_encrypted,
168
+ ),
169
+ ],
170
+ )
171
+ def test_s3_file_url_split_url(url: str, expected_file_url: FileUrl):
172
+ file_url = S3FileUrl.split_url(url, bucket=_s3_filesystem_bucket_external)
173
+ assert file_url == expected_file_url
174
+
175
+
176
+ @pytest.mark.parametrize(
177
+ "file_url, expected_key_prefix",
178
+ [
179
+ (_case_dir_only, "partition/dir"),
180
+ (_case_dir_with_subdir, "partition/dir/subdir"),
181
+ (_case_file_name_only, "partition/f.txt"),
182
+ (_case_file_name_with_dir, "partition/dir/f.txt"),
183
+ (_case_unique_id_with_dir_and_file_name, "partition/dir/@123-abc$f.txt"),
184
+ (_case_unique_id_with_file_name, "partition/@123-abc$f.txt"),
185
+ ],
186
+ )
187
+ def test_s3_file_url_key_prefix(file_url: S3FileUrl, expected_key_prefix: str):
188
+ assert file_url.key_prefix == expected_key_prefix
189
+
190
+
191
+ @pytest.mark.parametrize(
192
+ "file_url, expected_key_prefix_parent",
193
+ [
194
+ (_case_dir_only, "partition"),
195
+ (_case_dir_with_subdir, "partition/dir"),
196
+ (_case_file_name_only, "partition"),
197
+ (_case_file_name_with_dir, "partition/dir"),
198
+ (_case_unique_id_with_dir_and_file_name, "partition/dir"),
199
+ (_case_unique_id_with_file_name, "partition"),
200
+ ],
201
+ )
202
+ def test_s3_file_url_key_prefix_parent(file_url: S3FileUrl, expected_key_prefix_parent: str):
203
+ assert file_url.key_prefix_parent == expected_key_prefix_parent
204
+
205
+
206
+ @pytest.mark.parametrize(
207
+ "file_url",
208
+ [
209
+ (_case_dir_only),
210
+ (_case_dir_with_subdir),
211
+ ],
212
+ )
213
+ def test_s3_file_url_is_directory(file_url: FileUrl):
214
+ assert file_url.is_valid is True
215
+ assert file_url.is_directory is True
216
+ assert file_url.is_file is False
217
+
218
+
219
+ @pytest.mark.parametrize(
220
+ "file_url",
221
+ [
222
+ (_case_file_name_only),
223
+ (_case_file_name_with_dir),
224
+ (_case_unique_id_with_dir_and_file_name),
225
+ (_case_unique_id_with_file_name),
226
+ ],
227
+ )
228
+ def test_s3_file_url_is_file(file_url: FileUrl):
229
+ assert file_url.is_valid is True
230
+ assert file_url.is_directory is False
231
+ assert file_url.is_file is True
232
+
233
+
234
+ @pytest.mark.parametrize(
235
+ "file_url, join_dir, join_file_name, expected_joined_path",
236
+ [
237
+ (_case_dir_only, None, None, "s3://fs_s3_test/partition/dir"),
238
+ (_case_dir_only, "join_dir", None, "s3://fs_s3_test/partition/dir/join_dir"),
239
+ (_case_dir_only, None, "join_f.txt", "s3://fs_s3_test/partition/dir/join_f.txt"),
240
+ (
241
+ _case_dir_only,
242
+ "join_dir",
243
+ "join_f.txt",
244
+ "s3://fs_s3_test/partition/dir/join_dir/join_f.txt",
245
+ ),
246
+ ],
247
+ )
248
+ def test_s3_file_url_join(
249
+ file_url: S3FileUrl,
250
+ join_dir: str | None,
251
+ join_file_name: str | None,
252
+ expected_joined_path: str,
253
+ ):
254
+ joined_file_url = file_url.join(directory=join_dir, file_name=join_file_name)
255
+ assert joined_file_url.as_str_internal() == expected_joined_path
256
+
257
+
258
+ @pytest.mark.parametrize(
259
+ "file_url, expected_parent_path",
260
+ [
261
+ (_case_dir_only, "s3://fs_s3_test/partition"),
262
+ (_case_dir_with_subdir, "s3://fs_s3_test/partition/dir"),
263
+ (_case_unique_id_with_dir_and_file_name, "s3://fs_s3_test/partition/dir"),
264
+ (_case_unique_id_with_file_name, "s3://fs_s3_test/partition"),
265
+ (
266
+ _case_unique_id_with_dir_and_file_name_encrypted,
267
+ "s3://fs_s3_test/partition/dir",
268
+ ),
269
+ ],
270
+ )
271
+ def test_s3_file_url_parent(file_url: S3FileUrl, expected_parent_path: str):
272
+ parent_file_url = file_url.parent()
273
+ assert parent_file_url.as_str_internal() == expected_parent_path