pos3 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pos3-0.2.0 → pos3-0.2.2}/PKG-INFO +1 -1
- {pos3-0.2.0 → pos3-0.2.2}/pos3/__init__.py +28 -9
- {pos3-0.2.0 → pos3-0.2.2}/pos3.egg-info/PKG-INFO +1 -1
- {pos3-0.2.0 → pos3-0.2.2}/pyproject.toml +1 -1
- {pos3-0.2.0 → pos3-0.2.2}/tests/test_s3.py +177 -0
- {pos3-0.2.0 → pos3-0.2.2}/LICENSE +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/README.md +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/pos3.egg-info/SOURCES.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/pos3.egg-info/dependency_links.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/pos3.egg-info/requires.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/pos3.egg-info/top_level.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.2}/setup.cfg +0 -0
|
@@ -142,6 +142,17 @@ def _s3_paths_conflict(left: str, right: str) -> bool:
|
|
|
142
142
|
return left_norm.startswith(right_norm + "/") or right_norm.startswith(left_norm + "/")
|
|
143
143
|
|
|
144
144
|
|
|
145
|
+
def _make_s3_key(prefix: str, info: FileInfo) -> str:
|
|
146
|
+
"""Build the canonical S3 key from a prefix and FileInfo, including trailing '/' for directories."""
|
|
147
|
+
if info.relative_path:
|
|
148
|
+
key = prefix + "/" + info.relative_path if prefix else info.relative_path
|
|
149
|
+
else:
|
|
150
|
+
key = prefix
|
|
151
|
+
if info.is_dir and not key.endswith("/"):
|
|
152
|
+
key += "/"
|
|
153
|
+
return key
|
|
154
|
+
|
|
155
|
+
|
|
145
156
|
def _process_futures(futures, operation: str) -> None:
|
|
146
157
|
for future in futures:
|
|
147
158
|
try:
|
|
@@ -618,12 +629,12 @@ class _Mirror:
|
|
|
618
629
|
)
|
|
619
630
|
|
|
620
631
|
for info in to_copy:
|
|
621
|
-
s3_key = prefix
|
|
632
|
+
s3_key = _make_s3_key(prefix, info)
|
|
622
633
|
to_put.append((info, local_path, bucket, s3_key, profile))
|
|
623
634
|
total_bytes += info.size
|
|
624
635
|
|
|
625
636
|
for info in to_delete if delete else []:
|
|
626
|
-
s3_key = prefix
|
|
637
|
+
s3_key = _make_s3_key(prefix, info)
|
|
627
638
|
to_remove.append((bucket, s3_key, profile))
|
|
628
639
|
|
|
629
640
|
if to_put:
|
|
@@ -679,7 +690,7 @@ class _Mirror:
|
|
|
679
690
|
total_bytes = 0
|
|
680
691
|
|
|
681
692
|
for info in to_copy:
|
|
682
|
-
s3_key = prefix
|
|
693
|
+
s3_key = _make_s3_key(prefix, info)
|
|
683
694
|
to_put.append((info, bucket, s3_key, local_path))
|
|
684
695
|
total_bytes += info.size
|
|
685
696
|
|
|
@@ -709,26 +720,35 @@ class _Mirror:
|
|
|
709
720
|
def _list_s3_objects(self, bucket: str, key: str, profile: Profile | None = None) -> Iterator[dict]:
|
|
710
721
|
logger.debug("Listing S3 objects: bucket=%s, key=%s", bucket, key)
|
|
711
722
|
client = self._get_client(profile)
|
|
712
|
-
|
|
713
|
-
#
|
|
714
|
-
|
|
723
|
+
|
|
724
|
+
# Determine the listing prefix - ensure it ends with "/" for directory-like operations
|
|
725
|
+
# This prevents "droid/recovery" from matching "droid/recovery_towels"
|
|
726
|
+
list_prefix = key
|
|
727
|
+
|
|
728
|
+
# If key doesn't end with "/", try to fetch it as a single object first
|
|
729
|
+
if key and not key.endswith("/"):
|
|
715
730
|
try:
|
|
716
731
|
obj = client.head_object(Bucket=bucket, Key=key)
|
|
717
732
|
except ClientError as exc:
|
|
718
733
|
error_code = exc.response["Error"]["Code"]
|
|
719
734
|
if error_code != "404":
|
|
720
735
|
raise
|
|
736
|
+
# Not a single file - treat as directory by adding "/"
|
|
737
|
+
list_prefix = key + "/"
|
|
721
738
|
else:
|
|
739
|
+
# Found single object
|
|
722
740
|
logger.debug("Found single object via head_object: %s", key)
|
|
723
741
|
if "ContentLength" in obj and "Size" not in obj:
|
|
724
742
|
obj["Size"] = obj["ContentLength"]
|
|
725
743
|
yield {**obj, "Key": key}
|
|
726
744
|
return
|
|
745
|
+
# If key already ends with "/", skip head_object - it's clearly a directory prefix
|
|
727
746
|
|
|
747
|
+
# List with the directory prefix (guaranteed to end with "/")
|
|
728
748
|
paginator = client.get_paginator("list_objects_v2")
|
|
729
|
-
for page in paginator.paginate(Bucket=bucket, Prefix=
|
|
749
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=list_prefix):
|
|
730
750
|
objects = page.get("Contents", [])
|
|
731
|
-
logger.debug("Listed %d objects with prefix %s", len(objects),
|
|
751
|
+
logger.debug("Listed %d objects with prefix %s", len(objects), list_prefix)
|
|
732
752
|
yield from objects
|
|
733
753
|
|
|
734
754
|
def _scan_s3(self, bucket: str, prefix: str, profile: Profile | None = None) -> Iterator[FileInfo]:
|
|
@@ -773,7 +793,6 @@ class _Mirror:
|
|
|
773
793
|
try:
|
|
774
794
|
client = self._get_client(profile)
|
|
775
795
|
if info.is_dir:
|
|
776
|
-
key += "/" if not key.endswith("/") else ""
|
|
777
796
|
client.put_object(Bucket=bucket, Key=key, Body=b"")
|
|
778
797
|
else:
|
|
779
798
|
file_path = local_path / info.relative_path if info.relative_path else local_path
|
|
@@ -29,6 +29,43 @@ def _setup_s3_mock(mock_boto_client, paginate_return_value=None):
|
|
|
29
29
|
return mock_s3
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
class TestMakeS3Key:
|
|
33
|
+
def test_file_with_prefix(self):
|
|
34
|
+
info = s3.FileInfo(relative_path="file.txt", size=100, is_dir=False)
|
|
35
|
+
assert s3._make_s3_key("data", info) == "data/file.txt"
|
|
36
|
+
|
|
37
|
+
def test_dir_with_prefix(self):
|
|
38
|
+
info = s3.FileInfo(relative_path="subdir", size=0, is_dir=True)
|
|
39
|
+
assert s3._make_s3_key("data", info) == "data/subdir/"
|
|
40
|
+
|
|
41
|
+
def test_root_dir(self):
|
|
42
|
+
info = s3.FileInfo(relative_path="", size=0, is_dir=True)
|
|
43
|
+
assert s3._make_s3_key("data", info) == "data/"
|
|
44
|
+
|
|
45
|
+
def test_empty_prefix_file(self):
|
|
46
|
+
info = s3.FileInfo(relative_path="file.txt", size=100, is_dir=False)
|
|
47
|
+
assert s3._make_s3_key("", info) == "file.txt"
|
|
48
|
+
|
|
49
|
+
def test_empty_prefix_dir(self):
|
|
50
|
+
info = s3.FileInfo(relative_path="subdir", size=0, is_dir=True)
|
|
51
|
+
assert s3._make_s3_key("", info) == "subdir/"
|
|
52
|
+
|
|
53
|
+
def test_nested_path(self):
|
|
54
|
+
info = s3.FileInfo(relative_path="a/b/c.txt", size=50, is_dir=False)
|
|
55
|
+
assert s3._make_s3_key("prefix", info) == "prefix/a/b/c.txt"
|
|
56
|
+
|
|
57
|
+
def test_nested_dir(self):
|
|
58
|
+
info = s3.FileInfo(relative_path="a/b", size=0, is_dir=True)
|
|
59
|
+
assert s3._make_s3_key("prefix", info) == "prefix/a/b/"
|
|
60
|
+
|
|
61
|
+
def test_dir_already_trailing_slash_prefix(self):
|
|
62
|
+
"""Prefix with trailing slash in relative_path shouldn't double-slash."""
|
|
63
|
+
info = s3.FileInfo(relative_path="sub/", size=0, is_dir=True)
|
|
64
|
+
result = s3._make_s3_key("data", info)
|
|
65
|
+
assert result == "data/sub/"
|
|
66
|
+
assert "//" not in result
|
|
67
|
+
|
|
68
|
+
|
|
32
69
|
class TestS3URLParsing:
|
|
33
70
|
def test_parse_s3_url_valid(self):
|
|
34
71
|
assert s3._parse_s3_url("s3://bucket/path/to/data") == (
|
|
@@ -159,6 +196,36 @@ class TestUpload:
|
|
|
159
196
|
assert mock_s3.upload_file.call_count >= 1
|
|
160
197
|
assert mock_s3.delete_object.call_count == 1
|
|
161
198
|
|
|
199
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
200
|
+
def test_upload_delete_directory_marker_trailing_slash(self, mock_boto_client):
|
|
201
|
+
"""Test that deleting a directory from S3 uses trailing slash to match directory markers."""
|
|
202
|
+
# S3 has a directory marker "output/subdir/" and a file "output/file.txt"
|
|
203
|
+
paginate = [
|
|
204
|
+
{
|
|
205
|
+
"Contents": [
|
|
206
|
+
{"Key": "output/subdir/", "Size": 0}, # Directory marker with trailing slash
|
|
207
|
+
{"Key": "output/file.txt", "Size": 5},
|
|
208
|
+
]
|
|
209
|
+
}
|
|
210
|
+
]
|
|
211
|
+
mock_s3 = _setup_s3_mock(mock_boto_client, paginate)
|
|
212
|
+
|
|
213
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
214
|
+
output = Path(tmpdir) / "output"
|
|
215
|
+
output.mkdir()
|
|
216
|
+
# Local only has file.txt - subdir was deleted locally
|
|
217
|
+
(output / "file.txt").write_text("content")
|
|
218
|
+
|
|
219
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
220
|
+
s3.upload("s3://bucket/output", local=output, interval=None)
|
|
221
|
+
|
|
222
|
+
# The directory marker should be deleted with trailing slash
|
|
223
|
+
delete_calls = mock_s3.delete_object.call_args_list
|
|
224
|
+
deleted_keys = [call[1]["Key"] for call in delete_calls]
|
|
225
|
+
assert "output/subdir/" in deleted_keys, (
|
|
226
|
+
f"Expected delete of 'output/subdir/' but got: {deleted_keys}"
|
|
227
|
+
)
|
|
228
|
+
|
|
162
229
|
@patch(BOTO3_PATCH_TARGET)
|
|
163
230
|
def test_background_sync_uploads_repeatedly(self, mock_boto_client):
|
|
164
231
|
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
@@ -871,6 +938,116 @@ class TestExclude:
|
|
|
871
938
|
assert "file.txt" in call_args[1]
|
|
872
939
|
|
|
873
940
|
|
|
941
|
+
class TestPrefixBoundaryMatching:
|
|
942
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
943
|
+
def test_prefix_boundary_prevents_spurious_matches(self, mock_boto_client):
|
|
944
|
+
"""Test that S3 prefix matching respects path boundaries.
|
|
945
|
+
|
|
946
|
+
When downloading s3://bucket/data/, should NOT match s3://bucket/data_backup/
|
|
947
|
+
This is a regression test for the bug where "droid/recovery" matched "droid/recovery_towels"
|
|
948
|
+
"""
|
|
949
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
950
|
+
|
|
951
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
952
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
953
|
+
mirror_obj = s3._require_active_mirror()
|
|
954
|
+
|
|
955
|
+
# Simulate listing objects - should add "/" to prefix when key doesn't end with "/"
|
|
956
|
+
_ = list(mirror_obj._list_s3_objects("bucket", "data", None))
|
|
957
|
+
|
|
958
|
+
# Verify that paginate was called with "data/" (with trailing slash)
|
|
959
|
+
paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
|
|
960
|
+
assert len(paginator_calls) == 1
|
|
961
|
+
call_kwargs = paginator_calls[0][1]
|
|
962
|
+
assert (
|
|
963
|
+
call_kwargs["Prefix"] == "data/"
|
|
964
|
+
), f"Expected Prefix='data/' but got Prefix='{call_kwargs['Prefix']}'"
|
|
965
|
+
|
|
966
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
967
|
+
def test_prefix_boundary_with_trailing_slash(self, mock_boto_client):
|
|
968
|
+
"""Test that keys already ending with '/' don't get double slashes."""
|
|
969
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
970
|
+
|
|
971
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
972
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
973
|
+
mirror_obj = s3._require_active_mirror()
|
|
974
|
+
|
|
975
|
+
# List with trailing slash already present
|
|
976
|
+
_ = list(mirror_obj._list_s3_objects("bucket", "data/", None))
|
|
977
|
+
|
|
978
|
+
# Should use "data/" as-is, not "data//"
|
|
979
|
+
paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
|
|
980
|
+
assert len(paginator_calls) == 1
|
|
981
|
+
call_kwargs = paginator_calls[0][1]
|
|
982
|
+
assert call_kwargs["Prefix"] == "data/"
|
|
983
|
+
|
|
984
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
985
|
+
def test_single_file_download_bypasses_list(self, mock_boto_client):
|
|
986
|
+
"""Test that single file downloads use head_object and don't list with prefix."""
|
|
987
|
+
mock_s3 = Mock()
|
|
988
|
+
mock_boto_client.return_value = mock_s3
|
|
989
|
+
|
|
990
|
+
# Mock head_object to return a valid file
|
|
991
|
+
mock_s3.head_object.return_value = {"ContentLength": 1234, "ETag": "abc123"}
|
|
992
|
+
|
|
993
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
994
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
995
|
+
mirror_obj = s3._require_active_mirror()
|
|
996
|
+
|
|
997
|
+
# List a single file (no trailing slash)
|
|
998
|
+
results = list(mirror_obj._list_s3_objects("bucket", "data/file.txt", None))
|
|
999
|
+
|
|
1000
|
+
# Should have called head_object and returned the file
|
|
1001
|
+
assert mock_s3.head_object.call_count == 1
|
|
1002
|
+
assert len(results) == 1
|
|
1003
|
+
assert results[0]["Key"] == "data/file.txt"
|
|
1004
|
+
assert results[0]["Size"] == 1234
|
|
1005
|
+
|
|
1006
|
+
# Should NOT have called paginate
|
|
1007
|
+
assert mock_s3.get_paginator.call_count == 0
|
|
1008
|
+
|
|
1009
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
1010
|
+
def test_directory_without_trailing_slash_gets_slash_added(self, mock_boto_client):
|
|
1011
|
+
"""Test that downloading a directory without trailing slash still works correctly.
|
|
1012
|
+
|
|
1013
|
+
User scenario: download('s3://bucket/my_dir') where my_dir is a directory.
|
|
1014
|
+
The fix should:
|
|
1015
|
+
1. Try head_object('my_dir') first
|
|
1016
|
+
2. Get 404 (not a single file)
|
|
1017
|
+
3. Add trailing slash and list with Prefix='my_dir/'
|
|
1018
|
+
4. Only match 'my_dir/*', NOT 'my_dir_backup/*'
|
|
1019
|
+
"""
|
|
1020
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
1021
|
+
|
|
1022
|
+
# Mock paginator to return directory contents
|
|
1023
|
+
mock_paginator = Mock()
|
|
1024
|
+
mock_s3.get_paginator.return_value = mock_paginator
|
|
1025
|
+
mock_paginator.paginate.return_value = [
|
|
1026
|
+
{"Contents": [{"Key": "my_dir/file1.txt", "Size": 100}, {"Key": "my_dir/file2.txt", "Size": 200}]}
|
|
1027
|
+
]
|
|
1028
|
+
|
|
1029
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
1030
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
1031
|
+
mirror_obj = s3._require_active_mirror()
|
|
1032
|
+
|
|
1033
|
+
# User downloads directory without trailing slash (after normalization: key="my_dir")
|
|
1034
|
+
results = list(mirror_obj._list_s3_objects("bucket", "my_dir", None))
|
|
1035
|
+
|
|
1036
|
+
# Should have tried head_object first
|
|
1037
|
+
assert mock_s3.head_object.call_count == 1
|
|
1038
|
+
head_call_key = mock_s3.head_object.call_args[1]["Key"]
|
|
1039
|
+
assert head_call_key == "my_dir"
|
|
1040
|
+
|
|
1041
|
+
# After getting 404, should have listed with trailing slash
|
|
1042
|
+
paginate_calls = mock_paginator.paginate.call_args_list
|
|
1043
|
+
assert len(paginate_calls) == 1
|
|
1044
|
+
prefix_used = paginate_calls[0][1]["Prefix"]
|
|
1045
|
+
assert prefix_used == "my_dir/", f"Expected 'my_dir/' but got '{prefix_used}'"
|
|
1046
|
+
|
|
1047
|
+
# Should have returned the directory contents
|
|
1048
|
+
assert len(results) == 2
|
|
1049
|
+
|
|
1050
|
+
|
|
874
1051
|
class TestProfile:
|
|
875
1052
|
def setup_method(self):
|
|
876
1053
|
"""Clear registered profiles before each test."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|