pos3 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pos3-0.2.0 → pos3-0.2.1}/PKG-INFO +1 -1
- {pos3-0.2.0 → pos3-0.2.1}/pos3/__init__.py +14 -5
- {pos3-0.2.0 → pos3-0.2.1}/pos3.egg-info/PKG-INFO +1 -1
- {pos3-0.2.0 → pos3-0.2.1}/pyproject.toml +1 -1
- {pos3-0.2.0 → pos3-0.2.1}/tests/test_s3.py +110 -0
- {pos3-0.2.0 → pos3-0.2.1}/LICENSE +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/README.md +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/pos3.egg-info/SOURCES.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/pos3.egg-info/dependency_links.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/pos3.egg-info/requires.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/pos3.egg-info/top_level.txt +0 -0
- {pos3-0.2.0 → pos3-0.2.1}/setup.cfg +0 -0
|
@@ -709,26 +709,35 @@ class _Mirror:
|
|
|
709
709
|
def _list_s3_objects(self, bucket: str, key: str, profile: Profile | None = None) -> Iterator[dict]:
|
|
710
710
|
logger.debug("Listing S3 objects: bucket=%s, key=%s", bucket, key)
|
|
711
711
|
client = self._get_client(profile)
|
|
712
|
-
|
|
713
|
-
#
|
|
714
|
-
|
|
712
|
+
|
|
713
|
+
# Determine the listing prefix - ensure it ends with "/" for directory-like operations
|
|
714
|
+
# This prevents "droid/recovery" from matching "droid/recovery_towels"
|
|
715
|
+
list_prefix = key
|
|
716
|
+
|
|
717
|
+
# If key doesn't end with "/", try to fetch it as a single object first
|
|
718
|
+
if key and not key.endswith("/"):
|
|
715
719
|
try:
|
|
716
720
|
obj = client.head_object(Bucket=bucket, Key=key)
|
|
717
721
|
except ClientError as exc:
|
|
718
722
|
error_code = exc.response["Error"]["Code"]
|
|
719
723
|
if error_code != "404":
|
|
720
724
|
raise
|
|
725
|
+
# Not a single file - treat as directory by adding "/"
|
|
726
|
+
list_prefix = key + "/"
|
|
721
727
|
else:
|
|
728
|
+
# Found single object
|
|
722
729
|
logger.debug("Found single object via head_object: %s", key)
|
|
723
730
|
if "ContentLength" in obj and "Size" not in obj:
|
|
724
731
|
obj["Size"] = obj["ContentLength"]
|
|
725
732
|
yield {**obj, "Key": key}
|
|
726
733
|
return
|
|
734
|
+
# If key already ends with "/", skip head_object - it's clearly a directory prefix
|
|
727
735
|
|
|
736
|
+
# List with the directory prefix (guaranteed to end with "/")
|
|
728
737
|
paginator = client.get_paginator("list_objects_v2")
|
|
729
|
-
for page in paginator.paginate(Bucket=bucket, Prefix=
|
|
738
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=list_prefix):
|
|
730
739
|
objects = page.get("Contents", [])
|
|
731
|
-
logger.debug("Listed %d objects with prefix %s", len(objects),
|
|
740
|
+
logger.debug("Listed %d objects with prefix %s", len(objects), list_prefix)
|
|
732
741
|
yield from objects
|
|
733
742
|
|
|
734
743
|
def _scan_s3(self, bucket: str, prefix: str, profile: Profile | None = None) -> Iterator[FileInfo]:
|
|
@@ -871,6 +871,116 @@ class TestExclude:
|
|
|
871
871
|
assert "file.txt" in call_args[1]
|
|
872
872
|
|
|
873
873
|
|
|
874
|
+
class TestPrefixBoundaryMatching:
|
|
875
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
876
|
+
def test_prefix_boundary_prevents_spurious_matches(self, mock_boto_client):
|
|
877
|
+
"""Test that S3 prefix matching respects path boundaries.
|
|
878
|
+
|
|
879
|
+
When downloading s3://bucket/data/, should NOT match s3://bucket/data_backup/
|
|
880
|
+
This is a regression test for the bug where "droid/recovery" matched "droid/recovery_towels"
|
|
881
|
+
"""
|
|
882
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
883
|
+
|
|
884
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
885
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
886
|
+
mirror_obj = s3._require_active_mirror()
|
|
887
|
+
|
|
888
|
+
# Simulate listing objects - should add "/" to prefix when key doesn't end with "/"
|
|
889
|
+
_ = list(mirror_obj._list_s3_objects("bucket", "data", None))
|
|
890
|
+
|
|
891
|
+
# Verify that paginate was called with "data/" (with trailing slash)
|
|
892
|
+
paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
|
|
893
|
+
assert len(paginator_calls) == 1
|
|
894
|
+
call_kwargs = paginator_calls[0][1]
|
|
895
|
+
assert (
|
|
896
|
+
call_kwargs["Prefix"] == "data/"
|
|
897
|
+
), f"Expected Prefix='data/' but got Prefix='{call_kwargs['Prefix']}'"
|
|
898
|
+
|
|
899
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
900
|
+
def test_prefix_boundary_with_trailing_slash(self, mock_boto_client):
|
|
901
|
+
"""Test that keys already ending with '/' don't get double slashes."""
|
|
902
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
903
|
+
|
|
904
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
905
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
906
|
+
mirror_obj = s3._require_active_mirror()
|
|
907
|
+
|
|
908
|
+
# List with trailing slash already present
|
|
909
|
+
_ = list(mirror_obj._list_s3_objects("bucket", "data/", None))
|
|
910
|
+
|
|
911
|
+
# Should use "data/" as-is, not "data//"
|
|
912
|
+
paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
|
|
913
|
+
assert len(paginator_calls) == 1
|
|
914
|
+
call_kwargs = paginator_calls[0][1]
|
|
915
|
+
assert call_kwargs["Prefix"] == "data/"
|
|
916
|
+
|
|
917
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
918
|
+
def test_single_file_download_bypasses_list(self, mock_boto_client):
|
|
919
|
+
"""Test that single file downloads use head_object and don't list with prefix."""
|
|
920
|
+
mock_s3 = Mock()
|
|
921
|
+
mock_boto_client.return_value = mock_s3
|
|
922
|
+
|
|
923
|
+
# Mock head_object to return a valid file
|
|
924
|
+
mock_s3.head_object.return_value = {"ContentLength": 1234, "ETag": "abc123"}
|
|
925
|
+
|
|
926
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
927
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
928
|
+
mirror_obj = s3._require_active_mirror()
|
|
929
|
+
|
|
930
|
+
# List a single file (no trailing slash)
|
|
931
|
+
results = list(mirror_obj._list_s3_objects("bucket", "data/file.txt", None))
|
|
932
|
+
|
|
933
|
+
# Should have called head_object and returned the file
|
|
934
|
+
assert mock_s3.head_object.call_count == 1
|
|
935
|
+
assert len(results) == 1
|
|
936
|
+
assert results[0]["Key"] == "data/file.txt"
|
|
937
|
+
assert results[0]["Size"] == 1234
|
|
938
|
+
|
|
939
|
+
# Should NOT have called paginate
|
|
940
|
+
assert mock_s3.get_paginator.call_count == 0
|
|
941
|
+
|
|
942
|
+
@patch(BOTO3_PATCH_TARGET)
|
|
943
|
+
def test_directory_without_trailing_slash_gets_slash_added(self, mock_boto_client):
|
|
944
|
+
"""Test that downloading a directory without trailing slash still works correctly.
|
|
945
|
+
|
|
946
|
+
User scenario: download('s3://bucket/my_dir') where my_dir is a directory.
|
|
947
|
+
The fix should:
|
|
948
|
+
1. Try head_object('my_dir') first
|
|
949
|
+
2. Get 404 (not a single file)
|
|
950
|
+
3. Add trailing slash and list with Prefix='my_dir/'
|
|
951
|
+
4. Only match 'my_dir/*', NOT 'my_dir_backup/*'
|
|
952
|
+
"""
|
|
953
|
+
mock_s3 = _setup_s3_mock(mock_boto_client)
|
|
954
|
+
|
|
955
|
+
# Mock paginator to return directory contents
|
|
956
|
+
mock_paginator = Mock()
|
|
957
|
+
mock_s3.get_paginator.return_value = mock_paginator
|
|
958
|
+
mock_paginator.paginate.return_value = [
|
|
959
|
+
{"Contents": [{"Key": "my_dir/file1.txt", "Size": 100}, {"Key": "my_dir/file2.txt", "Size": 200}]}
|
|
960
|
+
]
|
|
961
|
+
|
|
962
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
963
|
+
with s3.mirror(cache_root=tmpdir, show_progress=False):
|
|
964
|
+
mirror_obj = s3._require_active_mirror()
|
|
965
|
+
|
|
966
|
+
# User downloads directory without trailing slash (after normalization: key="my_dir")
|
|
967
|
+
results = list(mirror_obj._list_s3_objects("bucket", "my_dir", None))
|
|
968
|
+
|
|
969
|
+
# Should have tried head_object first
|
|
970
|
+
assert mock_s3.head_object.call_count == 1
|
|
971
|
+
head_call_key = mock_s3.head_object.call_args[1]["Key"]
|
|
972
|
+
assert head_call_key == "my_dir"
|
|
973
|
+
|
|
974
|
+
# After getting 404, should have listed with trailing slash
|
|
975
|
+
paginate_calls = mock_paginator.paginate.call_args_list
|
|
976
|
+
assert len(paginate_calls) == 1
|
|
977
|
+
prefix_used = paginate_calls[0][1]["Prefix"]
|
|
978
|
+
assert prefix_used == "my_dir/", f"Expected 'my_dir/' but got '{prefix_used}'"
|
|
979
|
+
|
|
980
|
+
# Should have returned the directory contents
|
|
981
|
+
assert len(results) == 2
|
|
982
|
+
|
|
983
|
+
|
|
874
984
|
class TestProfile:
|
|
875
985
|
def setup_method(self):
|
|
876
986
|
"""Clear registered profiles before each test."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|