pos3 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pos3
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: S3 Simple Sync - Make using S3 as simple as using local files
5
5
  Author-email: Positronic Robotics <hi@positronic.ro>
6
6
  License: Apache-2.0
@@ -709,26 +709,35 @@ class _Mirror:
709
709
  def _list_s3_objects(self, bucket: str, key: str, profile: Profile | None = None) -> Iterator[dict]:
710
710
  logger.debug("Listing S3 objects: bucket=%s, key=%s", bucket, key)
711
711
  client = self._get_client(profile)
712
- # Skip head_object for directory-like keys ending with '/'
713
- # as we want to list contents, not check if the directory marker exists
714
- if not key.endswith("/"):
712
+
713
+ # Determine the listing prefix - ensure it ends with "/" for directory-like operations
714
+ # This prevents "droid/recovery" from matching "droid/recovery_towels"
715
+ list_prefix = key
716
+
717
+ # If key doesn't end with "/", try to fetch it as a single object first
718
+ if key and not key.endswith("/"):
715
719
  try:
716
720
  obj = client.head_object(Bucket=bucket, Key=key)
717
721
  except ClientError as exc:
718
722
  error_code = exc.response["Error"]["Code"]
719
723
  if error_code != "404":
720
724
  raise
725
+ # Not a single file - treat as directory by adding "/"
726
+ list_prefix = key + "/"
721
727
  else:
728
+ # Found single object
722
729
  logger.debug("Found single object via head_object: %s", key)
723
730
  if "ContentLength" in obj and "Size" not in obj:
724
731
  obj["Size"] = obj["ContentLength"]
725
732
  yield {**obj, "Key": key}
726
733
  return
734
+ # If key already ends with "/", skip head_object - it's clearly a directory prefix
727
735
 
736
+ # List with the directory prefix (guaranteed to end with "/")
728
737
  paginator = client.get_paginator("list_objects_v2")
729
- for page in paginator.paginate(Bucket=bucket, Prefix=key):
738
+ for page in paginator.paginate(Bucket=bucket, Prefix=list_prefix):
730
739
  objects = page.get("Contents", [])
731
- logger.debug("Listed %d objects with prefix %s", len(objects), key)
740
+ logger.debug("Listed %d objects with prefix %s", len(objects), list_prefix)
732
741
  yield from objects
733
742
 
734
743
  def _scan_s3(self, bucket: str, prefix: str, profile: Profile | None = None) -> Iterator[FileInfo]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pos3
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: S3 Simple Sync - Make using S3 as simple as using local files
5
5
  Author-email: Positronic Robotics <hi@positronic.ro>
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pos3"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "S3 Simple Sync - Make using S3 as simple as using local files"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -871,6 +871,116 @@ class TestExclude:
871
871
  assert "file.txt" in call_args[1]
872
872
 
873
873
 
874
+ class TestPrefixBoundaryMatching:
875
+ @patch(BOTO3_PATCH_TARGET)
876
+ def test_prefix_boundary_prevents_spurious_matches(self, mock_boto_client):
877
+ """Test that S3 prefix matching respects path boundaries.
878
+
879
+ When downloading s3://bucket/data/, should NOT match s3://bucket/data_backup/
880
+ This is a regression test for the bug where "droid/recovery" matched "droid/recovery_towels"
881
+ """
882
+ mock_s3 = _setup_s3_mock(mock_boto_client)
883
+
884
+ with tempfile.TemporaryDirectory() as tmpdir:
885
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
886
+ mirror_obj = s3._require_active_mirror()
887
+
888
+ # Simulate listing objects - should add "/" to prefix when key doesn't end with "/"
889
+ _ = list(mirror_obj._list_s3_objects("bucket", "data", None))
890
+
891
+ # Verify that paginate was called with "data/" (with trailing slash)
892
+ paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
893
+ assert len(paginator_calls) == 1
894
+ call_kwargs = paginator_calls[0][1]
895
+ assert (
896
+ call_kwargs["Prefix"] == "data/"
897
+ ), f"Expected Prefix='data/' but got Prefix='{call_kwargs['Prefix']}'"
898
+
899
+ @patch(BOTO3_PATCH_TARGET)
900
+ def test_prefix_boundary_with_trailing_slash(self, mock_boto_client):
901
+ """Test that keys already ending with '/' don't get double slashes."""
902
+ mock_s3 = _setup_s3_mock(mock_boto_client)
903
+
904
+ with tempfile.TemporaryDirectory() as tmpdir:
905
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
906
+ mirror_obj = s3._require_active_mirror()
907
+
908
+ # List with trailing slash already present
909
+ _ = list(mirror_obj._list_s3_objects("bucket", "data/", None))
910
+
911
+ # Should use "data/" as-is, not "data//"
912
+ paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
913
+ assert len(paginator_calls) == 1
914
+ call_kwargs = paginator_calls[0][1]
915
+ assert call_kwargs["Prefix"] == "data/"
916
+
917
+ @patch(BOTO3_PATCH_TARGET)
918
+ def test_single_file_download_bypasses_list(self, mock_boto_client):
919
+ """Test that single file downloads use head_object and don't list with prefix."""
920
+ mock_s3 = Mock()
921
+ mock_boto_client.return_value = mock_s3
922
+
923
+ # Mock head_object to return a valid file
924
+ mock_s3.head_object.return_value = {"ContentLength": 1234, "ETag": "abc123"}
925
+
926
+ with tempfile.TemporaryDirectory() as tmpdir:
927
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
928
+ mirror_obj = s3._require_active_mirror()
929
+
930
+ # List a single file (no trailing slash)
931
+ results = list(mirror_obj._list_s3_objects("bucket", "data/file.txt", None))
932
+
933
+ # Should have called head_object and returned the file
934
+ assert mock_s3.head_object.call_count == 1
935
+ assert len(results) == 1
936
+ assert results[0]["Key"] == "data/file.txt"
937
+ assert results[0]["Size"] == 1234
938
+
939
+ # Should NOT have called paginate
940
+ assert mock_s3.get_paginator.call_count == 0
941
+
942
+ @patch(BOTO3_PATCH_TARGET)
943
+ def test_directory_without_trailing_slash_gets_slash_added(self, mock_boto_client):
944
+ """Test that downloading a directory without trailing slash still works correctly.
945
+
946
+ User scenario: download('s3://bucket/my_dir') where my_dir is a directory.
947
+ The fix should:
948
+ 1. Try head_object('my_dir') first
949
+ 2. Get 404 (not a single file)
950
+ 3. Add trailing slash and list with Prefix='my_dir/'
951
+ 4. Only match 'my_dir/*', NOT 'my_dir_backup/*'
952
+ """
953
+ mock_s3 = _setup_s3_mock(mock_boto_client)
954
+
955
+ # Mock paginator to return directory contents
956
+ mock_paginator = Mock()
957
+ mock_s3.get_paginator.return_value = mock_paginator
958
+ mock_paginator.paginate.return_value = [
959
+ {"Contents": [{"Key": "my_dir/file1.txt", "Size": 100}, {"Key": "my_dir/file2.txt", "Size": 200}]}
960
+ ]
961
+
962
+ with tempfile.TemporaryDirectory() as tmpdir:
963
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
964
+ mirror_obj = s3._require_active_mirror()
965
+
966
+ # User downloads directory without trailing slash (after normalization: key="my_dir")
967
+ results = list(mirror_obj._list_s3_objects("bucket", "my_dir", None))
968
+
969
+ # Should have tried head_object first
970
+ assert mock_s3.head_object.call_count == 1
971
+ head_call_key = mock_s3.head_object.call_args[1]["Key"]
972
+ assert head_call_key == "my_dir"
973
+
974
+ # After getting 404, should have listed with trailing slash
975
+ paginate_calls = mock_paginator.paginate.call_args_list
976
+ assert len(paginate_calls) == 1
977
+ prefix_used = paginate_calls[0][1]["Prefix"]
978
+ assert prefix_used == "my_dir/", f"Expected 'my_dir/' but got '{prefix_used}'"
979
+
980
+ # Should have returned the directory contents
981
+ assert len(results) == 2
982
+
983
+
874
984
  class TestProfile:
875
985
  def setup_method(self):
876
986
  """Clear registered profiles before each test."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes