pos3 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pos3
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: S3 Simple Sync - Make using S3 as simple as using local files
5
5
  Author-email: Positronic Robotics <hi@positronic.ro>
6
6
  License: Apache-2.0
@@ -142,6 +142,17 @@ def _s3_paths_conflict(left: str, right: str) -> bool:
142
142
  return left_norm.startswith(right_norm + "/") or right_norm.startswith(left_norm + "/")
143
143
 
144
144
 
145
+ def _make_s3_key(prefix: str, info: FileInfo) -> str:
146
+ """Build the canonical S3 key from a prefix and FileInfo, including trailing '/' for directories."""
147
+ if info.relative_path:
148
+ key = prefix + "/" + info.relative_path if prefix else info.relative_path
149
+ else:
150
+ key = prefix
151
+ if info.is_dir and not key.endswith("/"):
152
+ key += "/"
153
+ return key
154
+
155
+
145
156
  def _process_futures(futures, operation: str) -> None:
146
157
  for future in futures:
147
158
  try:
@@ -618,12 +629,12 @@ class _Mirror:
618
629
  )
619
630
 
620
631
  for info in to_copy:
621
- s3_key = prefix + ("/" + info.relative_path if info.relative_path else "")
632
+ s3_key = _make_s3_key(prefix, info)
622
633
  to_put.append((info, local_path, bucket, s3_key, profile))
623
634
  total_bytes += info.size
624
635
 
625
636
  for info in to_delete if delete else []:
626
- s3_key = prefix + ("/" + info.relative_path if info.relative_path else "")
637
+ s3_key = _make_s3_key(prefix, info)
627
638
  to_remove.append((bucket, s3_key, profile))
628
639
 
629
640
  if to_put:
@@ -679,7 +690,7 @@ class _Mirror:
679
690
  total_bytes = 0
680
691
 
681
692
  for info in to_copy:
682
- s3_key = prefix + ("/" + info.relative_path if info.relative_path else "")
693
+ s3_key = _make_s3_key(prefix, info)
683
694
  to_put.append((info, bucket, s3_key, local_path))
684
695
  total_bytes += info.size
685
696
 
@@ -709,26 +720,35 @@ class _Mirror:
709
720
  def _list_s3_objects(self, bucket: str, key: str, profile: Profile | None = None) -> Iterator[dict]:
710
721
  logger.debug("Listing S3 objects: bucket=%s, key=%s", bucket, key)
711
722
  client = self._get_client(profile)
712
- # Skip head_object for directory-like keys ending with '/'
713
- # as we want to list contents, not check if the directory marker exists
714
- if not key.endswith("/"):
723
+
724
+ # Determine the listing prefix - ensure it ends with "/" for directory-like operations
725
+ # This prevents "droid/recovery" from matching "droid/recovery_towels"
726
+ list_prefix = key
727
+
728
+ # If key doesn't end with "/", try to fetch it as a single object first
729
+ if key and not key.endswith("/"):
715
730
  try:
716
731
  obj = client.head_object(Bucket=bucket, Key=key)
717
732
  except ClientError as exc:
718
733
  error_code = exc.response["Error"]["Code"]
719
734
  if error_code != "404":
720
735
  raise
736
+ # Not a single file - treat as directory by adding "/"
737
+ list_prefix = key + "/"
721
738
  else:
739
+ # Found single object
722
740
  logger.debug("Found single object via head_object: %s", key)
723
741
  if "ContentLength" in obj and "Size" not in obj:
724
742
  obj["Size"] = obj["ContentLength"]
725
743
  yield {**obj, "Key": key}
726
744
  return
745
+ # If key already ends with "/", skip head_object - it's clearly a directory prefix
727
746
 
747
+ # List with the directory prefix (guaranteed to end with "/")
728
748
  paginator = client.get_paginator("list_objects_v2")
729
- for page in paginator.paginate(Bucket=bucket, Prefix=key):
749
+ for page in paginator.paginate(Bucket=bucket, Prefix=list_prefix):
730
750
  objects = page.get("Contents", [])
731
- logger.debug("Listed %d objects with prefix %s", len(objects), key)
751
+ logger.debug("Listed %d objects with prefix %s", len(objects), list_prefix)
732
752
  yield from objects
733
753
 
734
754
  def _scan_s3(self, bucket: str, prefix: str, profile: Profile | None = None) -> Iterator[FileInfo]:
@@ -773,7 +793,6 @@ class _Mirror:
773
793
  try:
774
794
  client = self._get_client(profile)
775
795
  if info.is_dir:
776
- key += "/" if not key.endswith("/") else ""
777
796
  client.put_object(Bucket=bucket, Key=key, Body=b"")
778
797
  else:
779
798
  file_path = local_path / info.relative_path if info.relative_path else local_path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pos3
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: S3 Simple Sync - Make using S3 as simple as using local files
5
5
  Author-email: Positronic Robotics <hi@positronic.ro>
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pos3"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "S3 Simple Sync - Make using S3 as simple as using local files"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -29,6 +29,43 @@ def _setup_s3_mock(mock_boto_client, paginate_return_value=None):
29
29
  return mock_s3
30
30
 
31
31
 
32
+ class TestMakeS3Key:
33
+ def test_file_with_prefix(self):
34
+ info = s3.FileInfo(relative_path="file.txt", size=100, is_dir=False)
35
+ assert s3._make_s3_key("data", info) == "data/file.txt"
36
+
37
+ def test_dir_with_prefix(self):
38
+ info = s3.FileInfo(relative_path="subdir", size=0, is_dir=True)
39
+ assert s3._make_s3_key("data", info) == "data/subdir/"
40
+
41
+ def test_root_dir(self):
42
+ info = s3.FileInfo(relative_path="", size=0, is_dir=True)
43
+ assert s3._make_s3_key("data", info) == "data/"
44
+
45
+ def test_empty_prefix_file(self):
46
+ info = s3.FileInfo(relative_path="file.txt", size=100, is_dir=False)
47
+ assert s3._make_s3_key("", info) == "file.txt"
48
+
49
+ def test_empty_prefix_dir(self):
50
+ info = s3.FileInfo(relative_path="subdir", size=0, is_dir=True)
51
+ assert s3._make_s3_key("", info) == "subdir/"
52
+
53
+ def test_nested_path(self):
54
+ info = s3.FileInfo(relative_path="a/b/c.txt", size=50, is_dir=False)
55
+ assert s3._make_s3_key("prefix", info) == "prefix/a/b/c.txt"
56
+
57
+ def test_nested_dir(self):
58
+ info = s3.FileInfo(relative_path="a/b", size=0, is_dir=True)
59
+ assert s3._make_s3_key("prefix", info) == "prefix/a/b/"
60
+
61
+ def test_dir_already_trailing_slash_prefix(self):
62
+ """Prefix with trailing slash in relative_path shouldn't double-slash."""
63
+ info = s3.FileInfo(relative_path="sub/", size=0, is_dir=True)
64
+ result = s3._make_s3_key("data", info)
65
+ assert result == "data/sub/"
66
+ assert "//" not in result
67
+
68
+
32
69
  class TestS3URLParsing:
33
70
  def test_parse_s3_url_valid(self):
34
71
  assert s3._parse_s3_url("s3://bucket/path/to/data") == (
@@ -159,6 +196,36 @@ class TestUpload:
159
196
  assert mock_s3.upload_file.call_count >= 1
160
197
  assert mock_s3.delete_object.call_count == 1
161
198
 
199
+ @patch(BOTO3_PATCH_TARGET)
200
+ def test_upload_delete_directory_marker_trailing_slash(self, mock_boto_client):
201
+ """Test that deleting a directory from S3 uses trailing slash to match directory markers."""
202
+ # S3 has a directory marker "output/subdir/" and a file "output/file.txt"
203
+ paginate = [
204
+ {
205
+ "Contents": [
206
+ {"Key": "output/subdir/", "Size": 0}, # Directory marker with trailing slash
207
+ {"Key": "output/file.txt", "Size": 5},
208
+ ]
209
+ }
210
+ ]
211
+ mock_s3 = _setup_s3_mock(mock_boto_client, paginate)
212
+
213
+ with tempfile.TemporaryDirectory() as tmpdir:
214
+ output = Path(tmpdir) / "output"
215
+ output.mkdir()
216
+ # Local only has file.txt - subdir was deleted locally
217
+ (output / "file.txt").write_text("content")
218
+
219
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
220
+ s3.upload("s3://bucket/output", local=output, interval=None)
221
+
222
+ # The directory marker should be deleted with trailing slash
223
+ delete_calls = mock_s3.delete_object.call_args_list
224
+ deleted_keys = [call[1]["Key"] for call in delete_calls]
225
+ assert "output/subdir/" in deleted_keys, (
226
+ f"Expected delete of 'output/subdir/' but got: {deleted_keys}"
227
+ )
228
+
162
229
  @patch(BOTO3_PATCH_TARGET)
163
230
  def test_background_sync_uploads_repeatedly(self, mock_boto_client):
164
231
  mock_s3 = _setup_s3_mock(mock_boto_client)
@@ -871,6 +938,116 @@ class TestExclude:
871
938
  assert "file.txt" in call_args[1]
872
939
 
873
940
 
941
+ class TestPrefixBoundaryMatching:
942
+ @patch(BOTO3_PATCH_TARGET)
943
+ def test_prefix_boundary_prevents_spurious_matches(self, mock_boto_client):
944
+ """Test that S3 prefix matching respects path boundaries.
945
+
946
+ When downloading s3://bucket/data/, should NOT match s3://bucket/data_backup/
947
+ This is a regression test for the bug where "droid/recovery" matched "droid/recovery_towels"
948
+ """
949
+ mock_s3 = _setup_s3_mock(mock_boto_client)
950
+
951
+ with tempfile.TemporaryDirectory() as tmpdir:
952
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
953
+ mirror_obj = s3._require_active_mirror()
954
+
955
+ # Simulate listing objects - should add "/" to prefix when key doesn't end with "/"
956
+ _ = list(mirror_obj._list_s3_objects("bucket", "data", None))
957
+
958
+ # Verify that paginate was called with "data/" (with trailing slash)
959
+ paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
960
+ assert len(paginator_calls) == 1
961
+ call_kwargs = paginator_calls[0][1]
962
+ assert (
963
+ call_kwargs["Prefix"] == "data/"
964
+ ), f"Expected Prefix='data/' but got Prefix='{call_kwargs['Prefix']}'"
965
+
966
+ @patch(BOTO3_PATCH_TARGET)
967
+ def test_prefix_boundary_with_trailing_slash(self, mock_boto_client):
968
+ """Test that keys already ending with '/' don't get double slashes."""
969
+ mock_s3 = _setup_s3_mock(mock_boto_client)
970
+
971
+ with tempfile.TemporaryDirectory() as tmpdir:
972
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
973
+ mirror_obj = s3._require_active_mirror()
974
+
975
+ # List with trailing slash already present
976
+ _ = list(mirror_obj._list_s3_objects("bucket", "data/", None))
977
+
978
+ # Should use "data/" as-is, not "data//"
979
+ paginator_calls = mock_s3.get_paginator.return_value.paginate.call_args_list
980
+ assert len(paginator_calls) == 1
981
+ call_kwargs = paginator_calls[0][1]
982
+ assert call_kwargs["Prefix"] == "data/"
983
+
984
+ @patch(BOTO3_PATCH_TARGET)
985
+ def test_single_file_download_bypasses_list(self, mock_boto_client):
986
+ """Test that single file downloads use head_object and don't list with prefix."""
987
+ mock_s3 = Mock()
988
+ mock_boto_client.return_value = mock_s3
989
+
990
+ # Mock head_object to return a valid file
991
+ mock_s3.head_object.return_value = {"ContentLength": 1234, "ETag": "abc123"}
992
+
993
+ with tempfile.TemporaryDirectory() as tmpdir:
994
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
995
+ mirror_obj = s3._require_active_mirror()
996
+
997
+ # List a single file (no trailing slash)
998
+ results = list(mirror_obj._list_s3_objects("bucket", "data/file.txt", None))
999
+
1000
+ # Should have called head_object and returned the file
1001
+ assert mock_s3.head_object.call_count == 1
1002
+ assert len(results) == 1
1003
+ assert results[0]["Key"] == "data/file.txt"
1004
+ assert results[0]["Size"] == 1234
1005
+
1006
+ # Should NOT have called paginate
1007
+ assert mock_s3.get_paginator.call_count == 0
1008
+
1009
+ @patch(BOTO3_PATCH_TARGET)
1010
+ def test_directory_without_trailing_slash_gets_slash_added(self, mock_boto_client):
1011
+ """Test that downloading a directory without trailing slash still works correctly.
1012
+
1013
+ User scenario: download('s3://bucket/my_dir') where my_dir is a directory.
1014
+ The fix should:
1015
+ 1. Try head_object('my_dir') first
1016
+ 2. Get 404 (not a single file)
1017
+ 3. Add trailing slash and list with Prefix='my_dir/'
1018
+ 4. Only match 'my_dir/*', NOT 'my_dir_backup/*'
1019
+ """
1020
+ mock_s3 = _setup_s3_mock(mock_boto_client)
1021
+
1022
+ # Mock paginator to return directory contents
1023
+ mock_paginator = Mock()
1024
+ mock_s3.get_paginator.return_value = mock_paginator
1025
+ mock_paginator.paginate.return_value = [
1026
+ {"Contents": [{"Key": "my_dir/file1.txt", "Size": 100}, {"Key": "my_dir/file2.txt", "Size": 200}]}
1027
+ ]
1028
+
1029
+ with tempfile.TemporaryDirectory() as tmpdir:
1030
+ with s3.mirror(cache_root=tmpdir, show_progress=False):
1031
+ mirror_obj = s3._require_active_mirror()
1032
+
1033
+ # User downloads directory without trailing slash (after normalization: key="my_dir")
1034
+ results = list(mirror_obj._list_s3_objects("bucket", "my_dir", None))
1035
+
1036
+ # Should have tried head_object first
1037
+ assert mock_s3.head_object.call_count == 1
1038
+ head_call_key = mock_s3.head_object.call_args[1]["Key"]
1039
+ assert head_call_key == "my_dir"
1040
+
1041
+ # After getting 404, should have listed with trailing slash
1042
+ paginate_calls = mock_paginator.paginate.call_args_list
1043
+ assert len(paginate_calls) == 1
1044
+ prefix_used = paginate_calls[0][1]["Prefix"]
1045
+ assert prefix_used == "my_dir/", f"Expected 'my_dir/' but got '{prefix_used}'"
1046
+
1047
+ # Should have returned the directory contents
1048
+ assert len(results) == 2
1049
+
1050
+
874
1051
  class TestProfile:
875
1052
  def setup_method(self):
876
1053
  """Clear registered profiles before each test."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes