plexus-python-common 1.0.32__tar.gz → 1.0.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/PKG-INFO +1 -1
  2. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/s3utils.py +111 -49
  3. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/strutils.py +19 -0
  4. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/PKG-INFO +1 -1
  5. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/s3utils_test.py +4 -5
  6. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/strutils_test.py +150 -0
  7. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/.editorconfig +0 -0
  8. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/.github/workflows/pr.yml +0 -0
  9. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/.github/workflows/push.yml +0 -0
  10. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/.gitignore +0 -0
  11. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/MANIFEST.in +0 -0
  12. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/README.md +0 -0
  13. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/VERSION +0 -0
  14. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/pyproject.toml +0 -0
  15. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.0.jsonl +0 -0
  16. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.1.jsonl +0 -0
  17. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.2.jsonl +0 -0
  18. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.bar.baz +0 -0
  19. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.bar +0 -0
  20. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.baz +0 -0
  21. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/dir.foo.bar.baz/file.foo.bar.baz +0 -0
  22. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.bar.baz +0 -0
  23. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.bar +0 -0
  24. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.baz +0 -0
  25. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.bar +0 -0
  26. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.baz +0 -0
  27. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.foo +0 -0
  28. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.compressed.zip +0 -0
  29. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.uncompressed.zip +0 -0
  30. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/0-dummy +0 -0
  31. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/1-dummy +0 -0
  32. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/2-dummy +0 -0
  33. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.jsonl +0 -0
  34. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.vol-0.jsonl +0 -0
  35. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.jsonl +0 -0
  36. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.jsonl +0 -0
  37. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.vol-1.jsonl +0 -0
  38. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.jsonl +0 -0
  39. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.jsonl +0 -0
  40. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.vol-2.jsonl +0 -0
  41. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.jsonl +0 -0
  42. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part0 +0 -0
  43. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part1 +0 -0
  44. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part2 +0 -0
  45. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.txt +0 -0
  46. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/setup.cfg +0 -0
  47. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/setup.py +0 -0
  48. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/__init__.py +0 -0
  49. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMFile.py +0 -0
  50. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMNode.py +0 -0
  51. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMTags.py +0 -0
  52. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMWay.py +0 -0
  53. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/carto/__init__.py +0 -0
  54. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/pose.py +0 -0
  55. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/proj.py +0 -0
  56. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/__init__.py +0 -0
  57. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/apiutils.py +0 -0
  58. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/bagutils.py +0 -0
  59. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/config.py +0 -0
  60. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/datautils.py +0 -0
  61. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/jsonutils.py +0 -0
  62. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/ormutils.py +0 -0
  63. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/shutils.py +0 -0
  64. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/sqlutils.py +0 -0
  65. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus/common/utils/testutils.py +0 -0
  66. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/SOURCES.txt +0 -0
  67. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/dependency_links.txt +0 -0
  68. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/not-zip-safe +0 -0
  69. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/requires.txt +0 -0
  70. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/top_level.txt +0 -0
  71. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_test.py +0 -0
  72. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/__init__.py +0 -0
  73. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_file_test.py +0 -0
  74. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_tags_test.py +0 -0
  75. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/pose_test.py +0 -0
  76. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/proj_test.py +0 -0
  77. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/bagutils_test.py +0 -0
  78. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/datautils_test.py +0 -0
  79. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/jsonutils_test.py +0 -0
  80. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/ormutils_test.py +0 -0
  81. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/shutils_test.py +0 -0
  82. {plexus_python_common-1.0.32 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/testutils_test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plexus-python-common
3
- Version: 1.0.32
3
+ Version: 1.0.33
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.12
6
6
  Classifier: Programming Language :: Python :: 3.13
@@ -37,11 +37,14 @@ __all__ = [
37
37
  "s3_pull_text",
38
38
  "s3_push_text",
39
39
  "S3TransferCallbackClient",
40
+ "ArchiveMemberChunk",
40
41
  "s3_make_progress_callback",
41
42
  "s3_make_progressed_client",
42
43
  "s3_archive_member_tree",
43
44
  "s3_archive_listfile",
44
45
  "s3_archive_open_member",
46
+ "s3_archive_use_ranged_requests",
47
+ "s3_archive_use_chunked_reads",
45
48
  "s3_archive_open_members",
46
49
  ]
47
50
 
@@ -658,17 +661,86 @@ def s3_archive_open_member(
658
661
  yield s3_fh
659
662
 
660
663
 
661
- ZIPFILE_HEADER_MIN_SIZE = 30
662
- ZIPFILE_HEADER_FN_LEN_OFFSET = 26
663
- ZIPFILE_HEADER_EX_LEN_OFFSET = 28
664
+ ZIP_CENTRAL_DIR_ESTIMATED_SIZE = 64 * 1024
665
+ ZIP_INFO_HDR_MIN_SIZE = 30
666
+ ZIP_INFO_HDR_ESTIMATED_SIZE = 128
667
+ ZIP_INFO_HDR_FN_LEN_OFFSET = 26
668
+ ZIP_INFO_HDR_EX_LEN_OFFSET = 28
664
669
 
665
670
 
666
- def zipfile_hdr_fn_len_slice(offset: int) -> slice:
667
- return slice(offset + ZIPFILE_HEADER_FN_LEN_OFFSET, offset + ZIPFILE_HEADER_FN_LEN_OFFSET + 2)
671
+ @dataclasses.dataclass(frozen=True)
672
+ class ArchiveMemberChunk(object):
673
+ name: str
674
+ header_offset: int
675
+ compress_size: int
676
+ compress_type: int
677
+ header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE
668
678
 
679
+ @property
680
+ def begin(self) -> int:
681
+ return self.header_offset
669
682
 
670
- def zipfile_hdr_ex_len_slice(offset: int) -> slice:
671
- return slice(offset + ZIPFILE_HEADER_EX_LEN_OFFSET, offset + ZIPFILE_HEADER_EX_LEN_OFFSET + 2)
683
+ @property
684
+ def end(self) -> int:
685
+ return self.header_offset + self.header_overhead + self.compress_size
686
+
687
+
688
+ def s3_archive_use_ranged_requests(
689
+ threshold: float = 0.5,
690
+ central_directory_overhead: int = 64 * ZIP_CENTRAL_DIR_ESTIMATED_SIZE,
691
+ zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
692
+ ) -> Callable[[int, list[zipfile.ZipInfo]], bool]:
693
+ """
694
+ Decide whether to use ranged requests for accessing members of a ZIP archive in S3
695
+ based on estimated transfer size ratio.
696
+
697
+ :param threshold: If (estimated ranged transfer bytes / archive bytes) <= threshold, use ranged per-member access;
698
+ otherwise download the whole archive.
699
+ :param central_directory_overhead: Estimated overhead size for the central directory in bytes.
700
+ :param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
701
+ :return: A callable that takes (``archive_size``, ``member_zip_infos``) and returns a boolean
702
+ indicating whether to use ranged requests.
703
+ """
704
+
705
+ if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
706
+ raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
707
+
708
+ def use_ranged_requests(
709
+ archive_size: int,
710
+ member_zip_infos: list[zipfile.ZipInfo],
711
+ ) -> bool:
712
+ estimated_ranged_total_size = central_directory_overhead + sum(info.compress_size + zip_info_header_overhead
713
+ for info in member_zip_infos)
714
+
715
+ # Avoid division by zero; prefer ranged if archive size is zero (degenerate case)
716
+ return (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
717
+
718
+ return use_ranged_requests
719
+
720
+
721
+ def s3_archive_use_chunked_reads(
722
+ zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
723
+ ) -> Callable[[zipfile.ZipInfo], ArchiveMemberChunk]:
724
+ """
725
+ Map each ``ZipInfo`` to an ``ArchiveMemberChunk`` for grouping adjacent members into single ranged reads.
726
+
727
+ :param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
728
+ :return: A callable that takes a ``ZipInfo`` and returns an ``ArchiveMemberChunk``.
729
+ """
730
+
731
+ if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
732
+ raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
733
+
734
+ def use_chunked_reads(
735
+ zip_info: zipfile.ZipInfo,
736
+ ) -> ArchiveMemberChunk:
737
+ return ArchiveMemberChunk(zip_info.filename,
738
+ zip_info.header_offset,
739
+ zip_info.compress_size,
740
+ zip_info.compress_type,
741
+ header_overhead=zip_info_header_overhead)
742
+
743
+ return use_chunked_reads
672
744
 
673
745
 
674
746
  def s3_archive_open_members(
@@ -678,10 +750,8 @@ def s3_archive_open_members(
678
750
  members: list[str] | None = None,
679
751
  mode: Literal["r", "rb"] = "r",
680
752
  *,
681
- threshold: float = 0.5,
682
- central_directory_overhead: int = 64 * 1024,
683
- member_header_overhead: int = 128,
684
- use_chunked_reads: bool = False,
753
+ use_ranged_requests: bool | Callable[[int, list[zipfile.ZipInfo]], bool] = True,
754
+ use_chunked_reads: bool | Callable[[zipfile.ZipInfo], ArchiveMemberChunk] = False,
685
755
  ) -> Generator[tuple[str, Callable[[], typing.IO]], None, None]:
686
756
  """
687
757
  Choose the best transfer strategy (ranged requests per-member vs full archive transfer)
@@ -712,44 +782,30 @@ def s3_archive_open_members(
712
782
  :param key: Object key of the ZIP archive.
713
783
  :param members: List of member names to stream.
714
784
  :param mode: File mode for opening members ("r" for text, "rb" for binary).
715
- :param threshold: If (estimated ranged transfer bytes / archive bytes) <= threshold,
716
- use ranged per-member access; otherwise download the whole archive.
717
- :param central_directory_overhead: Passed to s3_estimate_archive_ranged_requests.
718
- :param member_header_overhead: Passed to s3_estimate_archive_ranged_requests.
719
- :param use_chunked_reads: If ``True`` and ranged access is chosen, group adjacent members into single ranged reads.
785
+ :param use_ranged_requests: If ``True``, always use ranged requests to access the archive; if callable, use it as
786
+ custom logic to decide based on the archive size and member infos; if ``False``, always
787
+ download the whole archive.
788
+ :param use_chunked_reads: If ``True``, group adjacent members into single ranged reads using default ``ZipInfo``
789
+ to ``ArchiveMemberChunk`` mapping; if callable, use it as custom mapping from
790
+ ``ZipInfo`` to ``ArchiveMemberChunk`` for grouping adjacent members; if ``False``,
791
+ read each member with individual ranged requests. If ``use_ranged_requests`` is ``False``,
792
+ this parameter is ignored.
720
793
 
721
794
  :return: An iterable of callables that return file-like objects for each requested member.
722
795
  """
723
796
  if mode not in ("r", "rb"):
724
797
  raise ValueError("mode must be either 'r' or 'rb'")
725
- if member_header_overhead < ZIPFILE_HEADER_MIN_SIZE:
726
- raise ValueError(f"member_header_overhead must be at least {ZIPFILE_HEADER_MIN_SIZE} bytes")
727
798
 
728
799
  s3_options = s3_options_from_s3_client(client)
729
800
 
730
801
  archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, bucket, key, members)
802
+ if missed_members:
803
+ raise FileNotFoundError(f"Archive members not found: {', '.join(missed_members)}")
731
804
 
732
- if len(missed_members) > 0:
733
- raise FileNotFoundError(f"members not found in archive '{missed_members}'")
734
-
735
- estimated_ranged_total_size = central_directory_overhead + sum(info.compress_size + member_header_overhead
736
- for info in member_zip_infos)
737
-
738
- # Avoid division by zero; prefer ranged if archive size is zero (degenerate case)
739
- use_ranged = (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
805
+ if callable(use_ranged_requests):
806
+ use_ranged_requests = use_ranged_requests(archive_size, member_zip_infos)
740
807
 
741
- @dataclasses.dataclass(frozen=True)
742
- class MemberChunk(object):
743
- name: str
744
- header_offset: int
745
- compress_size: int
746
- compress_type: int
747
-
748
- @property
749
- def end(self) -> int:
750
- return self.header_offset + self.compress_size + member_header_overhead
751
-
752
- if use_ranged and not use_chunked_reads:
808
+ if use_ranged_requests and not use_chunked_reads:
753
809
  for info in member_zip_infos:
754
810
  opener = functools.partial(s3_archive_open_member, client, bucket, key, info.filename, mode)
755
811
  yield info.filename, opener
@@ -757,17 +813,23 @@ def s3_archive_open_members(
757
813
 
758
814
  archive_url = f"s3://{bucket}/{key}"
759
815
 
760
- if use_ranged and use_chunked_reads:
816
+ if use_ranged_requests and use_chunked_reads:
817
+
818
+ fn_len_slice = lambda index: slice(index + ZIP_INFO_HDR_FN_LEN_OFFSET, index + ZIP_INFO_HDR_FN_LEN_OFFSET + 2)
819
+ ex_len_slice = lambda index: slice(index + ZIP_INFO_HDR_EX_LEN_OFFSET, index + ZIP_INFO_HDR_EX_LEN_OFFSET + 2)
820
+
761
821
  # Open archive once to read central directory and gather ZipInfo for requested members.
762
822
  # We will group adjacent members (by local header offsets) and issue one ranged read per group,
763
823
  # then extract each member from the group's bytes to avoid many small ranged requests.
764
824
  with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
825
+ if callable(use_chunked_reads):
826
+ chunks = [use_chunked_reads(info) for info in member_zip_infos]
827
+ else:
828
+ chunks = [ArchiveMemberChunk(info.filename, info.header_offset, info.compress_size, info.compress_type)
829
+ for info in member_zip_infos]
765
830
 
766
- chunks = [MemberChunk(info.filename, info.header_offset, info.compress_size, info.compress_type)
767
- for info in member_zip_infos]
768
-
769
- chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.header_offset),
770
- chunk_func=lambda x, y: y.header_offset > x.end + member_header_overhead)
831
+ chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.begin),
832
+ chunk_func=lambda x, y: y.begin > x.end)
771
833
 
772
834
  # For each group, create openers for members inside the group.
773
835
  for group in chunks_groups:
@@ -778,18 +840,18 @@ def s3_archive_open_members(
778
840
  s3_fh.seek(group_offset)
779
841
  group_bytes = s3_fh.read(group_size)
780
842
 
781
- def make_opener(chunk: MemberChunk) -> Callable[[], typing.IO]:
843
+ def make_opener(chunk: ArchiveMemberChunk) -> Callable[[], typing.IO]:
782
844
 
783
845
  def opener() -> typing.IO:
784
846
  index = chunk.header_offset - group_offset
785
847
 
786
- if index + ZIPFILE_HEADER_MIN_SIZE > len(group_bytes):
848
+ if index + ZIP_INFO_HDR_MIN_SIZE > len(group_bytes):
787
849
  raise IOError("unexpected short read of member header")
788
850
 
789
- fn_len = int.from_bytes(group_bytes[zipfile_hdr_fn_len_slice(index)], "little")
790
- ex_len = int.from_bytes(group_bytes[zipfile_hdr_ex_len_slice(index)], "little")
851
+ fn_len = int.from_bytes(group_bytes[fn_len_slice(index)], "little")
852
+ ex_len = int.from_bytes(group_bytes[ex_len_slice(index)], "little")
791
853
 
792
- raw_data_begin = index + ZIPFILE_HEADER_MIN_SIZE + fn_len + ex_len
854
+ raw_data_begin = index + ZIP_INFO_HDR_MIN_SIZE + fn_len + ex_len
793
855
  raw_data_end = raw_data_begin + chunk.compress_size
794
856
 
795
857
  if raw_data_end > len(group_bytes):
@@ -22,6 +22,10 @@ __all__ = [
22
22
  "strict_relpath_parser",
23
23
  "strict_abspath_pattern",
24
24
  "strict_abspath_parser",
25
+ "strict_archive_relpath_pattern",
26
+ "strict_archive_relpath_parser",
27
+ "strict_archive_abspath_pattern",
28
+ "strict_archive_abspath_parser",
25
29
  "email_address_pattern",
26
30
  "email_address_parser",
27
31
  "semver_pattern",
@@ -82,6 +86,7 @@ period_token: pp.ParserElement = pp.Char(".")
82
86
  colon_token: pp.ParserElement = pp.Char(":")
83
87
  slash_token: pp.ParserElement = pp.Char("/")
84
88
  plus_token: pp.ParserElement = pp.Char("+")
89
+ sharp_token: pp.ParserElement = pp.Char("#")
85
90
 
86
91
  lower_regexp: re.Pattern[str] = re.compile(r"[a-z]")
87
92
  upper_regexp: re.Pattern[str] = re.compile(r"[A-Z]")
@@ -155,6 +160,10 @@ strict_relpath_regexp: re.Pattern[str] = re.compile(
155
160
  rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
156
161
  strict_abspath_regexp: re.Pattern[str] = re.compile(
157
162
  rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
163
+ strict_archive_relpath_regexp: re.Pattern[str] = re.compile(
164
+ rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
165
+ strict_archive_abspath_regexp: re.Pattern[str] = re.compile(
166
+ rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
158
167
 
159
168
  strict_path_chars_element = strict_chars_element.copy()
160
169
  strict_path_chars_element.add_condition(token_reparse(period_token[1, ...], negate=True),
@@ -164,6 +173,12 @@ strict_relpath_element: pp.ParserElement = pp.Combine(
164
173
  (strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
165
174
  strict_abspath_element: pp.ParserElement = pp.Combine(
166
175
  slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
176
+ strict_archive_relpath_element: pp.ParserElement = pp.Combine(
177
+ (strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
178
+ (sharp_token + strict_relpath_element)[0, 1])
179
+ strict_archive_abspath_element: pp.ParserElement = pp.Combine(
180
+ slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
181
+ (sharp_token + strict_relpath_element)[0, 1])
167
182
 
168
183
  email_address_regexp: re.Pattern[str] = re.compile(
169
184
  rf"(({lower_digit_regexp.pattern}|[_-])+)(?:\.({lower_digit_regexp.pattern}|[_-])+)*@(?:{kebab_case_regexp.pattern}\.)+({lower_digit_regexp.pattern}{{2,63}})")
@@ -206,6 +221,10 @@ strict_relpath_pattern = make_string_pattern(strict_relpath_regexp)
206
221
  strict_relpath_parser = make_string_parser(strict_relpath_element)
207
222
  strict_abspath_pattern = make_string_pattern(strict_abspath_regexp)
208
223
  strict_abspath_parser = make_string_parser(strict_abspath_element)
224
+ strict_archive_relpath_pattern = make_string_pattern(strict_archive_relpath_regexp)
225
+ strict_archive_relpath_parser = make_string_parser(strict_archive_relpath_element)
226
+ strict_archive_abspath_pattern = make_string_pattern(strict_archive_abspath_regexp)
227
+ strict_archive_abspath_parser = make_string_parser(strict_archive_abspath_element)
209
228
 
210
229
  email_address_pattern = make_string_pattern(email_address_regexp)
211
230
  email_address_parser = make_string_parser(email_address_element)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plexus-python-common
3
- Version: 1.0.32
3
+ Version: 1.0.33
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.12
6
6
  Classifier: Programming Language :: Python :: 3.13
@@ -575,12 +575,12 @@ class S3UtilsTest(unittest.TestCase):
575
575
  local_root = os.path.join(resources_directory, "unittest", "s3utils")
576
576
  local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
577
577
 
578
- for archive_key, members, mode, threshold, use_chunked_reads in itertools.product(
578
+ for archive_key, members, mode, use_ranged_requests, use_chunked_reads in itertools.product(
579
579
  ["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
580
580
  [local_members, None],
581
581
  ["r", "rb"],
582
- [1.0, 0.0],
583
- [False, True],
582
+ [False, True, s3_archive_use_ranged_requests()],
583
+ [False, True, s3_archive_use_chunked_reads()],
584
584
  ):
585
585
  for member, opener in s3_archive_open_members(
586
586
  client,
@@ -588,8 +588,7 @@ class S3UtilsTest(unittest.TestCase):
588
588
  archive_key,
589
589
  members,
590
590
  mode,
591
- threshold=threshold,
592
- central_directory_overhead=0,
591
+ use_ranged_requests=use_ranged_requests,
593
592
  use_chunked_reads=use_chunked_reads,
594
593
  ):
595
594
  with open(os.path.join(local_root, member), mode) as local_fh, opener() as s3_fh:
@@ -13,6 +13,8 @@ from plexus.common.utils.strutils import kebab_case_parser, kebab_case_pattern
13
13
  from plexus.common.utils.strutils import parse_bag_name, parse_user_email, parse_user_name, parse_vehicle_name
14
14
  from plexus.common.utils.strutils import snake_case_parser, snake_case_pattern
15
15
  from plexus.common.utils.strutils import strict_abspath_parser, strict_abspath_pattern
16
+ from plexus.common.utils.strutils import strict_archive_abspath_parser, strict_archive_abspath_pattern
17
+ from plexus.common.utils.strutils import strict_archive_relpath_parser, strict_archive_relpath_pattern
16
18
  from plexus.common.utils.strutils import strict_relpath_parser, strict_relpath_pattern
17
19
  from plexus.common.utils.strutils import topic_parser, topic_pattern
18
20
  from plexus.common.utils.strutils import vin_code_parser, vin_code_pattern
@@ -377,6 +379,154 @@ class StrUtilsTest(unittest.TestCase):
377
379
  with self.assertRaises(pp.ParseException):
378
380
  strict_abspath_parser.parse_string(data, parse_all=True)
379
381
 
382
+ data_strict_archive_relpath_pattern = [
383
+ ("archive.zip",),
384
+ ("directory/archive.zip",),
385
+ ("directory/dummy/archive.zip",),
386
+ ("directory/dummy/path/archive.zip",),
387
+ ("directory/dummy/path/to/archive.zip",),
388
+ ("directory/dummy/path_to-archive.zip",),
389
+ ("directory/dummy/.path_to-archive.zip",),
390
+ ("archive.zip#",),
391
+ ("archive.zip#directory",),
392
+ ("archive.zip#directory/",),
393
+ ("directory/archive.zip#directory/dummy",),
394
+ ("directory/archive.zip#directory/dummy/",),
395
+ ("directory/dummy/archive.zip#directory/dummy/path",),
396
+ ("directory/dummy/path/archive.zip#directory/dummy/path/to",),
397
+ ("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
398
+ ("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
399
+ ("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
400
+ ]
401
+
402
+ @ddt.idata(data_strict_archive_relpath_pattern)
403
+ @ddt.unpack
404
+ def test_strict_archive_relpath_pattern(self, data):
405
+ self.assertIsNotNone(strict_archive_relpath_pattern.match(data))
406
+ self.assertIsNotNone(strict_archive_relpath_parser.parse_string(data, parse_all=True))
407
+
408
+ data_strict_archive_relpath_pattern__bad_cases = [
409
+ ("/archive.zip",),
410
+ ("/directory/archive.zip",),
411
+ ("/directory/dummy/archive.zip",),
412
+ ("/directory/dummy/path/archive.zip",),
413
+ ("/directory/dummy/path/to/archive.zip",),
414
+ ("/directory/dummy/path_to-archive.zip",),
415
+ ("/directory/dummy/.path_to-archive.zip",),
416
+ ("/archive.zip#directory",),
417
+ ("/archive.zip#directory/",),
418
+ ("/directory/archive.zip#directory/dummy",),
419
+ ("/directory/archive.zip#directory/dummy/",),
420
+ ("/directory/dummy/archive.zip#directory/dummy/path",),
421
+ ("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
422
+ ("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
423
+ ("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
424
+ ("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
425
+ ("archive.zip/",),
426
+ ("directory/archive.zip/",),
427
+ ("directory/dummy/archive.zip/",),
428
+ ("directory/dummy/path/archive.zip/",),
429
+ ("directory/dummy/path/to/archive.zip/",),
430
+ ("directory/dummy/path_to-archive.zip/",),
431
+ ("directory/dummy/.path_to-archive.zip/",),
432
+ ("archive.zip#/directory",),
433
+ ("archive.zip#/directory/",),
434
+ ("directory/archive.zip#/directory/dummy",),
435
+ ("directory/archive.zip#/directory/dummy/",),
436
+ ("directory/dummy/archive.zip#/directory/dummy/path",),
437
+ ("directory/dummy/path/archive.zip#/directory/dummy/path/to",),
438
+ ("directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
439
+ ("directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
440
+ ("directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
441
+ ("directory/archive.zip#directory/./dummy",),
442
+ ("directory/archive.zip#directory/../dummy",),
443
+ ("directory/archive.zip#directory/.../dummy",),
444
+ ("directory/archive.zip#directory/\t/dummy",),
445
+ ("directory/archive.zip#directory/\r/dummy",),
446
+ ("directory/archive.zip#directory/\n/dummy",),
447
+ ]
448
+
449
+ @ddt.idata(data_strict_archive_relpath_pattern__bad_cases)
450
+ @ddt.unpack
451
+ def test_strict_archive_relpath_pattern__bad_cases(self, data):
452
+ self.assertIsNone(strict_archive_relpath_pattern.match(data))
453
+ with self.assertRaises(pp.ParseException):
454
+ strict_archive_relpath_parser.parse_string(data, parse_all=True)
455
+
456
+ data_strict_archive_abspath_pattern = [
457
+ ("/archive.zip",),
458
+ ("/directory/archive.zip",),
459
+ ("/directory/dummy/archive.zip",),
460
+ ("/directory/dummy/path/archive.zip",),
461
+ ("/directory/dummy/path/to/archive.zip",),
462
+ ("/directory/dummy/path_to-archive.zip",),
463
+ ("/directory/dummy/.path_to-archive.zip",),
464
+ ("/archive.zip#",),
465
+ ("/archive.zip#directory",),
466
+ ("/archive.zip#directory/",),
467
+ ("/directory/archive.zip#directory/dummy",),
468
+ ("/directory/archive.zip#directory/dummy/",),
469
+ ("/directory/dummy/archive.zip#directory/dummy/path",),
470
+ ("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
471
+ ("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
472
+ ("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
473
+ ("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
474
+ ]
475
+
476
+ @ddt.idata(data_strict_archive_abspath_pattern)
477
+ @ddt.unpack
478
+ def test_strict_archive_abspath_pattern(self, data):
479
+ self.assertIsNotNone(strict_archive_abspath_pattern.match(data))
480
+ self.assertIsNotNone(strict_archive_abspath_parser.parse_string(data, parse_all=True))
481
+
482
+ data_strict_archive_abspath_pattern__bad_cases = [
483
+ ("archive.zip",),
484
+ ("directory/archive.zip",),
485
+ ("directory/dummy/archive.zip",),
486
+ ("directory/dummy/path/archive.zip",),
487
+ ("directory/dummy/path/to/archive.zip",),
488
+ ("directory/dummy/path_to-archive.zip",),
489
+ ("directory/dummy/.path_to-archive.zip",),
490
+ ("archive.zip#directory",),
491
+ ("archive.zip#directory/",),
492
+ ("directory/archive.zip#directory/dummy",),
493
+ ("directory/archive.zip#directory/dummy/",),
494
+ ("directory/dummy/archive.zip#directory/dummy/path",),
495
+ ("directory/dummy/path/archive.zip#directory/dummy/path/to",),
496
+ ("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
497
+ ("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
498
+ ("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
499
+ ("/archive.zip/",),
500
+ ("/directory/archive.zip/",),
501
+ ("/directory/dummy/archive.zip/",),
502
+ ("/directory/dummy/path/archive.zip/",),
503
+ ("/directory/dummy/path/to/archive.zip/",),
504
+ ("/directory/dummy/path_to-archive.zip/",),
505
+ ("/directory/dummy/.path_to-archive.zip/",),
506
+ ("/archive.zip#/directory",),
507
+ ("/archive.zip#/directory/",),
508
+ ("/directory/archive.zip#/directory/dummy",),
509
+ ("/directory/archive.zip#/directory/dummy/",),
510
+ ("/directory/dummy/archive.zip#/directory/dummy/path",),
511
+ ("/directory/dummy/path/archive.zip#/directory/dummy/path/to",),
512
+ ("/directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
513
+ ("/directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
514
+ ("/directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
515
+ ("/directory/archive.zip#directory/./dummy",),
516
+ ("/directory/archive.zip#directory/../dummy",),
517
+ ("/directory/archive.zip#directory/.../dummy",),
518
+ ("/directory/archive.zip#directory/\t/dummy",),
519
+ ("/directory/archive.zip#directory/\r/dummy",),
520
+ ("/directory/archive.zip#directory/\n/dummy",),
521
+ ]
522
+
523
+ @ddt.idata(data_strict_archive_abspath_pattern__bad_cases)
524
+ @ddt.unpack
525
+ def test_strict_archive_abspath_pattern__bad_cases(self, data):
526
+ self.assertIsNone(strict_archive_abspath_pattern.match(data))
527
+ with self.assertRaises(pp.ParseException):
528
+ strict_archive_abspath_parser.parse_string(data, parse_all=True)
529
+
380
530
  data_email_address_pattern = [
381
531
  ("someone@dummy.com",),
382
532
  ("some.one@dummy.com",),