plexus-python-common 1.0.31__tar.gz → 1.0.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/PKG-INFO +1 -1
  2. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/s3utils.py +219 -125
  3. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/strutils.py +19 -0
  4. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/PKG-INFO +1 -1
  5. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/s3utils_test.py +22 -23
  6. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/strutils_test.py +150 -0
  7. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.editorconfig +0 -0
  8. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.github/workflows/pr.yml +0 -0
  9. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.github/workflows/push.yml +0 -0
  10. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.gitignore +0 -0
  11. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/MANIFEST.in +0 -0
  12. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/README.md +0 -0
  13. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/VERSION +0 -0
  14. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/pyproject.toml +0 -0
  15. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.0.jsonl +0 -0
  16. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.1.jsonl +0 -0
  17. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.2.jsonl +0 -0
  18. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.bar.baz +0 -0
  19. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.bar +0 -0
  20. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.baz +0 -0
  21. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/dir.foo.bar.baz/file.foo.bar.baz +0 -0
  22. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.bar.baz +0 -0
  23. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.bar +0 -0
  24. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.baz +0 -0
  25. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.bar +0 -0
  26. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.baz +0 -0
  27. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.foo +0 -0
  28. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.compressed.zip +0 -0
  29. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.uncompressed.zip +0 -0
  30. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/0-dummy +0 -0
  31. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/1-dummy +0 -0
  32. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/2-dummy +0 -0
  33. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.jsonl +0 -0
  34. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.vol-0.jsonl +0 -0
  35. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.jsonl +0 -0
  36. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.jsonl +0 -0
  37. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.vol-1.jsonl +0 -0
  38. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.jsonl +0 -0
  39. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.jsonl +0 -0
  40. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.vol-2.jsonl +0 -0
  41. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.jsonl +0 -0
  42. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part0 +0 -0
  43. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part1 +0 -0
  44. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part2 +0 -0
  45. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.txt +0 -0
  46. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/setup.cfg +0 -0
  47. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/setup.py +0 -0
  48. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/__init__.py +0 -0
  49. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMFile.py +0 -0
  50. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMNode.py +0 -0
  51. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMTags.py +0 -0
  52. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMWay.py +0 -0
  53. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/__init__.py +0 -0
  54. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/pose.py +0 -0
  55. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/proj.py +0 -0
  56. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/__init__.py +0 -0
  57. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/apiutils.py +0 -0
  58. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/bagutils.py +0 -0
  59. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/config.py +0 -0
  60. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/datautils.py +0 -0
  61. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/jsonutils.py +0 -0
  62. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/ormutils.py +0 -0
  63. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/shutils.py +0 -0
  64. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/sqlutils.py +0 -0
  65. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/testutils.py +0 -0
  66. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/SOURCES.txt +0 -0
  67. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/dependency_links.txt +0 -0
  68. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/not-zip-safe +0 -0
  69. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/requires.txt +0 -0
  70. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/top_level.txt +0 -0
  71. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_test.py +0 -0
  72. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/__init__.py +0 -0
  73. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_file_test.py +0 -0
  74. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_tags_test.py +0 -0
  75. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/pose_test.py +0 -0
  76. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/proj_test.py +0 -0
  77. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/bagutils_test.py +0 -0
  78. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/datautils_test.py +0 -0
  79. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/jsonutils_test.py +0 -0
  80. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/ormutils_test.py +0 -0
  81. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/shutils_test.py +0 -0
  82. {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/testutils_test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plexus-python-common
3
- Version: 1.0.31
3
+ Version: 1.0.33
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.12
6
6
  Classifier: Programming Language :: Python :: 3.13
@@ -37,10 +37,14 @@ __all__ = [
37
37
  "s3_pull_text",
38
38
  "s3_push_text",
39
39
  "S3TransferCallbackClient",
40
+ "ArchiveMemberChunk",
40
41
  "s3_make_progress_callback",
41
42
  "s3_make_progressed_client",
43
+ "s3_archive_member_tree",
42
44
  "s3_archive_listfile",
43
45
  "s3_archive_open_member",
46
+ "s3_archive_use_ranged_requests",
47
+ "s3_archive_use_chunked_reads",
44
48
  "s3_archive_open_members",
45
49
  ]
46
50
 
@@ -88,7 +92,12 @@ def s3_make_client(
88
92
  client.close()
89
93
 
90
94
 
91
- def s3_list_objects(client: S3Client, bucket: str, prefix: str, limit: int = None) -> Generator[S3ObjectMeta]:
95
+ def s3_list_objects(
96
+ client: S3Client,
97
+ bucket: str,
98
+ prefix: str,
99
+ limit: int = None,
100
+ ) -> Generator[S3ObjectMeta, None, None]:
92
101
  """
93
102
  Lists all objects from the given S3 ``bucket`` and ``prefix``.
94
103
 
@@ -130,7 +139,7 @@ def s3_listfile(
130
139
  include_patterns: list[str] | None = None,
131
140
  exclude_patterns: list[str] | None = None,
132
141
  depth: int = 0,
133
- ) -> Generator[S3ObjectMeta]:
142
+ ) -> Generator[S3ObjectMeta, None, None]:
134
143
  """
135
144
  Lists all objects from the given S3 ``bucket`` and ``prefix``, filtered by patterns and directory depth.
136
145
 
@@ -426,7 +435,7 @@ def s3_make_progressed_client(
426
435
  secret_access_key: str = None,
427
436
  region_name: str = None,
428
437
  endpoint_url: str = None,
429
- ) -> Generator[S3Client]:
438
+ ) -> Generator[S3Client, None, None]:
430
439
  """
431
440
  Creates an S3 client with progress callback as a context manager for safe resource handling.
432
441
 
@@ -483,9 +492,65 @@ def s3_options_from_s3_client(client: S3Client) -> dict[str, Any]:
483
492
  return s3_options
484
493
 
485
494
 
495
+ type ArchiveMemberTree = dict[str, tuple[zipfile.ZipInfo, ArchiveMemberTree | None]]
496
+
497
+
498
+ def s3_archive_member_tree(
499
+ client: S3Client,
500
+ bucket: str,
501
+ key: str,
502
+ ) -> ArchiveMemberTree:
503
+ """
504
+ Builds a tree structure of members in a ZIP archive stored in S3 for efficient lookup.
505
+ Directories have ZipInfo and a nested dict; files have ZipInfo and None.
506
+ Directory members are recognized by names ending with a trailing slash ("/").
507
+
508
+ Example:
509
+ {
510
+ "dir1/": (ZipInfo, {
511
+ "file1.txt": (ZipInfo, None),
512
+ "subdir/": (ZipInfo, {
513
+ "file2.txt": (ZipInfo, None)
514
+ })
515
+ }),
516
+ "file3.txt": (ZipInfo, None)
517
+ }
518
+
519
+ :param client: An instance of ``S3Client``.
520
+ :param bucket: Bucket name.
521
+ :param key: Object key of the ZIP archive.
522
+ :return: A tree structure of members in the ZIP archive.
523
+ """
524
+ s3_options = s3_options_from_s3_client(client)
525
+
526
+ archive_url = f"s3://{bucket}/{key}"
527
+
528
+ with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
529
+ member_zip_infos = archive.infolist()
530
+
531
+ root_member_tree: ArchiveMemberTree = {}
532
+
533
+ def build_member_tree(info: zipfile.ZipInfo):
534
+ *parts, last_part = info.filename.rstrip("/").split("/")
535
+ current_member_tree = root_member_tree
536
+ for part in parts:
537
+ _, current_member_tree = current_member_tree.setdefault(part + "/", (None, {}))
538
+ if info.is_dir():
539
+ current_member_tree[last_part + "/"] = info, {}
540
+ else:
541
+ current_member_tree[last_part] = info, None
542
+
543
+ # Sort by filename to ensure directories are created before their contents
544
+ for info in sorted(member_zip_infos, key=lambda x: x.filename):
545
+ build_member_tree(info)
546
+
547
+ return root_member_tree
548
+
549
+
486
550
  def s3_archive_listfile(
487
551
  client: S3Client,
488
- archive_url: str,
552
+ bucket: str,
553
+ key: str,
489
554
  members: list[str] | None = None,
490
555
  ) -> tuple[int, list[zipfile.ZipInfo], list[str]]:
491
556
  """
@@ -494,14 +559,15 @@ def s3_archive_listfile(
494
559
  under that directory will be included in the results.
495
560
 
496
561
  Example usage:
497
- >>> archive_size, member_infos, missed_members = s3_archive_listfile(client, archive_url, members=["file1.txt", "dir1/"])
498
- >>> for info in member_infos:
562
+ >>> archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, bucket, key, members=["file1.txt", "dir1/"])
563
+ >>> for info in member_zip_infos:
499
564
  ... print(info.filename, info.file_size)
500
565
  >>> if missed_members:
501
566
  ... print("Members not found:", missed_members)
502
567
 
503
568
  :param client: An instance of ``S3Client``.
504
- :param archive_url: The URL of the ZIP archive in S3.
569
+ :param bucket: Bucket name.
570
+ :param key: Object key of the ZIP archive.
505
571
  :param members: Optional list of member names to filter; if ``None``, all members are returned.
506
572
  :return: A tuple containing:
507
573
  - The size of the archive in bytes.
@@ -510,82 +576,49 @@ def s3_archive_listfile(
510
576
  """
511
577
  s3_options = s3_options_from_s3_client(client)
512
578
 
513
- storage_opts = fsspec.utils.infer_storage_options(archive_url)
514
- protocol = storage_opts.get("protocol")
515
- if protocol != "s3":
516
- raise ValueError(f"unsupported protocol '{protocol}', only 's3' is supported")
579
+ archive_url = f"s3://{bucket}/{key}"
517
580
 
518
581
  fs = fsspec.filesystem("s3", **s3_options)
519
582
  archive_size = fs.size(archive_url)
520
583
 
521
- with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
522
- member_zip_infos = archive.infolist()
523
-
524
- if members is None:
525
- return archive_size, [info for info in member_zip_infos if not info.is_dir()], []
526
-
527
- # Build a tree structure of members for efficient lookup
528
- # Directories have ZipInfo and a nested dict; files have ZipInfo and None
529
- # Directory members are recognized by names ending with a trailing slash ("/")
530
- # Example:
531
- # {
532
- # "dir1/": (ZipInfo, {
533
- # "file1.txt": (ZipInfo, None),
534
- # "subdir/": (ZipInfo, {
535
- # "file2.txt": (ZipInfo, None)
536
- # })
537
- # }),
538
- # "file3.txt": (ZipInfo, None)
539
- # }
540
- members_tree: dict[str, tuple[zipfile.ZipInfo, dict | None]] = {}
541
-
542
- def build_members_tree(info: zipfile.ZipInfo):
543
- *parts, last_part = info.filename.rstrip("/").split("/")
544
- current = members_tree
545
- for part in parts:
546
- _, current = current.setdefault(part + "/", (None, {}))
547
- if info.is_dir():
548
- current[last_part + "/"] = info, {}
549
- else:
550
- current[last_part] = info, None
551
-
552
- # Sort by filename to ensure directories are created before their contents
553
- for info in sorted(member_zip_infos, key=lambda x: x.filename):
554
- build_members_tree(info)
584
+ root_member_tree = s3_archive_member_tree(client, bucket, key)
555
585
 
556
586
  def search_members_tree(member: str) -> tuple[zipfile.ZipInfo | None, dict | None]:
557
587
  *parts, last_part = member.rstrip("/").split("/")
558
- current = members_tree
588
+ current_member_tree = root_member_tree
559
589
  for part in parts:
560
- _, current = current.get(part + "/", (None, None))
561
- if current is None:
590
+ _, current_member_tree = current_member_tree.get(part + "/", (None, None))
591
+ if current_member_tree is None:
562
592
  return None, None
563
593
  if member.endswith("/"): # Directory member recognized by trailing slash
564
- return current.get(last_part + "/", (None, None))
594
+ return current_member_tree.get(last_part + "/", (None, None))
565
595
  else:
566
- return current.get(last_part, (None, None))
596
+ return current_member_tree.get(last_part, (None, None))
567
597
 
568
- def collect_member_infos(tree: dict[str, tuple[zipfile.ZipInfo, dict | None]]) -> Generator[zipfile.ZipInfo]:
569
- for _, (member_info, member_tree) in tree.items():
570
- if member_info is None:
598
+ def collect_member_zip_infos(tree: ArchiveMemberTree) -> Generator[zipfile.ZipInfo, None, None]:
599
+ for member_zip_info, member_tree in tree.values():
600
+ if member_zip_info is None:
571
601
  continue
572
- if member_info.is_dir():
573
- yield from collect_member_infos(member_tree)
602
+ if member_zip_info.is_dir():
603
+ yield from collect_member_zip_infos(member_tree)
574
604
  else:
575
- yield member_info
605
+ yield member_zip_info
606
+
607
+ if members is None:
608
+ return archive_size, list(collect_member_zip_infos(root_member_tree)), []
576
609
 
577
610
  included_member_zip_infos = []
578
611
  missed_members = []
579
612
 
580
613
  for member in members:
581
- member_info, member_tree = search_members_tree(member)
582
- if member_info is None:
614
+ member_zip_info, member_tree = search_members_tree(member)
615
+ if member_zip_info is None:
583
616
  missed_members.append(member)
584
617
  continue
585
- if not member_info.is_dir():
586
- included_member_zip_infos.append(member_info)
618
+ if not member_zip_info.is_dir():
619
+ included_member_zip_infos.append(member_zip_info)
587
620
  else:
588
- included_member_zip_infos.extend(collect_member_infos(member_tree or {}))
621
+ included_member_zip_infos.extend(collect_member_zip_infos(member_tree or {}))
589
622
 
590
623
  return archive_size, included_member_zip_infos, missed_members
591
624
 
@@ -593,7 +626,8 @@ def s3_archive_listfile(
593
626
  if typing.TYPE_CHECKING:
594
627
  def s3_archive_open_member(
595
628
  client: S3Client,
596
- archive_url: str,
629
+ bucket: str,
630
+ key: str,
597
631
  member: str,
598
632
  mode: Literal["r", "rb"] = "r",
599
633
  ) -> contextlib.AbstractContextManager[typing.IO]: ...
@@ -602,7 +636,8 @@ if typing.TYPE_CHECKING:
602
636
  @contextlib.contextmanager
603
637
  def s3_archive_open_member(
604
638
  client: S3Client,
605
- archive_url: str,
639
+ bucket: str,
640
+ key: str,
606
641
  member: str,
607
642
  mode: Literal["r", "rb"] = "r",
608
643
  ) -> Generator[typing.IO, None, None]:
@@ -610,7 +645,8 @@ def s3_archive_open_member(
610
645
  Opens a specific member file from a ZIP archive stored in S3.
611
646
 
612
647
  :param client: An instance of ``S3Client``.
613
- :param archive_url: The URL of the ZIP archive in S3.
648
+ :param bucket: Bucket name.
649
+ :param key: Object key of the ZIP archive.
614
650
  :param member: The member file name to open from the archive.
615
651
  :param mode: File mode for opening the member ("r" for text, "rb" for binary).
616
652
 
@@ -621,38 +657,101 @@ def s3_archive_open_member(
621
657
 
622
658
  s3_options = s3_options_from_s3_client(client)
623
659
 
624
- storage_opts = fsspec.utils.infer_storage_options(archive_url)
625
- protocol = storage_opts.get("protocol")
626
- if protocol != "s3":
627
- raise ValueError(f"unsupported protocol '{protocol}', only 's3' is supported")
628
-
629
- with fsspec.open(f"zip://{member}::{archive_url}", mode, s3=s3_options) as s3_fh:
660
+ with fsspec.open(f"zip://{member}::s3://{bucket}/{key}", mode, s3=s3_options) as s3_fh:
630
661
  yield s3_fh
631
662
 
632
663
 
633
- ZIPFILE_HEADER_MIN_SIZE = 30
634
- ZIPFILE_HEADER_FN_LEN_OFFSET = 26
635
- ZIPFILE_HEADER_EX_LEN_OFFSET = 28
664
+ ZIP_CENTRAL_DIR_ESTIMATED_SIZE = 64 * 1024
665
+ ZIP_INFO_HDR_MIN_SIZE = 30
666
+ ZIP_INFO_HDR_ESTIMATED_SIZE = 128
667
+ ZIP_INFO_HDR_FN_LEN_OFFSET = 26
668
+ ZIP_INFO_HDR_EX_LEN_OFFSET = 28
669
+
670
+
671
+ @dataclasses.dataclass(frozen=True)
672
+ class ArchiveMemberChunk(object):
673
+ name: str
674
+ header_offset: int
675
+ compress_size: int
676
+ compress_type: int
677
+ header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE
678
+
679
+ @property
680
+ def begin(self) -> int:
681
+ return self.header_offset
682
+
683
+ @property
684
+ def end(self) -> int:
685
+ return self.header_offset + self.header_overhead + self.compress_size
686
+
687
+
688
+ def s3_archive_use_ranged_requests(
689
+ threshold: float = 0.5,
690
+ central_directory_overhead: int = 64 * ZIP_CENTRAL_DIR_ESTIMATED_SIZE,
691
+ zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
692
+ ) -> Callable[[int, list[zipfile.ZipInfo]], bool]:
693
+ """
694
+ Decide whether to use ranged requests for accessing members of a ZIP archive in S3
695
+ based on estimated transfer size ratio.
696
+
697
+ :param threshold: If (estimated ranged transfer bytes / archive bytes) <= threshold, use ranged per-member access;
698
+ otherwise download the whole archive.
699
+ :param central_directory_overhead: Estimated overhead size for the central directory in bytes.
700
+ :param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
701
+ :return: A callable that takes (``archive_size``, ``member_zip_infos``) and returns a boolean
702
+ indicating whether to use ranged requests.
703
+ """
704
+
705
+ if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
706
+ raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
707
+
708
+ def use_ranged_requests(
709
+ archive_size: int,
710
+ member_zip_infos: list[zipfile.ZipInfo],
711
+ ) -> bool:
712
+ estimated_ranged_total_size = central_directory_overhead + sum(info.compress_size + zip_info_header_overhead
713
+ for info in member_zip_infos)
714
+
715
+ # Avoid division by zero; prefer ranged if archive size is zero (degenerate case)
716
+ return (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
717
+
718
+ return use_ranged_requests
719
+
720
+
721
+ def s3_archive_use_chunked_reads(
722
+ zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
723
+ ) -> Callable[[zipfile.ZipInfo], ArchiveMemberChunk]:
724
+ """
725
+ Map each ``ZipInfo`` to an ``ArchiveMemberChunk`` for grouping adjacent members into single ranged reads.
636
726
 
727
+ :param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
728
+ :return: A callable that takes a ``ZipInfo`` and returns an ``ArchiveMemberChunk``.
729
+ """
637
730
 
638
- def zipfile_hdr_fn_len_slice(offset: int) -> slice:
639
- return slice(offset + ZIPFILE_HEADER_FN_LEN_OFFSET, offset + ZIPFILE_HEADER_FN_LEN_OFFSET + 2)
731
+ if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
732
+ raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
640
733
 
734
+ def use_chunked_reads(
735
+ zip_info: zipfile.ZipInfo,
736
+ ) -> ArchiveMemberChunk:
737
+ return ArchiveMemberChunk(zip_info.filename,
738
+ zip_info.header_offset,
739
+ zip_info.compress_size,
740
+ zip_info.compress_type,
741
+ header_overhead=zip_info_header_overhead)
641
742
 
642
- def zipfile_hdr_ex_len_slice(offset: int) -> slice:
643
- return slice(offset + ZIPFILE_HEADER_EX_LEN_OFFSET, offset + ZIPFILE_HEADER_EX_LEN_OFFSET + 2)
743
+ return use_chunked_reads
644
744
 
645
745
 
646
746
  def s3_archive_open_members(
647
747
  client: S3Client,
648
- archive_url: str,
748
+ bucket: str,
749
+ key: str,
649
750
  members: list[str] | None = None,
650
751
  mode: Literal["r", "rb"] = "r",
651
752
  *,
652
- threshold: float = 0.5,
653
- central_directory_overhead: int = 64 * 1024,
654
- member_header_overhead: int = 128,
655
- use_chunked_reads: bool = False,
753
+ use_ranged_requests: bool | Callable[[int, list[zipfile.ZipInfo]], bool] = True,
754
+ use_chunked_reads: bool | Callable[[zipfile.ZipInfo], ArchiveMemberChunk] = False,
656
755
  ) -> Generator[tuple[str, Callable[[], typing.IO]], None, None]:
657
756
  """
658
757
  Choose the best transfer strategy (ranged requests per-member vs full archive transfer)
@@ -665,71 +764,72 @@ def s3_archive_open_members(
665
764
 
666
765
  Example usage:
667
766
 
668
- >>> for member, opener in s3_archive_open_members(client, archive_url, members):
767
+ >>> for member, opener in s3_archive_open_members(client, bucket, key, members):
669
768
  ... with opener() as fh:
670
769
  ... data = fh.read()
671
770
 
672
771
  Incorrect usage that may lead to errors due to temporary file cleanup:
673
772
 
674
773
  >>> openers = []
675
- >>> for member, opener in s3_archive_open_members(client, archive_url, members):
774
+ >>> for member, opener in s3_archive_open_members(client, bucket, key, members):
676
775
  ... openers.append((member, opener))
677
776
  >>> for member, opener in openers:
678
777
  ... with opener() as fh: # May fail if temporary files have been cleaned up
679
778
  ... data = fh.read()
680
779
 
681
780
  :param client: An instance of ``S3Client``.
682
- :param archive_url: S3 URL to the ZIP archive.
781
+ :param bucket: Bucket name.
782
+ :param key: Object key of the ZIP archive.
683
783
  :param members: List of member names to stream.
684
784
  :param mode: File mode for opening members ("r" for text, "rb" for binary).
685
- :param threshold: If (estimated ranged transfer bytes / archive bytes) <= threshold,
686
- use ranged per-member access; otherwise download the whole archive.
687
- :param central_directory_overhead: Passed to s3_estimate_archive_ranged_requests.
688
- :param member_header_overhead: Passed to s3_estimate_archive_ranged_requests.
689
- :param use_chunked_reads: If ``True`` and ranged access is chosen, group adjacent members into single ranged reads.
785
+ :param use_ranged_requests: If ``True``, always use ranged requests to access the archive; if callable, use it as
786
+ custom logic to decide based on the archive size and member infos; if ``False``, always
787
+ download the whole archive.
788
+ :param use_chunked_reads: If ``True``, group adjacent members into single ranged reads using default ``ZipInfo``
789
+ to ``ArchiveMemberChunk`` mapping; if callable, use it as custom mapping from
790
+ ``ZipInfo`` to ``ArchiveMemberChunk`` for grouping adjacent members; if ``False``,
791
+ read each member with individual ranged requests. If ``use_ranged_requests`` is ``False``,
792
+ this parameter is ignored.
690
793
 
691
794
  :return: An iterable of callables that return file-like objects for each requested member.
692
795
  """
693
796
  if mode not in ("r", "rb"):
694
797
  raise ValueError("mode must be either 'r' or 'rb'")
695
- if member_header_overhead < ZIPFILE_HEADER_MIN_SIZE:
696
- raise ValueError(f"member_header_overhead must be at least {ZIPFILE_HEADER_MIN_SIZE} bytes")
697
798
 
698
799
  s3_options = s3_options_from_s3_client(client)
699
800
 
700
- archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, archive_url, members)
801
+ archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, bucket, key, members)
802
+ if missed_members:
803
+ raise FileNotFoundError(f"Archive members not found: {', '.join(missed_members)}")
701
804
 
702
- if len(missed_members) > 0:
703
- raise FileNotFoundError(f"members not found in archive '{missed_members}'")
805
+ if callable(use_ranged_requests):
806
+ use_ranged_requests = use_ranged_requests(archive_size, member_zip_infos)
704
807
 
705
- estimated_ranged_total_size = central_directory_overhead + sum(info.compress_size + member_header_overhead
706
- for info in member_zip_infos)
808
+ if use_ranged_requests and not use_chunked_reads:
809
+ for info in member_zip_infos:
810
+ opener = functools.partial(s3_archive_open_member, client, bucket, key, info.filename, mode)
811
+ yield info.filename, opener
812
+ return
707
813
 
708
- # Avoid division by zero; prefer ranged if archive size is zero (degenerate case)
709
- use_ranged = (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
814
+ archive_url = f"s3://{bucket}/{key}"
710
815
 
711
- @dataclasses.dataclass(frozen=True)
712
- class MemberChunk(object):
713
- name: str
714
- header_offset: int
715
- compress_size: int
716
- compress_type: int
816
+ if use_ranged_requests and use_chunked_reads:
717
817
 
718
- @property
719
- def end(self) -> int:
720
- return self.header_offset + self.compress_size + member_header_overhead
818
+ fn_len_slice = lambda index: slice(index + ZIP_INFO_HDR_FN_LEN_OFFSET, index + ZIP_INFO_HDR_FN_LEN_OFFSET + 2)
819
+ ex_len_slice = lambda index: slice(index + ZIP_INFO_HDR_EX_LEN_OFFSET, index + ZIP_INFO_HDR_EX_LEN_OFFSET + 2)
721
820
 
722
- if use_ranged and use_chunked_reads:
723
821
  # Open archive once to read central directory and gather ZipInfo for requested members.
724
822
  # We will group adjacent members (by local header offsets) and issue one ranged read per group,
725
823
  # then extract each member from the group's bytes to avoid many small ranged requests.
726
824
  with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
825
+ if callable(use_chunked_reads):
826
+ chunks = [use_chunked_reads(info) for info in member_zip_infos]
827
+ else:
828
+ chunks = [ArchiveMemberChunk(info.filename, info.header_offset, info.compress_size, info.compress_type)
829
+ for info in member_zip_infos]
727
830
 
728
- chunks = [MemberChunk(info.filename, info.header_offset, info.compress_size, info.compress_type)
729
- for info in member_zip_infos]
730
-
731
- chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.header_offset),
732
- chunk_func=lambda x, y: y.header_offset > x.end + member_header_overhead)
831
+ chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.begin),
832
+ chunk_func=lambda x, y: y.begin > x.end)
733
833
 
734
834
  # For each group, create openers for members inside the group.
735
835
  for group in chunks_groups:
@@ -740,18 +840,18 @@ def s3_archive_open_members(
740
840
  s3_fh.seek(group_offset)
741
841
  group_bytes = s3_fh.read(group_size)
742
842
 
743
- def make_opener(chunk: MemberChunk) -> Callable[[], typing.IO]:
843
+ def make_opener(chunk: ArchiveMemberChunk) -> Callable[[], typing.IO]:
744
844
 
745
845
  def opener() -> typing.IO:
746
846
  index = chunk.header_offset - group_offset
747
847
 
748
- if index + ZIPFILE_HEADER_MIN_SIZE > len(group_bytes):
848
+ if index + ZIP_INFO_HDR_MIN_SIZE > len(group_bytes):
749
849
  raise IOError("unexpected short read of member header")
750
850
 
751
- fn_len = int.from_bytes(group_bytes[zipfile_hdr_fn_len_slice(index)], "little")
752
- ex_len = int.from_bytes(group_bytes[zipfile_hdr_ex_len_slice(index)], "little")
851
+ fn_len = int.from_bytes(group_bytes[fn_len_slice(index)], "little")
852
+ ex_len = int.from_bytes(group_bytes[ex_len_slice(index)], "little")
753
853
 
754
- raw_data_begin = index + ZIPFILE_HEADER_MIN_SIZE + fn_len + ex_len
854
+ raw_data_begin = index + ZIP_INFO_HDR_MIN_SIZE + fn_len + ex_len
755
855
  raw_data_end = raw_data_begin + chunk.compress_size
756
856
 
757
857
  if raw_data_end > len(group_bytes):
@@ -776,12 +876,6 @@ def s3_archive_open_members(
776
876
  yield from ((chunk.name, make_opener(chunk)) for chunk in group)
777
877
  return
778
878
 
779
- if use_ranged:
780
- for info in member_zip_infos:
781
- opener = functools.partial(s3_archive_open_member, client, archive_url, info.filename, mode)
782
- yield info.filename, opener
783
- return
784
-
785
879
  # Download full archive once and serve members from it (read member bytes into memory)
786
880
  with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, tempfile.TemporaryFile() as temp_fh:
787
881
  shutil.copyfileobj(s3_fh, temp_fh)
@@ -22,6 +22,10 @@ __all__ = [
22
22
  "strict_relpath_parser",
23
23
  "strict_abspath_pattern",
24
24
  "strict_abspath_parser",
25
+ "strict_archive_relpath_pattern",
26
+ "strict_archive_relpath_parser",
27
+ "strict_archive_abspath_pattern",
28
+ "strict_archive_abspath_parser",
25
29
  "email_address_pattern",
26
30
  "email_address_parser",
27
31
  "semver_pattern",
@@ -82,6 +86,7 @@ period_token: pp.ParserElement = pp.Char(".")
82
86
  colon_token: pp.ParserElement = pp.Char(":")
83
87
  slash_token: pp.ParserElement = pp.Char("/")
84
88
  plus_token: pp.ParserElement = pp.Char("+")
89
+ sharp_token: pp.ParserElement = pp.Char("#")
85
90
 
86
91
  lower_regexp: re.Pattern[str] = re.compile(r"[a-z]")
87
92
  upper_regexp: re.Pattern[str] = re.compile(r"[A-Z]")
@@ -155,6 +160,10 @@ strict_relpath_regexp: re.Pattern[str] = re.compile(
155
160
  rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
156
161
  strict_abspath_regexp: re.Pattern[str] = re.compile(
157
162
  rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
163
+ strict_archive_relpath_regexp: re.Pattern[str] = re.compile(
164
+ rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
165
+ strict_archive_abspath_regexp: re.Pattern[str] = re.compile(
166
+ rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
158
167
 
159
168
  strict_path_chars_element = strict_chars_element.copy()
160
169
  strict_path_chars_element.add_condition(token_reparse(period_token[1, ...], negate=True),
@@ -164,6 +173,12 @@ strict_relpath_element: pp.ParserElement = pp.Combine(
164
173
  (strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
165
174
  strict_abspath_element: pp.ParserElement = pp.Combine(
166
175
  slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
176
+ strict_archive_relpath_element: pp.ParserElement = pp.Combine(
177
+ (strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
178
+ (sharp_token + strict_relpath_element)[0, 1])
179
+ strict_archive_abspath_element: pp.ParserElement = pp.Combine(
180
+ slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
181
+ (sharp_token + strict_relpath_element)[0, 1])
167
182
 
168
183
  email_address_regexp: re.Pattern[str] = re.compile(
169
184
  rf"(({lower_digit_regexp.pattern}|[_-])+)(?:\.({lower_digit_regexp.pattern}|[_-])+)*@(?:{kebab_case_regexp.pattern}\.)+({lower_digit_regexp.pattern}{{2,63}})")
@@ -206,6 +221,10 @@ strict_relpath_pattern = make_string_pattern(strict_relpath_regexp)
206
221
  strict_relpath_parser = make_string_parser(strict_relpath_element)
207
222
  strict_abspath_pattern = make_string_pattern(strict_abspath_regexp)
208
223
  strict_abspath_parser = make_string_parser(strict_abspath_element)
224
+ strict_archive_relpath_pattern = make_string_pattern(strict_archive_relpath_regexp)
225
+ strict_archive_relpath_parser = make_string_parser(strict_archive_relpath_element)
226
+ strict_archive_abspath_pattern = make_string_pattern(strict_archive_abspath_regexp)
227
+ strict_archive_abspath_parser = make_string_parser(strict_archive_abspath_element)
209
228
 
210
229
  email_address_pattern = make_string_pattern(email_address_regexp)
211
230
  email_address_parser = make_string_parser(email_address_element)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plexus-python-common
3
- Version: 1.0.31
3
+ Version: 1.0.33
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.12
6
6
  Classifier: Programming Language :: Python :: 3.13
@@ -488,21 +488,21 @@ class S3UtilsTest(unittest.TestCase):
488
488
  local_root = os.path.join(resources_directory, "unittest", "s3utils")
489
489
  local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
490
490
 
491
- for archive_url, members in itertools.product(
492
- ["s3://dummy-bucket/s3utils_archive/archive.uncompressed.zip",
493
- "s3://dummy-bucket/s3utils_archive/archive.compressed.zip"],
491
+ for archive_key, members in itertools.product(
492
+ ["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
494
493
  [local_members, None],
495
494
  ):
496
- archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, archive_url, members)
495
+ archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
496
+ "dummy-bucket",
497
+ archive_key,
498
+ members)
497
499
 
498
500
  self.assertEqual(archive_size - sum(info.compress_size for info in member_zip_infos), 2470)
499
501
  self.assertEqual(set(info.filename for info in member_zip_infos), set(local_members))
500
502
  self.assertEqual(len(missed_members), 0)
501
503
 
502
- for archive_url, (members, members_expect) in itertools.product(
503
- ["s3://dummy-bucket/s3utils_archive/archive.uncompressed.zip",
504
- "s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
505
- ],
504
+ for archive_key, (members, members_expect) in itertools.product(
505
+ ["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
506
506
  [(local_members, local_members),
507
507
  (None, local_members),
508
508
  (["dir.baz/", "dir.foo/"],
@@ -524,7 +524,10 @@ class S3UtilsTest(unittest.TestCase):
524
524
  ),
525
525
  ],
526
526
  ):
527
- archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, archive_url, members)
527
+ archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
528
+ "dummy-bucket",
529
+ archive_key,
530
+ members)
528
531
 
529
532
  self.assertEqual(sorted(list(info.filename for info in member_zip_infos)), sorted(members_expect))
530
533
  self.assertEqual(len(missed_members), 0)
@@ -545,16 +548,14 @@ class S3UtilsTest(unittest.TestCase):
545
548
  local_root = os.path.join(resources_directory, "unittest", "s3utils")
546
549
  local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
547
550
 
548
- for archive_url, mode in itertools.product(
549
- ["s3://dummy-bucket/s3utils_archive/archive.uncompressed.zip",
550
- "s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
551
- ],
551
+ for archive_key, mode in itertools.product(
552
+ ["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
552
553
  ["r", "rb"],
553
554
  ):
554
555
  for local_member in local_members:
555
556
  with (
556
557
  open(os.path.join(local_root, local_member), mode) as local_fh,
557
- s3_archive_open_member(client, archive_url, local_member, mode) as s3_fh,
558
+ s3_archive_open_member(client, "dummy-bucket", archive_key, local_member, mode) as s3_fh,
558
559
  ):
559
560
  self.assertEqual(local_fh.read(), s3_fh.read())
560
561
 
@@ -574,22 +575,20 @@ class S3UtilsTest(unittest.TestCase):
574
575
  local_root = os.path.join(resources_directory, "unittest", "s3utils")
575
576
  local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
576
577
 
577
- for archive_url, members, mode, threshold, use_chunked_reads in itertools.product(
578
- ["s3://dummy-bucket/s3utils_archive/archive.uncompressed.zip",
579
- "s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
580
- ],
578
+ for archive_key, members, mode, use_ranged_requests, use_chunked_reads in itertools.product(
579
+ ["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
581
580
  [local_members, None],
582
581
  ["r", "rb"],
583
- [1.0, 0.0],
584
- [False, True],
582
+ [False, True, s3_archive_use_ranged_requests()],
583
+ [False, True, s3_archive_use_chunked_reads()],
585
584
  ):
586
585
  for member, opener in s3_archive_open_members(
587
586
  client,
588
- archive_url,
587
+ "dummy-bucket",
588
+ archive_key,
589
589
  members,
590
590
  mode,
591
- threshold=threshold,
592
- central_directory_overhead=0,
591
+ use_ranged_requests=use_ranged_requests,
593
592
  use_chunked_reads=use_chunked_reads,
594
593
  ):
595
594
  with open(os.path.join(local_root, member), mode) as local_fh, opener() as s3_fh:
@@ -13,6 +13,8 @@ from plexus.common.utils.strutils import kebab_case_parser, kebab_case_pattern
13
13
  from plexus.common.utils.strutils import parse_bag_name, parse_user_email, parse_user_name, parse_vehicle_name
14
14
  from plexus.common.utils.strutils import snake_case_parser, snake_case_pattern
15
15
  from plexus.common.utils.strutils import strict_abspath_parser, strict_abspath_pattern
16
+ from plexus.common.utils.strutils import strict_archive_abspath_parser, strict_archive_abspath_pattern
17
+ from plexus.common.utils.strutils import strict_archive_relpath_parser, strict_archive_relpath_pattern
16
18
  from plexus.common.utils.strutils import strict_relpath_parser, strict_relpath_pattern
17
19
  from plexus.common.utils.strutils import topic_parser, topic_pattern
18
20
  from plexus.common.utils.strutils import vin_code_parser, vin_code_pattern
@@ -377,6 +379,154 @@ class StrUtilsTest(unittest.TestCase):
377
379
  with self.assertRaises(pp.ParseException):
378
380
  strict_abspath_parser.parse_string(data, parse_all=True)
379
381
 
382
+ data_strict_archive_relpath_pattern = [
383
+ ("archive.zip",),
384
+ ("directory/archive.zip",),
385
+ ("directory/dummy/archive.zip",),
386
+ ("directory/dummy/path/archive.zip",),
387
+ ("directory/dummy/path/to/archive.zip",),
388
+ ("directory/dummy/path_to-archive.zip",),
389
+ ("directory/dummy/.path_to-archive.zip",),
390
+ ("archive.zip#",),
391
+ ("archive.zip#directory",),
392
+ ("archive.zip#directory/",),
393
+ ("directory/archive.zip#directory/dummy",),
394
+ ("directory/archive.zip#directory/dummy/",),
395
+ ("directory/dummy/archive.zip#directory/dummy/path",),
396
+ ("directory/dummy/path/archive.zip#directory/dummy/path/to",),
397
+ ("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
398
+ ("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
399
+ ("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
400
+ ]
401
+
402
+ @ddt.idata(data_strict_archive_relpath_pattern)
403
+ @ddt.unpack
404
+ def test_strict_archive_relpath_pattern(self, data):
405
+ self.assertIsNotNone(strict_archive_relpath_pattern.match(data))
406
+ self.assertIsNotNone(strict_archive_relpath_parser.parse_string(data, parse_all=True))
407
+
408
+ data_strict_archive_relpath_pattern__bad_cases = [
409
+ ("/archive.zip",),
410
+ ("/directory/archive.zip",),
411
+ ("/directory/dummy/archive.zip",),
412
+ ("/directory/dummy/path/archive.zip",),
413
+ ("/directory/dummy/path/to/archive.zip",),
414
+ ("/directory/dummy/path_to-archive.zip",),
415
+ ("/directory/dummy/.path_to-archive.zip",),
416
+ ("/archive.zip#directory",),
417
+ ("/archive.zip#directory/",),
418
+ ("/directory/archive.zip#directory/dummy",),
419
+ ("/directory/archive.zip#directory/dummy/",),
420
+ ("/directory/dummy/archive.zip#directory/dummy/path",),
421
+ ("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
422
+ ("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
423
+ ("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
424
+ ("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
425
+ ("archive.zip/",),
426
+ ("directory/archive.zip/",),
427
+ ("directory/dummy/archive.zip/",),
428
+ ("directory/dummy/path/archive.zip/",),
429
+ ("directory/dummy/path/to/archive.zip/",),
430
+ ("directory/dummy/path_to-archive.zip/",),
431
+ ("directory/dummy/.path_to-archive.zip/",),
432
+ ("archive.zip#/directory",),
433
+ ("archive.zip#/directory/",),
434
+ ("directory/archive.zip#/directory/dummy",),
435
+ ("directory/archive.zip#/directory/dummy/",),
436
+ ("directory/dummy/archive.zip#/directory/dummy/path",),
437
+ ("directory/dummy/path/archive.zip#/directory/dummy/path/to",),
438
+ ("directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
439
+ ("directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
440
+ ("directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
441
+ ("directory/archive.zip#directory/./dummy",),
442
+ ("directory/archive.zip#directory/../dummy",),
443
+ ("directory/archive.zip#directory/.../dummy",),
444
+ ("directory/archive.zip#directory/\t/dummy",),
445
+ ("directory/archive.zip#directory/\r/dummy",),
446
+ ("directory/archive.zip#directory/\n/dummy",),
447
+ ]
448
+
449
+ @ddt.idata(data_strict_archive_relpath_pattern__bad_cases)
450
+ @ddt.unpack
451
+ def test_strict_archive_relpath_pattern__bad_cases(self, data):
452
+ self.assertIsNone(strict_archive_relpath_pattern.match(data))
453
+ with self.assertRaises(pp.ParseException):
454
+ strict_archive_relpath_parser.parse_string(data, parse_all=True)
455
+
456
+ data_strict_archive_abspath_pattern = [
457
+ ("/archive.zip",),
458
+ ("/directory/archive.zip",),
459
+ ("/directory/dummy/archive.zip",),
460
+ ("/directory/dummy/path/archive.zip",),
461
+ ("/directory/dummy/path/to/archive.zip",),
462
+ ("/directory/dummy/path_to-archive.zip",),
463
+ ("/directory/dummy/.path_to-archive.zip",),
464
+ ("/archive.zip#",),
465
+ ("/archive.zip#directory",),
466
+ ("/archive.zip#directory/",),
467
+ ("/directory/archive.zip#directory/dummy",),
468
+ ("/directory/archive.zip#directory/dummy/",),
469
+ ("/directory/dummy/archive.zip#directory/dummy/path",),
470
+ ("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
471
+ ("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
472
+ ("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
473
+ ("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
474
+ ]
475
+
476
+ @ddt.idata(data_strict_archive_abspath_pattern)
477
+ @ddt.unpack
478
+ def test_strict_archive_abspath_pattern(self, data):
479
+ self.assertIsNotNone(strict_archive_abspath_pattern.match(data))
480
+ self.assertIsNotNone(strict_archive_abspath_parser.parse_string(data, parse_all=True))
481
+
482
+ data_strict_archive_abspath_pattern__bad_cases = [
483
+ ("archive.zip",),
484
+ ("directory/archive.zip",),
485
+ ("directory/dummy/archive.zip",),
486
+ ("directory/dummy/path/archive.zip",),
487
+ ("directory/dummy/path/to/archive.zip",),
488
+ ("directory/dummy/path_to-archive.zip",),
489
+ ("directory/dummy/.path_to-archive.zip",),
490
+ ("archive.zip#directory",),
491
+ ("archive.zip#directory/",),
492
+ ("directory/archive.zip#directory/dummy",),
493
+ ("directory/archive.zip#directory/dummy/",),
494
+ ("directory/dummy/archive.zip#directory/dummy/path",),
495
+ ("directory/dummy/path/archive.zip#directory/dummy/path/to",),
496
+ ("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
497
+ ("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
498
+ ("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
499
+ ("/archive.zip/",),
500
+ ("/directory/archive.zip/",),
501
+ ("/directory/dummy/archive.zip/",),
502
+ ("/directory/dummy/path/archive.zip/",),
503
+ ("/directory/dummy/path/to/archive.zip/",),
504
+ ("/directory/dummy/path_to-archive.zip/",),
505
+ ("/directory/dummy/.path_to-archive.zip/",),
506
+ ("/archive.zip#/directory",),
507
+ ("/archive.zip#/directory/",),
508
+ ("/directory/archive.zip#/directory/dummy",),
509
+ ("/directory/archive.zip#/directory/dummy/",),
510
+ ("/directory/dummy/archive.zip#/directory/dummy/path",),
511
+ ("/directory/dummy/path/archive.zip#/directory/dummy/path/to",),
512
+ ("/directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
513
+ ("/directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
514
+ ("/directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
515
+ ("/directory/archive.zip#directory/./dummy",),
516
+ ("/directory/archive.zip#directory/../dummy",),
517
+ ("/directory/archive.zip#directory/.../dummy",),
518
+ ("/directory/archive.zip#directory/\t/dummy",),
519
+ ("/directory/archive.zip#directory/\r/dummy",),
520
+ ("/directory/archive.zip#directory/\n/dummy",),
521
+ ]
522
+
523
+ @ddt.idata(data_strict_archive_abspath_pattern__bad_cases)
524
+ @ddt.unpack
525
+ def test_strict_archive_abspath_pattern__bad_cases(self, data):
526
+ self.assertIsNone(strict_archive_abspath_pattern.match(data))
527
+ with self.assertRaises(pp.ParseException):
528
+ strict_archive_abspath_parser.parse_string(data, parse_all=True)
529
+
380
530
  data_email_address_pattern = [
381
531
  ("someone@dummy.com",),
382
532
  ("some.one@dummy.com",),