plexus-python-common 1.0.31__tar.gz → 1.0.33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/PKG-INFO +1 -1
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/s3utils.py +219 -125
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/strutils.py +19 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/PKG-INFO +1 -1
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/s3utils_test.py +22 -23
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/strutils_test.py +150 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.editorconfig +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.github/workflows/pr.yml +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.github/workflows/push.yml +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/.gitignore +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/MANIFEST.in +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/README.md +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/VERSION +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/pyproject.toml +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.0.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.1.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/jsonutils/dummy.2.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.bar.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.bar +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.baz/file.foo.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/dir.foo.bar.baz/file.foo.bar.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.bar.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.bar +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.bar +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.baz +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils/dir.foo/file.foo +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.compressed.zip +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/s3utils_archive/archive.uncompressed.zip +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/0-dummy +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/1-dummy +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/2-dummy +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.0.vol-0.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.1.vol-1.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.2.vol-2.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.jsonl +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part0 +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part1 +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.csv.part2 +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.txt +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/setup.cfg +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/setup.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/__init__.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMFile.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMNode.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMTags.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMWay.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/__init__.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/pose.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/proj.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/__init__.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/apiutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/bagutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/config.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/datautils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/jsonutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/ormutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/shutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/sqlutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/testutils.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/SOURCES.txt +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/dependency_links.txt +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/not-zip-safe +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/requires.txt +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus_python_common.egg-info/top_level.txt +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/__init__.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_file_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/carto/osm_tags_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/pose_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/proj_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/bagutils_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/datautils_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/jsonutils_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/ormutils_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/shutils_test.py +0 -0
- {plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/utils/testutils_test.py +0 -0
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/s3utils.py
RENAMED
|
@@ -37,10 +37,14 @@ __all__ = [
|
|
|
37
37
|
"s3_pull_text",
|
|
38
38
|
"s3_push_text",
|
|
39
39
|
"S3TransferCallbackClient",
|
|
40
|
+
"ArchiveMemberChunk",
|
|
40
41
|
"s3_make_progress_callback",
|
|
41
42
|
"s3_make_progressed_client",
|
|
43
|
+
"s3_archive_member_tree",
|
|
42
44
|
"s3_archive_listfile",
|
|
43
45
|
"s3_archive_open_member",
|
|
46
|
+
"s3_archive_use_ranged_requests",
|
|
47
|
+
"s3_archive_use_chunked_reads",
|
|
44
48
|
"s3_archive_open_members",
|
|
45
49
|
]
|
|
46
50
|
|
|
@@ -88,7 +92,12 @@ def s3_make_client(
|
|
|
88
92
|
client.close()
|
|
89
93
|
|
|
90
94
|
|
|
91
|
-
def s3_list_objects(
|
|
95
|
+
def s3_list_objects(
|
|
96
|
+
client: S3Client,
|
|
97
|
+
bucket: str,
|
|
98
|
+
prefix: str,
|
|
99
|
+
limit: int = None,
|
|
100
|
+
) -> Generator[S3ObjectMeta, None, None]:
|
|
92
101
|
"""
|
|
93
102
|
Lists all objects from the given S3 ``bucket`` and ``prefix``.
|
|
94
103
|
|
|
@@ -130,7 +139,7 @@ def s3_listfile(
|
|
|
130
139
|
include_patterns: list[str] | None = None,
|
|
131
140
|
exclude_patterns: list[str] | None = None,
|
|
132
141
|
depth: int = 0,
|
|
133
|
-
) -> Generator[S3ObjectMeta]:
|
|
142
|
+
) -> Generator[S3ObjectMeta, None, None]:
|
|
134
143
|
"""
|
|
135
144
|
Lists all objects from the given S3 ``bucket`` and ``prefix``, filtered by patterns and directory depth.
|
|
136
145
|
|
|
@@ -426,7 +435,7 @@ def s3_make_progressed_client(
|
|
|
426
435
|
secret_access_key: str = None,
|
|
427
436
|
region_name: str = None,
|
|
428
437
|
endpoint_url: str = None,
|
|
429
|
-
) -> Generator[S3Client]:
|
|
438
|
+
) -> Generator[S3Client, None, None]:
|
|
430
439
|
"""
|
|
431
440
|
Creates an S3 client with progress callback as a context manager for safe resource handling.
|
|
432
441
|
|
|
@@ -483,9 +492,65 @@ def s3_options_from_s3_client(client: S3Client) -> dict[str, Any]:
|
|
|
483
492
|
return s3_options
|
|
484
493
|
|
|
485
494
|
|
|
495
|
+
type ArchiveMemberTree = dict[str, tuple[zipfile.ZipInfo, ArchiveMemberTree | None]]
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def s3_archive_member_tree(
|
|
499
|
+
client: S3Client,
|
|
500
|
+
bucket: str,
|
|
501
|
+
key: str,
|
|
502
|
+
) -> ArchiveMemberTree:
|
|
503
|
+
"""
|
|
504
|
+
Builds a tree structure of members in a ZIP archive stored in S3 for efficient lookup.
|
|
505
|
+
Directories have ZipInfo and a nested dict; files have ZipInfo and None.
|
|
506
|
+
Directory members are recognized by names ending with a trailing slash ("/").
|
|
507
|
+
|
|
508
|
+
Example:
|
|
509
|
+
{
|
|
510
|
+
"dir1/": (ZipInfo, {
|
|
511
|
+
"file1.txt": (ZipInfo, None),
|
|
512
|
+
"subdir/": (ZipInfo, {
|
|
513
|
+
"file2.txt": (ZipInfo, None)
|
|
514
|
+
})
|
|
515
|
+
}),
|
|
516
|
+
"file3.txt": (ZipInfo, None)
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
:param client: An instance of ``S3Client``.
|
|
520
|
+
:param bucket: Bucket name.
|
|
521
|
+
:param key: Object key of the ZIP archive.
|
|
522
|
+
:return: A tree structure of members in the ZIP archive.
|
|
523
|
+
"""
|
|
524
|
+
s3_options = s3_options_from_s3_client(client)
|
|
525
|
+
|
|
526
|
+
archive_url = f"s3://{bucket}/{key}"
|
|
527
|
+
|
|
528
|
+
with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
|
|
529
|
+
member_zip_infos = archive.infolist()
|
|
530
|
+
|
|
531
|
+
root_member_tree: ArchiveMemberTree = {}
|
|
532
|
+
|
|
533
|
+
def build_member_tree(info: zipfile.ZipInfo):
|
|
534
|
+
*parts, last_part = info.filename.rstrip("/").split("/")
|
|
535
|
+
current_member_tree = root_member_tree
|
|
536
|
+
for part in parts:
|
|
537
|
+
_, current_member_tree = current_member_tree.setdefault(part + "/", (None, {}))
|
|
538
|
+
if info.is_dir():
|
|
539
|
+
current_member_tree[last_part + "/"] = info, {}
|
|
540
|
+
else:
|
|
541
|
+
current_member_tree[last_part] = info, None
|
|
542
|
+
|
|
543
|
+
# Sort by filename to ensure directories are created before their contents
|
|
544
|
+
for info in sorted(member_zip_infos, key=lambda x: x.filename):
|
|
545
|
+
build_member_tree(info)
|
|
546
|
+
|
|
547
|
+
return root_member_tree
|
|
548
|
+
|
|
549
|
+
|
|
486
550
|
def s3_archive_listfile(
|
|
487
551
|
client: S3Client,
|
|
488
|
-
|
|
552
|
+
bucket: str,
|
|
553
|
+
key: str,
|
|
489
554
|
members: list[str] | None = None,
|
|
490
555
|
) -> tuple[int, list[zipfile.ZipInfo], list[str]]:
|
|
491
556
|
"""
|
|
@@ -494,14 +559,15 @@ def s3_archive_listfile(
|
|
|
494
559
|
under that directory will be included in the results.
|
|
495
560
|
|
|
496
561
|
Example usage:
|
|
497
|
-
>>> archive_size,
|
|
498
|
-
>>> for info in
|
|
562
|
+
>>> archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, bucket, key, members=["file1.txt", "dir1/"])
|
|
563
|
+
>>> for info in member_zip_infos:
|
|
499
564
|
... print(info.filename, info.file_size)
|
|
500
565
|
>>> if missed_members:
|
|
501
566
|
... print("Members not found:", missed_members)
|
|
502
567
|
|
|
503
568
|
:param client: An instance of ``S3Client``.
|
|
504
|
-
:param
|
|
569
|
+
:param bucket: Bucket name.
|
|
570
|
+
:param key: Object key of the ZIP archive.
|
|
505
571
|
:param members: Optional list of member names to filter; if ``None``, all members are returned.
|
|
506
572
|
:return: A tuple containing:
|
|
507
573
|
- The size of the archive in bytes.
|
|
@@ -510,82 +576,49 @@ def s3_archive_listfile(
|
|
|
510
576
|
"""
|
|
511
577
|
s3_options = s3_options_from_s3_client(client)
|
|
512
578
|
|
|
513
|
-
|
|
514
|
-
protocol = storage_opts.get("protocol")
|
|
515
|
-
if protocol != "s3":
|
|
516
|
-
raise ValueError(f"unsupported protocol '{protocol}', only 's3' is supported")
|
|
579
|
+
archive_url = f"s3://{bucket}/{key}"
|
|
517
580
|
|
|
518
581
|
fs = fsspec.filesystem("s3", **s3_options)
|
|
519
582
|
archive_size = fs.size(archive_url)
|
|
520
583
|
|
|
521
|
-
|
|
522
|
-
member_zip_infos = archive.infolist()
|
|
523
|
-
|
|
524
|
-
if members is None:
|
|
525
|
-
return archive_size, [info for info in member_zip_infos if not info.is_dir()], []
|
|
526
|
-
|
|
527
|
-
# Build a tree structure of members for efficient lookup
|
|
528
|
-
# Directories have ZipInfo and a nested dict; files have ZipInfo and None
|
|
529
|
-
# Directory members are recognized by names ending with a trailing slash ("/")
|
|
530
|
-
# Example:
|
|
531
|
-
# {
|
|
532
|
-
# "dir1/": (ZipInfo, {
|
|
533
|
-
# "file1.txt": (ZipInfo, None),
|
|
534
|
-
# "subdir/": (ZipInfo, {
|
|
535
|
-
# "file2.txt": (ZipInfo, None)
|
|
536
|
-
# })
|
|
537
|
-
# }),
|
|
538
|
-
# "file3.txt": (ZipInfo, None)
|
|
539
|
-
# }
|
|
540
|
-
members_tree: dict[str, tuple[zipfile.ZipInfo, dict | None]] = {}
|
|
541
|
-
|
|
542
|
-
def build_members_tree(info: zipfile.ZipInfo):
|
|
543
|
-
*parts, last_part = info.filename.rstrip("/").split("/")
|
|
544
|
-
current = members_tree
|
|
545
|
-
for part in parts:
|
|
546
|
-
_, current = current.setdefault(part + "/", (None, {}))
|
|
547
|
-
if info.is_dir():
|
|
548
|
-
current[last_part + "/"] = info, {}
|
|
549
|
-
else:
|
|
550
|
-
current[last_part] = info, None
|
|
551
|
-
|
|
552
|
-
# Sort by filename to ensure directories are created before their contents
|
|
553
|
-
for info in sorted(member_zip_infos, key=lambda x: x.filename):
|
|
554
|
-
build_members_tree(info)
|
|
584
|
+
root_member_tree = s3_archive_member_tree(client, bucket, key)
|
|
555
585
|
|
|
556
586
|
def search_members_tree(member: str) -> tuple[zipfile.ZipInfo | None, dict | None]:
|
|
557
587
|
*parts, last_part = member.rstrip("/").split("/")
|
|
558
|
-
|
|
588
|
+
current_member_tree = root_member_tree
|
|
559
589
|
for part in parts:
|
|
560
|
-
_,
|
|
561
|
-
if
|
|
590
|
+
_, current_member_tree = current_member_tree.get(part + "/", (None, None))
|
|
591
|
+
if current_member_tree is None:
|
|
562
592
|
return None, None
|
|
563
593
|
if member.endswith("/"): # Directory member recognized by trailing slash
|
|
564
|
-
return
|
|
594
|
+
return current_member_tree.get(last_part + "/", (None, None))
|
|
565
595
|
else:
|
|
566
|
-
return
|
|
596
|
+
return current_member_tree.get(last_part, (None, None))
|
|
567
597
|
|
|
568
|
-
def
|
|
569
|
-
for
|
|
570
|
-
if
|
|
598
|
+
def collect_member_zip_infos(tree: ArchiveMemberTree) -> Generator[zipfile.ZipInfo, None, None]:
|
|
599
|
+
for member_zip_info, member_tree in tree.values():
|
|
600
|
+
if member_zip_info is None:
|
|
571
601
|
continue
|
|
572
|
-
if
|
|
573
|
-
yield from
|
|
602
|
+
if member_zip_info.is_dir():
|
|
603
|
+
yield from collect_member_zip_infos(member_tree)
|
|
574
604
|
else:
|
|
575
|
-
yield
|
|
605
|
+
yield member_zip_info
|
|
606
|
+
|
|
607
|
+
if members is None:
|
|
608
|
+
return archive_size, list(collect_member_zip_infos(root_member_tree)), []
|
|
576
609
|
|
|
577
610
|
included_member_zip_infos = []
|
|
578
611
|
missed_members = []
|
|
579
612
|
|
|
580
613
|
for member in members:
|
|
581
|
-
|
|
582
|
-
if
|
|
614
|
+
member_zip_info, member_tree = search_members_tree(member)
|
|
615
|
+
if member_zip_info is None:
|
|
583
616
|
missed_members.append(member)
|
|
584
617
|
continue
|
|
585
|
-
if not
|
|
586
|
-
included_member_zip_infos.append(
|
|
618
|
+
if not member_zip_info.is_dir():
|
|
619
|
+
included_member_zip_infos.append(member_zip_info)
|
|
587
620
|
else:
|
|
588
|
-
included_member_zip_infos.extend(
|
|
621
|
+
included_member_zip_infos.extend(collect_member_zip_infos(member_tree or {}))
|
|
589
622
|
|
|
590
623
|
return archive_size, included_member_zip_infos, missed_members
|
|
591
624
|
|
|
@@ -593,7 +626,8 @@ def s3_archive_listfile(
|
|
|
593
626
|
if typing.TYPE_CHECKING:
|
|
594
627
|
def s3_archive_open_member(
|
|
595
628
|
client: S3Client,
|
|
596
|
-
|
|
629
|
+
bucket: str,
|
|
630
|
+
key: str,
|
|
597
631
|
member: str,
|
|
598
632
|
mode: Literal["r", "rb"] = "r",
|
|
599
633
|
) -> contextlib.AbstractContextManager[typing.IO]: ...
|
|
@@ -602,7 +636,8 @@ if typing.TYPE_CHECKING:
|
|
|
602
636
|
@contextlib.contextmanager
|
|
603
637
|
def s3_archive_open_member(
|
|
604
638
|
client: S3Client,
|
|
605
|
-
|
|
639
|
+
bucket: str,
|
|
640
|
+
key: str,
|
|
606
641
|
member: str,
|
|
607
642
|
mode: Literal["r", "rb"] = "r",
|
|
608
643
|
) -> Generator[typing.IO, None, None]:
|
|
@@ -610,7 +645,8 @@ def s3_archive_open_member(
|
|
|
610
645
|
Opens a specific member file from a ZIP archive stored in S3.
|
|
611
646
|
|
|
612
647
|
:param client: An instance of ``S3Client``.
|
|
613
|
-
:param
|
|
648
|
+
:param bucket: Bucket name.
|
|
649
|
+
:param key: Object key of the ZIP archive.
|
|
614
650
|
:param member: The member file name to open from the archive.
|
|
615
651
|
:param mode: File mode for opening the member ("r" for text, "rb" for binary).
|
|
616
652
|
|
|
@@ -621,38 +657,101 @@ def s3_archive_open_member(
|
|
|
621
657
|
|
|
622
658
|
s3_options = s3_options_from_s3_client(client)
|
|
623
659
|
|
|
624
|
-
|
|
625
|
-
protocol = storage_opts.get("protocol")
|
|
626
|
-
if protocol != "s3":
|
|
627
|
-
raise ValueError(f"unsupported protocol '{protocol}', only 's3' is supported")
|
|
628
|
-
|
|
629
|
-
with fsspec.open(f"zip://{member}::{archive_url}", mode, s3=s3_options) as s3_fh:
|
|
660
|
+
with fsspec.open(f"zip://{member}::s3://{bucket}/{key}", mode, s3=s3_options) as s3_fh:
|
|
630
661
|
yield s3_fh
|
|
631
662
|
|
|
632
663
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
664
|
+
ZIP_CENTRAL_DIR_ESTIMATED_SIZE = 64 * 1024
|
|
665
|
+
ZIP_INFO_HDR_MIN_SIZE = 30
|
|
666
|
+
ZIP_INFO_HDR_ESTIMATED_SIZE = 128
|
|
667
|
+
ZIP_INFO_HDR_FN_LEN_OFFSET = 26
|
|
668
|
+
ZIP_INFO_HDR_EX_LEN_OFFSET = 28
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@dataclasses.dataclass(frozen=True)
|
|
672
|
+
class ArchiveMemberChunk(object):
|
|
673
|
+
name: str
|
|
674
|
+
header_offset: int
|
|
675
|
+
compress_size: int
|
|
676
|
+
compress_type: int
|
|
677
|
+
header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE
|
|
678
|
+
|
|
679
|
+
@property
|
|
680
|
+
def begin(self) -> int:
|
|
681
|
+
return self.header_offset
|
|
682
|
+
|
|
683
|
+
@property
|
|
684
|
+
def end(self) -> int:
|
|
685
|
+
return self.header_offset + self.header_overhead + self.compress_size
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def s3_archive_use_ranged_requests(
|
|
689
|
+
threshold: float = 0.5,
|
|
690
|
+
central_directory_overhead: int = 64 * ZIP_CENTRAL_DIR_ESTIMATED_SIZE,
|
|
691
|
+
zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
|
|
692
|
+
) -> Callable[[int, list[zipfile.ZipInfo]], bool]:
|
|
693
|
+
"""
|
|
694
|
+
Decide whether to use ranged requests for accessing members of a ZIP archive in S3
|
|
695
|
+
based on estimated transfer size ratio.
|
|
696
|
+
|
|
697
|
+
:param threshold: If (estimated ranged transfer bytes / archive bytes) <= threshold, use ranged per-member access;
|
|
698
|
+
otherwise download the whole archive.
|
|
699
|
+
:param central_directory_overhead: Estimated overhead size for the central directory in bytes.
|
|
700
|
+
:param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
|
|
701
|
+
:return: A callable that takes (``archive_size``, ``member_zip_infos``) and returns a boolean
|
|
702
|
+
indicating whether to use ranged requests.
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
|
|
706
|
+
raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
|
|
707
|
+
|
|
708
|
+
def use_ranged_requests(
|
|
709
|
+
archive_size: int,
|
|
710
|
+
member_zip_infos: list[zipfile.ZipInfo],
|
|
711
|
+
) -> bool:
|
|
712
|
+
estimated_ranged_total_size = central_directory_overhead + sum(info.compress_size + zip_info_header_overhead
|
|
713
|
+
for info in member_zip_infos)
|
|
714
|
+
|
|
715
|
+
# Avoid division by zero; prefer ranged if archive size is zero (degenerate case)
|
|
716
|
+
return (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
|
|
717
|
+
|
|
718
|
+
return use_ranged_requests
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
def s3_archive_use_chunked_reads(
|
|
722
|
+
zip_info_header_overhead: int = ZIP_INFO_HDR_ESTIMATED_SIZE,
|
|
723
|
+
) -> Callable[[zipfile.ZipInfo], ArchiveMemberChunk]:
|
|
724
|
+
"""
|
|
725
|
+
Map each ``ZipInfo`` to an ``ArchiveMemberChunk`` for grouping adjacent members into single ranged reads.
|
|
636
726
|
|
|
727
|
+
:param zip_info_header_overhead: Estimated overhead size for each member's ``ZipInfo`` header in bytes.
|
|
728
|
+
:return: A callable that takes a ``ZipInfo`` and returns an ``ArchiveMemberChunk``.
|
|
729
|
+
"""
|
|
637
730
|
|
|
638
|
-
|
|
639
|
-
|
|
731
|
+
if zip_info_header_overhead < ZIP_INFO_HDR_MIN_SIZE:
|
|
732
|
+
raise ValueError(f"zip_info_header_overhead must be at least {ZIP_INFO_HDR_MIN_SIZE} bytes")
|
|
640
733
|
|
|
734
|
+
def use_chunked_reads(
|
|
735
|
+
zip_info: zipfile.ZipInfo,
|
|
736
|
+
) -> ArchiveMemberChunk:
|
|
737
|
+
return ArchiveMemberChunk(zip_info.filename,
|
|
738
|
+
zip_info.header_offset,
|
|
739
|
+
zip_info.compress_size,
|
|
740
|
+
zip_info.compress_type,
|
|
741
|
+
header_overhead=zip_info_header_overhead)
|
|
641
742
|
|
|
642
|
-
|
|
643
|
-
return slice(offset + ZIPFILE_HEADER_EX_LEN_OFFSET, offset + ZIPFILE_HEADER_EX_LEN_OFFSET + 2)
|
|
743
|
+
return use_chunked_reads
|
|
644
744
|
|
|
645
745
|
|
|
646
746
|
def s3_archive_open_members(
|
|
647
747
|
client: S3Client,
|
|
648
|
-
|
|
748
|
+
bucket: str,
|
|
749
|
+
key: str,
|
|
649
750
|
members: list[str] | None = None,
|
|
650
751
|
mode: Literal["r", "rb"] = "r",
|
|
651
752
|
*,
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
member_header_overhead: int = 128,
|
|
655
|
-
use_chunked_reads: bool = False,
|
|
753
|
+
use_ranged_requests: bool | Callable[[int, list[zipfile.ZipInfo]], bool] = True,
|
|
754
|
+
use_chunked_reads: bool | Callable[[zipfile.ZipInfo], ArchiveMemberChunk] = False,
|
|
656
755
|
) -> Generator[tuple[str, Callable[[], typing.IO]], None, None]:
|
|
657
756
|
"""
|
|
658
757
|
Choose the best transfer strategy (ranged requests per-member vs full archive transfer)
|
|
@@ -665,71 +764,72 @@ def s3_archive_open_members(
|
|
|
665
764
|
|
|
666
765
|
Example usage:
|
|
667
766
|
|
|
668
|
-
>>> for member, opener in s3_archive_open_members(client,
|
|
767
|
+
>>> for member, opener in s3_archive_open_members(client, bucket, key, members):
|
|
669
768
|
... with opener() as fh:
|
|
670
769
|
... data = fh.read()
|
|
671
770
|
|
|
672
771
|
Incorrect usage that may lead to errors due to temporary file cleanup:
|
|
673
772
|
|
|
674
773
|
>>> openers = []
|
|
675
|
-
>>> for member, opener in s3_archive_open_members(client,
|
|
774
|
+
>>> for member, opener in s3_archive_open_members(client, bucket, key, members):
|
|
676
775
|
... openers.append((member, opener))
|
|
677
776
|
>>> for member, opener in openers:
|
|
678
777
|
... with opener() as fh: # May fail if temporary files have been cleaned up
|
|
679
778
|
... data = fh.read()
|
|
680
779
|
|
|
681
780
|
:param client: An instance of ``S3Client``.
|
|
682
|
-
:param
|
|
781
|
+
:param bucket: Bucket name.
|
|
782
|
+
:param key: Object key of the ZIP archive.
|
|
683
783
|
:param members: List of member names to stream.
|
|
684
784
|
:param mode: File mode for opening members ("r" for text, "rb" for binary).
|
|
685
|
-
:param
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
:param
|
|
689
|
-
|
|
785
|
+
:param use_ranged_requests: If ``True``, always use ranged requests to access the archive; if callable, use it as
|
|
786
|
+
custom logic to decide based on the archive size and member infos; if ``False``, always
|
|
787
|
+
download the whole archive.
|
|
788
|
+
:param use_chunked_reads: If ``True``, group adjacent members into single ranged reads using default ``ZipInfo``
|
|
789
|
+
to ``ArchiveMemberChunk`` mapping; if callable, use it as custom mapping from
|
|
790
|
+
``ZipInfo`` to ``ArchiveMemberChunk`` for grouping adjacent members; if ``False``,
|
|
791
|
+
read each member with individual ranged requests. If ``use_ranged_requests`` is ``False``,
|
|
792
|
+
this parameter is ignored.
|
|
690
793
|
|
|
691
794
|
:return: An iterable of callables that return file-like objects for each requested member.
|
|
692
795
|
"""
|
|
693
796
|
if mode not in ("r", "rb"):
|
|
694
797
|
raise ValueError("mode must be either 'r' or 'rb'")
|
|
695
|
-
if member_header_overhead < ZIPFILE_HEADER_MIN_SIZE:
|
|
696
|
-
raise ValueError(f"member_header_overhead must be at least {ZIPFILE_HEADER_MIN_SIZE} bytes")
|
|
697
798
|
|
|
698
799
|
s3_options = s3_options_from_s3_client(client)
|
|
699
800
|
|
|
700
|
-
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
|
|
801
|
+
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client, bucket, key, members)
|
|
802
|
+
if missed_members:
|
|
803
|
+
raise FileNotFoundError(f"Archive members not found: {', '.join(missed_members)}")
|
|
701
804
|
|
|
702
|
-
if
|
|
703
|
-
|
|
805
|
+
if callable(use_ranged_requests):
|
|
806
|
+
use_ranged_requests = use_ranged_requests(archive_size, member_zip_infos)
|
|
704
807
|
|
|
705
|
-
|
|
706
|
-
|
|
808
|
+
if use_ranged_requests and not use_chunked_reads:
|
|
809
|
+
for info in member_zip_infos:
|
|
810
|
+
opener = functools.partial(s3_archive_open_member, client, bucket, key, info.filename, mode)
|
|
811
|
+
yield info.filename, opener
|
|
812
|
+
return
|
|
707
813
|
|
|
708
|
-
|
|
709
|
-
use_ranged = (estimated_ranged_total_size / archive_size) <= threshold if archive_size > 0 else True
|
|
814
|
+
archive_url = f"s3://{bucket}/{key}"
|
|
710
815
|
|
|
711
|
-
|
|
712
|
-
class MemberChunk(object):
|
|
713
|
-
name: str
|
|
714
|
-
header_offset: int
|
|
715
|
-
compress_size: int
|
|
716
|
-
compress_type: int
|
|
816
|
+
if use_ranged_requests and use_chunked_reads:
|
|
717
817
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
return self.header_offset + self.compress_size + member_header_overhead
|
|
818
|
+
fn_len_slice = lambda index: slice(index + ZIP_INFO_HDR_FN_LEN_OFFSET, index + ZIP_INFO_HDR_FN_LEN_OFFSET + 2)
|
|
819
|
+
ex_len_slice = lambda index: slice(index + ZIP_INFO_HDR_EX_LEN_OFFSET, index + ZIP_INFO_HDR_EX_LEN_OFFSET + 2)
|
|
721
820
|
|
|
722
|
-
if use_ranged and use_chunked_reads:
|
|
723
821
|
# Open archive once to read central directory and gather ZipInfo for requested members.
|
|
724
822
|
# We will group adjacent members (by local header offsets) and issue one ranged read per group,
|
|
725
823
|
# then extract each member from the group's bytes to avoid many small ranged requests.
|
|
726
824
|
with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, zipfile.ZipFile(s3_fh) as archive:
|
|
825
|
+
if callable(use_chunked_reads):
|
|
826
|
+
chunks = [use_chunked_reads(info) for info in member_zip_infos]
|
|
827
|
+
else:
|
|
828
|
+
chunks = [ArchiveMemberChunk(info.filename, info.header_offset, info.compress_size, info.compress_type)
|
|
829
|
+
for info in member_zip_infos]
|
|
727
830
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.header_offset),
|
|
732
|
-
chunk_func=lambda x, y: y.header_offset > x.end + member_header_overhead)
|
|
831
|
+
chunks_groups = chunk_between(sorted(chunks, key=lambda x: x.begin),
|
|
832
|
+
chunk_func=lambda x, y: y.begin > x.end)
|
|
733
833
|
|
|
734
834
|
# For each group, create openers for members inside the group.
|
|
735
835
|
for group in chunks_groups:
|
|
@@ -740,18 +840,18 @@ def s3_archive_open_members(
|
|
|
740
840
|
s3_fh.seek(group_offset)
|
|
741
841
|
group_bytes = s3_fh.read(group_size)
|
|
742
842
|
|
|
743
|
-
def make_opener(chunk:
|
|
843
|
+
def make_opener(chunk: ArchiveMemberChunk) -> Callable[[], typing.IO]:
|
|
744
844
|
|
|
745
845
|
def opener() -> typing.IO:
|
|
746
846
|
index = chunk.header_offset - group_offset
|
|
747
847
|
|
|
748
|
-
if index +
|
|
848
|
+
if index + ZIP_INFO_HDR_MIN_SIZE > len(group_bytes):
|
|
749
849
|
raise IOError("unexpected short read of member header")
|
|
750
850
|
|
|
751
|
-
fn_len = int.from_bytes(group_bytes[
|
|
752
|
-
ex_len = int.from_bytes(group_bytes[
|
|
851
|
+
fn_len = int.from_bytes(group_bytes[fn_len_slice(index)], "little")
|
|
852
|
+
ex_len = int.from_bytes(group_bytes[ex_len_slice(index)], "little")
|
|
753
853
|
|
|
754
|
-
raw_data_begin = index +
|
|
854
|
+
raw_data_begin = index + ZIP_INFO_HDR_MIN_SIZE + fn_len + ex_len
|
|
755
855
|
raw_data_end = raw_data_begin + chunk.compress_size
|
|
756
856
|
|
|
757
857
|
if raw_data_end > len(group_bytes):
|
|
@@ -776,12 +876,6 @@ def s3_archive_open_members(
|
|
|
776
876
|
yield from ((chunk.name, make_opener(chunk)) for chunk in group)
|
|
777
877
|
return
|
|
778
878
|
|
|
779
|
-
if use_ranged:
|
|
780
|
-
for info in member_zip_infos:
|
|
781
|
-
opener = functools.partial(s3_archive_open_member, client, archive_url, info.filename, mode)
|
|
782
|
-
yield info.filename, opener
|
|
783
|
-
return
|
|
784
|
-
|
|
785
879
|
# Download full archive once and serve members from it (read member bytes into memory)
|
|
786
880
|
with fsspec.open(archive_url, "rb", s3=s3_options) as s3_fh, tempfile.TemporaryFile() as temp_fh:
|
|
787
881
|
shutil.copyfileobj(s3_fh, temp_fh)
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/strutils.py
RENAMED
|
@@ -22,6 +22,10 @@ __all__ = [
|
|
|
22
22
|
"strict_relpath_parser",
|
|
23
23
|
"strict_abspath_pattern",
|
|
24
24
|
"strict_abspath_parser",
|
|
25
|
+
"strict_archive_relpath_pattern",
|
|
26
|
+
"strict_archive_relpath_parser",
|
|
27
|
+
"strict_archive_abspath_pattern",
|
|
28
|
+
"strict_archive_abspath_parser",
|
|
25
29
|
"email_address_pattern",
|
|
26
30
|
"email_address_parser",
|
|
27
31
|
"semver_pattern",
|
|
@@ -82,6 +86,7 @@ period_token: pp.ParserElement = pp.Char(".")
|
|
|
82
86
|
colon_token: pp.ParserElement = pp.Char(":")
|
|
83
87
|
slash_token: pp.ParserElement = pp.Char("/")
|
|
84
88
|
plus_token: pp.ParserElement = pp.Char("+")
|
|
89
|
+
sharp_token: pp.ParserElement = pp.Char("#")
|
|
85
90
|
|
|
86
91
|
lower_regexp: re.Pattern[str] = re.compile(r"[a-z]")
|
|
87
92
|
upper_regexp: re.Pattern[str] = re.compile(r"[A-Z]")
|
|
@@ -155,6 +160,10 @@ strict_relpath_regexp: re.Pattern[str] = re.compile(
|
|
|
155
160
|
rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
|
|
156
161
|
strict_abspath_regexp: re.Pattern[str] = re.compile(
|
|
157
162
|
rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*(?:{strict_chars_regexp.pattern})?")
|
|
163
|
+
strict_archive_relpath_regexp: re.Pattern[str] = re.compile(
|
|
164
|
+
rf"(?!.*(^|/)\.+($|/))(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
|
|
165
|
+
strict_archive_abspath_regexp: re.Pattern[str] = re.compile(
|
|
166
|
+
rf"(?!.*(^|/)\.+($|/))/(?:{strict_chars_regexp.pattern}/)*({strict_chars_regexp.pattern})(?:#({strict_relpath_regexp.pattern}))?")
|
|
158
167
|
|
|
159
168
|
strict_path_chars_element = strict_chars_element.copy()
|
|
160
169
|
strict_path_chars_element.add_condition(token_reparse(period_token[1, ...], negate=True),
|
|
@@ -164,6 +173,12 @@ strict_relpath_element: pp.ParserElement = pp.Combine(
|
|
|
164
173
|
(strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
|
|
165
174
|
strict_abspath_element: pp.ParserElement = pp.Combine(
|
|
166
175
|
slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element[0, 1])
|
|
176
|
+
strict_archive_relpath_element: pp.ParserElement = pp.Combine(
|
|
177
|
+
(strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
|
|
178
|
+
(sharp_token + strict_relpath_element)[0, 1])
|
|
179
|
+
strict_archive_abspath_element: pp.ParserElement = pp.Combine(
|
|
180
|
+
slash_token + (strict_path_chars_element + slash_token)[...] + strict_path_chars_element +
|
|
181
|
+
(sharp_token + strict_relpath_element)[0, 1])
|
|
167
182
|
|
|
168
183
|
email_address_regexp: re.Pattern[str] = re.compile(
|
|
169
184
|
rf"(({lower_digit_regexp.pattern}|[_-])+)(?:\.({lower_digit_regexp.pattern}|[_-])+)*@(?:{kebab_case_regexp.pattern}\.)+({lower_digit_regexp.pattern}{{2,63}})")
|
|
@@ -206,6 +221,10 @@ strict_relpath_pattern = make_string_pattern(strict_relpath_regexp)
|
|
|
206
221
|
strict_relpath_parser = make_string_parser(strict_relpath_element)
|
|
207
222
|
strict_abspath_pattern = make_string_pattern(strict_abspath_regexp)
|
|
208
223
|
strict_abspath_parser = make_string_parser(strict_abspath_element)
|
|
224
|
+
strict_archive_relpath_pattern = make_string_pattern(strict_archive_relpath_regexp)
|
|
225
|
+
strict_archive_relpath_parser = make_string_parser(strict_archive_relpath_element)
|
|
226
|
+
strict_archive_abspath_pattern = make_string_pattern(strict_archive_abspath_regexp)
|
|
227
|
+
strict_archive_abspath_parser = make_string_parser(strict_archive_abspath_element)
|
|
209
228
|
|
|
210
229
|
email_address_pattern = make_string_pattern(email_address_regexp)
|
|
211
230
|
email_address_parser = make_string_parser(email_address_element)
|
|
@@ -488,21 +488,21 @@ class S3UtilsTest(unittest.TestCase):
|
|
|
488
488
|
local_root = os.path.join(resources_directory, "unittest", "s3utils")
|
|
489
489
|
local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
|
|
490
490
|
|
|
491
|
-
for
|
|
492
|
-
["
|
|
493
|
-
"s3://dummy-bucket/s3utils_archive/archive.compressed.zip"],
|
|
491
|
+
for archive_key, members in itertools.product(
|
|
492
|
+
["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
|
|
494
493
|
[local_members, None],
|
|
495
494
|
):
|
|
496
|
-
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
|
|
495
|
+
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
|
|
496
|
+
"dummy-bucket",
|
|
497
|
+
archive_key,
|
|
498
|
+
members)
|
|
497
499
|
|
|
498
500
|
self.assertEqual(archive_size - sum(info.compress_size for info in member_zip_infos), 2470)
|
|
499
501
|
self.assertEqual(set(info.filename for info in member_zip_infos), set(local_members))
|
|
500
502
|
self.assertEqual(len(missed_members), 0)
|
|
501
503
|
|
|
502
|
-
for
|
|
503
|
-
["
|
|
504
|
-
"s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
|
|
505
|
-
],
|
|
504
|
+
for archive_key, (members, members_expect) in itertools.product(
|
|
505
|
+
["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
|
|
506
506
|
[(local_members, local_members),
|
|
507
507
|
(None, local_members),
|
|
508
508
|
(["dir.baz/", "dir.foo/"],
|
|
@@ -524,7 +524,10 @@ class S3UtilsTest(unittest.TestCase):
|
|
|
524
524
|
),
|
|
525
525
|
],
|
|
526
526
|
):
|
|
527
|
-
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
|
|
527
|
+
archive_size, member_zip_infos, missed_members = s3_archive_listfile(client,
|
|
528
|
+
"dummy-bucket",
|
|
529
|
+
archive_key,
|
|
530
|
+
members)
|
|
528
531
|
|
|
529
532
|
self.assertEqual(sorted(list(info.filename for info in member_zip_infos)), sorted(members_expect))
|
|
530
533
|
self.assertEqual(len(missed_members), 0)
|
|
@@ -545,16 +548,14 @@ class S3UtilsTest(unittest.TestCase):
|
|
|
545
548
|
local_root = os.path.join(resources_directory, "unittest", "s3utils")
|
|
546
549
|
local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
|
|
547
550
|
|
|
548
|
-
for
|
|
549
|
-
["
|
|
550
|
-
"s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
|
|
551
|
-
],
|
|
551
|
+
for archive_key, mode in itertools.product(
|
|
552
|
+
["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
|
|
552
553
|
["r", "rb"],
|
|
553
554
|
):
|
|
554
555
|
for local_member in local_members:
|
|
555
556
|
with (
|
|
556
557
|
open(os.path.join(local_root, local_member), mode) as local_fh,
|
|
557
|
-
s3_archive_open_member(client,
|
|
558
|
+
s3_archive_open_member(client, "dummy-bucket", archive_key, local_member, mode) as s3_fh,
|
|
558
559
|
):
|
|
559
560
|
self.assertEqual(local_fh.read(), s3_fh.read())
|
|
560
561
|
|
|
@@ -574,22 +575,20 @@ class S3UtilsTest(unittest.TestCase):
|
|
|
574
575
|
local_root = os.path.join(resources_directory, "unittest", "s3utils")
|
|
575
576
|
local_members = [os.path.relpath(file_path, local_root) for file_path in listfile(local_root)]
|
|
576
577
|
|
|
577
|
-
for
|
|
578
|
-
["
|
|
579
|
-
"s3://dummy-bucket/s3utils_archive/archive.compressed.zip",
|
|
580
|
-
],
|
|
578
|
+
for archive_key, members, mode, use_ranged_requests, use_chunked_reads in itertools.product(
|
|
579
|
+
["s3utils_archive/archive.uncompressed.zip", "s3utils_archive/archive.compressed.zip"],
|
|
581
580
|
[local_members, None],
|
|
582
581
|
["r", "rb"],
|
|
583
|
-
[
|
|
584
|
-
[False, True],
|
|
582
|
+
[False, True, s3_archive_use_ranged_requests()],
|
|
583
|
+
[False, True, s3_archive_use_chunked_reads()],
|
|
585
584
|
):
|
|
586
585
|
for member, opener in s3_archive_open_members(
|
|
587
586
|
client,
|
|
588
|
-
|
|
587
|
+
"dummy-bucket",
|
|
588
|
+
archive_key,
|
|
589
589
|
members,
|
|
590
590
|
mode,
|
|
591
|
-
|
|
592
|
-
central_directory_overhead=0,
|
|
591
|
+
use_ranged_requests=use_ranged_requests,
|
|
593
592
|
use_chunked_reads=use_chunked_reads,
|
|
594
593
|
):
|
|
595
594
|
with open(os.path.join(local_root, member), mode) as local_fh, opener() as s3_fh:
|
|
@@ -13,6 +13,8 @@ from plexus.common.utils.strutils import kebab_case_parser, kebab_case_pattern
|
|
|
13
13
|
from plexus.common.utils.strutils import parse_bag_name, parse_user_email, parse_user_name, parse_vehicle_name
|
|
14
14
|
from plexus.common.utils.strutils import snake_case_parser, snake_case_pattern
|
|
15
15
|
from plexus.common.utils.strutils import strict_abspath_parser, strict_abspath_pattern
|
|
16
|
+
from plexus.common.utils.strutils import strict_archive_abspath_parser, strict_archive_abspath_pattern
|
|
17
|
+
from plexus.common.utils.strutils import strict_archive_relpath_parser, strict_archive_relpath_pattern
|
|
16
18
|
from plexus.common.utils.strutils import strict_relpath_parser, strict_relpath_pattern
|
|
17
19
|
from plexus.common.utils.strutils import topic_parser, topic_pattern
|
|
18
20
|
from plexus.common.utils.strutils import vin_code_parser, vin_code_pattern
|
|
@@ -377,6 +379,154 @@ class StrUtilsTest(unittest.TestCase):
|
|
|
377
379
|
with self.assertRaises(pp.ParseException):
|
|
378
380
|
strict_abspath_parser.parse_string(data, parse_all=True)
|
|
379
381
|
|
|
382
|
+
data_strict_archive_relpath_pattern = [
|
|
383
|
+
("archive.zip",),
|
|
384
|
+
("directory/archive.zip",),
|
|
385
|
+
("directory/dummy/archive.zip",),
|
|
386
|
+
("directory/dummy/path/archive.zip",),
|
|
387
|
+
("directory/dummy/path/to/archive.zip",),
|
|
388
|
+
("directory/dummy/path_to-archive.zip",),
|
|
389
|
+
("directory/dummy/.path_to-archive.zip",),
|
|
390
|
+
("archive.zip#",),
|
|
391
|
+
("archive.zip#directory",),
|
|
392
|
+
("archive.zip#directory/",),
|
|
393
|
+
("directory/archive.zip#directory/dummy",),
|
|
394
|
+
("directory/archive.zip#directory/dummy/",),
|
|
395
|
+
("directory/dummy/archive.zip#directory/dummy/path",),
|
|
396
|
+
("directory/dummy/path/archive.zip#directory/dummy/path/to",),
|
|
397
|
+
("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
|
|
398
|
+
("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
|
|
399
|
+
("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
@ddt.idata(data_strict_archive_relpath_pattern)
|
|
403
|
+
@ddt.unpack
|
|
404
|
+
def test_strict_archive_relpath_pattern(self, data):
|
|
405
|
+
self.assertIsNotNone(strict_archive_relpath_pattern.match(data))
|
|
406
|
+
self.assertIsNotNone(strict_archive_relpath_parser.parse_string(data, parse_all=True))
|
|
407
|
+
|
|
408
|
+
data_strict_archive_relpath_pattern__bad_cases = [
|
|
409
|
+
("/archive.zip",),
|
|
410
|
+
("/directory/archive.zip",),
|
|
411
|
+
("/directory/dummy/archive.zip",),
|
|
412
|
+
("/directory/dummy/path/archive.zip",),
|
|
413
|
+
("/directory/dummy/path/to/archive.zip",),
|
|
414
|
+
("/directory/dummy/path_to-archive.zip",),
|
|
415
|
+
("/directory/dummy/.path_to-archive.zip",),
|
|
416
|
+
("/archive.zip#directory",),
|
|
417
|
+
("/archive.zip#directory/",),
|
|
418
|
+
("/directory/archive.zip#directory/dummy",),
|
|
419
|
+
("/directory/archive.zip#directory/dummy/",),
|
|
420
|
+
("/directory/dummy/archive.zip#directory/dummy/path",),
|
|
421
|
+
("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
|
|
422
|
+
("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
|
|
423
|
+
("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
|
|
424
|
+
("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
|
|
425
|
+
("archive.zip/",),
|
|
426
|
+
("directory/archive.zip/",),
|
|
427
|
+
("directory/dummy/archive.zip/",),
|
|
428
|
+
("directory/dummy/path/archive.zip/",),
|
|
429
|
+
("directory/dummy/path/to/archive.zip/",),
|
|
430
|
+
("directory/dummy/path_to-archive.zip/",),
|
|
431
|
+
("directory/dummy/.path_to-archive.zip/",),
|
|
432
|
+
("archive.zip#/directory",),
|
|
433
|
+
("archive.zip#/directory/",),
|
|
434
|
+
("directory/archive.zip#/directory/dummy",),
|
|
435
|
+
("directory/archive.zip#/directory/dummy/",),
|
|
436
|
+
("directory/dummy/archive.zip#/directory/dummy/path",),
|
|
437
|
+
("directory/dummy/path/archive.zip#/directory/dummy/path/to",),
|
|
438
|
+
("directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
|
|
439
|
+
("directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
|
|
440
|
+
("directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
|
|
441
|
+
("directory/archive.zip#directory/./dummy",),
|
|
442
|
+
("directory/archive.zip#directory/../dummy",),
|
|
443
|
+
("directory/archive.zip#directory/.../dummy",),
|
|
444
|
+
("directory/archive.zip#directory/\t/dummy",),
|
|
445
|
+
("directory/archive.zip#directory/\r/dummy",),
|
|
446
|
+
("directory/archive.zip#directory/\n/dummy",),
|
|
447
|
+
]
|
|
448
|
+
|
|
449
|
+
@ddt.idata(data_strict_archive_relpath_pattern__bad_cases)
|
|
450
|
+
@ddt.unpack
|
|
451
|
+
def test_strict_archive_relpath_pattern__bad_cases(self, data):
|
|
452
|
+
self.assertIsNone(strict_archive_relpath_pattern.match(data))
|
|
453
|
+
with self.assertRaises(pp.ParseException):
|
|
454
|
+
strict_archive_relpath_parser.parse_string(data, parse_all=True)
|
|
455
|
+
|
|
456
|
+
data_strict_archive_abspath_pattern = [
|
|
457
|
+
("/archive.zip",),
|
|
458
|
+
("/directory/archive.zip",),
|
|
459
|
+
("/directory/dummy/archive.zip",),
|
|
460
|
+
("/directory/dummy/path/archive.zip",),
|
|
461
|
+
("/directory/dummy/path/to/archive.zip",),
|
|
462
|
+
("/directory/dummy/path_to-archive.zip",),
|
|
463
|
+
("/directory/dummy/.path_to-archive.zip",),
|
|
464
|
+
("/archive.zip#",),
|
|
465
|
+
("/archive.zip#directory",),
|
|
466
|
+
("/archive.zip#directory/",),
|
|
467
|
+
("/directory/archive.zip#directory/dummy",),
|
|
468
|
+
("/directory/archive.zip#directory/dummy/",),
|
|
469
|
+
("/directory/dummy/archive.zip#directory/dummy/path",),
|
|
470
|
+
("/directory/dummy/path/archive.zip#directory/dummy/path/to",),
|
|
471
|
+
("/directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
|
|
472
|
+
("/directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
|
|
473
|
+
("/directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
@ddt.idata(data_strict_archive_abspath_pattern)
|
|
477
|
+
@ddt.unpack
|
|
478
|
+
def test_strict_archive_abspath_pattern(self, data):
|
|
479
|
+
self.assertIsNotNone(strict_archive_abspath_pattern.match(data))
|
|
480
|
+
self.assertIsNotNone(strict_archive_abspath_parser.parse_string(data, parse_all=True))
|
|
481
|
+
|
|
482
|
+
data_strict_archive_abspath_pattern__bad_cases = [
|
|
483
|
+
("archive.zip",),
|
|
484
|
+
("directory/archive.zip",),
|
|
485
|
+
("directory/dummy/archive.zip",),
|
|
486
|
+
("directory/dummy/path/archive.zip",),
|
|
487
|
+
("directory/dummy/path/to/archive.zip",),
|
|
488
|
+
("directory/dummy/path_to-archive.zip",),
|
|
489
|
+
("directory/dummy/.path_to-archive.zip",),
|
|
490
|
+
("archive.zip#directory",),
|
|
491
|
+
("archive.zip#directory/",),
|
|
492
|
+
("directory/archive.zip#directory/dummy",),
|
|
493
|
+
("directory/archive.zip#directory/dummy/",),
|
|
494
|
+
("directory/dummy/archive.zip#directory/dummy/path",),
|
|
495
|
+
("directory/dummy/path/archive.zip#directory/dummy/path/to",),
|
|
496
|
+
("directory/dummy/path/to/archive.zip#directory/dummy/path/to/file.txt",),
|
|
497
|
+
("directory/dummy/path_to-archive.zip#directory/dummy/path_to-file.txt.",),
|
|
498
|
+
("directory/dummy/.path_to-archive.zip#directory/dummy/.path_to-file.txt",),
|
|
499
|
+
("/archive.zip/",),
|
|
500
|
+
("/directory/archive.zip/",),
|
|
501
|
+
("/directory/dummy/archive.zip/",),
|
|
502
|
+
("/directory/dummy/path/archive.zip/",),
|
|
503
|
+
("/directory/dummy/path/to/archive.zip/",),
|
|
504
|
+
("/directory/dummy/path_to-archive.zip/",),
|
|
505
|
+
("/directory/dummy/.path_to-archive.zip/",),
|
|
506
|
+
("/archive.zip#/directory",),
|
|
507
|
+
("/archive.zip#/directory/",),
|
|
508
|
+
("/directory/archive.zip#/directory/dummy",),
|
|
509
|
+
("/directory/archive.zip#/directory/dummy/",),
|
|
510
|
+
("/directory/dummy/archive.zip#/directory/dummy/path",),
|
|
511
|
+
("/directory/dummy/path/archive.zip#/directory/dummy/path/to",),
|
|
512
|
+
("/directory/dummy/path/to/archive.zip#/directory/dummy/path/to/file.txt",),
|
|
513
|
+
("/directory/dummy/path_to-archive.zip#/directory/dummy/path_to-file.txt.",),
|
|
514
|
+
("/directory/dummy/.path_to-archive.zip#/directory/dummy/.path_to-file.txt",),
|
|
515
|
+
("/directory/archive.zip#directory/./dummy",),
|
|
516
|
+
("/directory/archive.zip#directory/../dummy",),
|
|
517
|
+
("/directory/archive.zip#directory/.../dummy",),
|
|
518
|
+
("/directory/archive.zip#directory/\t/dummy",),
|
|
519
|
+
("/directory/archive.zip#directory/\r/dummy",),
|
|
520
|
+
("/directory/archive.zip#directory/\n/dummy",),
|
|
521
|
+
]
|
|
522
|
+
|
|
523
|
+
@ddt.idata(data_strict_archive_abspath_pattern__bad_cases)
|
|
524
|
+
@ddt.unpack
|
|
525
|
+
def test_strict_archive_abspath_pattern__bad_cases(self, data):
|
|
526
|
+
self.assertIsNone(strict_archive_abspath_pattern.match(data))
|
|
527
|
+
with self.assertRaises(pp.ParseException):
|
|
528
|
+
strict_archive_abspath_parser.parse_string(data, parse_all=True)
|
|
529
|
+
|
|
380
530
|
data_email_address_pattern = [
|
|
381
531
|
("someone@dummy.com",),
|
|
382
532
|
("some.one@dummy.com",),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/0-dummy
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/1-dummy
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/2-dummy
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.0.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.1.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.2.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/resources/unittest/shutils/dummy.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMFile.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMNode.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMTags.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/OSMWay.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/carto/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/__init__.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/apiutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/bagutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/config.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/datautils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/jsonutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/ormutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/shutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/sqlutils.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/src/plexus/common/utils/testutils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/pose_test.py
RENAMED
|
File without changes
|
{plexus_python_common-1.0.31 → plexus_python_common-1.0.33}/test/plexus_tests/common/proj_test.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|