plexus-python-common 1.0.20__tar.gz → 1.0.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/PKG-INFO +3 -1
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/pyproject.toml +4 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.0.0.vol-0.jsonl +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.1.1.jsonl +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.1.1.vol-1.jsonl +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.2.2.jsonl +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.2.2.vol-2.jsonl +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.csv.part0 +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.csv.part1 +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.csv.part2 +0 -0
- plexus_python_common-1.0.22/resources/unittest/shutils/dummy.txt +0 -0
- plexus_python_common-1.0.22/src/plexus/common/utils/__init__.py +0 -0
- plexus_python_common-1.0.22/src/plexus/common/utils/s3utils.py +416 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/PKG-INFO +3 -1
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/SOURCES.txt +11 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/requires.txt +2 -0
- plexus_python_common-1.0.22/test/plexus_tests/common/utils/s3utils_test.py +458 -0
- plexus_python_common-1.0.20/src/plexus/common/utils/s3utils.py +0 -118
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/.editorconfig +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/.github/workflows/pr.yml +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/.github/workflows/push.yml +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/.gitignore +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/MANIFEST.in +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/README.md +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/VERSION +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/jsonutils/dummy.0.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/jsonutils/dummy.1.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/jsonutils/dummy.2.jsonl +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/0-dummy → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.baz/file.bar.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/1-dummy → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.baz/file.foo.bar +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/2-dummy → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.baz/file.foo.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.0.0.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/dir.foo.bar/dir.foo.bar.baz/file.foo.bar.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.0.0.vol-0.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.bar.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.1.1.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.bar +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.1.1.vol-1.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.2.2.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/file.bar +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.2.2.vol-2.jsonl → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/file.baz +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.csv.part0 → /plexus_python_common-1.0.22/resources/unittest/s3utils/dir.foo/file.foo +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.csv.part1 → /plexus_python_common-1.0.22/resources/unittest/shutils/0-dummy +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.csv.part2 → /plexus_python_common-1.0.22/resources/unittest/shutils/1-dummy +0 -0
- /plexus_python_common-1.0.20/resources/unittest/shutils/dummy.txt → /plexus_python_common-1.0.22/resources/unittest/shutils/2-dummy +0 -0
- /plexus_python_common-1.0.20/src/plexus/common/utils/__init__.py → /plexus_python_common-1.0.22/resources/unittest/shutils/dummy.0.0.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/shutils/dummy.0.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/shutils/dummy.1.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/resources/unittest/shutils/dummy.2.jsonl +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/setup.cfg +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/setup.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/__init__.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/carto/OSMFile.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/carto/OSMNode.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/carto/OSMTags.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/carto/OSMWay.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/carto/__init__.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/config.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/pose.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/proj.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/bagutils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/datautils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/jsonutils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/ormutils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/shutils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus/common/utils/strutils.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/dependency_links.txt +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/not-zip-safe +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/src/plexus_python_common.egg-info/top_level.txt +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/__init__.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/carto/osm_file_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/carto/osm_tags_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/pose_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/proj_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/bagutils_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/datautils_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/jsonutils_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/ormutils_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/shutils_test.py +0 -0
- {plexus_python_common-1.0.20 → plexus_python_common-1.0.22}/test/plexus_tests/common/utils/strutils_test.py +0 -0
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: plexus-python-common
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.22
|
|
4
4
|
Classifier: Programming Language :: Python :: 3
|
|
5
5
|
Classifier: Programming Language :: Python :: 3.12
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.13
|
|
7
7
|
Classifier: Programming Language :: Python :: 3.14
|
|
8
8
|
Requires-Python: <3.15,>=3.12
|
|
9
|
+
Requires-Dist: boto3>=1.41
|
|
9
10
|
Requires-Dist: cloudpathlib>=0.21
|
|
10
11
|
Requires-Dist: lxml>=6.0
|
|
11
12
|
Requires-Dist: numpy>=2.3
|
|
@@ -23,6 +24,7 @@ Provides-Extra: all
|
|
|
23
24
|
Requires-Dist: plexus-python-common; extra == "all"
|
|
24
25
|
Provides-Extra: test
|
|
25
26
|
Requires-Dist: ddt>=1.7; extra == "test"
|
|
27
|
+
Requires-Dist: moto[all,ec2,s3]>=5.1; extra == "test"
|
|
26
28
|
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
27
29
|
Requires-Dist: pytest-order>=1.3; extra == "test"
|
|
28
30
|
Requires-Dist: pytest-postgresql>=6.1; extra == "test"
|
|
@@ -8,6 +8,7 @@ build-backend = "setuptools.build_meta"
|
|
|
8
8
|
|
|
9
9
|
[dependency-groups]
|
|
10
10
|
dev = [
|
|
11
|
+
"boto3>=1.41",
|
|
11
12
|
"cloudpathlib>=0.21",
|
|
12
13
|
"lxml>=6.0",
|
|
13
14
|
"numpy>=2.3",
|
|
@@ -24,6 +25,7 @@ dev = [
|
|
|
24
25
|
]
|
|
25
26
|
test = [
|
|
26
27
|
"ddt>=1.7",
|
|
28
|
+
"moto[ec2,s3,all]>=5.1",
|
|
27
29
|
"pytest-cov>=5.0",
|
|
28
30
|
"pytest-order>=1.3",
|
|
29
31
|
"pytest-postgresql>=6.1",
|
|
@@ -41,6 +43,7 @@ classifiers = [
|
|
|
41
43
|
"Programming Language :: Python :: 3.14",
|
|
42
44
|
]
|
|
43
45
|
dependencies = [
|
|
46
|
+
"boto3>=1.41",
|
|
44
47
|
"cloudpathlib>=0.21",
|
|
45
48
|
"lxml>=6.0",
|
|
46
49
|
"numpy>=2.3",
|
|
@@ -62,6 +65,7 @@ all = [
|
|
|
62
65
|
]
|
|
63
66
|
test = [
|
|
64
67
|
"ddt>=1.7",
|
|
68
|
+
"moto[ec2,s3,all]>=5.1",
|
|
65
69
|
"pytest-cov>=5.0",
|
|
66
70
|
"pytest-order>=1.3",
|
|
67
71
|
"pytest-postgresql>=6.1",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import contextlib
|
|
3
|
+
import dataclasses
|
|
4
|
+
import datetime
|
|
5
|
+
import functools
|
|
6
|
+
import mimetypes
|
|
7
|
+
import os
|
|
8
|
+
import os.path
|
|
9
|
+
import tempfile
|
|
10
|
+
from collections.abc import Callable, Generator
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Literal
|
|
13
|
+
|
|
14
|
+
import boto3
|
|
15
|
+
from cloudpathlib import CloudPath, S3Client, S3Path
|
|
16
|
+
from iker.common.utils.shutils import glob_match, listfile, path_depth
|
|
17
|
+
from iker.common.utils.strutils import is_empty, trim_to_none
|
|
18
|
+
from rich.progress import BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TransferSpeedColumn
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"S3ObjectMeta",
|
|
22
|
+
"s3_make_client",
|
|
23
|
+
"s3_list_objects",
|
|
24
|
+
"s3_listfile",
|
|
25
|
+
"s3_cp_download",
|
|
26
|
+
"s3_cp_upload",
|
|
27
|
+
"s3_sync_download",
|
|
28
|
+
"s3_sync_upload",
|
|
29
|
+
"s3_pull_text",
|
|
30
|
+
"s3_push_text",
|
|
31
|
+
"S3TransferCallbackClient",
|
|
32
|
+
"s3_make_progress_callback",
|
|
33
|
+
"s3_make_progressed_client",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclasses.dataclass
|
|
38
|
+
class S3ObjectMeta(object):
|
|
39
|
+
key: str
|
|
40
|
+
last_modified: datetime.datetime
|
|
41
|
+
size: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@contextlib.contextmanager
|
|
45
|
+
def s3_make_client(
|
|
46
|
+
access_key_id: str = None,
|
|
47
|
+
secret_access_key: str = None,
|
|
48
|
+
region_name: str = None,
|
|
49
|
+
endpoint_url: str = None,
|
|
50
|
+
) -> Generator[S3Client]:
|
|
51
|
+
"""
|
|
52
|
+
Creates an S3 client as a context manager for safe resource handling.
|
|
53
|
+
|
|
54
|
+
:param access_key_id: AWS access key ID.
|
|
55
|
+
:param secret_access_key: AWS secret access key.
|
|
56
|
+
:param region_name: AWS service region name.
|
|
57
|
+
:param endpoint_url: AWS service endpoint URL.
|
|
58
|
+
:return: An instance of ``S3Client``.
|
|
59
|
+
"""
|
|
60
|
+
session = boto3.Session(aws_access_key_id=trim_to_none(access_key_id),
|
|
61
|
+
aws_secret_access_key=trim_to_none(secret_access_key),
|
|
62
|
+
region_name=trim_to_none(region_name))
|
|
63
|
+
yield S3Client(boto3_session=session, endpoint_url=trim_to_none(endpoint_url))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def s3_list_objects(client: S3Client, bucket: str, prefix: str, limit: int = None) -> Generator[S3ObjectMeta]:
|
|
67
|
+
"""
|
|
68
|
+
Lists all objects from the given S3 ``bucket`` and ``prefix``.
|
|
69
|
+
|
|
70
|
+
:param client: An instance of ``S3Client``.
|
|
71
|
+
:param bucket: Bucket name.
|
|
72
|
+
:param prefix: Object keys prefix.
|
|
73
|
+
:param limit: Maximum number of objects to return (``None`` for all).
|
|
74
|
+
:return: An iterable of ``S3ObjectMeta`` objects representing the S3 objects.
|
|
75
|
+
"""
|
|
76
|
+
continuation_token = None
|
|
77
|
+
count = 0
|
|
78
|
+
while True:
|
|
79
|
+
if is_empty(continuation_token):
|
|
80
|
+
response = client.client.list_objects_v2(MaxKeys=1000, Bucket=bucket, Prefix=prefix)
|
|
81
|
+
else:
|
|
82
|
+
response = client.client.list_objects_v2(MaxKeys=1000,
|
|
83
|
+
Bucket=bucket,
|
|
84
|
+
Prefix=prefix,
|
|
85
|
+
ContinuationToken=continuation_token)
|
|
86
|
+
|
|
87
|
+
contents = response.get("Contents", [])
|
|
88
|
+
count += len(contents)
|
|
89
|
+
if limit is not None and count > limit:
|
|
90
|
+
contents = contents[:limit - count]
|
|
91
|
+
|
|
92
|
+
yield from (S3ObjectMeta(key=e["Key"], last_modified=e["LastModified"], size=e["Size"]) for e in contents)
|
|
93
|
+
|
|
94
|
+
if not response.get("IsTruncated") or (limit is not None and count >= limit):
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
continuation_token = response.get("NextContinuationToken")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def s3_listfile(
|
|
101
|
+
client: S3Client,
|
|
102
|
+
bucket: str,
|
|
103
|
+
prefix: str,
|
|
104
|
+
*,
|
|
105
|
+
include_patterns: list[str] | None = None,
|
|
106
|
+
exclude_patterns: list[str] | None = None,
|
|
107
|
+
depth: int = 0,
|
|
108
|
+
) -> Generator[S3ObjectMeta]:
|
|
109
|
+
"""
|
|
110
|
+
Lists all objects from the given S3 ``bucket`` and ``prefix``, filtered by patterns and directory depth.
|
|
111
|
+
|
|
112
|
+
:param client: An instance of ``S3Client``.
|
|
113
|
+
:param bucket: Bucket name.
|
|
114
|
+
:param prefix: Object keys prefix.
|
|
115
|
+
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
116
|
+
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
117
|
+
:param depth: Maximum depth of subdirectories to include in the scan (``0`` for unlimited depth).
|
|
118
|
+
:return: An iterable of ``S3ObjectMeta`` objects representing the filtered S3 objects.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# We add trailing slash "/" to the prefix if it is absent
|
|
122
|
+
if not prefix.endswith("/"):
|
|
123
|
+
prefix = prefix + "/"
|
|
124
|
+
|
|
125
|
+
def filter_object_meta(object_meta: S3ObjectMeta) -> bool:
|
|
126
|
+
if 0 < depth <= path_depth(prefix, os.path.dirname(object_meta.key)):
|
|
127
|
+
return False
|
|
128
|
+
if len(glob_match([os.path.basename(object_meta.key)], include_patterns, exclude_patterns)) == 0:
|
|
129
|
+
return False
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
yield from filter(filter_object_meta, s3_list_objects(client, bucket, prefix))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def s3_cp_download(client: S3Client, bucket: str, key: str, file_path: str | os.PathLike[str]):
|
|
136
|
+
"""
|
|
137
|
+
Downloads an object from the given S3 ``bucket`` and ``key`` to a local file path.
|
|
138
|
+
|
|
139
|
+
:param client: An instance of ``S3Client``.
|
|
140
|
+
:param bucket: Bucket name.
|
|
141
|
+
:param key: Object key.
|
|
142
|
+
:param file_path: Local file path to save the object.
|
|
143
|
+
"""
|
|
144
|
+
client.client.download_file(bucket, key, file_path)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def s3_cp_upload(client: S3Client, file_path: str | os.PathLike[str], bucket: str, key: str):
|
|
148
|
+
"""
|
|
149
|
+
Uploads a local file to the given S3 ``bucket`` and ``key``.
|
|
150
|
+
|
|
151
|
+
:param client: An instance of ``S3Client``.
|
|
152
|
+
:param file_path: Local file path to upload.
|
|
153
|
+
:param bucket: Bucket name.
|
|
154
|
+
:param key: Object key for the uploaded file.
|
|
155
|
+
"""
|
|
156
|
+
t, _ = mimetypes.MimeTypes().guess_type(file_path)
|
|
157
|
+
client.client.upload_file(file_path,
|
|
158
|
+
bucket,
|
|
159
|
+
key,
|
|
160
|
+
ExtraArgs={"ContentType": "binary/octet-stream" if t is None else t})
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def s3_sync_download(
|
|
164
|
+
client: S3Client,
|
|
165
|
+
bucket: str,
|
|
166
|
+
prefix: str,
|
|
167
|
+
dir_path: str | os.PathLike[str],
|
|
168
|
+
*,
|
|
169
|
+
max_workers: int = None,
|
|
170
|
+
include_patterns: list[str] = None,
|
|
171
|
+
exclude_patterns: list[str] = None,
|
|
172
|
+
depth: int = 0,
|
|
173
|
+
):
|
|
174
|
+
"""
|
|
175
|
+
Recursively downloads all objects from the given S3 ``bucket`` and ``prefix`` to a local directory path, using a thread pool.
|
|
176
|
+
|
|
177
|
+
:param client: An instance of ``S3Client``.
|
|
178
|
+
:param bucket: Bucket name.
|
|
179
|
+
:param prefix: Object keys prefix.
|
|
180
|
+
:param dir_path: Local directory path to save objects.
|
|
181
|
+
:param max_workers: Maximum number of worker threads.
|
|
182
|
+
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
183
|
+
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
184
|
+
:param depth: Maximum depth of subdirectories to include in the scan (``0`` for unlimited depth).
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
# We add trailing slash "/" to the prefix if it is absent
|
|
188
|
+
if not prefix.endswith("/"):
|
|
189
|
+
prefix = prefix + "/"
|
|
190
|
+
|
|
191
|
+
objects = s3_listfile(client,
|
|
192
|
+
bucket,
|
|
193
|
+
prefix,
|
|
194
|
+
include_patterns=include_patterns,
|
|
195
|
+
exclude_patterns=exclude_patterns,
|
|
196
|
+
depth=depth)
|
|
197
|
+
|
|
198
|
+
def download_file(key: str):
|
|
199
|
+
file_path = os.path.join(dir_path, key[len(prefix):])
|
|
200
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
201
|
+
s3_cp_download(client, bucket, key, file_path)
|
|
202
|
+
|
|
203
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
204
|
+
futures = [executor.submit(download_file, obj.key) for obj in objects]
|
|
205
|
+
done_futures, not_done_futures = concurrent.futures.wait(futures,
|
|
206
|
+
return_when=concurrent.futures.FIRST_EXCEPTION)
|
|
207
|
+
if len(not_done_futures) > 0:
|
|
208
|
+
for future in not_done_futures:
|
|
209
|
+
future.cancel()
|
|
210
|
+
for future in done_futures:
|
|
211
|
+
exc = future.exception()
|
|
212
|
+
if exc is not None:
|
|
213
|
+
raise exc
|
|
214
|
+
if len(not_done_futures) > 0:
|
|
215
|
+
raise RuntimeError("download did not complete due to errors in some threads")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def s3_sync_upload(
|
|
219
|
+
client: S3Client,
|
|
220
|
+
dir_path: str | os.PathLike[str],
|
|
221
|
+
bucket: str,
|
|
222
|
+
prefix: str,
|
|
223
|
+
*,
|
|
224
|
+
max_workers: int = None,
|
|
225
|
+
include_patterns: list[str] = None,
|
|
226
|
+
exclude_patterns: list[str] = None,
|
|
227
|
+
depth: int = 0,
|
|
228
|
+
):
|
|
229
|
+
"""
|
|
230
|
+
Recursively uploads all files from a local directory to the given S3 ``bucket`` and ``prefix``, using a thread pool.
|
|
231
|
+
|
|
232
|
+
:param client: An instance of ``S3Client``.
|
|
233
|
+
:param dir_path: Local directory path to upload from.
|
|
234
|
+
:param bucket: Bucket name.
|
|
235
|
+
:param prefix: Object keys prefix for uploaded files.
|
|
236
|
+
:param max_workers: Maximum number of worker threads.
|
|
237
|
+
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
238
|
+
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
239
|
+
:param depth: Maximum depth of subdirectories to include in the scan (``0`` for unlimited depth).
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
# We add trailing slash "/" to the prefix if it is absent
|
|
243
|
+
if not prefix.endswith("/"):
|
|
244
|
+
prefix = prefix + "/"
|
|
245
|
+
|
|
246
|
+
file_paths = listfile(dir_path,
|
|
247
|
+
include_patterns=include_patterns,
|
|
248
|
+
exclude_patterns=exclude_patterns,
|
|
249
|
+
depth=depth)
|
|
250
|
+
|
|
251
|
+
def upload_file(file_path: str):
|
|
252
|
+
s3_cp_upload(client, file_path, bucket, prefix + os.path.relpath(file_path, dir_path))
|
|
253
|
+
|
|
254
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
255
|
+
futures = [executor.submit(upload_file, file_path) for file_path in file_paths]
|
|
256
|
+
done_futures, not_done_futures = concurrent.futures.wait(futures,
|
|
257
|
+
return_when=concurrent.futures.FIRST_EXCEPTION)
|
|
258
|
+
if len(not_done_futures) > 0:
|
|
259
|
+
for future in not_done_futures:
|
|
260
|
+
future.cancel()
|
|
261
|
+
for future in done_futures:
|
|
262
|
+
exc = future.exception()
|
|
263
|
+
if exc is not None:
|
|
264
|
+
raise exc
|
|
265
|
+
if len(not_done_futures) > 0:
|
|
266
|
+
raise RuntimeError("upload did not complete due to errors in some threads")
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def s3_pull_text(client: S3Client, bucket: str, key: str, encoding: str = None) -> str:
|
|
270
|
+
"""
|
|
271
|
+
Downloads and decodes text content stored as an object in the given S3 ``bucket`` and ``key``.
|
|
272
|
+
|
|
273
|
+
:param client: An instance of ``S3Client``.
|
|
274
|
+
:param bucket: Bucket name.
|
|
275
|
+
:param key: Object key storing the text.
|
|
276
|
+
:param encoding: String encoding to use (defaults to UTF-8).
|
|
277
|
+
:return: The decoded text content.
|
|
278
|
+
"""
|
|
279
|
+
with tempfile.TemporaryFile() as fp:
|
|
280
|
+
client.client.download_fileobj(bucket, key, fp)
|
|
281
|
+
fp.seek(0)
|
|
282
|
+
return fp.read().decode(encoding or "utf-8")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def s3_push_text(client: S3Client, text: str, bucket: str, key: str, encoding: str = None):
|
|
286
|
+
"""
|
|
287
|
+
Uploads the given text as an object to the specified S3 ``bucket`` and ``key``.
|
|
288
|
+
|
|
289
|
+
:param client: An instance of ``S3Client``.
|
|
290
|
+
:param text: Text content to upload.
|
|
291
|
+
:param bucket: Bucket name.
|
|
292
|
+
:param key: Object key to store the text.
|
|
293
|
+
:param encoding: String encoding to use (defaults to UTF-8).
|
|
294
|
+
"""
|
|
295
|
+
with tempfile.TemporaryFile() as fp:
|
|
296
|
+
fp.write(text.encode(encoding or "utf-8"))
|
|
297
|
+
fp.seek(0)
|
|
298
|
+
client.client.upload_fileobj(fp, bucket, key)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
TransferDirection = Literal["download", "upload"]
|
|
302
|
+
TransferState = Literal["start", "update", "stop"]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@contextlib.contextmanager
|
|
306
|
+
def make_transfer_callback(
|
|
307
|
+
callback: Callable[[CloudPath, TransferDirection, TransferState, int], None],
|
|
308
|
+
path: Path | CloudPath,
|
|
309
|
+
direction: TransferDirection,
|
|
310
|
+
):
|
|
311
|
+
if callback is None:
|
|
312
|
+
yield None
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
callback(path, direction, "start", 0)
|
|
316
|
+
try:
|
|
317
|
+
yield functools.partial(callback, path, direction, "update")
|
|
318
|
+
finally:
|
|
319
|
+
callback(path, direction, "stop", 0)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class S3TransferCallbackClient(S3Client):
|
|
323
|
+
def __init__(
|
|
324
|
+
self,
|
|
325
|
+
*args,
|
|
326
|
+
transfer_callback: Callable[[Path | CloudPath, TransferDirection, TransferState, int], None],
|
|
327
|
+
**kwargs,
|
|
328
|
+
):
|
|
329
|
+
super().__init__(*args, **kwargs)
|
|
330
|
+
self.transfer_callback = transfer_callback
|
|
331
|
+
|
|
332
|
+
def _download_file(self, cloud_path: S3Path, local_path: str | os.PathLike[str]) -> Path:
|
|
333
|
+
local_path = Path(local_path)
|
|
334
|
+
|
|
335
|
+
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
|
|
336
|
+
|
|
337
|
+
with make_transfer_callback(self.transfer_callback, cloud_path, "download") as callback:
|
|
338
|
+
obj.download_file(
|
|
339
|
+
str(local_path),
|
|
340
|
+
Config=self.boto3_transfer_config,
|
|
341
|
+
ExtraArgs=self.boto3_dl_extra_args,
|
|
342
|
+
Callback=callback,
|
|
343
|
+
)
|
|
344
|
+
return local_path
|
|
345
|
+
|
|
346
|
+
def _upload_file(self, local_path: str | os.PathLike[str], cloud_path: S3Path) -> S3Path:
|
|
347
|
+
local_path = Path(local_path)
|
|
348
|
+
|
|
349
|
+
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
|
|
350
|
+
|
|
351
|
+
extra_args = self.boto3_ul_extra_args.copy()
|
|
352
|
+
|
|
353
|
+
if self.content_type_method is not None:
|
|
354
|
+
content_type, content_encoding = self.content_type_method(str(local_path))
|
|
355
|
+
if content_type is not None:
|
|
356
|
+
extra_args["ContentType"] = content_type
|
|
357
|
+
if content_encoding is not None:
|
|
358
|
+
extra_args["ContentEncoding"] = content_encoding
|
|
359
|
+
|
|
360
|
+
with make_transfer_callback(self.transfer_callback, local_path, "upload") as callback:
|
|
361
|
+
obj.upload_file(
|
|
362
|
+
str(local_path),
|
|
363
|
+
Config=self.boto3_transfer_config,
|
|
364
|
+
ExtraArgs=extra_args,
|
|
365
|
+
Callback=callback,
|
|
366
|
+
)
|
|
367
|
+
return cloud_path
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def s3_make_progress_callback(
|
|
371
|
+
progress: Progress,
|
|
372
|
+
) -> Callable[[Path | CloudPath, TransferDirection, TransferState, int], None]:
|
|
373
|
+
task_ids: dict[Path | CloudPath, TaskID] = {}
|
|
374
|
+
|
|
375
|
+
def progress_callback(path: Path | CloudPath, direction: TransferDirection, state: TransferState, bytes_sent: int):
|
|
376
|
+
if state == "start":
|
|
377
|
+
size = path.stat().st_size
|
|
378
|
+
task_ids[path] = progress.add_task(direction, total=size, filename=path.name)
|
|
379
|
+
elif state == "stop":
|
|
380
|
+
if path in task_ids:
|
|
381
|
+
progress.remove_task(task_ids[path])
|
|
382
|
+
del task_ids[path]
|
|
383
|
+
else:
|
|
384
|
+
progress.update(task_ids[path], advance=bytes_sent)
|
|
385
|
+
|
|
386
|
+
return progress_callback
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@contextlib.contextmanager
|
|
390
|
+
def s3_make_progressed_client(
|
|
391
|
+
access_key_id: str = None,
|
|
392
|
+
secret_access_key: str = None,
|
|
393
|
+
region_name: str = None,
|
|
394
|
+
endpoint_url: str = None,
|
|
395
|
+
) -> Generator[S3Client]:
|
|
396
|
+
"""
|
|
397
|
+
Creates an S3 client with progress callback as a context manager for safe resource handling.
|
|
398
|
+
|
|
399
|
+
:param access_key_id: AWS access key ID.
|
|
400
|
+
:param secret_access_key: AWS secret access key.
|
|
401
|
+
:param region_name: AWS service region name.
|
|
402
|
+
:param endpoint_url: AWS service endpoint URL.
|
|
403
|
+
:return: An instance of ``S3TransferCallbackClient``.
|
|
404
|
+
"""
|
|
405
|
+
with Progress(
|
|
406
|
+
TextColumn("[blue]{task.fields[filename]}"),
|
|
407
|
+
BarColumn(),
|
|
408
|
+
DownloadColumn(),
|
|
409
|
+
TransferSpeedColumn(),
|
|
410
|
+
) as progress:
|
|
411
|
+
session = boto3.Session(aws_access_key_id=trim_to_none(access_key_id),
|
|
412
|
+
aws_secret_access_key=trim_to_none(secret_access_key),
|
|
413
|
+
region_name=trim_to_none(region_name))
|
|
414
|
+
yield S3TransferCallbackClient(boto3_session=session,
|
|
415
|
+
endpoint_url=trim_to_none(endpoint_url),
|
|
416
|
+
transfer_callback=s3_make_progress_callback(progress))
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: plexus-python-common
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.22
|
|
4
4
|
Classifier: Programming Language :: Python :: 3
|
|
5
5
|
Classifier: Programming Language :: Python :: 3.12
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.13
|
|
7
7
|
Classifier: Programming Language :: Python :: 3.14
|
|
8
8
|
Requires-Python: <3.15,>=3.12
|
|
9
|
+
Requires-Dist: boto3>=1.41
|
|
9
10
|
Requires-Dist: cloudpathlib>=0.21
|
|
10
11
|
Requires-Dist: lxml>=6.0
|
|
11
12
|
Requires-Dist: numpy>=2.3
|
|
@@ -23,6 +24,7 @@ Provides-Extra: all
|
|
|
23
24
|
Requires-Dist: plexus-python-common; extra == "all"
|
|
24
25
|
Provides-Extra: test
|
|
25
26
|
Requires-Dist: ddt>=1.7; extra == "test"
|
|
27
|
+
Requires-Dist: moto[all,ec2,s3]>=5.1; extra == "test"
|
|
26
28
|
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
27
29
|
Requires-Dist: pytest-order>=1.3; extra == "test"
|
|
28
30
|
Requires-Dist: pytest-postgresql>=6.1; extra == "test"
|
|
@@ -10,6 +10,16 @@ setup.py
|
|
|
10
10
|
resources/unittest/jsonutils/dummy.0.jsonl
|
|
11
11
|
resources/unittest/jsonutils/dummy.1.jsonl
|
|
12
12
|
resources/unittest/jsonutils/dummy.2.jsonl
|
|
13
|
+
resources/unittest/s3utils/dir.baz/file.bar.baz
|
|
14
|
+
resources/unittest/s3utils/dir.baz/file.foo.bar
|
|
15
|
+
resources/unittest/s3utils/dir.baz/file.foo.baz
|
|
16
|
+
resources/unittest/s3utils/dir.foo/file.bar
|
|
17
|
+
resources/unittest/s3utils/dir.foo/file.baz
|
|
18
|
+
resources/unittest/s3utils/dir.foo/file.foo
|
|
19
|
+
resources/unittest/s3utils/dir.foo/dir.foo.bar/file.bar.baz
|
|
20
|
+
resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.bar
|
|
21
|
+
resources/unittest/s3utils/dir.foo/dir.foo.bar/file.foo.baz
|
|
22
|
+
resources/unittest/s3utils/dir.foo/dir.foo.bar/dir.foo.bar.baz/file.foo.bar.baz
|
|
13
23
|
resources/unittest/shutils/0-dummy
|
|
14
24
|
resources/unittest/shutils/1-dummy
|
|
15
25
|
resources/unittest/shutils/2-dummy
|
|
@@ -59,5 +69,6 @@ test/plexus_tests/common/utils/bagutils_test.py
|
|
|
59
69
|
test/plexus_tests/common/utils/datautils_test.py
|
|
60
70
|
test/plexus_tests/common/utils/jsonutils_test.py
|
|
61
71
|
test/plexus_tests/common/utils/ormutils_test.py
|
|
72
|
+
test/plexus_tests/common/utils/s3utils_test.py
|
|
62
73
|
test/plexus_tests/common/utils/shutils_test.py
|
|
63
74
|
test/plexus_tests/common/utils/strutils_test.py
|