iker-python-common 1.0.58__py3-none-any.whl → 1.0.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iker/common/utils/funcutils.py +69 -1
- iker/common/utils/jsonutils.py +97 -40
- iker/common/utils/shutils.py +9 -5
- {iker_python_common-1.0.58.dist-info → iker_python_common-1.0.60.dist-info}/METADATA +1 -3
- {iker_python_common-1.0.58.dist-info → iker_python_common-1.0.60.dist-info}/RECORD +7 -8
- iker/common/utils/s3utils.py +0 -270
- {iker_python_common-1.0.58.dist-info → iker_python_common-1.0.60.dist-info}/WHEEL +0 -0
- {iker_python_common-1.0.58.dist-info → iker_python_common-1.0.60.dist-info}/top_level.txt +0 -0
iker/common/utils/funcutils.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
-
from typing import Protocol
|
|
3
|
+
from typing import Any, Protocol
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
|
+
"const",
|
|
7
|
+
"first",
|
|
8
|
+
"second",
|
|
9
|
+
"packed",
|
|
6
10
|
"identity",
|
|
7
11
|
"composable",
|
|
8
12
|
"singleton",
|
|
@@ -12,6 +16,70 @@ __all__ = [
|
|
|
12
16
|
]
|
|
13
17
|
|
|
14
18
|
|
|
19
|
+
def const[T](value: T) -> Callable[..., T]:
|
|
20
|
+
"""
|
|
21
|
+
Returns a function that always returns the specified ``value``, regardless of the input arguments.
|
|
22
|
+
|
|
23
|
+
:param value: The constant value to return.
|
|
24
|
+
:return: A function that takes any arguments and returns ``value``.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def getter(*args: Any, **kwargs: Any) -> T:
|
|
28
|
+
return value
|
|
29
|
+
|
|
30
|
+
return getter
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def first[K]() -> Callable[[tuple[K, Any]], K]:
|
|
34
|
+
"""
|
|
35
|
+
Returns a function that extracts the first element (key) from a 2-tuple.
|
|
36
|
+
|
|
37
|
+
:return: A function that takes a 2-tuple and returns its first element.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def getter(item: tuple[K, Any]) -> K:
|
|
41
|
+
if not isinstance(item, tuple) or len(item) != 2:
|
|
42
|
+
raise ValueError("item must be a 2-tuple")
|
|
43
|
+
return item[0]
|
|
44
|
+
|
|
45
|
+
return getter
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def second[V]() -> Callable[[tuple[Any, V]], V]:
|
|
49
|
+
"""
|
|
50
|
+
Returns a function that extracts the second element (value) from a 2-tuple.
|
|
51
|
+
|
|
52
|
+
:return: A function that takes a 2-tuple and returns its second element.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def getter(item: tuple[Any, V]) -> V:
|
|
56
|
+
if not isinstance(item, tuple) or len(item) != 2:
|
|
57
|
+
raise ValueError("item must be a 2-tuple")
|
|
58
|
+
return item[1]
|
|
59
|
+
|
|
60
|
+
return getter
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def packed[R](func: Callable[..., R]) -> Callable[[tuple[Any, ...]], R]:
|
|
64
|
+
"""
|
|
65
|
+
Wraps a function to accept its arguments as a single tuple, unpacking them when called. This is useful for
|
|
66
|
+
scenarios where arguments are naturally grouped in tuples, such as when working with data structures like maps or
|
|
67
|
+
lists of tuples, or when interfacing with APIs that provide arguments in tuple form.
|
|
68
|
+
|
|
69
|
+
>>> data = [(1, 2), (3, 4), (5, 6)]
|
|
70
|
+
>>> sums = map(packed(lambda x, y: x + y), data)
|
|
71
|
+
|
|
72
|
+
:param func: The function to wrap.
|
|
73
|
+
:return: A function that takes a tuple of arguments and calls the original function with them unpacked.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@functools.wraps(func)
|
|
77
|
+
def wrapper(args: tuple[Any, ...]) -> R:
|
|
78
|
+
return func(*args)
|
|
79
|
+
|
|
80
|
+
return wrapper
|
|
81
|
+
|
|
82
|
+
|
|
15
83
|
def identity[T](instance: T) -> T:
|
|
16
84
|
"""
|
|
17
85
|
Returns the input ``instance`` unchanged. This is a utility function often used as a default or placeholder.
|
iker/common/utils/jsonutils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import math
|
|
2
|
-
from collections.abc import Callable, Mapping, MutableMapping, MutableSequence, Sequence, Set
|
|
2
|
+
from collections.abc import Callable, Generator, Mapping, MutableMapping, MutableSequence, Sequence, Set
|
|
3
3
|
from typing import Any, SupportsFloat, SupportsInt
|
|
4
4
|
|
|
5
5
|
from iker.common.utils.numutils import is_normal_real
|
|
@@ -24,6 +24,8 @@ __all__ = [
|
|
|
24
24
|
"json_traverse",
|
|
25
25
|
"json_reformat",
|
|
26
26
|
"json_sanitize",
|
|
27
|
+
"json_difference",
|
|
28
|
+
"json_equals",
|
|
27
29
|
"json_compare",
|
|
28
30
|
]
|
|
29
31
|
|
|
@@ -347,79 +349,134 @@ def json_sanitize(obj: Any, *, str_inf_nan: bool = True, str_unregistered: bool
|
|
|
347
349
|
unregistered_formatter=unregistered_formatter)
|
|
348
350
|
|
|
349
351
|
|
|
350
|
-
def
|
|
352
|
+
def json_difference(
|
|
351
353
|
a: JsonTypeCompatible,
|
|
352
354
|
b: JsonTypeCompatible,
|
|
355
|
+
node_path: NodePath | None = None,
|
|
353
356
|
*,
|
|
354
357
|
int_strict: bool = False,
|
|
355
358
|
float_tol: float = 1e-5,
|
|
356
359
|
list_order: bool = True,
|
|
357
360
|
dict_extra: bool = False,
|
|
358
|
-
) ->
|
|
361
|
+
) -> Generator[tuple[NodePath, str], None, None]:
|
|
359
362
|
"""
|
|
360
|
-
Compares two JSON-like structures
|
|
361
|
-
and dictionary key matching.
|
|
363
|
+
Compares two JSON-like structures and yields differences found, with options for integer strictness, float
|
|
364
|
+
tolerance, list order, and dictionary key matching.
|
|
362
365
|
|
|
363
366
|
:param a: The first JSON-compatible object to compare.
|
|
364
367
|
:param b: The second JSON-compatible object to compare.
|
|
368
|
+
:param node_path: The current node path during recursion (used internally).
|
|
365
369
|
:param int_strict: Whether to require strict integer type matching.
|
|
366
370
|
:param float_tol: The tolerance for comparing float values.
|
|
367
371
|
:param list_order: Whether to require list order to match.
|
|
368
372
|
:param dict_extra: Whether to allow extra keys in dictionaries.
|
|
369
|
-
:return:
|
|
373
|
+
:return: Tuples of node paths and difference descriptions.
|
|
370
374
|
"""
|
|
371
375
|
if a is None or b is None:
|
|
372
|
-
|
|
376
|
+
if not (a is None and b is None):
|
|
377
|
+
yield node_path, "one value is None while the other is not"
|
|
378
|
+
return
|
|
373
379
|
|
|
374
380
|
if isinstance(a, (str, bool)):
|
|
375
381
|
if type(a) != type(b):
|
|
376
|
-
|
|
377
|
-
|
|
382
|
+
yield node_path, f"type mismatch: '{type(a)}' vs '{type(b)}'"
|
|
383
|
+
elif a != b:
|
|
384
|
+
yield node_path, f"value mismatch: '{a}' vs '{b}'"
|
|
385
|
+
return
|
|
378
386
|
|
|
379
387
|
if isinstance(a, (SupportsFloat, SupportsInt)) and isinstance(b, (SupportsFloat, SupportsInt)):
|
|
380
388
|
isint_a = isinstance(a, int) or not isinstance(a, SupportsFloat)
|
|
381
389
|
isint_b = isinstance(b, int) or not isinstance(b, SupportsFloat)
|
|
382
390
|
if isint_a and isint_b:
|
|
383
|
-
|
|
391
|
+
if int(a) != int(b):
|
|
392
|
+
yield node_path, f"integer value mismatch: '{int(a)}' vs '{int(b)}'"
|
|
393
|
+
return
|
|
384
394
|
if int_strict and (isint_a or isint_b):
|
|
385
|
-
|
|
395
|
+
yield node_path, "integer type mismatch under strict mode"
|
|
396
|
+
return
|
|
386
397
|
va = int(a) if isint_a else float(a)
|
|
387
398
|
vb = int(b) if isint_b else float(b)
|
|
388
|
-
if math.isnan(va)
|
|
389
|
-
|
|
399
|
+
if math.isnan(va) or math.isnan(vb):
|
|
400
|
+
if not (math.isnan(va) and math.isnan(vb)):
|
|
401
|
+
yield node_path, "NaN mismatch"
|
|
402
|
+
return
|
|
390
403
|
if math.isinf(va) and math.isinf(vb):
|
|
391
|
-
|
|
392
|
-
|
|
404
|
+
if va != vb:
|
|
405
|
+
yield node_path, "infinity sign mismatch"
|
|
406
|
+
return
|
|
407
|
+
if abs(va - vb) > float_tol:
|
|
408
|
+
yield node_path, f"float value mismatch: '{va}' vs '{vb}' with tolerance '{float_tol}'"
|
|
409
|
+
return
|
|
393
410
|
|
|
394
411
|
if isinstance(a, Mapping) and isinstance(b, Mapping):
|
|
395
412
|
if not dict_extra and set(a.keys()) != set(b.keys()):
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
413
|
+
yield node_path, f"dictionary key mismatch: '{set(a.keys())}' vs '{set(b.keys())}'"
|
|
414
|
+
return
|
|
415
|
+
for k in set(a.keys()) & set(b.keys()):
|
|
416
|
+
yield from json_difference(a[k],
|
|
417
|
+
b[k],
|
|
418
|
+
(node_path or []) + [k],
|
|
419
|
+
int_strict=int_strict,
|
|
420
|
+
float_tol=float_tol,
|
|
421
|
+
list_order=list_order,
|
|
422
|
+
dict_extra=dict_extra)
|
|
423
|
+
return
|
|
404
424
|
|
|
405
425
|
if isinstance(a, Sequence) and isinstance(b, Sequence):
|
|
406
426
|
if len(a) != len(b):
|
|
407
|
-
|
|
427
|
+
yield node_path, f"list length mismatch: '{len(a)}' vs '{len(b)}'"
|
|
428
|
+
return
|
|
408
429
|
if list_order:
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
430
|
+
for i, (va, vb) in enumerate(zip(a, b)):
|
|
431
|
+
yield from json_difference(va,
|
|
432
|
+
vb,
|
|
433
|
+
(node_path or []) + [i],
|
|
434
|
+
int_strict=int_strict,
|
|
435
|
+
float_tol=float_tol,
|
|
436
|
+
list_order=list_order,
|
|
437
|
+
dict_extra=dict_extra)
|
|
416
438
|
else:
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
439
|
+
for i, (va, vb) in enumerate(zip(sorted(a), sorted(b))):
|
|
440
|
+
yield from json_difference(va,
|
|
441
|
+
vb,
|
|
442
|
+
(node_path or []) + [i],
|
|
443
|
+
int_strict=int_strict,
|
|
444
|
+
float_tol=float_tol,
|
|
445
|
+
list_order=list_order,
|
|
446
|
+
dict_extra=dict_extra)
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
yield node_path, f"type mismatch: '{type(a)}' vs '{type(b)}'"
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def json_equals(
|
|
453
|
+
a: JsonTypeCompatible,
|
|
454
|
+
b: JsonTypeCompatible,
|
|
455
|
+
*,
|
|
456
|
+
int_strict: bool = False,
|
|
457
|
+
float_tol: float = 1e-5,
|
|
458
|
+
list_order: bool = True,
|
|
459
|
+
dict_extra: bool = False,
|
|
460
|
+
) -> bool:
|
|
461
|
+
"""
|
|
462
|
+
Compares two JSON-like structures for equality based on specified criteria.
|
|
463
|
+
|
|
464
|
+
:param a: The first JSON-compatible object to compare.
|
|
465
|
+
:param b: The second JSON-compatible object to compare.
|
|
466
|
+
:param int_strict: Whether to require strict integer type matching.
|
|
467
|
+
:param float_tol: The tolerance for comparing float values.
|
|
468
|
+
:param list_order: Whether to require list order to match.
|
|
469
|
+
:param dict_extra: Whether to allow extra keys in dictionaries.
|
|
470
|
+
:return: ``True`` if the structures are considered equal, ``False`` otherwise.
|
|
471
|
+
"""
|
|
472
|
+
return next(json_difference(a,
|
|
473
|
+
b,
|
|
474
|
+
node_path=[],
|
|
475
|
+
int_strict=int_strict,
|
|
476
|
+
float_tol=float_tol,
|
|
477
|
+
list_order=list_order,
|
|
478
|
+
dict_extra=dict_extra),
|
|
479
|
+
None) is None
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
json_compare = json_equals
|
iker/common/utils/shutils.py
CHANGED
|
@@ -95,7 +95,11 @@ def path_depth(root: str, child: str) -> int:
|
|
|
95
95
|
return child_expanded[len(root_expanded):].count(os.sep)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
def glob_match(
|
|
98
|
+
def glob_match(
|
|
99
|
+
names: list[str],
|
|
100
|
+
include_patterns: list[str] | None = None,
|
|
101
|
+
exclude_patterns: list[str] | None = None,
|
|
102
|
+
) -> list[str]:
|
|
99
103
|
"""
|
|
100
104
|
Applies the given inclusive and exclusive glob patterns to the given ``names`` and returns the filtered result.
|
|
101
105
|
|
|
@@ -121,8 +125,8 @@ class CopyFuncProtocol(Protocol):
|
|
|
121
125
|
def listfile(
|
|
122
126
|
path: str,
|
|
123
127
|
*,
|
|
124
|
-
include_patterns: list[str] = None,
|
|
125
|
-
exclude_patterns: list[str] = None,
|
|
128
|
+
include_patterns: list[str] | None = None,
|
|
129
|
+
exclude_patterns: list[str] | None = None,
|
|
126
130
|
depth: int = 0,
|
|
127
131
|
) -> list[str]:
|
|
128
132
|
"""
|
|
@@ -153,8 +157,8 @@ def copy(
|
|
|
153
157
|
src: str,
|
|
154
158
|
dst: str,
|
|
155
159
|
*,
|
|
156
|
-
include_patterns: list[str] = None,
|
|
157
|
-
exclude_patterns: list[str] = None,
|
|
160
|
+
include_patterns: list[str] | None = None,
|
|
161
|
+
exclude_patterns: list[str] | None = None,
|
|
158
162
|
depth: int = 0,
|
|
159
163
|
follow_symlinks: bool = False,
|
|
160
164
|
ignore_dangling_symlinks: bool = False,
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: iker-python-common
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.60
|
|
4
4
|
Classifier: Programming Language :: Python :: 3
|
|
5
5
|
Classifier: Programming Language :: Python :: 3.12
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.13
|
|
7
7
|
Classifier: Programming Language :: Python :: 3.14
|
|
8
8
|
Requires-Python: <3.15,>=3.12
|
|
9
|
-
Requires-Dist: boto3>=1.35
|
|
10
9
|
Requires-Dist: docker>=7.1
|
|
11
10
|
Requires-Dist: numpy>=2.3
|
|
12
11
|
Requires-Dist: psycopg>=3.2
|
|
@@ -16,7 +15,6 @@ Provides-Extra: all
|
|
|
16
15
|
Requires-Dist: iker-python-common; extra == "all"
|
|
17
16
|
Provides-Extra: test
|
|
18
17
|
Requires-Dist: ddt>=1.7; extra == "test"
|
|
19
|
-
Requires-Dist: moto[all,ec2,s3]>=5.0; extra == "test"
|
|
20
18
|
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
21
19
|
Requires-Dist: pytest-mysql>=3.0; extra == "test"
|
|
22
20
|
Requires-Dist: pytest-order>=1.3; extra == "test"
|
|
@@ -6,20 +6,19 @@ iker/common/utils/csv.py,sha256=_V9OUrKcojec2L-hWagEIVnL2uvGjyJAFTrD7tHNr48,7573
|
|
|
6
6
|
iker/common/utils/dbutils.py,sha256=zXZVJCz7HZPityFRF7sHRRMpMraegV_hyYnzApUUPhY,11852
|
|
7
7
|
iker/common/utils/dockerutils.py,sha256=n2WuzXaZB6_WocSljvPOnfExSIjIHRUbuWp2oBbaPKQ,8004
|
|
8
8
|
iker/common/utils/dtutils.py,sha256=86vbaa4pgcBWERZvTfJ92PKB3IimxP6tf0O11ho2Ffk,12554
|
|
9
|
-
iker/common/utils/funcutils.py,sha256=
|
|
10
|
-
iker/common/utils/jsonutils.py,sha256=
|
|
9
|
+
iker/common/utils/funcutils.py,sha256=4AkkvK9_Z2tgk1-Sp6-vLLVhI15cIgN9xW58QqL5QL4,7780
|
|
10
|
+
iker/common/utils/jsonutils.py,sha256=AkziMAYVQDODHRqZC-c1x7VqI2hHY3Kxrw7gmoss8mU,18527
|
|
11
11
|
iker/common/utils/logger.py,sha256=FJaai6Sbchy4wKHcUMUCrrkBcXvIxq4qByERZ_TJBps,3881
|
|
12
12
|
iker/common/utils/numutils.py,sha256=p6Rz1qyCcUru3v1zDy2PM-nds2NWJdL5A_vLmG-kswk,4294
|
|
13
13
|
iker/common/utils/randutils.py,sha256=Sxf852B18CJ-MfrEDsv1ROO_brmz79dRZ4jpJiH65v4,12843
|
|
14
14
|
iker/common/utils/retry.py,sha256=H9lR6pp_jzgOwKTM-dOWIddjTlQbK-ijcwuDmVvurZM,8938
|
|
15
|
-
iker/common/utils/s3utils.py,sha256=rb-JVCJuIbmVn4ml7MQ7qKD8Z25t8xnU_u4oY1-APe4,9368
|
|
16
15
|
iker/common/utils/sequtils.py,sha256=Wc8RcbNjVYSJYZv_07SOKWfYjhmGWz9_RXWbG2-tE1o,25060
|
|
17
|
-
iker/common/utils/shutils.py,sha256=
|
|
16
|
+
iker/common/utils/shutils.py,sha256=dUm1Y7m8u1Ri_R5598oQJsxwgQaBnVzhtpcsL7_Vzp0,7916
|
|
18
17
|
iker/common/utils/span.py,sha256=u_KuWi2U7QDMUotl4AeW2_57ItL3YhVDSeCwaOiFDvs,5963
|
|
19
18
|
iker/common/utils/strutils.py,sha256=Tu_qFeH3K-SfwvMxdrZAc9iLPV8ZmtX4ntyyFGNslf8,5094
|
|
20
19
|
iker/common/utils/testutils.py,sha256=2VieV5yeCDntSKQSpIeyqRT8BZmZYE_ArMeQz3g7fXY,5568
|
|
21
20
|
iker/common/utils/typeutils.py,sha256=RVkYkFRgDrx77OHFH7PavMV0AIB0S8ly40rs4g7JWE4,8220
|
|
22
|
-
iker_python_common-1.0.
|
|
23
|
-
iker_python_common-1.0.
|
|
24
|
-
iker_python_common-1.0.
|
|
25
|
-
iker_python_common-1.0.
|
|
21
|
+
iker_python_common-1.0.60.dist-info/METADATA,sha256=2B3f-_-H83ceea3JraxM602M47-BcdClWBtZIZLF_I0,813
|
|
22
|
+
iker_python_common-1.0.60.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
+
iker_python_common-1.0.60.dist-info/top_level.txt,sha256=4_B8Prfc_lxFafFYTQThIU1ZqOYQ4pHHHnJ_fQ_oHs8,5
|
|
24
|
+
iker_python_common-1.0.60.dist-info/RECORD,,
|
iker/common/utils/s3utils.py
DELETED
|
@@ -1,270 +0,0 @@
|
|
|
1
|
-
import concurrent.futures
|
|
2
|
-
import contextlib
|
|
3
|
-
import dataclasses
|
|
4
|
-
import datetime
|
|
5
|
-
import mimetypes
|
|
6
|
-
import os
|
|
7
|
-
import tempfile
|
|
8
|
-
|
|
9
|
-
import boto3
|
|
10
|
-
from botocore.client import BaseClient
|
|
11
|
-
|
|
12
|
-
from iker.common.utils.shutils import glob_match, listfile, path_depth
|
|
13
|
-
from iker.common.utils.strutils import is_empty, trim_to_none
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"S3ObjectMeta",
|
|
17
|
-
"s3_make_client",
|
|
18
|
-
"s3_list_objects",
|
|
19
|
-
"s3_listfile",
|
|
20
|
-
"s3_cp_download",
|
|
21
|
-
"s3_cp_upload",
|
|
22
|
-
"s3_sync_download",
|
|
23
|
-
"s3_sync_upload",
|
|
24
|
-
"s3_pull_text",
|
|
25
|
-
"s3_push_text",
|
|
26
|
-
]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@dataclasses.dataclass
|
|
30
|
-
class S3ObjectMeta(object):
|
|
31
|
-
key: str
|
|
32
|
-
last_modified: datetime.datetime
|
|
33
|
-
size: int
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def s3_make_client(
|
|
37
|
-
access_key_id: str = None,
|
|
38
|
-
secret_access_key: str = None,
|
|
39
|
-
region_name: str = None,
|
|
40
|
-
endpoint_url: str = None,
|
|
41
|
-
) -> contextlib.AbstractContextManager[BaseClient]:
|
|
42
|
-
"""
|
|
43
|
-
Creates an AWS S3 client as a context manager for safe resource handling.
|
|
44
|
-
|
|
45
|
-
:param access_key_id: AWS access key ID.
|
|
46
|
-
:param secret_access_key: AWS secret access key.
|
|
47
|
-
:param region_name: AWS service region name.
|
|
48
|
-
:param endpoint_url: AWS service endpoint URL.
|
|
49
|
-
:return: A context manager yielding an S3 client instance.
|
|
50
|
-
"""
|
|
51
|
-
client = boto3.client("s3",
|
|
52
|
-
region_name=trim_to_none(region_name),
|
|
53
|
-
endpoint_url=trim_to_none(endpoint_url),
|
|
54
|
-
aws_access_key_id=trim_to_none(access_key_id),
|
|
55
|
-
aws_secret_access_key=trim_to_none(secret_access_key))
|
|
56
|
-
return contextlib.closing(client)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def s3_list_objects(client: BaseClient, bucket: str, prefix: str, limit: int = None) -> list[S3ObjectMeta]:
|
|
60
|
-
"""
|
|
61
|
-
Lists all objects from the given S3 ``bucket`` and ``prefix``.
|
|
62
|
-
|
|
63
|
-
:param client: AWS S3 client instance.
|
|
64
|
-
:param bucket: Bucket name.
|
|
65
|
-
:param prefix: Object keys prefix.
|
|
66
|
-
:param limit: Maximum number of objects to return (``None`` for all).
|
|
67
|
-
:return: List of ``S3ObjectMeta`` items.
|
|
68
|
-
"""
|
|
69
|
-
entries = []
|
|
70
|
-
|
|
71
|
-
next_marker = None
|
|
72
|
-
while True:
|
|
73
|
-
if is_empty(next_marker):
|
|
74
|
-
response = client.list_objects(MaxKeys=1000, Bucket=bucket, Prefix=prefix)
|
|
75
|
-
else:
|
|
76
|
-
response = client.list_objects(MaxKeys=1000, Bucket=bucket, Prefix=prefix, Marker=next_marker)
|
|
77
|
-
|
|
78
|
-
entries.extend(response.get("Contents", []))
|
|
79
|
-
|
|
80
|
-
if limit is not None and len(entries) >= limit:
|
|
81
|
-
entries = entries[:limit]
|
|
82
|
-
|
|
83
|
-
if not response.get("IsTruncated"):
|
|
84
|
-
break
|
|
85
|
-
|
|
86
|
-
next_marker = response.get("NextMarker")
|
|
87
|
-
if is_empty(next_marker):
|
|
88
|
-
next_marker = entries[-1]["Key"]
|
|
89
|
-
|
|
90
|
-
return [S3ObjectMeta(key=e["Key"], last_modified=e["LastModified"], size=e["Size"]) for e in entries]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def s3_listfile(
|
|
94
|
-
client: BaseClient,
|
|
95
|
-
bucket: str,
|
|
96
|
-
prefix: str,
|
|
97
|
-
*,
|
|
98
|
-
include_patterns: list[str] = None,
|
|
99
|
-
exclude_patterns: list[str] = None,
|
|
100
|
-
depth: int = 0,
|
|
101
|
-
) -> list[S3ObjectMeta]:
|
|
102
|
-
"""
|
|
103
|
-
Lists all objects from the given S3 ``bucket`` and ``prefix``, filtered by patterns and directory depth.
|
|
104
|
-
|
|
105
|
-
:param client: AWS S3 client instance.
|
|
106
|
-
:param bucket: Bucket name.
|
|
107
|
-
:param prefix: Object keys prefix.
|
|
108
|
-
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
109
|
-
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
110
|
-
:param depth: Maximum depth of subdirectories to include in the scan.
|
|
111
|
-
:return: List of ``S3ObjectMeta`` items.
|
|
112
|
-
"""
|
|
113
|
-
|
|
114
|
-
# We add trailing slash "/" to the prefix if it is absent
|
|
115
|
-
if not prefix.endswith("/"):
|
|
116
|
-
prefix = prefix + "/"
|
|
117
|
-
|
|
118
|
-
objects = s3_list_objects(client, bucket, prefix)
|
|
119
|
-
|
|
120
|
-
def filter_object_meta(object_meta: S3ObjectMeta) -> bool:
|
|
121
|
-
if 0 < depth <= path_depth(prefix, os.path.dirname(object_meta.key)):
|
|
122
|
-
return False
|
|
123
|
-
if len(glob_match([os.path.basename(object_meta.key)], include_patterns, exclude_patterns)) == 0:
|
|
124
|
-
return False
|
|
125
|
-
return True
|
|
126
|
-
|
|
127
|
-
return list(filter(filter_object_meta, objects))
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def s3_cp_download(client: BaseClient, bucket: str, key: str, file_path: str):
|
|
131
|
-
"""
|
|
132
|
-
Downloads an object from the given S3 ``bucket`` and ``key`` to a local file path.
|
|
133
|
-
|
|
134
|
-
:param client: AWS S3 client instance.
|
|
135
|
-
:param bucket: Bucket name.
|
|
136
|
-
:param key: Object key.
|
|
137
|
-
:param file_path: Local file path to save the object.
|
|
138
|
-
"""
|
|
139
|
-
client.download_file(bucket, key, file_path)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def s3_cp_upload(client: BaseClient, file_path: str, bucket: str, key: str):
|
|
143
|
-
"""
|
|
144
|
-
Uploads a local file to the given S3 ``bucket`` and ``key``.
|
|
145
|
-
|
|
146
|
-
:param client: AWS S3 client instance.
|
|
147
|
-
:param file_path: Local file path to upload.
|
|
148
|
-
:param bucket: Bucket name.
|
|
149
|
-
:param key: Object key for the uploaded file.
|
|
150
|
-
"""
|
|
151
|
-
t, _ = mimetypes.MimeTypes().guess_type(file_path)
|
|
152
|
-
client.upload_file(file_path, bucket, key, ExtraArgs={"ContentType": "binary/octet-stream" if t is None else t})
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def s3_sync_download(
|
|
156
|
-
client: BaseClient,
|
|
157
|
-
bucket: str,
|
|
158
|
-
prefix: str,
|
|
159
|
-
dir_path: str,
|
|
160
|
-
*,
|
|
161
|
-
max_workers: int = None,
|
|
162
|
-
include_patterns: list[str] = None,
|
|
163
|
-
exclude_patterns: list[str] = None,
|
|
164
|
-
depth: int = 0,
|
|
165
|
-
):
|
|
166
|
-
"""
|
|
167
|
-
Recursively downloads all objects from the given S3 ``bucket`` and ``prefix`` to a local directory path, using a thread pool.
|
|
168
|
-
|
|
169
|
-
:param client: AWS S3 client instance.
|
|
170
|
-
:param bucket: Bucket name.
|
|
171
|
-
:param prefix: Object keys prefix.
|
|
172
|
-
:param dir_path: Local directory path to save objects.
|
|
173
|
-
:param max_workers: Maximum number of worker threads.
|
|
174
|
-
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
175
|
-
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
176
|
-
:param depth: Maximum depth of subdirectories to include in the scan.
|
|
177
|
-
"""
|
|
178
|
-
|
|
179
|
-
# We add trailing slash "/" to the prefix if it is absent
|
|
180
|
-
if not prefix.endswith("/"):
|
|
181
|
-
prefix = prefix + "/"
|
|
182
|
-
|
|
183
|
-
objects = s3_listfile(client,
|
|
184
|
-
bucket,
|
|
185
|
-
prefix,
|
|
186
|
-
include_patterns=include_patterns,
|
|
187
|
-
exclude_patterns=exclude_patterns,
|
|
188
|
-
depth=depth)
|
|
189
|
-
|
|
190
|
-
def download_file(key: str):
|
|
191
|
-
file_path = os.path.join(dir_path, key[len(prefix):])
|
|
192
|
-
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
193
|
-
s3_cp_download(client, bucket, key, file_path)
|
|
194
|
-
|
|
195
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
196
|
-
concurrent.futures.wait([executor.submit(download_file, obj.key) for obj in objects],
|
|
197
|
-
return_when=concurrent.futures.FIRST_EXCEPTION)
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
def s3_sync_upload(
|
|
201
|
-
client: BaseClient,
|
|
202
|
-
dir_path: str,
|
|
203
|
-
bucket: str,
|
|
204
|
-
prefix: str,
|
|
205
|
-
*,
|
|
206
|
-
max_workers: int = None,
|
|
207
|
-
include_patterns: list[str] = None,
|
|
208
|
-
exclude_patterns: list[str] = None,
|
|
209
|
-
depth: int = 0,
|
|
210
|
-
):
|
|
211
|
-
"""
|
|
212
|
-
Recursively uploads all files from a local directory to the given S3 ``bucket`` and ``prefix``, using a thread pool.
|
|
213
|
-
|
|
214
|
-
:param client: AWS S3 client instance.
|
|
215
|
-
:param dir_path: Local directory path to upload from.
|
|
216
|
-
:param bucket: Bucket name.
|
|
217
|
-
:param prefix: Object keys prefix for uploaded files.
|
|
218
|
-
:param max_workers: Maximum number of worker threads.
|
|
219
|
-
:param include_patterns: Inclusive glob patterns applied to filenames.
|
|
220
|
-
:param exclude_patterns: Exclusive glob patterns applied to filenames.
|
|
221
|
-
:param depth: Maximum depth of subdirectories to include in the scan.
|
|
222
|
-
"""
|
|
223
|
-
|
|
224
|
-
# We add trailing slash "/" to the prefix if it is absent
|
|
225
|
-
if not prefix.endswith("/"):
|
|
226
|
-
prefix = prefix + "/"
|
|
227
|
-
|
|
228
|
-
file_paths = listfile(dir_path,
|
|
229
|
-
include_patterns=include_patterns,
|
|
230
|
-
exclude_patterns=exclude_patterns,
|
|
231
|
-
depth=depth)
|
|
232
|
-
|
|
233
|
-
def upload_file(file_path: str):
|
|
234
|
-
s3_cp_upload(client, file_path, bucket, prefix + os.path.relpath(file_path, dir_path))
|
|
235
|
-
|
|
236
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
237
|
-
concurrent.futures.wait([executor.submit(upload_file, file_path) for file_path in file_paths],
|
|
238
|
-
return_when=concurrent.futures.FIRST_EXCEPTION)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def s3_pull_text(client: BaseClient, bucket: str, key: str, encoding: str = None) -> str:
|
|
242
|
-
"""
|
|
243
|
-
Downloads and decodes text content stored as an object in the given S3 ``bucket`` and ``key``.
|
|
244
|
-
|
|
245
|
-
:param client: AWS S3 client instance.
|
|
246
|
-
:param bucket: Bucket name.
|
|
247
|
-
:param key: Object key storing the text.
|
|
248
|
-
:param encoding: String encoding to use (defaults to UTF-8).
|
|
249
|
-
:return: The decoded text content.
|
|
250
|
-
"""
|
|
251
|
-
with tempfile.TemporaryFile() as fp:
|
|
252
|
-
client.download_fileobj(bucket, key, fp)
|
|
253
|
-
fp.seek(0)
|
|
254
|
-
return fp.read().decode(encoding or "utf-8")
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
def s3_push_text(client: BaseClient, text: str, bucket: str, key: str, encoding: str = None):
|
|
258
|
-
"""
|
|
259
|
-
Uploads the given text as an object to the specified S3 ``bucket`` and ``key``.
|
|
260
|
-
|
|
261
|
-
:param client: AWS S3 client instance.
|
|
262
|
-
:param text: Text content to upload.
|
|
263
|
-
:param bucket: Bucket name.
|
|
264
|
-
:param key: Object key to store the text.
|
|
265
|
-
:param encoding: String encoding to use (defaults to UTF-8).
|
|
266
|
-
"""
|
|
267
|
-
with tempfile.TemporaryFile() as fp:
|
|
268
|
-
fp.write(text.encode(encoding or "utf-8"))
|
|
269
|
-
fp.seek(0)
|
|
270
|
-
client.upload_fileobj(fp, bucket, key)
|
|
File without changes
|
|
File without changes
|