polars-runtime-compat 1.34.0b3__cp39-abi3-win_arm64.whl → 1.34.0b5__cp39-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/METADATA +6 -2
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/io/cloud/_utils.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Any, Generic, TypeVar
|
|
5
|
-
|
|
6
|
-
from polars._typing import PartitioningScheme
|
|
7
|
-
from polars._utils.various import is_path_or_str_sequence
|
|
8
|
-
|
|
9
|
-
T = TypeVar("T")
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class NoPickleOption(Generic[T]):
|
|
13
|
-
"""
|
|
14
|
-
Wrapper that does not pickle the wrapped value.
|
|
15
|
-
|
|
16
|
-
This wrapper will unpickle to contain a None. Used for cached values.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
def __init__(self, opt_value: T | None = None) -> None:
|
|
20
|
-
self._opt_value = opt_value
|
|
21
|
-
|
|
22
|
-
def get(self) -> T | None:
|
|
23
|
-
return self._opt_value
|
|
24
|
-
|
|
25
|
-
def set(self, value: T | None) -> None:
|
|
26
|
-
self._opt_value = value
|
|
27
|
-
|
|
28
|
-
def __getstate__(self) -> tuple[()]:
|
|
29
|
-
# Needs to return not-None for `__setstate__()` to be called
|
|
30
|
-
return ()
|
|
31
|
-
|
|
32
|
-
def __setstate__(self, _state: tuple[()]) -> None:
|
|
33
|
-
NoPickleOption.__init__(self)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _first_scan_path(
|
|
37
|
-
source: Any,
|
|
38
|
-
) -> str | Path | None:
|
|
39
|
-
if isinstance(source, (str, Path)):
|
|
40
|
-
return source
|
|
41
|
-
elif is_path_or_str_sequence(source) and source:
|
|
42
|
-
return source[0]
|
|
43
|
-
elif isinstance(source, PartitioningScheme):
|
|
44
|
-
return source._base_path
|
|
45
|
-
|
|
46
|
-
return None
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _get_path_scheme(path: str | Path) -> str | None:
|
|
50
|
-
path_str = str(path)
|
|
51
|
-
i = path_str.find("://")
|
|
52
|
-
|
|
53
|
-
return path_str[:i] if i >= 0 else None
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def _is_aws_cloud(*, scheme: str, first_scan_path: str) -> bool:
|
|
57
|
-
if any(scheme == x for x in ["s3", "s3a"]):
|
|
58
|
-
return True
|
|
59
|
-
|
|
60
|
-
if scheme == "http" or scheme == "https":
|
|
61
|
-
bucket_end = first_scan_path.find(".s3.")
|
|
62
|
-
region_end = first_scan_path.find(".amazonaws.com/", bucket_end + 4)
|
|
63
|
-
|
|
64
|
-
if (
|
|
65
|
-
first_scan_path.find("/", len(scheme) + 3, region_end) > 0
|
|
66
|
-
or "?" in first_scan_path
|
|
67
|
-
):
|
|
68
|
-
return False
|
|
69
|
-
|
|
70
|
-
return 0 < bucket_end < region_end
|
|
71
|
-
|
|
72
|
-
return False
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _is_azure_cloud(scheme: str) -> bool:
|
|
76
|
-
return any(scheme == x for x in ["az", "azure", "adl", "abfs", "abfss"])
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _is_gcp_cloud(scheme: str) -> bool:
|
|
80
|
-
return any(scheme == x for x in ["gs", "gcp", "gcs"])
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from polars.io.cloud.credential_provider._providers import (
|
|
2
|
-
CredentialProvider,
|
|
3
|
-
CredentialProviderAWS,
|
|
4
|
-
CredentialProviderAzure,
|
|
5
|
-
CredentialProviderFunction,
|
|
6
|
-
CredentialProviderFunctionReturn,
|
|
7
|
-
CredentialProviderGCP,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"CredentialProvider",
|
|
12
|
-
"CredentialProviderAWS",
|
|
13
|
-
"CredentialProviderAzure",
|
|
14
|
-
"CredentialProviderFunction",
|
|
15
|
-
"CredentialProviderFunctionReturn",
|
|
16
|
-
"CredentialProviderGCP",
|
|
17
|
-
]
|
|
@@ -1,520 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import abc
|
|
4
|
-
import os
|
|
5
|
-
import threading
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable, Literal, Union
|
|
7
|
-
|
|
8
|
-
import polars._utils.logging
|
|
9
|
-
from polars._utils.cache import LRUCache
|
|
10
|
-
from polars._utils.logging import eprint, verbose
|
|
11
|
-
from polars._utils.unstable import issue_unstable_warning
|
|
12
|
-
from polars.io.cloud._utils import NoPickleOption
|
|
13
|
-
from polars.io.cloud.credential_provider._providers import (
|
|
14
|
-
CachedCredentialProvider,
|
|
15
|
-
CachingCredentialProvider,
|
|
16
|
-
CredentialProvider,
|
|
17
|
-
CredentialProviderAWS,
|
|
18
|
-
CredentialProviderAzure,
|
|
19
|
-
CredentialProviderFunction,
|
|
20
|
-
CredentialProviderGCP,
|
|
21
|
-
UserProvidedGCPToken,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
if TYPE_CHECKING:
|
|
25
|
-
import sys
|
|
26
|
-
|
|
27
|
-
if sys.version_info >= (3, 10):
|
|
28
|
-
from typing import TypeAlias
|
|
29
|
-
else:
|
|
30
|
-
from typing_extensions import TypeAlias
|
|
31
|
-
|
|
32
|
-
# https://docs.rs/object_store/latest/object_store/enum.ClientConfigKey.html
|
|
33
|
-
OBJECT_STORE_CLIENT_OPTIONS: frozenset[str] = frozenset(
|
|
34
|
-
[
|
|
35
|
-
"allow_http",
|
|
36
|
-
"allow_invalid_certificates",
|
|
37
|
-
"connect_timeout",
|
|
38
|
-
"default_content_type",
|
|
39
|
-
"http1_only",
|
|
40
|
-
"http2_only",
|
|
41
|
-
"http2_keep_alive_interval",
|
|
42
|
-
"http2_keep_alive_timeout",
|
|
43
|
-
"http2_keep_alive_while_idle",
|
|
44
|
-
"http2_max_frame_size",
|
|
45
|
-
"pool_idle_timeout",
|
|
46
|
-
"pool_max_idle_per_host",
|
|
47
|
-
"proxy_url",
|
|
48
|
-
"proxy_ca_certificate",
|
|
49
|
-
"proxy_excludes",
|
|
50
|
-
"timeout",
|
|
51
|
-
"user_agent",
|
|
52
|
-
]
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
CredentialProviderBuilderReturn: TypeAlias = Union[
|
|
56
|
-
CredentialProvider, CredentialProviderFunction, None
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class CredentialProviderBuilder:
|
|
61
|
-
"""
|
|
62
|
-
Builds credential providers.
|
|
63
|
-
|
|
64
|
-
This is used to defer credential provider initialization to happen at
|
|
65
|
-
`collect()` rather than immediately during query construction. This makes
|
|
66
|
-
the behavior predictable when queries are sent to another environment for
|
|
67
|
-
execution.
|
|
68
|
-
"""
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
credential_provider_init: CredentialProviderBuilderImpl,
|
|
73
|
-
) -> None:
|
|
74
|
-
"""
|
|
75
|
-
Initialize configuration for building a credential provider.
|
|
76
|
-
|
|
77
|
-
Parameters
|
|
78
|
-
----------
|
|
79
|
-
credential_provider_init
|
|
80
|
-
Initializer function that returns a credential provider.
|
|
81
|
-
"""
|
|
82
|
-
self.credential_provider_init = credential_provider_init
|
|
83
|
-
|
|
84
|
-
# Note: The rust-side expects this exact function name.
|
|
85
|
-
def build_credential_provider(
|
|
86
|
-
self,
|
|
87
|
-
clear_cached_credentials: bool = False, # noqa: FBT001
|
|
88
|
-
) -> CredentialProviderBuilderReturn:
|
|
89
|
-
"""
|
|
90
|
-
Instantiate a credential provider from configuration.
|
|
91
|
-
|
|
92
|
-
Parameters
|
|
93
|
-
----------
|
|
94
|
-
clear_cached_credentials
|
|
95
|
-
If the built provider is an instance of `CachingCredentialProvider`,
|
|
96
|
-
clears any cached credentials on that object.
|
|
97
|
-
"""
|
|
98
|
-
verbose = polars._utils.logging.verbose()
|
|
99
|
-
|
|
100
|
-
if verbose:
|
|
101
|
-
eprint(
|
|
102
|
-
"[CredentialProviderBuilder]: Begin initialize "
|
|
103
|
-
f"{self.credential_provider_init!r} "
|
|
104
|
-
f"{clear_cached_credentials = }"
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
v = self.credential_provider_init()
|
|
108
|
-
|
|
109
|
-
if verbose:
|
|
110
|
-
if v is not None:
|
|
111
|
-
eprint(
|
|
112
|
-
f"[CredentialProviderBuilder]: Initialized {v!r} "
|
|
113
|
-
f"from {self.credential_provider_init!r}"
|
|
114
|
-
)
|
|
115
|
-
else:
|
|
116
|
-
eprint(
|
|
117
|
-
f"[CredentialProviderBuilder]: No provider initialized "
|
|
118
|
-
f"from {self.credential_provider_init!r}"
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
if clear_cached_credentials and isinstance(v, CachingCredentialProvider):
|
|
122
|
-
v.clear_cached_credentials()
|
|
123
|
-
|
|
124
|
-
if verbose:
|
|
125
|
-
eprint(
|
|
126
|
-
f"[CredentialProviderBuilder]: Clear cached credentials for {v!r}"
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
return v
|
|
130
|
-
|
|
131
|
-
@classmethod
|
|
132
|
-
def from_initialized_provider(
|
|
133
|
-
cls, credential_provider: CredentialProviderFunction
|
|
134
|
-
) -> CredentialProviderBuilder:
|
|
135
|
-
"""Initialize with an already constructed provider."""
|
|
136
|
-
return cls(InitializedCredentialProvider(credential_provider))
|
|
137
|
-
|
|
138
|
-
def __getstate__(self) -> Any:
|
|
139
|
-
state = self.credential_provider_init
|
|
140
|
-
|
|
141
|
-
if verbose():
|
|
142
|
-
eprint(f"[CredentialProviderBuilder]: __getstate__(): {state = !r} ")
|
|
143
|
-
|
|
144
|
-
return state
|
|
145
|
-
|
|
146
|
-
def __setstate__(self, state: Any) -> None:
|
|
147
|
-
self.credential_provider_init = state
|
|
148
|
-
|
|
149
|
-
if verbose():
|
|
150
|
-
eprint(f"[CredentialProviderBuilder]: __setstate__(): {self = !r}")
|
|
151
|
-
|
|
152
|
-
def __repr__(self) -> str:
|
|
153
|
-
return f"CredentialProviderBuilder({self.credential_provider_init!r})"
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
class CredentialProviderBuilderImpl(abc.ABC):
|
|
157
|
-
@abc.abstractmethod
|
|
158
|
-
def __call__(self) -> CredentialProviderFunction | None:
|
|
159
|
-
pass
|
|
160
|
-
|
|
161
|
-
@property
|
|
162
|
-
@abc.abstractmethod
|
|
163
|
-
def provider_repr(self) -> str:
|
|
164
|
-
"""Used for logging."""
|
|
165
|
-
|
|
166
|
-
def __repr__(self) -> str:
|
|
167
|
-
provider_repr = self.provider_repr
|
|
168
|
-
builder_name = type(self).__name__
|
|
169
|
-
|
|
170
|
-
return f"{provider_repr} @ {builder_name}"
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
# Wraps an already initialized credential provider into the builder interface.
|
|
174
|
-
# Used for e.g. user-provided credential providers.
|
|
175
|
-
class InitializedCredentialProvider(CredentialProviderBuilderImpl):
|
|
176
|
-
"""Wraps an already initialized credential provider."""
|
|
177
|
-
|
|
178
|
-
def __init__(self, credential_provider: CredentialProviderFunction) -> None:
|
|
179
|
-
self.credential_provider = credential_provider
|
|
180
|
-
|
|
181
|
-
def __call__(self) -> CredentialProviderBuilderReturn:
|
|
182
|
-
if isinstance(self.credential_provider, CachingCredentialProvider):
|
|
183
|
-
return self.credential_provider
|
|
184
|
-
|
|
185
|
-
# We use the cache by keying the entry as the address of the object
|
|
186
|
-
# provided by the user.
|
|
187
|
-
return _build_with_cache(
|
|
188
|
-
lambda: id(self.credential_provider),
|
|
189
|
-
lambda: CachedCredentialProvider(self.credential_provider),
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
@property
|
|
193
|
-
def provider_repr(self) -> str:
|
|
194
|
-
return repr(self.credential_provider)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
# The keys of this can be:
|
|
198
|
-
# * int: Object address of a user-passed credential provider
|
|
199
|
-
# * bytes: Hash of an AutoInit configuration
|
|
200
|
-
BUILT_PROVIDERS_LRU_CACHE: (
|
|
201
|
-
LRUCache[int | bytes, CredentialProviderBuilderReturn] | None
|
|
202
|
-
) = None
|
|
203
|
-
BUILT_PROVIDERS_LRU_CACHE_LOCK: threading.RLock = threading.RLock()
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _build_with_cache(
|
|
207
|
-
get_cache_key_func: Callable[[], int | bytes],
|
|
208
|
-
build_provider_func: Callable[[], CredentialProviderBuilderReturn],
|
|
209
|
-
) -> CredentialProviderBuilderReturn:
|
|
210
|
-
global BUILT_PROVIDERS_LRU_CACHE
|
|
211
|
-
|
|
212
|
-
if (
|
|
213
|
-
max_items := int(
|
|
214
|
-
os.getenv(
|
|
215
|
-
"POLARS_CREDENTIAL_PROVIDER_BUILDER_CACHE_SIZE",
|
|
216
|
-
8,
|
|
217
|
-
)
|
|
218
|
-
)
|
|
219
|
-
) <= 0:
|
|
220
|
-
if BUILT_PROVIDERS_LRU_CACHE_LOCK.acquire(blocking=False):
|
|
221
|
-
BUILT_PROVIDERS_LRU_CACHE = None
|
|
222
|
-
BUILT_PROVIDERS_LRU_CACHE_LOCK.release()
|
|
223
|
-
|
|
224
|
-
return build_provider_func()
|
|
225
|
-
|
|
226
|
-
verbose = polars._utils.logging.verbose()
|
|
227
|
-
|
|
228
|
-
with BUILT_PROVIDERS_LRU_CACHE_LOCK:
|
|
229
|
-
if BUILT_PROVIDERS_LRU_CACHE is None:
|
|
230
|
-
if verbose:
|
|
231
|
-
eprint(f"Create built credential providers LRU cache ({max_items = })")
|
|
232
|
-
|
|
233
|
-
BUILT_PROVIDERS_LRU_CACHE = LRUCache(max_items)
|
|
234
|
-
|
|
235
|
-
cache_key = get_cache_key_func()
|
|
236
|
-
|
|
237
|
-
try:
|
|
238
|
-
provider = BUILT_PROVIDERS_LRU_CACHE[cache_key]
|
|
239
|
-
|
|
240
|
-
if verbose:
|
|
241
|
-
eprint(
|
|
242
|
-
f"Loaded credential provider from cache: {provider!r} {cache_key = }"
|
|
243
|
-
)
|
|
244
|
-
except KeyError:
|
|
245
|
-
provider = build_provider_func()
|
|
246
|
-
BUILT_PROVIDERS_LRU_CACHE[cache_key] = provider
|
|
247
|
-
|
|
248
|
-
if verbose:
|
|
249
|
-
eprint(
|
|
250
|
-
f"Added new credential provider to cache: {provider!r} {cache_key = }"
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
return provider
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
# Represents an automatic initialization configuration. This is created for
|
|
257
|
-
# credential_provider="auto".
|
|
258
|
-
class AutoInit(CredentialProviderBuilderImpl):
|
|
259
|
-
def __init__(self, cls: Any, **kw: Any) -> None:
|
|
260
|
-
self.cls = cls
|
|
261
|
-
self.kw = kw
|
|
262
|
-
self._cache_key: NoPickleOption[bytes] = NoPickleOption()
|
|
263
|
-
|
|
264
|
-
def __call__(self) -> CredentialProviderFunction | None:
|
|
265
|
-
# This is used for credential_provider="auto", which allows for
|
|
266
|
-
# ImportErrors.
|
|
267
|
-
try:
|
|
268
|
-
return _build_with_cache(
|
|
269
|
-
self.get_or_init_cache_key,
|
|
270
|
-
lambda: self.cls(**self.kw),
|
|
271
|
-
)
|
|
272
|
-
except ImportError as e:
|
|
273
|
-
if verbose():
|
|
274
|
-
eprint(f"failed to auto-initialize {self.provider_repr}: {e!r}")
|
|
275
|
-
|
|
276
|
-
return None
|
|
277
|
-
|
|
278
|
-
def get_or_init_cache_key(self) -> bytes:
|
|
279
|
-
cache_key = self._cache_key.get()
|
|
280
|
-
|
|
281
|
-
if cache_key is None:
|
|
282
|
-
cache_key = self.get_cache_key_impl()
|
|
283
|
-
self._cache_key.set(cache_key)
|
|
284
|
-
|
|
285
|
-
if verbose():
|
|
286
|
-
eprint(f"{self!r}: AutoInit cache key: {cache_key.hex()}")
|
|
287
|
-
|
|
288
|
-
return cache_key
|
|
289
|
-
|
|
290
|
-
def get_cache_key_impl(self) -> bytes:
|
|
291
|
-
import hashlib
|
|
292
|
-
import pickle
|
|
293
|
-
|
|
294
|
-
hash = hashlib.sha256(pickle.dumps(self))
|
|
295
|
-
return hash.digest()[:16]
|
|
296
|
-
|
|
297
|
-
@property
|
|
298
|
-
def provider_repr(self) -> str:
|
|
299
|
-
return self.cls.__name__
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
DEFAULT_CREDENTIAL_PROVIDER: CredentialProviderFunction | Literal["auto"] | None = (
|
|
303
|
-
"auto"
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
def _init_credential_provider_builder(
|
|
308
|
-
credential_provider: CredentialProviderFunction
|
|
309
|
-
| CredentialProviderBuilder
|
|
310
|
-
| Literal["auto"]
|
|
311
|
-
| None,
|
|
312
|
-
source: Any,
|
|
313
|
-
storage_options: dict[str, Any] | None,
|
|
314
|
-
caller_name: str,
|
|
315
|
-
) -> CredentialProviderBuilder | None:
|
|
316
|
-
def f() -> CredentialProviderBuilder | None:
|
|
317
|
-
# Note: The behavior of this function should depend only on the function
|
|
318
|
-
# parameters. Any environment-specific behavior should take place inside
|
|
319
|
-
# instantiated credential providers.
|
|
320
|
-
|
|
321
|
-
from polars.io.cloud._utils import (
|
|
322
|
-
_first_scan_path,
|
|
323
|
-
_get_path_scheme,
|
|
324
|
-
_is_aws_cloud,
|
|
325
|
-
_is_azure_cloud,
|
|
326
|
-
_is_gcp_cloud,
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
if credential_provider is None:
|
|
330
|
-
return None
|
|
331
|
-
|
|
332
|
-
if isinstance(credential_provider, CredentialProviderBuilder):
|
|
333
|
-
# This happens when the catalog client auto-inits and passes it to
|
|
334
|
-
# scan/write_delta, which calls us again.
|
|
335
|
-
return credential_provider
|
|
336
|
-
|
|
337
|
-
if credential_provider != "auto":
|
|
338
|
-
msg = f"the `credential_provider` parameter of `{caller_name}` is considered unstable."
|
|
339
|
-
issue_unstable_warning(msg)
|
|
340
|
-
|
|
341
|
-
return CredentialProviderBuilder.from_initialized_provider(
|
|
342
|
-
credential_provider
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
if DEFAULT_CREDENTIAL_PROVIDER is None:
|
|
346
|
-
return None
|
|
347
|
-
|
|
348
|
-
if (first_scan_path := _first_scan_path(source)) is None:
|
|
349
|
-
return None
|
|
350
|
-
|
|
351
|
-
if (scheme := _get_path_scheme(first_scan_path)) is None:
|
|
352
|
-
return None
|
|
353
|
-
|
|
354
|
-
def get_default_credential_provider() -> CredentialProviderBuilder | None:
|
|
355
|
-
return (
|
|
356
|
-
CredentialProviderBuilder.from_initialized_provider(
|
|
357
|
-
DEFAULT_CREDENTIAL_PROVIDER
|
|
358
|
-
)
|
|
359
|
-
if DEFAULT_CREDENTIAL_PROVIDER != "auto"
|
|
360
|
-
else None
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
if _is_azure_cloud(scheme):
|
|
364
|
-
tenant_id = None
|
|
365
|
-
storage_account = None
|
|
366
|
-
|
|
367
|
-
if storage_options is not None:
|
|
368
|
-
for k, v in storage_options.items():
|
|
369
|
-
k = k.lower()
|
|
370
|
-
|
|
371
|
-
# https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html
|
|
372
|
-
if k in {
|
|
373
|
-
"azure_storage_tenant_id",
|
|
374
|
-
"azure_storage_authority_id",
|
|
375
|
-
"azure_tenant_id",
|
|
376
|
-
"azure_authority_id",
|
|
377
|
-
"tenant_id",
|
|
378
|
-
"authority_id",
|
|
379
|
-
}:
|
|
380
|
-
tenant_id = v
|
|
381
|
-
elif k in {"azure_storage_account_name", "account_name"}:
|
|
382
|
-
storage_account = v
|
|
383
|
-
elif k in {"azure_use_azure_cli", "use_azure_cli"}:
|
|
384
|
-
continue
|
|
385
|
-
elif k in OBJECT_STORE_CLIENT_OPTIONS:
|
|
386
|
-
continue
|
|
387
|
-
else:
|
|
388
|
-
# We assume some sort of access key was given, so we
|
|
389
|
-
# just dispatch to the rust side.
|
|
390
|
-
return None
|
|
391
|
-
|
|
392
|
-
storage_account = (
|
|
393
|
-
# Prefer the one embedded in the path
|
|
394
|
-
CredentialProviderAzure._extract_adls_uri_storage_account(
|
|
395
|
-
str(first_scan_path)
|
|
396
|
-
)
|
|
397
|
-
or storage_account
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
if (default := get_default_credential_provider()) is not None:
|
|
401
|
-
return default
|
|
402
|
-
|
|
403
|
-
return CredentialProviderBuilder(
|
|
404
|
-
AutoInit(
|
|
405
|
-
CredentialProviderAzure,
|
|
406
|
-
tenant_id=tenant_id,
|
|
407
|
-
_storage_account=storage_account,
|
|
408
|
-
)
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
elif _is_aws_cloud(scheme=scheme, first_scan_path=str(first_scan_path)):
|
|
412
|
-
region = None
|
|
413
|
-
profile = None
|
|
414
|
-
default_region = None
|
|
415
|
-
unhandled_key = None
|
|
416
|
-
has_endpoint_url = False
|
|
417
|
-
|
|
418
|
-
if storage_options is not None:
|
|
419
|
-
for k, v in storage_options.items():
|
|
420
|
-
k = k.lower()
|
|
421
|
-
|
|
422
|
-
# https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
|
|
423
|
-
if k in {"aws_region", "region"}:
|
|
424
|
-
region = v
|
|
425
|
-
elif k in {"aws_default_region", "default_region"}:
|
|
426
|
-
default_region = v
|
|
427
|
-
elif k in {"aws_profile", "profile"}:
|
|
428
|
-
profile = v
|
|
429
|
-
elif k in {
|
|
430
|
-
"aws_endpoint",
|
|
431
|
-
"aws_endpoint_url",
|
|
432
|
-
"endpoint",
|
|
433
|
-
"endpoint_url",
|
|
434
|
-
}:
|
|
435
|
-
has_endpoint_url = True
|
|
436
|
-
elif k in {"aws_request_payer", "request_payer"}:
|
|
437
|
-
continue
|
|
438
|
-
elif k in OBJECT_STORE_CLIENT_OPTIONS:
|
|
439
|
-
continue
|
|
440
|
-
else:
|
|
441
|
-
# We assume this is some sort of access key
|
|
442
|
-
unhandled_key = k
|
|
443
|
-
|
|
444
|
-
if unhandled_key is not None:
|
|
445
|
-
if profile is not None:
|
|
446
|
-
msg = (
|
|
447
|
-
"unsupported: cannot combine aws_profile with "
|
|
448
|
-
f"{unhandled_key} in storage_options"
|
|
449
|
-
)
|
|
450
|
-
raise ValueError(msg)
|
|
451
|
-
|
|
452
|
-
if (
|
|
453
|
-
unhandled_key is None
|
|
454
|
-
and (default := get_default_credential_provider()) is not None
|
|
455
|
-
):
|
|
456
|
-
return default
|
|
457
|
-
|
|
458
|
-
return CredentialProviderBuilder(
|
|
459
|
-
AutoInit(
|
|
460
|
-
CredentialProviderAWS,
|
|
461
|
-
profile_name=profile,
|
|
462
|
-
region_name=region or default_region,
|
|
463
|
-
_auto_init_unhandled_key=unhandled_key,
|
|
464
|
-
_storage_options_has_endpoint_url=has_endpoint_url,
|
|
465
|
-
)
|
|
466
|
-
)
|
|
467
|
-
|
|
468
|
-
elif _is_gcp_cloud(scheme):
|
|
469
|
-
token = None
|
|
470
|
-
unhandled_key = None
|
|
471
|
-
|
|
472
|
-
if storage_options is not None:
|
|
473
|
-
for k, v in storage_options.items():
|
|
474
|
-
k = k.lower()
|
|
475
|
-
|
|
476
|
-
# https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html
|
|
477
|
-
if k in {"token", "bearer_token"}:
|
|
478
|
-
token = v
|
|
479
|
-
elif k in {
|
|
480
|
-
"google_bucket",
|
|
481
|
-
"google_bucket_name",
|
|
482
|
-
"bucket",
|
|
483
|
-
"bucket_name",
|
|
484
|
-
}:
|
|
485
|
-
continue
|
|
486
|
-
elif k in OBJECT_STORE_CLIENT_OPTIONS:
|
|
487
|
-
continue
|
|
488
|
-
else:
|
|
489
|
-
# We assume some sort of access key was given, so we
|
|
490
|
-
# just dispatch to the rust side.
|
|
491
|
-
unhandled_key = k
|
|
492
|
-
|
|
493
|
-
if unhandled_key is not None:
|
|
494
|
-
if token is not None:
|
|
495
|
-
msg = (
|
|
496
|
-
"unsupported: cannot combine token with "
|
|
497
|
-
f"{unhandled_key} in storage_options"
|
|
498
|
-
)
|
|
499
|
-
raise ValueError(msg)
|
|
500
|
-
|
|
501
|
-
return None
|
|
502
|
-
|
|
503
|
-
if token is not None:
|
|
504
|
-
return CredentialProviderBuilder(
|
|
505
|
-
InitializedCredentialProvider(UserProvidedGCPToken(token))
|
|
506
|
-
)
|
|
507
|
-
|
|
508
|
-
if (default := get_default_credential_provider()) is not None:
|
|
509
|
-
return default
|
|
510
|
-
|
|
511
|
-
return CredentialProviderBuilder(AutoInit(CredentialProviderGCP))
|
|
512
|
-
|
|
513
|
-
return None
|
|
514
|
-
|
|
515
|
-
credential_provider_init = f()
|
|
516
|
-
|
|
517
|
-
if verbose():
|
|
518
|
-
eprint(f"_init_credential_provider_builder(): {credential_provider_init = !r}")
|
|
519
|
-
|
|
520
|
-
return credential_provider_init
|