polars-runtime-compat 1.34.0b3__cp39-abi3-win_amd64.whl → 1.34.0b4__cp39-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/catalog/unity/client.py
DELETED
|
@@ -1,733 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import importlib
|
|
5
|
-
import os
|
|
6
|
-
import sys
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
8
|
-
|
|
9
|
-
from polars._utils.unstable import issue_unstable_warning
|
|
10
|
-
from polars._utils.wrap import wrap_ldf
|
|
11
|
-
from polars.catalog.unity.models import (
|
|
12
|
-
CatalogInfo,
|
|
13
|
-
ColumnInfo,
|
|
14
|
-
NamespaceInfo,
|
|
15
|
-
TableInfo,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from collections.abc import Generator
|
|
20
|
-
from datetime import datetime
|
|
21
|
-
|
|
22
|
-
import deltalake
|
|
23
|
-
|
|
24
|
-
from polars._typing import SchemaDict
|
|
25
|
-
from polars.catalog.unity.models import DataSourceFormat, TableType
|
|
26
|
-
from polars.dataframe.frame import DataFrame
|
|
27
|
-
from polars.io.cloud import (
|
|
28
|
-
CredentialProviderFunction,
|
|
29
|
-
CredentialProviderFunctionReturn,
|
|
30
|
-
)
|
|
31
|
-
from polars.io.cloud.credential_provider._builder import CredentialProviderBuilder
|
|
32
|
-
from polars.lazyframe import LazyFrame
|
|
33
|
-
|
|
34
|
-
with contextlib.suppress(ImportError):
|
|
35
|
-
from polars._plr import PyCatalogClient
|
|
36
|
-
|
|
37
|
-
PyCatalogClient.init_classes(
|
|
38
|
-
catalog_info_cls=CatalogInfo,
|
|
39
|
-
namespace_info_cls=NamespaceInfo,
|
|
40
|
-
table_info_cls=TableInfo,
|
|
41
|
-
column_info_cls=ColumnInfo,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class Catalog:
|
|
46
|
-
"""
|
|
47
|
-
Unity catalog client.
|
|
48
|
-
|
|
49
|
-
.. warning::
|
|
50
|
-
This functionality is considered **unstable**. It may be changed
|
|
51
|
-
at any point without it being considered a breaking change.
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
def __init__(
|
|
55
|
-
self,
|
|
56
|
-
workspace_url: str,
|
|
57
|
-
*,
|
|
58
|
-
bearer_token: str | None = "auto",
|
|
59
|
-
require_https: bool = True,
|
|
60
|
-
) -> None:
|
|
61
|
-
"""
|
|
62
|
-
Initialize a catalog client.
|
|
63
|
-
|
|
64
|
-
.. warning::
|
|
65
|
-
This functionality is considered **unstable**. It may be changed
|
|
66
|
-
at any point without it being considered a breaking change.
|
|
67
|
-
|
|
68
|
-
Parameters
|
|
69
|
-
----------
|
|
70
|
-
workspace_url
|
|
71
|
-
URL of the workspace, or alternatively the URL of the Unity catalog
|
|
72
|
-
API endpoint.
|
|
73
|
-
bearer_token
|
|
74
|
-
Bearer token to authenticate with. This can also be set to:
|
|
75
|
-
|
|
76
|
-
* "auto": Automatically retrieve bearer tokens from the environment.
|
|
77
|
-
* "databricks-sdk": Use the Databricks SDK to retrieve and use the
|
|
78
|
-
bearer token from the environment.
|
|
79
|
-
require_https
|
|
80
|
-
Require the `workspace_url` to use HTTPS.
|
|
81
|
-
"""
|
|
82
|
-
issue_unstable_warning("`Catalog` functionality is considered unstable.")
|
|
83
|
-
|
|
84
|
-
if require_https and not workspace_url.startswith("https://"):
|
|
85
|
-
msg = (
|
|
86
|
-
f"a non-HTTPS workspace_url was given ({workspace_url}). To "
|
|
87
|
-
"allow non-HTTPS URLs, pass require_https=False."
|
|
88
|
-
)
|
|
89
|
-
raise ValueError(msg)
|
|
90
|
-
|
|
91
|
-
if bearer_token == "databricks-sdk" or (
|
|
92
|
-
bearer_token == "auto"
|
|
93
|
-
# For security, in "auto" mode, only retrieve/use the token if:
|
|
94
|
-
# * We are running inside a Databricks environment
|
|
95
|
-
# * The `workspace_url` is pointing to Databricks and uses HTTPS
|
|
96
|
-
and "DATABRICKS_RUNTIME_VERSION" in os.environ
|
|
97
|
-
and workspace_url.startswith("https://")
|
|
98
|
-
and (
|
|
99
|
-
workspace_url.removeprefix("https://")
|
|
100
|
-
.split("/", 1)[0]
|
|
101
|
-
.endswith(".cloud.databricks.com")
|
|
102
|
-
)
|
|
103
|
-
):
|
|
104
|
-
bearer_token = self._get_databricks_token()
|
|
105
|
-
|
|
106
|
-
if bearer_token == "auto":
|
|
107
|
-
bearer_token = None
|
|
108
|
-
|
|
109
|
-
self._client = PyCatalogClient.new(workspace_url, bearer_token)
|
|
110
|
-
|
|
111
|
-
def list_catalogs(self) -> list[CatalogInfo]:
|
|
112
|
-
"""
|
|
113
|
-
List the available catalogs.
|
|
114
|
-
|
|
115
|
-
.. warning::
|
|
116
|
-
This functionality is considered **unstable**. It may be changed
|
|
117
|
-
at any point without it being considered a breaking change.
|
|
118
|
-
"""
|
|
119
|
-
return self._client.list_catalogs()
|
|
120
|
-
|
|
121
|
-
def list_namespaces(self, catalog_name: str) -> list[NamespaceInfo]:
|
|
122
|
-
"""
|
|
123
|
-
List the available namespaces (unity schema) under the specified catalog.
|
|
124
|
-
|
|
125
|
-
.. warning::
|
|
126
|
-
This functionality is considered **unstable**. It may be changed
|
|
127
|
-
at any point without it being considered a breaking change.
|
|
128
|
-
|
|
129
|
-
Parameters
|
|
130
|
-
----------
|
|
131
|
-
catalog_name
|
|
132
|
-
Name of the catalog.
|
|
133
|
-
"""
|
|
134
|
-
return self._client.list_namespaces(catalog_name)
|
|
135
|
-
|
|
136
|
-
def list_tables(self, catalog_name: str, namespace: str) -> list[TableInfo]:
|
|
137
|
-
"""
|
|
138
|
-
List the available tables under the specified schema.
|
|
139
|
-
|
|
140
|
-
.. warning::
|
|
141
|
-
This functionality is considered **unstable**. It may be changed
|
|
142
|
-
at any point without it being considered a breaking change.
|
|
143
|
-
|
|
144
|
-
Parameters
|
|
145
|
-
----------
|
|
146
|
-
catalog_name
|
|
147
|
-
Name of the catalog.
|
|
148
|
-
namespace
|
|
149
|
-
Name of the namespace (unity schema).
|
|
150
|
-
"""
|
|
151
|
-
return self._client.list_tables(catalog_name, namespace)
|
|
152
|
-
|
|
153
|
-
def get_table_info(
|
|
154
|
-
self, catalog_name: str, namespace: str, table_name: str
|
|
155
|
-
) -> TableInfo:
|
|
156
|
-
"""
|
|
157
|
-
Retrieve the metadata of the specified table.
|
|
158
|
-
|
|
159
|
-
.. warning::
|
|
160
|
-
This functionality is considered **unstable**. It may be changed
|
|
161
|
-
at any point without it being considered a breaking change.
|
|
162
|
-
|
|
163
|
-
Parameters
|
|
164
|
-
----------
|
|
165
|
-
catalog_name
|
|
166
|
-
Name of the catalog.
|
|
167
|
-
namespace
|
|
168
|
-
Name of the namespace (unity schema).
|
|
169
|
-
table_name
|
|
170
|
-
Name of the table.
|
|
171
|
-
"""
|
|
172
|
-
return self._client.get_table_info(catalog_name, namespace, table_name)
|
|
173
|
-
|
|
174
|
-
def _get_table_credentials(
|
|
175
|
-
self, table_id: str, *, write: bool
|
|
176
|
-
) -> tuple[dict[str, str] | None, dict[str, str], int]:
|
|
177
|
-
return self._client.get_table_credentials(table_id=table_id, write=write)
|
|
178
|
-
|
|
179
|
-
def scan_table(
|
|
180
|
-
self,
|
|
181
|
-
catalog_name: str,
|
|
182
|
-
namespace: str,
|
|
183
|
-
table_name: str,
|
|
184
|
-
*,
|
|
185
|
-
delta_table_version: int | str | datetime | None = None,
|
|
186
|
-
delta_table_options: dict[str, Any] | None = None,
|
|
187
|
-
storage_options: dict[str, Any] | None = None,
|
|
188
|
-
credential_provider: (
|
|
189
|
-
CredentialProviderFunction | Literal["auto"] | None
|
|
190
|
-
) = "auto",
|
|
191
|
-
retries: int = 2,
|
|
192
|
-
) -> LazyFrame:
|
|
193
|
-
"""
|
|
194
|
-
Retrieve the metadata of the specified table.
|
|
195
|
-
|
|
196
|
-
.. warning::
|
|
197
|
-
This functionality is considered **unstable**. It may be changed
|
|
198
|
-
at any point without it being considered a breaking change.
|
|
199
|
-
|
|
200
|
-
Parameters
|
|
201
|
-
----------
|
|
202
|
-
catalog_name
|
|
203
|
-
Name of the catalog.
|
|
204
|
-
namespace
|
|
205
|
-
Name of the namespace (unity schema).
|
|
206
|
-
table_name
|
|
207
|
-
Name of the table.
|
|
208
|
-
delta_table_version
|
|
209
|
-
Version of the table to scan (Deltalake only).
|
|
210
|
-
delta_table_options
|
|
211
|
-
Additional keyword arguments while reading a Deltalake table.
|
|
212
|
-
storage_options
|
|
213
|
-
Options that indicate how to connect to a cloud provider.
|
|
214
|
-
|
|
215
|
-
The cloud providers currently supported are AWS, GCP, and Azure.
|
|
216
|
-
See supported keys here:
|
|
217
|
-
|
|
218
|
-
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
|
|
219
|
-
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
|
|
220
|
-
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
|
|
221
|
-
* Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
|
|
222
|
-
`{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
|
223
|
-
|
|
224
|
-
If `storage_options` is not provided, Polars will try to infer the
|
|
225
|
-
information from environment variables.
|
|
226
|
-
credential_provider
|
|
227
|
-
Provide a function that can be called to provide cloud storage
|
|
228
|
-
credentials. The function is expected to return a dictionary of
|
|
229
|
-
credential keys along with an optional credential expiry time.
|
|
230
|
-
|
|
231
|
-
.. warning::
|
|
232
|
-
This functionality is considered **unstable**. It may be changed
|
|
233
|
-
at any point without it being considered a breaking change.
|
|
234
|
-
retries
|
|
235
|
-
Number of retries if accessing a cloud instance fails.
|
|
236
|
-
|
|
237
|
-
"""
|
|
238
|
-
table_info = self.get_table_info(catalog_name, namespace, table_name)
|
|
239
|
-
storage_location, data_source_format = _extract_location_and_data_format(
|
|
240
|
-
table_info, "scan table"
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
|
|
244
|
-
credential_provider,
|
|
245
|
-
storage_options,
|
|
246
|
-
table_info,
|
|
247
|
-
write=False,
|
|
248
|
-
caller_name="Catalog.scan_table",
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
if data_source_format in ["DELTA", "DELTASHARING"]:
|
|
252
|
-
from polars.io.delta import scan_delta
|
|
253
|
-
|
|
254
|
-
return scan_delta(
|
|
255
|
-
storage_location,
|
|
256
|
-
version=delta_table_version,
|
|
257
|
-
delta_table_options=delta_table_options,
|
|
258
|
-
storage_options=storage_options,
|
|
259
|
-
credential_provider=credential_provider,
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
if delta_table_version is not None:
|
|
263
|
-
msg = (
|
|
264
|
-
"cannot apply delta_table_version for table of type "
|
|
265
|
-
f"{data_source_format}"
|
|
266
|
-
)
|
|
267
|
-
raise ValueError(msg)
|
|
268
|
-
|
|
269
|
-
if delta_table_options is not None:
|
|
270
|
-
msg = (
|
|
271
|
-
"cannot apply delta_table_options for table of type "
|
|
272
|
-
f"{data_source_format}"
|
|
273
|
-
)
|
|
274
|
-
raise ValueError(msg)
|
|
275
|
-
|
|
276
|
-
if storage_options:
|
|
277
|
-
storage_options = list(storage_options.items()) # type: ignore[assignment]
|
|
278
|
-
else:
|
|
279
|
-
# Handle empty dict input
|
|
280
|
-
storage_options = None
|
|
281
|
-
|
|
282
|
-
return wrap_ldf(
|
|
283
|
-
self._client.scan_table(
|
|
284
|
-
catalog_name,
|
|
285
|
-
namespace,
|
|
286
|
-
table_name,
|
|
287
|
-
credential_provider=credential_provider,
|
|
288
|
-
cloud_options=storage_options,
|
|
289
|
-
retries=retries,
|
|
290
|
-
)
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
def write_table(
|
|
294
|
-
self,
|
|
295
|
-
df: DataFrame,
|
|
296
|
-
catalog_name: str,
|
|
297
|
-
namespace: str,
|
|
298
|
-
table_name: str,
|
|
299
|
-
*,
|
|
300
|
-
delta_mode: Literal[
|
|
301
|
-
"error", "append", "overwrite", "ignore", "merge"
|
|
302
|
-
] = "error",
|
|
303
|
-
delta_write_options: dict[str, Any] | None = None,
|
|
304
|
-
delta_merge_options: dict[str, Any] | None = None,
|
|
305
|
-
storage_options: dict[str, str] | None = None,
|
|
306
|
-
credential_provider: CredentialProviderFunction
|
|
307
|
-
| Literal["auto"]
|
|
308
|
-
| None = "auto",
|
|
309
|
-
) -> None | deltalake.table.TableMerger:
|
|
310
|
-
"""
|
|
311
|
-
Write a DataFrame to a catalog table.
|
|
312
|
-
|
|
313
|
-
.. warning::
|
|
314
|
-
This functionality is considered **unstable**. It may be changed
|
|
315
|
-
at any point without it being considered a breaking change.
|
|
316
|
-
|
|
317
|
-
Parameters
|
|
318
|
-
----------
|
|
319
|
-
df
|
|
320
|
-
DataFrame to write.
|
|
321
|
-
catalog_name
|
|
322
|
-
Name of the catalog.
|
|
323
|
-
namespace
|
|
324
|
-
Name of the namespace (unity schema).
|
|
325
|
-
table_name
|
|
326
|
-
Name of the table.
|
|
327
|
-
delta_mode : {'error', 'append', 'overwrite', 'ignore', 'merge'}
|
|
328
|
-
(For delta tables) How to handle existing data.
|
|
329
|
-
|
|
330
|
-
- If 'error', throw an error if the table already exists (default).
|
|
331
|
-
- If 'append', will add new data.
|
|
332
|
-
- If 'overwrite', will replace table with new data.
|
|
333
|
-
- If 'ignore', will not write anything if table already exists.
|
|
334
|
-
- If 'merge', return a `TableMerger` object to merge data from the DataFrame
|
|
335
|
-
with the existing data.
|
|
336
|
-
delta_write_options
|
|
337
|
-
(For delta tables) Additional keyword arguments while writing a
|
|
338
|
-
Delta lake Table.
|
|
339
|
-
See a list of supported write options `here <https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake>`__.
|
|
340
|
-
delta_merge_options
|
|
341
|
-
(For delta tables) Keyword arguments which are required to `MERGE` a
|
|
342
|
-
Delta lake Table.
|
|
343
|
-
See a list of supported merge options `here <https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.merge>`__.
|
|
344
|
-
storage_options
|
|
345
|
-
Options that indicate how to connect to a cloud provider.
|
|
346
|
-
|
|
347
|
-
The cloud providers currently supported are AWS, GCP, and Azure.
|
|
348
|
-
See supported keys here:
|
|
349
|
-
|
|
350
|
-
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
|
|
351
|
-
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
|
|
352
|
-
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
|
|
353
|
-
* Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
|
|
354
|
-
`{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
|
355
|
-
|
|
356
|
-
If `storage_options` is not provided, Polars will try to infer the
|
|
357
|
-
information from environment variables.
|
|
358
|
-
credential_provider
|
|
359
|
-
Provide a function that can be called to provide cloud storage
|
|
360
|
-
credentials. The function is expected to return a dictionary of
|
|
361
|
-
credential keys along with an optional credential expiry time.
|
|
362
|
-
|
|
363
|
-
.. warning::
|
|
364
|
-
This functionality is considered **unstable**. It may be changed
|
|
365
|
-
at any point without it being considered a breaking change.
|
|
366
|
-
"""
|
|
367
|
-
table_info = self.get_table_info(catalog_name, namespace, table_name)
|
|
368
|
-
storage_location, data_source_format = _extract_location_and_data_format(
|
|
369
|
-
table_info, "scan table"
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
|
|
373
|
-
credential_provider,
|
|
374
|
-
storage_options,
|
|
375
|
-
table_info,
|
|
376
|
-
write=True,
|
|
377
|
-
caller_name="Catalog.write_table",
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
if data_source_format in ["DELTA", "DELTASHARING"]:
|
|
381
|
-
return df.write_delta( # type: ignore[misc]
|
|
382
|
-
storage_location,
|
|
383
|
-
storage_options=storage_options,
|
|
384
|
-
credential_provider=credential_provider,
|
|
385
|
-
mode=delta_mode,
|
|
386
|
-
delta_write_options=delta_write_options,
|
|
387
|
-
delta_merge_options=delta_merge_options,
|
|
388
|
-
) # type: ignore[call-overload]
|
|
389
|
-
|
|
390
|
-
else:
|
|
391
|
-
msg = (
|
|
392
|
-
"write_table: table format of "
|
|
393
|
-
f"{catalog_name}.{namespace}.{table_name} "
|
|
394
|
-
f"({data_source_format}) is unsupported."
|
|
395
|
-
)
|
|
396
|
-
raise NotImplementedError(msg)
|
|
397
|
-
|
|
398
|
-
def create_catalog(
|
|
399
|
-
self,
|
|
400
|
-
catalog_name: str,
|
|
401
|
-
*,
|
|
402
|
-
comment: str | None = None,
|
|
403
|
-
storage_root: str | None = None,
|
|
404
|
-
) -> CatalogInfo:
|
|
405
|
-
"""
|
|
406
|
-
Create a catalog.
|
|
407
|
-
|
|
408
|
-
.. warning::
|
|
409
|
-
This functionality is considered **unstable**. It may be changed
|
|
410
|
-
at any point without it being considered a breaking change.
|
|
411
|
-
|
|
412
|
-
Parameters
|
|
413
|
-
----------
|
|
414
|
-
catalog_name
|
|
415
|
-
Name of the catalog.
|
|
416
|
-
comment
|
|
417
|
-
Leaves a comment about the catalog.
|
|
418
|
-
storage_root
|
|
419
|
-
Base location at which to store the catalog.
|
|
420
|
-
"""
|
|
421
|
-
return self._client.create_catalog(
|
|
422
|
-
catalog_name=catalog_name, comment=comment, storage_root=storage_root
|
|
423
|
-
)
|
|
424
|
-
|
|
425
|
-
def delete_catalog(
|
|
426
|
-
self,
|
|
427
|
-
catalog_name: str,
|
|
428
|
-
*,
|
|
429
|
-
force: bool = False,
|
|
430
|
-
) -> None:
|
|
431
|
-
"""
|
|
432
|
-
Delete a catalog.
|
|
433
|
-
|
|
434
|
-
Note that depending on the table type and catalog server, this may not
|
|
435
|
-
delete the actual data files from storage. For more details, please
|
|
436
|
-
consult the documentation of the catalog provider you are using.
|
|
437
|
-
|
|
438
|
-
.. warning::
|
|
439
|
-
This functionality is considered **unstable**. It may be changed
|
|
440
|
-
at any point without it being considered a breaking change.
|
|
441
|
-
|
|
442
|
-
Parameters
|
|
443
|
-
----------
|
|
444
|
-
catalog_name
|
|
445
|
-
Name of the catalog.
|
|
446
|
-
force
|
|
447
|
-
Forcibly delete the catalog even if it is not empty.
|
|
448
|
-
"""
|
|
449
|
-
self._client.delete_catalog(catalog_name=catalog_name, force=force)
|
|
450
|
-
|
|
451
|
-
def create_namespace(
|
|
452
|
-
self,
|
|
453
|
-
catalog_name: str,
|
|
454
|
-
namespace: str,
|
|
455
|
-
*,
|
|
456
|
-
comment: str | None = None,
|
|
457
|
-
storage_root: str | None = None,
|
|
458
|
-
) -> NamespaceInfo:
|
|
459
|
-
"""
|
|
460
|
-
Create a namespace (unity schema) in the catalog.
|
|
461
|
-
|
|
462
|
-
.. warning::
|
|
463
|
-
This functionality is considered **unstable**. It may be changed
|
|
464
|
-
at any point without it being considered a breaking change.
|
|
465
|
-
|
|
466
|
-
Parameters
|
|
467
|
-
----------
|
|
468
|
-
catalog_name
|
|
469
|
-
Name of the catalog.
|
|
470
|
-
namespace
|
|
471
|
-
Name of the namespace (unity schema).
|
|
472
|
-
comment
|
|
473
|
-
Leaves a comment about the table.
|
|
474
|
-
storage_root
|
|
475
|
-
Base location at which to store the namespace.
|
|
476
|
-
"""
|
|
477
|
-
return self._client.create_namespace(
|
|
478
|
-
catalog_name=catalog_name,
|
|
479
|
-
namespace=namespace,
|
|
480
|
-
comment=comment,
|
|
481
|
-
storage_root=storage_root,
|
|
482
|
-
)
|
|
483
|
-
|
|
484
|
-
def delete_namespace(
|
|
485
|
-
self,
|
|
486
|
-
catalog_name: str,
|
|
487
|
-
namespace: str,
|
|
488
|
-
*,
|
|
489
|
-
force: bool = False,
|
|
490
|
-
) -> None:
|
|
491
|
-
"""
|
|
492
|
-
Delete a namespace (unity schema) in the catalog.
|
|
493
|
-
|
|
494
|
-
Note that depending on the table type and catalog server, this may not
|
|
495
|
-
delete the actual data files from storage. For more details, please
|
|
496
|
-
consult the documentation of the catalog provider you are using.
|
|
497
|
-
|
|
498
|
-
.. warning::
|
|
499
|
-
This functionality is considered **unstable**. It may be changed
|
|
500
|
-
at any point without it being considered a breaking change.
|
|
501
|
-
|
|
502
|
-
Parameters
|
|
503
|
-
----------
|
|
504
|
-
catalog_name
|
|
505
|
-
Name of the catalog.
|
|
506
|
-
namespace
|
|
507
|
-
Name of the namespace (unity schema).
|
|
508
|
-
force
|
|
509
|
-
Forcibly delete the namespace even if it is not empty.
|
|
510
|
-
"""
|
|
511
|
-
self._client.delete_namespace(
|
|
512
|
-
catalog_name=catalog_name, namespace=namespace, force=force
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
def create_table(
|
|
516
|
-
self,
|
|
517
|
-
catalog_name: str,
|
|
518
|
-
namespace: str,
|
|
519
|
-
table_name: str,
|
|
520
|
-
*,
|
|
521
|
-
schema: SchemaDict | None,
|
|
522
|
-
table_type: TableType,
|
|
523
|
-
data_source_format: DataSourceFormat | None = None,
|
|
524
|
-
comment: str | None = None,
|
|
525
|
-
storage_root: str | None = None,
|
|
526
|
-
properties: dict[str, str] | None = None,
|
|
527
|
-
) -> TableInfo:
|
|
528
|
-
"""
|
|
529
|
-
Create a table in the catalog.
|
|
530
|
-
|
|
531
|
-
.. warning::
|
|
532
|
-
This functionality is considered **unstable**. It may be changed
|
|
533
|
-
at any point without it being considered a breaking change.
|
|
534
|
-
|
|
535
|
-
Parameters
|
|
536
|
-
----------
|
|
537
|
-
catalog_name
|
|
538
|
-
Name of the catalog.
|
|
539
|
-
namespace
|
|
540
|
-
Name of the namespace (unity schema).
|
|
541
|
-
table_name
|
|
542
|
-
Name of the table.
|
|
543
|
-
schema
|
|
544
|
-
Schema of the table.
|
|
545
|
-
table_type
|
|
546
|
-
Type of the table
|
|
547
|
-
data_source_format
|
|
548
|
-
Storage format of the table.
|
|
549
|
-
comment
|
|
550
|
-
Leaves a comment about the table.
|
|
551
|
-
storage_root
|
|
552
|
-
Base location at which to store the table.
|
|
553
|
-
properties
|
|
554
|
-
Extra key-value metadata to store.
|
|
555
|
-
"""
|
|
556
|
-
return self._client.create_table(
|
|
557
|
-
catalog_name=catalog_name,
|
|
558
|
-
namespace=namespace,
|
|
559
|
-
table_name=table_name,
|
|
560
|
-
schema=schema,
|
|
561
|
-
table_type=table_type,
|
|
562
|
-
data_source_format=data_source_format,
|
|
563
|
-
comment=comment,
|
|
564
|
-
storage_root=storage_root,
|
|
565
|
-
properties=list((properties or {}).items()),
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
def delete_table(
|
|
569
|
-
self,
|
|
570
|
-
catalog_name: str,
|
|
571
|
-
namespace: str,
|
|
572
|
-
table_name: str,
|
|
573
|
-
) -> None:
|
|
574
|
-
"""
|
|
575
|
-
Delete the table stored at this location.
|
|
576
|
-
|
|
577
|
-
Note that depending on the table type and catalog server, this may not
|
|
578
|
-
delete the actual data files from storage. For more details, please
|
|
579
|
-
consult the documentation of the catalog provider you are using.
|
|
580
|
-
|
|
581
|
-
If you would like to perform manual deletions, the storage location of
|
|
582
|
-
the files can be found using `get_table_info`.
|
|
583
|
-
|
|
584
|
-
.. warning::
|
|
585
|
-
This functionality is considered **unstable**. It may be changed
|
|
586
|
-
at any point without it being considered a breaking change.
|
|
587
|
-
|
|
588
|
-
Parameters
|
|
589
|
-
----------
|
|
590
|
-
catalog_name
|
|
591
|
-
Name of the catalog.
|
|
592
|
-
namespace
|
|
593
|
-
Name of the namespace (unity schema).
|
|
594
|
-
table_name
|
|
595
|
-
Name of the table.
|
|
596
|
-
"""
|
|
597
|
-
self._client.delete_table(
|
|
598
|
-
catalog_name=catalog_name,
|
|
599
|
-
namespace=namespace,
|
|
600
|
-
table_name=table_name,
|
|
601
|
-
)
|
|
602
|
-
|
|
603
|
-
def _init_credentials(
|
|
604
|
-
self,
|
|
605
|
-
credential_provider: CredentialProviderFunction | Literal["auto"] | None,
|
|
606
|
-
storage_options: dict[str, Any] | None,
|
|
607
|
-
table_info: TableInfo,
|
|
608
|
-
*,
|
|
609
|
-
write: bool,
|
|
610
|
-
caller_name: str,
|
|
611
|
-
) -> tuple[
|
|
612
|
-
CredentialProviderBuilder | None,
|
|
613
|
-
dict[str, Any] | None,
|
|
614
|
-
]:
|
|
615
|
-
from polars.io.cloud.credential_provider._builder import (
|
|
616
|
-
CredentialProviderBuilder,
|
|
617
|
-
)
|
|
618
|
-
|
|
619
|
-
if credential_provider != "auto":
|
|
620
|
-
if credential_provider:
|
|
621
|
-
return CredentialProviderBuilder.from_initialized_provider(
|
|
622
|
-
credential_provider
|
|
623
|
-
), storage_options
|
|
624
|
-
else:
|
|
625
|
-
return None, storage_options
|
|
626
|
-
|
|
627
|
-
verbose = os.getenv("POLARS_VERBOSE") == "1"
|
|
628
|
-
|
|
629
|
-
catalog_credential_provider = CatalogCredentialProvider(
|
|
630
|
-
self, table_info.table_id, write=write
|
|
631
|
-
)
|
|
632
|
-
|
|
633
|
-
try:
|
|
634
|
-
v = catalog_credential_provider._credentials_iter()
|
|
635
|
-
storage_update_options = next(v)
|
|
636
|
-
|
|
637
|
-
if storage_update_options:
|
|
638
|
-
storage_options = {**(storage_options or {}), **storage_update_options}
|
|
639
|
-
|
|
640
|
-
for _ in v:
|
|
641
|
-
pass
|
|
642
|
-
|
|
643
|
-
except Exception as e:
|
|
644
|
-
if verbose:
|
|
645
|
-
table_name = table_info.name
|
|
646
|
-
table_id = table_info.table_id
|
|
647
|
-
msg = (
|
|
648
|
-
f"error auto-initializing CatalogCredentialProvider: {e!r} "
|
|
649
|
-
f"{table_name = } ({table_id = }) ({write = })"
|
|
650
|
-
)
|
|
651
|
-
print(msg, file=sys.stderr)
|
|
652
|
-
else:
|
|
653
|
-
if verbose:
|
|
654
|
-
table_name = table_info.name
|
|
655
|
-
table_id = table_info.table_id
|
|
656
|
-
msg = (
|
|
657
|
-
"auto-selected CatalogCredentialProvider for "
|
|
658
|
-
f"{table_name = } ({table_id = })"
|
|
659
|
-
)
|
|
660
|
-
print(msg, file=sys.stderr)
|
|
661
|
-
|
|
662
|
-
return CredentialProviderBuilder.from_initialized_provider(
|
|
663
|
-
catalog_credential_provider
|
|
664
|
-
), storage_options
|
|
665
|
-
|
|
666
|
-
# This should generally not happen, but if using the temporary
|
|
667
|
-
# credentials API fails for whatever reason, we fallback to our built-in
|
|
668
|
-
# credential provider resolution.
|
|
669
|
-
|
|
670
|
-
from polars.io.cloud.credential_provider._builder import (
|
|
671
|
-
_init_credential_provider_builder,
|
|
672
|
-
)
|
|
673
|
-
|
|
674
|
-
return _init_credential_provider_builder(
|
|
675
|
-
"auto", table_info.storage_location, storage_options, caller_name
|
|
676
|
-
), storage_options
|
|
677
|
-
|
|
678
|
-
@classmethod
|
|
679
|
-
def _get_databricks_token(cls) -> str:
|
|
680
|
-
if importlib.util.find_spec("databricks.sdk") is None:
|
|
681
|
-
msg = "could not get Databricks token: databricks-sdk is not installed"
|
|
682
|
-
raise ImportError(msg)
|
|
683
|
-
|
|
684
|
-
# We code like this to bypass linting
|
|
685
|
-
m = importlib.import_module("databricks.sdk.core").__dict__
|
|
686
|
-
|
|
687
|
-
return m["DefaultCredentials"]()(m["Config"]())()["Authorization"][7:]
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
class CatalogCredentialProvider:
|
|
691
|
-
"""Retrieves credentials from the Unity catalog temporary credentials API."""
|
|
692
|
-
|
|
693
|
-
def __init__(self, catalog: Catalog, table_id: str, *, write: bool) -> None:
|
|
694
|
-
self.catalog = catalog
|
|
695
|
-
self.table_id = table_id
|
|
696
|
-
self.write = write
|
|
697
|
-
|
|
698
|
-
def __call__(self) -> CredentialProviderFunctionReturn: # noqa: D102
|
|
699
|
-
_, (creds, expiry) = self._credentials_iter()
|
|
700
|
-
return creds, expiry
|
|
701
|
-
|
|
702
|
-
def _credentials_iter(
|
|
703
|
-
self,
|
|
704
|
-
) -> Generator[Any]:
|
|
705
|
-
creds, storage_update_options, expiry = self.catalog._get_table_credentials(
|
|
706
|
-
self.table_id, write=self.write
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
yield storage_update_options
|
|
710
|
-
|
|
711
|
-
if not creds:
|
|
712
|
-
table_id = self.table_id
|
|
713
|
-
msg = (
|
|
714
|
-
"did not receive credentials from temporary credentials API for "
|
|
715
|
-
f"{table_id = }"
|
|
716
|
-
)
|
|
717
|
-
raise Exception(msg) # noqa: TRY002
|
|
718
|
-
|
|
719
|
-
yield creds, expiry
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def _extract_location_and_data_format(
|
|
723
|
-
table_info: TableInfo, operation: str
|
|
724
|
-
) -> tuple[str, DataSourceFormat]:
|
|
725
|
-
if table_info.storage_location is None:
|
|
726
|
-
msg = f"cannot {operation}: no storage_location found"
|
|
727
|
-
raise ValueError(msg)
|
|
728
|
-
|
|
729
|
-
if table_info.data_source_format is None:
|
|
730
|
-
msg = f"cannot {operation}: no data_source_format found"
|
|
731
|
-
raise ValueError(msg)
|
|
732
|
-
|
|
733
|
-
return table_info.storage_location, table_info.data_source_format
|