polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,733 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import importlib
5
+ import os
6
+ import sys
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ from polars._utils.unstable import issue_unstable_warning
10
+ from polars._utils.wrap import wrap_ldf
11
+ from polars.catalog.unity.models import (
12
+ CatalogInfo,
13
+ ColumnInfo,
14
+ NamespaceInfo,
15
+ TableInfo,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Generator
20
+ from datetime import datetime
21
+
22
+ import deltalake
23
+
24
+ from polars._typing import SchemaDict
25
+ from polars.catalog.unity.models import DataSourceFormat, TableType
26
+ from polars.dataframe.frame import DataFrame
27
+ from polars.io.cloud import (
28
+ CredentialProviderFunction,
29
+ CredentialProviderFunctionReturn,
30
+ )
31
+ from polars.io.cloud.credential_provider._builder import CredentialProviderBuilder
32
+ from polars.lazyframe import LazyFrame
33
+
34
+ with contextlib.suppress(ImportError):
35
+ from polars._plr import PyCatalogClient
36
+
37
+ PyCatalogClient.init_classes(
38
+ catalog_info_cls=CatalogInfo,
39
+ namespace_info_cls=NamespaceInfo,
40
+ table_info_cls=TableInfo,
41
+ column_info_cls=ColumnInfo,
42
+ )
43
+
44
+
45
+ class Catalog:
46
+ """
47
+ Unity catalog client.
48
+
49
+ .. warning::
50
+ This functionality is considered **unstable**. It may be changed
51
+ at any point without it being considered a breaking change.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ workspace_url: str,
57
+ *,
58
+ bearer_token: str | None = "auto",
59
+ require_https: bool = True,
60
+ ) -> None:
61
+ """
62
+ Initialize a catalog client.
63
+
64
+ .. warning::
65
+ This functionality is considered **unstable**. It may be changed
66
+ at any point without it being considered a breaking change.
67
+
68
+ Parameters
69
+ ----------
70
+ workspace_url
71
+ URL of the workspace, or alternatively the URL of the Unity catalog
72
+ API endpoint.
73
+ bearer_token
74
+ Bearer token to authenticate with. This can also be set to:
75
+
76
+ * "auto": Automatically retrieve bearer tokens from the environment.
77
+ * "databricks-sdk": Use the Databricks SDK to retrieve and use the
78
+ bearer token from the environment.
79
+ require_https
80
+ Require the `workspace_url` to use HTTPS.
81
+ """
82
+ issue_unstable_warning("`Catalog` functionality is considered unstable.")
83
+
84
+ if require_https and not workspace_url.startswith("https://"):
85
+ msg = (
86
+ f"a non-HTTPS workspace_url was given ({workspace_url}). To "
87
+ "allow non-HTTPS URLs, pass require_https=False."
88
+ )
89
+ raise ValueError(msg)
90
+
91
+ if bearer_token == "databricks-sdk" or (
92
+ bearer_token == "auto"
93
+ # For security, in "auto" mode, only retrieve/use the token if:
94
+ # * We are running inside a Databricks environment
95
+ # * The `workspace_url` is pointing to Databricks and uses HTTPS
96
+ and "DATABRICKS_RUNTIME_VERSION" in os.environ
97
+ and workspace_url.startswith("https://")
98
+ and (
99
+ workspace_url.removeprefix("https://")
100
+ .split("/", 1)[0]
101
+ .endswith(".cloud.databricks.com")
102
+ )
103
+ ):
104
+ bearer_token = self._get_databricks_token()
105
+
106
+ if bearer_token == "auto":
107
+ bearer_token = None
108
+
109
+ self._client = PyCatalogClient.new(workspace_url, bearer_token)
110
+
111
+ def list_catalogs(self) -> list[CatalogInfo]:
112
+ """
113
+ List the available catalogs.
114
+
115
+ .. warning::
116
+ This functionality is considered **unstable**. It may be changed
117
+ at any point without it being considered a breaking change.
118
+ """
119
+ return self._client.list_catalogs()
120
+
121
+ def list_namespaces(self, catalog_name: str) -> list[NamespaceInfo]:
122
+ """
123
+ List the available namespaces (unity schema) under the specified catalog.
124
+
125
+ .. warning::
126
+ This functionality is considered **unstable**. It may be changed
127
+ at any point without it being considered a breaking change.
128
+
129
+ Parameters
130
+ ----------
131
+ catalog_name
132
+ Name of the catalog.
133
+ """
134
+ return self._client.list_namespaces(catalog_name)
135
+
136
+ def list_tables(self, catalog_name: str, namespace: str) -> list[TableInfo]:
137
+ """
138
+ List the available tables under the specified schema.
139
+
140
+ .. warning::
141
+ This functionality is considered **unstable**. It may be changed
142
+ at any point without it being considered a breaking change.
143
+
144
+ Parameters
145
+ ----------
146
+ catalog_name
147
+ Name of the catalog.
148
+ namespace
149
+ Name of the namespace (unity schema).
150
+ """
151
+ return self._client.list_tables(catalog_name, namespace)
152
+
153
+ def get_table_info(
154
+ self, catalog_name: str, namespace: str, table_name: str
155
+ ) -> TableInfo:
156
+ """
157
+ Retrieve the metadata of the specified table.
158
+
159
+ .. warning::
160
+ This functionality is considered **unstable**. It may be changed
161
+ at any point without it being considered a breaking change.
162
+
163
+ Parameters
164
+ ----------
165
+ catalog_name
166
+ Name of the catalog.
167
+ namespace
168
+ Name of the namespace (unity schema).
169
+ table_name
170
+ Name of the table.
171
+ """
172
+ return self._client.get_table_info(catalog_name, namespace, table_name)
173
+
174
+ def _get_table_credentials(
175
+ self, table_id: str, *, write: bool
176
+ ) -> tuple[dict[str, str] | None, dict[str, str], int]:
177
+ return self._client.get_table_credentials(table_id=table_id, write=write)
178
+
179
+ def scan_table(
180
+ self,
181
+ catalog_name: str,
182
+ namespace: str,
183
+ table_name: str,
184
+ *,
185
+ delta_table_version: int | str | datetime | None = None,
186
+ delta_table_options: dict[str, Any] | None = None,
187
+ storage_options: dict[str, Any] | None = None,
188
+ credential_provider: (
189
+ CredentialProviderFunction | Literal["auto"] | None
190
+ ) = "auto",
191
+ retries: int = 2,
192
+ ) -> LazyFrame:
193
+ """
194
+ Retrieve the metadata of the specified table.
195
+
196
+ .. warning::
197
+ This functionality is considered **unstable**. It may be changed
198
+ at any point without it being considered a breaking change.
199
+
200
+ Parameters
201
+ ----------
202
+ catalog_name
203
+ Name of the catalog.
204
+ namespace
205
+ Name of the namespace (unity schema).
206
+ table_name
207
+ Name of the table.
208
+ delta_table_version
209
+ Version of the table to scan (Deltalake only).
210
+ delta_table_options
211
+ Additional keyword arguments while reading a Deltalake table.
212
+ storage_options
213
+ Options that indicate how to connect to a cloud provider.
214
+
215
+ The cloud providers currently supported are AWS, GCP, and Azure.
216
+ See supported keys here:
217
+
218
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
219
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
220
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
221
+ * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
222
+ `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
223
+
224
+ If `storage_options` is not provided, Polars will try to infer the
225
+ information from environment variables.
226
+ credential_provider
227
+ Provide a function that can be called to provide cloud storage
228
+ credentials. The function is expected to return a dictionary of
229
+ credential keys along with an optional credential expiry time.
230
+
231
+ .. warning::
232
+ This functionality is considered **unstable**. It may be changed
233
+ at any point without it being considered a breaking change.
234
+ retries
235
+ Number of retries if accessing a cloud instance fails.
236
+
237
+ """
238
+ table_info = self.get_table_info(catalog_name, namespace, table_name)
239
+ storage_location, data_source_format = _extract_location_and_data_format(
240
+ table_info, "scan table"
241
+ )
242
+
243
+ credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
244
+ credential_provider,
245
+ storage_options,
246
+ table_info,
247
+ write=False,
248
+ caller_name="Catalog.scan_table",
249
+ )
250
+
251
+ if data_source_format in ["DELTA", "DELTASHARING"]:
252
+ from polars.io.delta import scan_delta
253
+
254
+ return scan_delta(
255
+ storage_location,
256
+ version=delta_table_version,
257
+ delta_table_options=delta_table_options,
258
+ storage_options=storage_options,
259
+ credential_provider=credential_provider,
260
+ )
261
+
262
+ if delta_table_version is not None:
263
+ msg = (
264
+ "cannot apply delta_table_version for table of type "
265
+ f"{data_source_format}"
266
+ )
267
+ raise ValueError(msg)
268
+
269
+ if delta_table_options is not None:
270
+ msg = (
271
+ "cannot apply delta_table_options for table of type "
272
+ f"{data_source_format}"
273
+ )
274
+ raise ValueError(msg)
275
+
276
+ if storage_options:
277
+ storage_options = list(storage_options.items()) # type: ignore[assignment]
278
+ else:
279
+ # Handle empty dict input
280
+ storage_options = None
281
+
282
+ return wrap_ldf(
283
+ self._client.scan_table(
284
+ catalog_name,
285
+ namespace,
286
+ table_name,
287
+ credential_provider=credential_provider,
288
+ cloud_options=storage_options,
289
+ retries=retries,
290
+ )
291
+ )
292
+
293
+ def write_table(
294
+ self,
295
+ df: DataFrame,
296
+ catalog_name: str,
297
+ namespace: str,
298
+ table_name: str,
299
+ *,
300
+ delta_mode: Literal[
301
+ "error", "append", "overwrite", "ignore", "merge"
302
+ ] = "error",
303
+ delta_write_options: dict[str, Any] | None = None,
304
+ delta_merge_options: dict[str, Any] | None = None,
305
+ storage_options: dict[str, str] | None = None,
306
+ credential_provider: CredentialProviderFunction
307
+ | Literal["auto"]
308
+ | None = "auto",
309
+ ) -> None | deltalake.table.TableMerger:
310
+ """
311
+ Write a DataFrame to a catalog table.
312
+
313
+ .. warning::
314
+ This functionality is considered **unstable**. It may be changed
315
+ at any point without it being considered a breaking change.
316
+
317
+ Parameters
318
+ ----------
319
+ df
320
+ DataFrame to write.
321
+ catalog_name
322
+ Name of the catalog.
323
+ namespace
324
+ Name of the namespace (unity schema).
325
+ table_name
326
+ Name of the table.
327
+ delta_mode : {'error', 'append', 'overwrite', 'ignore', 'merge'}
328
+ (For delta tables) How to handle existing data.
329
+
330
+ - If 'error', throw an error if the table already exists (default).
331
+ - If 'append', will add new data.
332
+ - If 'overwrite', will replace table with new data.
333
+ - If 'ignore', will not write anything if table already exists.
334
+ - If 'merge', return a `TableMerger` object to merge data from the DataFrame
335
+ with the existing data.
336
+ delta_write_options
337
+ (For delta tables) Additional keyword arguments while writing a
338
+ Delta lake Table.
339
+ See a list of supported write options `here <https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake>`__.
340
+ delta_merge_options
341
+ (For delta tables) Keyword arguments which are required to `MERGE` a
342
+ Delta lake Table.
343
+ See a list of supported merge options `here <https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.merge>`__.
344
+ storage_options
345
+ Options that indicate how to connect to a cloud provider.
346
+
347
+ The cloud providers currently supported are AWS, GCP, and Azure.
348
+ See supported keys here:
349
+
350
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
351
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
352
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
353
+ * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
354
+ `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
355
+
356
+ If `storage_options` is not provided, Polars will try to infer the
357
+ information from environment variables.
358
+ credential_provider
359
+ Provide a function that can be called to provide cloud storage
360
+ credentials. The function is expected to return a dictionary of
361
+ credential keys along with an optional credential expiry time.
362
+
363
+ .. warning::
364
+ This functionality is considered **unstable**. It may be changed
365
+ at any point without it being considered a breaking change.
366
+ """
367
+ table_info = self.get_table_info(catalog_name, namespace, table_name)
368
+ storage_location, data_source_format = _extract_location_and_data_format(
369
+ table_info, "scan table"
370
+ )
371
+
372
+ credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
373
+ credential_provider,
374
+ storage_options,
375
+ table_info,
376
+ write=True,
377
+ caller_name="Catalog.write_table",
378
+ )
379
+
380
+ if data_source_format in ["DELTA", "DELTASHARING"]:
381
+ return df.write_delta( # type: ignore[misc]
382
+ storage_location,
383
+ storage_options=storage_options,
384
+ credential_provider=credential_provider,
385
+ mode=delta_mode,
386
+ delta_write_options=delta_write_options,
387
+ delta_merge_options=delta_merge_options,
388
+ ) # type: ignore[call-overload]
389
+
390
+ else:
391
+ msg = (
392
+ "write_table: table format of "
393
+ f"{catalog_name}.{namespace}.{table_name} "
394
+ f"({data_source_format}) is unsupported."
395
+ )
396
+ raise NotImplementedError(msg)
397
+
398
+ def create_catalog(
399
+ self,
400
+ catalog_name: str,
401
+ *,
402
+ comment: str | None = None,
403
+ storage_root: str | None = None,
404
+ ) -> CatalogInfo:
405
+ """
406
+ Create a catalog.
407
+
408
+ .. warning::
409
+ This functionality is considered **unstable**. It may be changed
410
+ at any point without it being considered a breaking change.
411
+
412
+ Parameters
413
+ ----------
414
+ catalog_name
415
+ Name of the catalog.
416
+ comment
417
+ Leaves a comment about the catalog.
418
+ storage_root
419
+ Base location at which to store the catalog.
420
+ """
421
+ return self._client.create_catalog(
422
+ catalog_name=catalog_name, comment=comment, storage_root=storage_root
423
+ )
424
+
425
+ def delete_catalog(
426
+ self,
427
+ catalog_name: str,
428
+ *,
429
+ force: bool = False,
430
+ ) -> None:
431
+ """
432
+ Delete a catalog.
433
+
434
+ Note that depending on the table type and catalog server, this may not
435
+ delete the actual data files from storage. For more details, please
436
+ consult the documentation of the catalog provider you are using.
437
+
438
+ .. warning::
439
+ This functionality is considered **unstable**. It may be changed
440
+ at any point without it being considered a breaking change.
441
+
442
+ Parameters
443
+ ----------
444
+ catalog_name
445
+ Name of the catalog.
446
+ force
447
+ Forcibly delete the catalog even if it is not empty.
448
+ """
449
+ self._client.delete_catalog(catalog_name=catalog_name, force=force)
450
+
451
+ def create_namespace(
452
+ self,
453
+ catalog_name: str,
454
+ namespace: str,
455
+ *,
456
+ comment: str | None = None,
457
+ storage_root: str | None = None,
458
+ ) -> NamespaceInfo:
459
+ """
460
+ Create a namespace (unity schema) in the catalog.
461
+
462
+ .. warning::
463
+ This functionality is considered **unstable**. It may be changed
464
+ at any point without it being considered a breaking change.
465
+
466
+ Parameters
467
+ ----------
468
+ catalog_name
469
+ Name of the catalog.
470
+ namespace
471
+ Name of the namespace (unity schema).
472
+ comment
473
+ Leaves a comment about the table.
474
+ storage_root
475
+ Base location at which to store the namespace.
476
+ """
477
+ return self._client.create_namespace(
478
+ catalog_name=catalog_name,
479
+ namespace=namespace,
480
+ comment=comment,
481
+ storage_root=storage_root,
482
+ )
483
+
484
+ def delete_namespace(
485
+ self,
486
+ catalog_name: str,
487
+ namespace: str,
488
+ *,
489
+ force: bool = False,
490
+ ) -> None:
491
+ """
492
+ Delete a namespace (unity schema) in the catalog.
493
+
494
+ Note that depending on the table type and catalog server, this may not
495
+ delete the actual data files from storage. For more details, please
496
+ consult the documentation of the catalog provider you are using.
497
+
498
+ .. warning::
499
+ This functionality is considered **unstable**. It may be changed
500
+ at any point without it being considered a breaking change.
501
+
502
+ Parameters
503
+ ----------
504
+ catalog_name
505
+ Name of the catalog.
506
+ namespace
507
+ Name of the namespace (unity schema).
508
+ force
509
+ Forcibly delete the namespace even if it is not empty.
510
+ """
511
+ self._client.delete_namespace(
512
+ catalog_name=catalog_name, namespace=namespace, force=force
513
+ )
514
+
515
+ def create_table(
516
+ self,
517
+ catalog_name: str,
518
+ namespace: str,
519
+ table_name: str,
520
+ *,
521
+ schema: SchemaDict | None,
522
+ table_type: TableType,
523
+ data_source_format: DataSourceFormat | None = None,
524
+ comment: str | None = None,
525
+ storage_root: str | None = None,
526
+ properties: dict[str, str] | None = None,
527
+ ) -> TableInfo:
528
+ """
529
+ Create a table in the catalog.
530
+
531
+ .. warning::
532
+ This functionality is considered **unstable**. It may be changed
533
+ at any point without it being considered a breaking change.
534
+
535
+ Parameters
536
+ ----------
537
+ catalog_name
538
+ Name of the catalog.
539
+ namespace
540
+ Name of the namespace (unity schema).
541
+ table_name
542
+ Name of the table.
543
+ schema
544
+ Schema of the table.
545
+ table_type
546
+ Type of the table
547
+ data_source_format
548
+ Storage format of the table.
549
+ comment
550
+ Leaves a comment about the table.
551
+ storage_root
552
+ Base location at which to store the table.
553
+ properties
554
+ Extra key-value metadata to store.
555
+ """
556
+ return self._client.create_table(
557
+ catalog_name=catalog_name,
558
+ namespace=namespace,
559
+ table_name=table_name,
560
+ schema=schema,
561
+ table_type=table_type,
562
+ data_source_format=data_source_format,
563
+ comment=comment,
564
+ storage_root=storage_root,
565
+ properties=list((properties or {}).items()),
566
+ )
567
+
568
+ def delete_table(
569
+ self,
570
+ catalog_name: str,
571
+ namespace: str,
572
+ table_name: str,
573
+ ) -> None:
574
+ """
575
+ Delete the table stored at this location.
576
+
577
+ Note that depending on the table type and catalog server, this may not
578
+ delete the actual data files from storage. For more details, please
579
+ consult the documentation of the catalog provider you are using.
580
+
581
+ If you would like to perform manual deletions, the storage location of
582
+ the files can be found using `get_table_info`.
583
+
584
+ .. warning::
585
+ This functionality is considered **unstable**. It may be changed
586
+ at any point without it being considered a breaking change.
587
+
588
+ Parameters
589
+ ----------
590
+ catalog_name
591
+ Name of the catalog.
592
+ namespace
593
+ Name of the namespace (unity schema).
594
+ table_name
595
+ Name of the table.
596
+ """
597
+ self._client.delete_table(
598
+ catalog_name=catalog_name,
599
+ namespace=namespace,
600
+ table_name=table_name,
601
+ )
602
+
603
+ def _init_credentials(
604
+ self,
605
+ credential_provider: CredentialProviderFunction | Literal["auto"] | None,
606
+ storage_options: dict[str, Any] | None,
607
+ table_info: TableInfo,
608
+ *,
609
+ write: bool,
610
+ caller_name: str,
611
+ ) -> tuple[
612
+ CredentialProviderBuilder | None,
613
+ dict[str, Any] | None,
614
+ ]:
615
+ from polars.io.cloud.credential_provider._builder import (
616
+ CredentialProviderBuilder,
617
+ )
618
+
619
+ if credential_provider != "auto":
620
+ if credential_provider:
621
+ return CredentialProviderBuilder.from_initialized_provider(
622
+ credential_provider
623
+ ), storage_options
624
+ else:
625
+ return None, storage_options
626
+
627
+ verbose = os.getenv("POLARS_VERBOSE") == "1"
628
+
629
+ catalog_credential_provider = CatalogCredentialProvider(
630
+ self, table_info.table_id, write=write
631
+ )
632
+
633
+ try:
634
+ v = catalog_credential_provider._credentials_iter()
635
+ storage_update_options = next(v)
636
+
637
+ if storage_update_options:
638
+ storage_options = {**(storage_options or {}), **storage_update_options}
639
+
640
+ for _ in v:
641
+ pass
642
+
643
+ except Exception as e:
644
+ if verbose:
645
+ table_name = table_info.name
646
+ table_id = table_info.table_id
647
+ msg = (
648
+ f"error auto-initializing CatalogCredentialProvider: {e!r} "
649
+ f"{table_name = } ({table_id = }) ({write = })"
650
+ )
651
+ print(msg, file=sys.stderr)
652
+ else:
653
+ if verbose:
654
+ table_name = table_info.name
655
+ table_id = table_info.table_id
656
+ msg = (
657
+ "auto-selected CatalogCredentialProvider for "
658
+ f"{table_name = } ({table_id = })"
659
+ )
660
+ print(msg, file=sys.stderr)
661
+
662
+ return CredentialProviderBuilder.from_initialized_provider(
663
+ catalog_credential_provider
664
+ ), storage_options
665
+
666
+ # This should generally not happen, but if using the temporary
667
+ # credentials API fails for whatever reason, we fallback to our built-in
668
+ # credential provider resolution.
669
+
670
+ from polars.io.cloud.credential_provider._builder import (
671
+ _init_credential_provider_builder,
672
+ )
673
+
674
+ return _init_credential_provider_builder(
675
+ "auto", table_info.storage_location, storage_options, caller_name
676
+ ), storage_options
677
+
678
+ @classmethod
679
+ def _get_databricks_token(cls) -> str:
680
+ if importlib.util.find_spec("databricks.sdk") is None:
681
+ msg = "could not get Databricks token: databricks-sdk is not installed"
682
+ raise ImportError(msg)
683
+
684
+ # We code like this to bypass linting
685
+ m = importlib.import_module("databricks.sdk.core").__dict__
686
+
687
+ return m["DefaultCredentials"]()(m["Config"]())()["Authorization"][7:]
688
+
689
+
690
+ class CatalogCredentialProvider:
691
+ """Retrieves credentials from the Unity catalog temporary credentials API."""
692
+
693
+ def __init__(self, catalog: Catalog, table_id: str, *, write: bool) -> None:
694
+ self.catalog = catalog
695
+ self.table_id = table_id
696
+ self.write = write
697
+
698
+ def __call__(self) -> CredentialProviderFunctionReturn: # noqa: D102
699
+ _, (creds, expiry) = self._credentials_iter()
700
+ return creds, expiry
701
+
702
+ def _credentials_iter(
703
+ self,
704
+ ) -> Generator[Any]:
705
+ creds, storage_update_options, expiry = self.catalog._get_table_credentials(
706
+ self.table_id, write=self.write
707
+ )
708
+
709
+ yield storage_update_options
710
+
711
+ if not creds:
712
+ table_id = self.table_id
713
+ msg = (
714
+ "did not receive credentials from temporary credentials API for "
715
+ f"{table_id = }"
716
+ )
717
+ raise Exception(msg) # noqa: TRY002
718
+
719
+ yield creds, expiry
720
+
721
+
722
+ def _extract_location_and_data_format(
723
+ table_info: TableInfo, operation: str
724
+ ) -> tuple[str, DataSourceFormat]:
725
+ if table_info.storage_location is None:
726
+ msg = f"cannot {operation}: no storage_location found"
727
+ raise ValueError(msg)
728
+
729
+ if table_info.data_source_format is None:
730
+ msg = f"cannot {operation}: no data_source_format found"
731
+ raise ValueError(msg)
732
+
733
+ return table_info.storage_location, table_info.data_source_format