polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b4__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,733 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- import importlib
5
- import os
6
- import sys
7
- from typing import TYPE_CHECKING, Any, Literal
8
-
9
- from polars._utils.unstable import issue_unstable_warning
10
- from polars._utils.wrap import wrap_ldf
11
- from polars.catalog.unity.models import (
12
- CatalogInfo,
13
- ColumnInfo,
14
- NamespaceInfo,
15
- TableInfo,
16
- )
17
-
18
- if TYPE_CHECKING:
19
- from collections.abc import Generator
20
- from datetime import datetime
21
-
22
- import deltalake
23
-
24
- from polars._typing import SchemaDict
25
- from polars.catalog.unity.models import DataSourceFormat, TableType
26
- from polars.dataframe.frame import DataFrame
27
- from polars.io.cloud import (
28
- CredentialProviderFunction,
29
- CredentialProviderFunctionReturn,
30
- )
31
- from polars.io.cloud.credential_provider._builder import CredentialProviderBuilder
32
- from polars.lazyframe import LazyFrame
33
-
34
- with contextlib.suppress(ImportError):
35
- from polars._plr import PyCatalogClient
36
-
37
- PyCatalogClient.init_classes(
38
- catalog_info_cls=CatalogInfo,
39
- namespace_info_cls=NamespaceInfo,
40
- table_info_cls=TableInfo,
41
- column_info_cls=ColumnInfo,
42
- )
43
-
44
-
45
- class Catalog:
46
- """
47
- Unity catalog client.
48
-
49
- .. warning::
50
- This functionality is considered **unstable**. It may be changed
51
- at any point without it being considered a breaking change.
52
- """
53
-
54
- def __init__(
55
- self,
56
- workspace_url: str,
57
- *,
58
- bearer_token: str | None = "auto",
59
- require_https: bool = True,
60
- ) -> None:
61
- """
62
- Initialize a catalog client.
63
-
64
- .. warning::
65
- This functionality is considered **unstable**. It may be changed
66
- at any point without it being considered a breaking change.
67
-
68
- Parameters
69
- ----------
70
- workspace_url
71
- URL of the workspace, or alternatively the URL of the Unity catalog
72
- API endpoint.
73
- bearer_token
74
- Bearer token to authenticate with. This can also be set to:
75
-
76
- * "auto": Automatically retrieve bearer tokens from the environment.
77
- * "databricks-sdk": Use the Databricks SDK to retrieve and use the
78
- bearer token from the environment.
79
- require_https
80
- Require the `workspace_url` to use HTTPS.
81
- """
82
- issue_unstable_warning("`Catalog` functionality is considered unstable.")
83
-
84
- if require_https and not workspace_url.startswith("https://"):
85
- msg = (
86
- f"a non-HTTPS workspace_url was given ({workspace_url}). To "
87
- "allow non-HTTPS URLs, pass require_https=False."
88
- )
89
- raise ValueError(msg)
90
-
91
- if bearer_token == "databricks-sdk" or (
92
- bearer_token == "auto"
93
- # For security, in "auto" mode, only retrieve/use the token if:
94
- # * We are running inside a Databricks environment
95
- # * The `workspace_url` is pointing to Databricks and uses HTTPS
96
- and "DATABRICKS_RUNTIME_VERSION" in os.environ
97
- and workspace_url.startswith("https://")
98
- and (
99
- workspace_url.removeprefix("https://")
100
- .split("/", 1)[0]
101
- .endswith(".cloud.databricks.com")
102
- )
103
- ):
104
- bearer_token = self._get_databricks_token()
105
-
106
- if bearer_token == "auto":
107
- bearer_token = None
108
-
109
- self._client = PyCatalogClient.new(workspace_url, bearer_token)
110
-
111
- def list_catalogs(self) -> list[CatalogInfo]:
112
- """
113
- List the available catalogs.
114
-
115
- .. warning::
116
- This functionality is considered **unstable**. It may be changed
117
- at any point without it being considered a breaking change.
118
- """
119
- return self._client.list_catalogs()
120
-
121
- def list_namespaces(self, catalog_name: str) -> list[NamespaceInfo]:
122
- """
123
- List the available namespaces (unity schema) under the specified catalog.
124
-
125
- .. warning::
126
- This functionality is considered **unstable**. It may be changed
127
- at any point without it being considered a breaking change.
128
-
129
- Parameters
130
- ----------
131
- catalog_name
132
- Name of the catalog.
133
- """
134
- return self._client.list_namespaces(catalog_name)
135
-
136
- def list_tables(self, catalog_name: str, namespace: str) -> list[TableInfo]:
137
- """
138
- List the available tables under the specified schema.
139
-
140
- .. warning::
141
- This functionality is considered **unstable**. It may be changed
142
- at any point without it being considered a breaking change.
143
-
144
- Parameters
145
- ----------
146
- catalog_name
147
- Name of the catalog.
148
- namespace
149
- Name of the namespace (unity schema).
150
- """
151
- return self._client.list_tables(catalog_name, namespace)
152
-
153
- def get_table_info(
154
- self, catalog_name: str, namespace: str, table_name: str
155
- ) -> TableInfo:
156
- """
157
- Retrieve the metadata of the specified table.
158
-
159
- .. warning::
160
- This functionality is considered **unstable**. It may be changed
161
- at any point without it being considered a breaking change.
162
-
163
- Parameters
164
- ----------
165
- catalog_name
166
- Name of the catalog.
167
- namespace
168
- Name of the namespace (unity schema).
169
- table_name
170
- Name of the table.
171
- """
172
- return self._client.get_table_info(catalog_name, namespace, table_name)
173
-
174
- def _get_table_credentials(
175
- self, table_id: str, *, write: bool
176
- ) -> tuple[dict[str, str] | None, dict[str, str], int]:
177
- return self._client.get_table_credentials(table_id=table_id, write=write)
178
-
179
- def scan_table(
180
- self,
181
- catalog_name: str,
182
- namespace: str,
183
- table_name: str,
184
- *,
185
- delta_table_version: int | str | datetime | None = None,
186
- delta_table_options: dict[str, Any] | None = None,
187
- storage_options: dict[str, Any] | None = None,
188
- credential_provider: (
189
- CredentialProviderFunction | Literal["auto"] | None
190
- ) = "auto",
191
- retries: int = 2,
192
- ) -> LazyFrame:
193
- """
194
- Retrieve the metadata of the specified table.
195
-
196
- .. warning::
197
- This functionality is considered **unstable**. It may be changed
198
- at any point without it being considered a breaking change.
199
-
200
- Parameters
201
- ----------
202
- catalog_name
203
- Name of the catalog.
204
- namespace
205
- Name of the namespace (unity schema).
206
- table_name
207
- Name of the table.
208
- delta_table_version
209
- Version of the table to scan (Deltalake only).
210
- delta_table_options
211
- Additional keyword arguments while reading a Deltalake table.
212
- storage_options
213
- Options that indicate how to connect to a cloud provider.
214
-
215
- The cloud providers currently supported are AWS, GCP, and Azure.
216
- See supported keys here:
217
-
218
- * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
219
- * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
220
- * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
221
- * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
222
- `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
223
-
224
- If `storage_options` is not provided, Polars will try to infer the
225
- information from environment variables.
226
- credential_provider
227
- Provide a function that can be called to provide cloud storage
228
- credentials. The function is expected to return a dictionary of
229
- credential keys along with an optional credential expiry time.
230
-
231
- .. warning::
232
- This functionality is considered **unstable**. It may be changed
233
- at any point without it being considered a breaking change.
234
- retries
235
- Number of retries if accessing a cloud instance fails.
236
-
237
- """
238
- table_info = self.get_table_info(catalog_name, namespace, table_name)
239
- storage_location, data_source_format = _extract_location_and_data_format(
240
- table_info, "scan table"
241
- )
242
-
243
- credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
244
- credential_provider,
245
- storage_options,
246
- table_info,
247
- write=False,
248
- caller_name="Catalog.scan_table",
249
- )
250
-
251
- if data_source_format in ["DELTA", "DELTASHARING"]:
252
- from polars.io.delta import scan_delta
253
-
254
- return scan_delta(
255
- storage_location,
256
- version=delta_table_version,
257
- delta_table_options=delta_table_options,
258
- storage_options=storage_options,
259
- credential_provider=credential_provider,
260
- )
261
-
262
- if delta_table_version is not None:
263
- msg = (
264
- "cannot apply delta_table_version for table of type "
265
- f"{data_source_format}"
266
- )
267
- raise ValueError(msg)
268
-
269
- if delta_table_options is not None:
270
- msg = (
271
- "cannot apply delta_table_options for table of type "
272
- f"{data_source_format}"
273
- )
274
- raise ValueError(msg)
275
-
276
- if storage_options:
277
- storage_options = list(storage_options.items()) # type: ignore[assignment]
278
- else:
279
- # Handle empty dict input
280
- storage_options = None
281
-
282
- return wrap_ldf(
283
- self._client.scan_table(
284
- catalog_name,
285
- namespace,
286
- table_name,
287
- credential_provider=credential_provider,
288
- cloud_options=storage_options,
289
- retries=retries,
290
- )
291
- )
292
-
293
- def write_table(
294
- self,
295
- df: DataFrame,
296
- catalog_name: str,
297
- namespace: str,
298
- table_name: str,
299
- *,
300
- delta_mode: Literal[
301
- "error", "append", "overwrite", "ignore", "merge"
302
- ] = "error",
303
- delta_write_options: dict[str, Any] | None = None,
304
- delta_merge_options: dict[str, Any] | None = None,
305
- storage_options: dict[str, str] | None = None,
306
- credential_provider: CredentialProviderFunction
307
- | Literal["auto"]
308
- | None = "auto",
309
- ) -> None | deltalake.table.TableMerger:
310
- """
311
- Write a DataFrame to a catalog table.
312
-
313
- .. warning::
314
- This functionality is considered **unstable**. It may be changed
315
- at any point without it being considered a breaking change.
316
-
317
- Parameters
318
- ----------
319
- df
320
- DataFrame to write.
321
- catalog_name
322
- Name of the catalog.
323
- namespace
324
- Name of the namespace (unity schema).
325
- table_name
326
- Name of the table.
327
- delta_mode : {'error', 'append', 'overwrite', 'ignore', 'merge'}
328
- (For delta tables) How to handle existing data.
329
-
330
- - If 'error', throw an error if the table already exists (default).
331
- - If 'append', will add new data.
332
- - If 'overwrite', will replace table with new data.
333
- - If 'ignore', will not write anything if table already exists.
334
- - If 'merge', return a `TableMerger` object to merge data from the DataFrame
335
- with the existing data.
336
- delta_write_options
337
- (For delta tables) Additional keyword arguments while writing a
338
- Delta lake Table.
339
- See a list of supported write options `here <https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake>`__.
340
- delta_merge_options
341
- (For delta tables) Keyword arguments which are required to `MERGE` a
342
- Delta lake Table.
343
- See a list of supported merge options `here <https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.merge>`__.
344
- storage_options
345
- Options that indicate how to connect to a cloud provider.
346
-
347
- The cloud providers currently supported are AWS, GCP, and Azure.
348
- See supported keys here:
349
-
350
- * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
351
- * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
352
- * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
353
- * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
354
- `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
355
-
356
- If `storage_options` is not provided, Polars will try to infer the
357
- information from environment variables.
358
- credential_provider
359
- Provide a function that can be called to provide cloud storage
360
- credentials. The function is expected to return a dictionary of
361
- credential keys along with an optional credential expiry time.
362
-
363
- .. warning::
364
- This functionality is considered **unstable**. It may be changed
365
- at any point without it being considered a breaking change.
366
- """
367
- table_info = self.get_table_info(catalog_name, namespace, table_name)
368
- storage_location, data_source_format = _extract_location_and_data_format(
369
- table_info, "scan table"
370
- )
371
-
372
- credential_provider, storage_options = self._init_credentials( # type: ignore[assignment]
373
- credential_provider,
374
- storage_options,
375
- table_info,
376
- write=True,
377
- caller_name="Catalog.write_table",
378
- )
379
-
380
- if data_source_format in ["DELTA", "DELTASHARING"]:
381
- return df.write_delta( # type: ignore[misc]
382
- storage_location,
383
- storage_options=storage_options,
384
- credential_provider=credential_provider,
385
- mode=delta_mode,
386
- delta_write_options=delta_write_options,
387
- delta_merge_options=delta_merge_options,
388
- ) # type: ignore[call-overload]
389
-
390
- else:
391
- msg = (
392
- "write_table: table format of "
393
- f"{catalog_name}.{namespace}.{table_name} "
394
- f"({data_source_format}) is unsupported."
395
- )
396
- raise NotImplementedError(msg)
397
-
398
- def create_catalog(
399
- self,
400
- catalog_name: str,
401
- *,
402
- comment: str | None = None,
403
- storage_root: str | None = None,
404
- ) -> CatalogInfo:
405
- """
406
- Create a catalog.
407
-
408
- .. warning::
409
- This functionality is considered **unstable**. It may be changed
410
- at any point without it being considered a breaking change.
411
-
412
- Parameters
413
- ----------
414
- catalog_name
415
- Name of the catalog.
416
- comment
417
- Leaves a comment about the catalog.
418
- storage_root
419
- Base location at which to store the catalog.
420
- """
421
- return self._client.create_catalog(
422
- catalog_name=catalog_name, comment=comment, storage_root=storage_root
423
- )
424
-
425
- def delete_catalog(
426
- self,
427
- catalog_name: str,
428
- *,
429
- force: bool = False,
430
- ) -> None:
431
- """
432
- Delete a catalog.
433
-
434
- Note that depending on the table type and catalog server, this may not
435
- delete the actual data files from storage. For more details, please
436
- consult the documentation of the catalog provider you are using.
437
-
438
- .. warning::
439
- This functionality is considered **unstable**. It may be changed
440
- at any point without it being considered a breaking change.
441
-
442
- Parameters
443
- ----------
444
- catalog_name
445
- Name of the catalog.
446
- force
447
- Forcibly delete the catalog even if it is not empty.
448
- """
449
- self._client.delete_catalog(catalog_name=catalog_name, force=force)
450
-
451
- def create_namespace(
452
- self,
453
- catalog_name: str,
454
- namespace: str,
455
- *,
456
- comment: str | None = None,
457
- storage_root: str | None = None,
458
- ) -> NamespaceInfo:
459
- """
460
- Create a namespace (unity schema) in the catalog.
461
-
462
- .. warning::
463
- This functionality is considered **unstable**. It may be changed
464
- at any point without it being considered a breaking change.
465
-
466
- Parameters
467
- ----------
468
- catalog_name
469
- Name of the catalog.
470
- namespace
471
- Name of the namespace (unity schema).
472
- comment
473
- Leaves a comment about the table.
474
- storage_root
475
- Base location at which to store the namespace.
476
- """
477
- return self._client.create_namespace(
478
- catalog_name=catalog_name,
479
- namespace=namespace,
480
- comment=comment,
481
- storage_root=storage_root,
482
- )
483
-
484
- def delete_namespace(
485
- self,
486
- catalog_name: str,
487
- namespace: str,
488
- *,
489
- force: bool = False,
490
- ) -> None:
491
- """
492
- Delete a namespace (unity schema) in the catalog.
493
-
494
- Note that depending on the table type and catalog server, this may not
495
- delete the actual data files from storage. For more details, please
496
- consult the documentation of the catalog provider you are using.
497
-
498
- .. warning::
499
- This functionality is considered **unstable**. It may be changed
500
- at any point without it being considered a breaking change.
501
-
502
- Parameters
503
- ----------
504
- catalog_name
505
- Name of the catalog.
506
- namespace
507
- Name of the namespace (unity schema).
508
- force
509
- Forcibly delete the namespace even if it is not empty.
510
- """
511
- self._client.delete_namespace(
512
- catalog_name=catalog_name, namespace=namespace, force=force
513
- )
514
-
515
- def create_table(
516
- self,
517
- catalog_name: str,
518
- namespace: str,
519
- table_name: str,
520
- *,
521
- schema: SchemaDict | None,
522
- table_type: TableType,
523
- data_source_format: DataSourceFormat | None = None,
524
- comment: str | None = None,
525
- storage_root: str | None = None,
526
- properties: dict[str, str] | None = None,
527
- ) -> TableInfo:
528
- """
529
- Create a table in the catalog.
530
-
531
- .. warning::
532
- This functionality is considered **unstable**. It may be changed
533
- at any point without it being considered a breaking change.
534
-
535
- Parameters
536
- ----------
537
- catalog_name
538
- Name of the catalog.
539
- namespace
540
- Name of the namespace (unity schema).
541
- table_name
542
- Name of the table.
543
- schema
544
- Schema of the table.
545
- table_type
546
- Type of the table
547
- data_source_format
548
- Storage format of the table.
549
- comment
550
- Leaves a comment about the table.
551
- storage_root
552
- Base location at which to store the table.
553
- properties
554
- Extra key-value metadata to store.
555
- """
556
- return self._client.create_table(
557
- catalog_name=catalog_name,
558
- namespace=namespace,
559
- table_name=table_name,
560
- schema=schema,
561
- table_type=table_type,
562
- data_source_format=data_source_format,
563
- comment=comment,
564
- storage_root=storage_root,
565
- properties=list((properties or {}).items()),
566
- )
567
-
568
- def delete_table(
569
- self,
570
- catalog_name: str,
571
- namespace: str,
572
- table_name: str,
573
- ) -> None:
574
- """
575
- Delete the table stored at this location.
576
-
577
- Note that depending on the table type and catalog server, this may not
578
- delete the actual data files from storage. For more details, please
579
- consult the documentation of the catalog provider you are using.
580
-
581
- If you would like to perform manual deletions, the storage location of
582
- the files can be found using `get_table_info`.
583
-
584
- .. warning::
585
- This functionality is considered **unstable**. It may be changed
586
- at any point without it being considered a breaking change.
587
-
588
- Parameters
589
- ----------
590
- catalog_name
591
- Name of the catalog.
592
- namespace
593
- Name of the namespace (unity schema).
594
- table_name
595
- Name of the table.
596
- """
597
- self._client.delete_table(
598
- catalog_name=catalog_name,
599
- namespace=namespace,
600
- table_name=table_name,
601
- )
602
-
603
- def _init_credentials(
604
- self,
605
- credential_provider: CredentialProviderFunction | Literal["auto"] | None,
606
- storage_options: dict[str, Any] | None,
607
- table_info: TableInfo,
608
- *,
609
- write: bool,
610
- caller_name: str,
611
- ) -> tuple[
612
- CredentialProviderBuilder | None,
613
- dict[str, Any] | None,
614
- ]:
615
- from polars.io.cloud.credential_provider._builder import (
616
- CredentialProviderBuilder,
617
- )
618
-
619
- if credential_provider != "auto":
620
- if credential_provider:
621
- return CredentialProviderBuilder.from_initialized_provider(
622
- credential_provider
623
- ), storage_options
624
- else:
625
- return None, storage_options
626
-
627
- verbose = os.getenv("POLARS_VERBOSE") == "1"
628
-
629
- catalog_credential_provider = CatalogCredentialProvider(
630
- self, table_info.table_id, write=write
631
- )
632
-
633
- try:
634
- v = catalog_credential_provider._credentials_iter()
635
- storage_update_options = next(v)
636
-
637
- if storage_update_options:
638
- storage_options = {**(storage_options or {}), **storage_update_options}
639
-
640
- for _ in v:
641
- pass
642
-
643
- except Exception as e:
644
- if verbose:
645
- table_name = table_info.name
646
- table_id = table_info.table_id
647
- msg = (
648
- f"error auto-initializing CatalogCredentialProvider: {e!r} "
649
- f"{table_name = } ({table_id = }) ({write = })"
650
- )
651
- print(msg, file=sys.stderr)
652
- else:
653
- if verbose:
654
- table_name = table_info.name
655
- table_id = table_info.table_id
656
- msg = (
657
- "auto-selected CatalogCredentialProvider for "
658
- f"{table_name = } ({table_id = })"
659
- )
660
- print(msg, file=sys.stderr)
661
-
662
- return CredentialProviderBuilder.from_initialized_provider(
663
- catalog_credential_provider
664
- ), storage_options
665
-
666
- # This should generally not happen, but if using the temporary
667
- # credentials API fails for whatever reason, we fallback to our built-in
668
- # credential provider resolution.
669
-
670
- from polars.io.cloud.credential_provider._builder import (
671
- _init_credential_provider_builder,
672
- )
673
-
674
- return _init_credential_provider_builder(
675
- "auto", table_info.storage_location, storage_options, caller_name
676
- ), storage_options
677
-
678
- @classmethod
679
- def _get_databricks_token(cls) -> str:
680
- if importlib.util.find_spec("databricks.sdk") is None:
681
- msg = "could not get Databricks token: databricks-sdk is not installed"
682
- raise ImportError(msg)
683
-
684
- # We code like this to bypass linting
685
- m = importlib.import_module("databricks.sdk.core").__dict__
686
-
687
- return m["DefaultCredentials"]()(m["Config"]())()["Authorization"][7:]
688
-
689
-
690
- class CatalogCredentialProvider:
691
- """Retrieves credentials from the Unity catalog temporary credentials API."""
692
-
693
- def __init__(self, catalog: Catalog, table_id: str, *, write: bool) -> None:
694
- self.catalog = catalog
695
- self.table_id = table_id
696
- self.write = write
697
-
698
- def __call__(self) -> CredentialProviderFunctionReturn: # noqa: D102
699
- _, (creds, expiry) = self._credentials_iter()
700
- return creds, expiry
701
-
702
- def _credentials_iter(
703
- self,
704
- ) -> Generator[Any]:
705
- creds, storage_update_options, expiry = self.catalog._get_table_credentials(
706
- self.table_id, write=self.write
707
- )
708
-
709
- yield storage_update_options
710
-
711
- if not creds:
712
- table_id = self.table_id
713
- msg = (
714
- "did not receive credentials from temporary credentials API for "
715
- f"{table_id = }"
716
- )
717
- raise Exception(msg) # noqa: TRY002
718
-
719
- yield creds, expiry
720
-
721
-
722
- def _extract_location_and_data_format(
723
- table_info: TableInfo, operation: str
724
- ) -> tuple[str, DataSourceFormat]:
725
- if table_info.storage_location is None:
726
- msg = f"cannot {operation}: no storage_location found"
727
- raise ValueError(msg)
728
-
729
- if table_info.data_source_format is None:
730
- msg = f"cannot {operation}: no data_source_format found"
731
- raise ValueError(msg)
732
-
733
- return table_info.storage_location, table_info.data_source_format