polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b4__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -96
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/io/delta.py DELETED
@@ -1,499 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import warnings
4
- from datetime import datetime
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Any
7
-
8
- from polars._dependencies import _DELTALAKE_AVAILABLE, deltalake
9
- from polars.datatypes import Null, Time
10
- from polars.datatypes.convert import unpack_dtypes
11
- from polars.io.cloud._utils import _get_path_scheme
12
- from polars.io.parquet import scan_parquet
13
- from polars.io.pyarrow_dataset.functions import scan_pyarrow_dataset
14
- from polars.io.scan_options.cast_options import ScanCastOptions
15
- from polars.schema import Schema
16
-
17
- if TYPE_CHECKING:
18
- from typing import Literal
19
-
20
- from deltalake import DeltaTable
21
-
22
- from polars import DataFrame, DataType, LazyFrame
23
- from polars.io.cloud import CredentialProviderFunction
24
-
25
-
26
- def read_delta(
27
- source: str | Path | DeltaTable,
28
- *,
29
- version: int | str | datetime | None = None,
30
- columns: list[str] | None = None,
31
- rechunk: bool | None = None,
32
- storage_options: dict[str, Any] | None = None,
33
- credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
34
- delta_table_options: dict[str, Any] | None = None,
35
- use_pyarrow: bool = False,
36
- pyarrow_options: dict[str, Any] | None = None,
37
- ) -> DataFrame:
38
- """
39
- Reads into a DataFrame from a Delta lake table.
40
-
41
- Parameters
42
- ----------
43
- source
44
- DeltaTable or a Path or URI to the root of the Delta lake table.
45
-
46
- Note: For Local filesystem, absolute and relative paths are supported but
47
- for the supported object storages - GCS, Azure and S3 full URI must be provided.
48
- version
49
- Numerical version or timestamp version of the Delta lake table.
50
-
51
- Note: If `version` is not provided, the latest version of delta lake
52
- table is read.
53
- columns
54
- Columns to select. Accepts a list of column names.
55
- rechunk
56
- Make sure that all columns are contiguous in memory by
57
- aggregating the chunks into a single array.
58
- storage_options
59
- Extra options for the storage backends supported by `deltalake`.
60
- For cloud storages, this may include configurations for authentication etc.
61
-
62
- More info is available `here
63
- <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
64
- credential_provider
65
- Provide a function that can be called to provide cloud storage
66
- credentials. The function is expected to return a dictionary of
67
- credential keys along with an optional credential expiry time.
68
-
69
- .. warning::
70
- This functionality is considered **unstable**. It may be changed
71
- at any point without it being considered a breaking change.
72
- delta_table_options
73
- Additional keyword arguments while reading a Delta lake Table.
74
- use_pyarrow
75
- Flag to enable pyarrow dataset reads.
76
- pyarrow_options
77
- Keyword arguments while converting a Delta lake Table to pyarrow table.
78
-
79
- Returns
80
- -------
81
- DataFrame
82
-
83
- Examples
84
- --------
85
- Reads a Delta table from local filesystem.
86
- Note: Since version is not provided, the latest version of the delta table is read.
87
-
88
- >>> table_path = "/path/to/delta-table/"
89
- >>> pl.read_delta(table_path) # doctest: +SKIP
90
-
91
- Reads a specific version of the Delta table from local filesystem.
92
- Note: This will fail if the provided version of the delta table does not exist.
93
-
94
- >>> pl.read_delta(table_path, version=1) # doctest: +SKIP
95
-
96
- Time travel a delta table from local filesystem using a timestamp version.
97
-
98
- >>> pl.read_delta(
99
- ... table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
100
- ... ) # doctest: +SKIP
101
-
102
- Reads a Delta table from AWS S3.
103
- See a list of supported storage options for S3 `here
104
- <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.
105
-
106
- >>> table_path = "s3://bucket/path/to/delta-table/"
107
- >>> storage_options = {
108
- ... "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
109
- ... "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
110
- ... }
111
- >>> pl.read_delta(table_path, storage_options=storage_options) # doctest: +SKIP
112
-
113
- Reads a Delta table from Google Cloud storage (GCS).
114
- See a list of supported storage options for GCS `here
115
- <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.
116
-
117
- >>> table_path = "gs://bucket/path/to/delta-table/"
118
- >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
119
- >>> pl.read_delta(table_path, storage_options=storage_options) # doctest: +SKIP
120
-
121
- Reads a Delta table from Azure.
122
-
123
- Following type of table paths are supported,
124
-
125
- * az://<container>/<path>
126
- * adl://<container>/<path>
127
- * abfs://<container>/<path>
128
-
129
- See a list of supported storage options for Azure `here
130
- <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.
131
-
132
- >>> table_path = "az://container/path/to/delta-table/"
133
- >>> storage_options = {
134
- ... "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
135
- ... "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
136
- ... }
137
- >>> pl.read_delta(table_path, storage_options=storage_options) # doctest: +SKIP
138
-
139
- Reads a Delta table with additional delta specific options. In the below example,
140
- `without_files` option is used which loads the table without file tracking
141
- information.
142
-
143
- >>> table_path = "/path/to/delta-table/"
144
- >>> delta_table_options = {"without_files": True}
145
- >>> pl.read_delta(
146
- ... table_path, delta_table_options=delta_table_options
147
- ... ) # doctest: +SKIP
148
- """
149
- df = scan_delta(
150
- source=source,
151
- version=version,
152
- storage_options=storage_options,
153
- credential_provider=credential_provider,
154
- delta_table_options=delta_table_options,
155
- use_pyarrow=use_pyarrow,
156
- pyarrow_options=pyarrow_options,
157
- rechunk=rechunk,
158
- )
159
-
160
- if columns is not None:
161
- df = df.select(columns)
162
- return df.collect()
163
-
164
-
165
- def scan_delta(
166
- source: str | Path | DeltaTable,
167
- *,
168
- version: int | str | datetime | None = None,
169
- storage_options: dict[str, Any] | None = None,
170
- credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
171
- delta_table_options: dict[str, Any] | None = None,
172
- use_pyarrow: bool = False,
173
- pyarrow_options: dict[str, Any] | None = None,
174
- rechunk: bool | None = None,
175
- ) -> LazyFrame:
176
- """
177
- Lazily read from a Delta lake table.
178
-
179
- Parameters
180
- ----------
181
- source
182
- DeltaTable or a Path or URI to the root of the Delta lake table.
183
-
184
- Note: For Local filesystem, absolute and relative paths are supported but
185
- for the supported object storages - GCS, Azure and S3 full URI must be provided.
186
- version
187
- Numerical version or timestamp version of the Delta lake table.
188
-
189
- Note: If `version` is not provided, the latest version of delta lake
190
- table is read.
191
- storage_options
192
- Extra options for the storage backends supported by `deltalake`.
193
- For cloud storages, this may include configurations for authentication etc.
194
-
195
- More info is available `here
196
- <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
197
- credential_provider
198
- Provide a function that can be called to provide cloud storage
199
- credentials. The function is expected to return a dictionary of
200
- credential keys along with an optional credential expiry time.
201
-
202
- .. warning::
203
- This functionality is considered **unstable**. It may be changed
204
- at any point without it being considered a breaking change.
205
- delta_table_options
206
- Additional keyword arguments while reading a Delta lake Table.
207
- use_pyarrow
208
- Flag to enable pyarrow dataset reads.
209
- pyarrow_options
210
- Keyword arguments while converting a Delta lake Table to pyarrow table.
211
- Use this parameter when filtering on partitioned columns or to read
212
- from a 'fsspec' supported filesystem.
213
- rechunk
214
- Make sure that all columns are contiguous in memory by
215
- aggregating the chunks into a single array.
216
-
217
- Returns
218
- -------
219
- LazyFrame
220
-
221
- Examples
222
- --------
223
- Creates a scan for a Delta table from local filesystem.
224
- Note: Since version is not provided, the latest version of the delta table is read.
225
-
226
- >>> table_path = "/path/to/delta-table/"
227
- >>> pl.scan_delta(table_path).collect() # doctest: +SKIP
228
-
229
- Creates a scan for a specific version of the Delta table from local filesystem.
230
- Note: This will fail if the provided version of the delta table does not exist.
231
-
232
- >>> pl.scan_delta(table_path, version=1).collect() # doctest: +SKIP
233
-
234
- Time travel a delta table from local filesystem using a timestamp version.
235
-
236
- >>> pl.scan_delta(
237
- ... table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
238
- ... ).collect() # doctest: +SKIP
239
-
240
- Creates a scan for a Delta table from AWS S3.
241
- See a list of supported storage options for S3 `here
242
- <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.
243
-
244
- >>> table_path = "s3://bucket/path/to/delta-table/"
245
- >>> storage_options = {
246
- ... "AWS_REGION": "eu-central-1",
247
- ... "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
248
- ... "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
249
- ... }
250
- >>> pl.scan_delta(
251
- ... table_path, storage_options=storage_options
252
- ... ).collect() # doctest: +SKIP
253
-
254
- Creates a scan for a Delta table from Google Cloud storage (GCS).
255
- See a list of supported storage options for GCS `here
256
- <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.
257
-
258
- >>> table_path = "gs://bucket/path/to/delta-table/"
259
- >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
260
- >>> pl.scan_delta(
261
- ... table_path, storage_options=storage_options
262
- ... ).collect() # doctest: +SKIP
263
-
264
- Creates a scan for a Delta table from Azure.
265
- Supported options for Azure are available `here
266
- <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.
267
-
268
- Following type of table paths are supported,
269
-
270
- * az://<container>/<path>
271
- * adl://<container>/<path>
272
- * abfs[s]://<container>/<path>
273
-
274
- >>> table_path = "az://container/path/to/delta-table/"
275
- >>> storage_options = {
276
- ... "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
277
- ... "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
278
- ... }
279
- >>> pl.scan_delta(
280
- ... table_path, storage_options=storage_options
281
- ... ).collect() # doctest: +SKIP
282
-
283
- Creates a scan for a Delta table with additional delta specific options.
284
- In the below example, `without_files` option is used which loads the table without
285
- file tracking information.
286
-
287
- >>> table_path = "/path/to/delta-table/"
288
- >>> delta_table_options = {"without_files": True}
289
- >>> pl.scan_delta(
290
- ... table_path, delta_table_options=delta_table_options
291
- ... ).collect() # doctest: +SKIP
292
- """
293
- _check_if_delta_available()
294
-
295
- credential_provider_creds = {}
296
-
297
- from deltalake import DeltaTable
298
-
299
- from polars.io.cloud.credential_provider._builder import (
300
- _init_credential_provider_builder,
301
- )
302
- from polars.io.cloud.credential_provider._providers import (
303
- _get_credentials_from_provider_expiry_aware,
304
- )
305
-
306
- if not isinstance(source, DeltaTable):
307
- credential_provider_builder = _init_credential_provider_builder(
308
- credential_provider, source, storage_options, "scan_delta"
309
- )
310
- elif credential_provider is not None and credential_provider != "auto":
311
- msg = "cannot use credential_provider when passing a DeltaTable object"
312
- raise ValueError(msg)
313
- else:
314
- credential_provider_builder = None
315
-
316
- del credential_provider
317
-
318
- if credential_provider_builder and (
319
- provider := credential_provider_builder.build_credential_provider()
320
- ):
321
- credential_provider_creds = (
322
- _get_credentials_from_provider_expiry_aware(provider) or {}
323
- )
324
-
325
- dl_tbl = _get_delta_lake_table(
326
- table_path=source,
327
- version=version,
328
- storage_options=(
329
- {**(storage_options or {}), **credential_provider_creds}
330
- if storage_options is not None or credential_provider_builder is not None
331
- else None
332
- ),
333
- delta_table_options=delta_table_options,
334
- )
335
-
336
- if isinstance(source, DeltaTable) and (
337
- source._storage_options is not None or storage_options is not None
338
- ):
339
- storage_options = {**(source._storage_options or {}), **(storage_options or {})}
340
-
341
- if use_pyarrow:
342
- pyarrow_options = pyarrow_options or {}
343
- pa_ds = dl_tbl.to_pyarrow_dataset(**pyarrow_options)
344
- return scan_pyarrow_dataset(pa_ds)
345
-
346
- if pyarrow_options is not None:
347
- msg = "To make use of pyarrow_options, set use_pyarrow to True"
348
- raise ValueError(msg)
349
-
350
- from deltalake.exceptions import DeltaProtocolError
351
- from deltalake.table import (
352
- MAX_SUPPORTED_READER_VERSION,
353
- NOT_SUPPORTED_READER_VERSION,
354
- SUPPORTED_READER_FEATURES,
355
- )
356
-
357
- table_protocol = dl_tbl.protocol()
358
- if (
359
- table_protocol.min_reader_version > MAX_SUPPORTED_READER_VERSION
360
- or table_protocol.min_reader_version == NOT_SUPPORTED_READER_VERSION
361
- ):
362
- msg = (
363
- f"The table's minimum reader version is {table_protocol.min_reader_version} "
364
- f"but polars delta scanner only supports version 1 or {MAX_SUPPORTED_READER_VERSION} with these reader features: {SUPPORTED_READER_FEATURES}"
365
- )
366
- raise DeltaProtocolError(msg)
367
- if (
368
- table_protocol.min_reader_version >= 3
369
- and table_protocol.reader_features is not None
370
- ):
371
- missing_features = {*table_protocol.reader_features}.difference(
372
- SUPPORTED_READER_FEATURES
373
- )
374
- if len(missing_features) > 0:
375
- msg = f"The table has set these reader features: {missing_features} but these are not yet supported by the polars delta scanner."
376
- raise DeltaProtocolError(msg)
377
-
378
- delta_schema = dl_tbl.schema()
379
- polars_schema = Schema(delta_schema)
380
- partition_columns = dl_tbl.metadata().partition_columns
381
-
382
- def _split_schema(
383
- schema: Schema, partition_columns: list[str]
384
- ) -> tuple[Schema, Schema]:
385
- if len(partition_columns) == 0:
386
- return schema, Schema([])
387
- main_schema = []
388
- hive_schema = []
389
-
390
- for name, dtype in schema.items():
391
- if name in partition_columns:
392
- hive_schema.append((name, dtype))
393
- else:
394
- main_schema.append((name, dtype))
395
-
396
- return Schema(main_schema), Schema(hive_schema)
397
-
398
- # Required because main_schema cannot contain hive columns currently
399
- main_schema, hive_schema = _split_schema(polars_schema, partition_columns)
400
-
401
- file_uris = dl_tbl.file_uris()
402
-
403
- # LakeFS has an S3 compatible API, for reading therefore it's safe to do this.
404
- # Deltalake internally has an integration for writing commits
405
- if dl_tbl.table_uri.startswith("lakefs://"):
406
- file_uris = [file_uri.replace("lakefs://", "s3://") for file_uri in file_uris]
407
-
408
- return scan_parquet(
409
- file_uris,
410
- schema=main_schema,
411
- hive_schema=hive_schema if len(partition_columns) > 0 else None,
412
- cast_options=ScanCastOptions._default_iceberg(),
413
- missing_columns="insert",
414
- extra_columns="ignore",
415
- hive_partitioning=len(partition_columns) > 0,
416
- storage_options=storage_options,
417
- credential_provider=credential_provider_builder, # type: ignore[arg-type]
418
- rechunk=rechunk or False,
419
- )
420
-
421
-
422
- def _resolve_delta_lake_uri(table_uri: str | Path, *, strict: bool = True) -> str:
423
- resolved_uri = str(
424
- Path(table_uri).expanduser().resolve(strict)
425
- if _get_path_scheme(table_uri) is None
426
- else table_uri
427
- )
428
-
429
- return resolved_uri
430
-
431
-
432
- def _get_delta_lake_table(
433
- table_path: str | Path | DeltaTable,
434
- version: int | str | datetime | None = None,
435
- storage_options: dict[str, Any] | None = None,
436
- delta_table_options: dict[str, Any] | None = None,
437
- ) -> deltalake.DeltaTable:
438
- """
439
- Initialize a Delta lake table for use in read and scan operations.
440
-
441
- Notes
442
- -----
443
- Make sure to install deltalake>=0.8.0. Read the documentation
444
- `here <https://delta-io.github.io/delta-rs/usage/installation/>`_.
445
- """
446
- _check_if_delta_available()
447
-
448
- if isinstance(table_path, deltalake.DeltaTable):
449
- if any(
450
- [
451
- version is not None,
452
- storage_options is not None,
453
- delta_table_options is not None,
454
- ]
455
- ):
456
- warnings.warn(
457
- """When supplying a DeltaTable directly, `version`, `storage_options`, and `delta_table_options` are ignored.
458
- To silence this warning, don't supply those parameters.""",
459
- RuntimeWarning,
460
- stacklevel=1,
461
- )
462
- return table_path
463
- if delta_table_options is None:
464
- delta_table_options = {}
465
- resolved_uri = _resolve_delta_lake_uri(table_path)
466
- if not isinstance(version, (str, datetime)):
467
- dl_tbl = deltalake.DeltaTable(
468
- resolved_uri,
469
- version=version,
470
- storage_options=storage_options,
471
- **delta_table_options,
472
- )
473
- else:
474
- dl_tbl = deltalake.DeltaTable(
475
- table_path,
476
- storage_options=storage_options,
477
- **delta_table_options,
478
- )
479
- dl_tbl.load_as_version(version)
480
-
481
- return dl_tbl
482
-
483
-
484
- def _check_if_delta_available() -> None:
485
- if not _DELTALAKE_AVAILABLE:
486
- msg = "deltalake is not installed\n\nPlease run: pip install deltalake"
487
- raise ModuleNotFoundError(msg)
488
-
489
-
490
- def _check_for_unsupported_types(dtypes: list[DataType]) -> None:
491
- schema_dtypes = unpack_dtypes(*dtypes)
492
- unsupported_types = {Time, Null}
493
- # Note that this overlap check does NOT work correctly for Categorical, so
494
- # if Categorical is added back to unsupported_types a different check will
495
- # need to be used.
496
-
497
- if overlap := schema_dtypes & unsupported_types:
498
- msg = f"dataframe contains unsupported data types: {overlap!r}"
499
- raise TypeError(msg)
@@ -1,3 +0,0 @@
1
- from polars.io.iceberg.functions import scan_iceberg
2
-
3
- __all__ = ["scan_iceberg"]