polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import io
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import IO, TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import polars.functions as F
|
|
10
|
+
from polars import concat as plconcat
|
|
11
|
+
from polars._dependencies import import_optional
|
|
12
|
+
from polars._utils.deprecation import (
|
|
13
|
+
deprecate_renamed_parameter,
|
|
14
|
+
issue_deprecation_warning,
|
|
15
|
+
)
|
|
16
|
+
from polars._utils.unstable import issue_unstable_warning
|
|
17
|
+
from polars._utils.various import (
|
|
18
|
+
is_int_sequence,
|
|
19
|
+
is_path_or_str_sequence,
|
|
20
|
+
normalize_filepath,
|
|
21
|
+
)
|
|
22
|
+
from polars._utils.wrap import wrap_ldf
|
|
23
|
+
from polars.convert import from_arrow
|
|
24
|
+
from polars.io._utils import (
|
|
25
|
+
prepare_file_arg,
|
|
26
|
+
)
|
|
27
|
+
from polars.io.cloud.credential_provider._builder import (
|
|
28
|
+
_init_credential_provider_builder,
|
|
29
|
+
)
|
|
30
|
+
from polars.io.scan_options._options import ScanOptions
|
|
31
|
+
|
|
32
|
+
with contextlib.suppress(ImportError):
|
|
33
|
+
from polars._plr import PyLazyFrame
|
|
34
|
+
from polars._plr import read_parquet_metadata as _read_parquet_metadata
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from typing import Literal
|
|
38
|
+
|
|
39
|
+
from polars import DataFrame, DataType, LazyFrame
|
|
40
|
+
from polars._typing import (
|
|
41
|
+
ColumnMapping,
|
|
42
|
+
DefaultFieldValues,
|
|
43
|
+
DeletionFiles,
|
|
44
|
+
FileSource,
|
|
45
|
+
ParallelStrategy,
|
|
46
|
+
SchemaDict,
|
|
47
|
+
)
|
|
48
|
+
from polars.io.cloud import CredentialProviderFunction
|
|
49
|
+
from polars.io.scan_options import ScanCastOptions
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@deprecate_renamed_parameter("row_count_name", "row_index_name", version="0.20.4")
|
|
53
|
+
@deprecate_renamed_parameter("row_count_offset", "row_index_offset", version="0.20.4")
|
|
54
|
+
def read_parquet(
|
|
55
|
+
source: FileSource,
|
|
56
|
+
*,
|
|
57
|
+
columns: list[int] | list[str] | None = None,
|
|
58
|
+
n_rows: int | None = None,
|
|
59
|
+
row_index_name: str | None = None,
|
|
60
|
+
row_index_offset: int = 0,
|
|
61
|
+
parallel: ParallelStrategy = "auto",
|
|
62
|
+
use_statistics: bool = True,
|
|
63
|
+
hive_partitioning: bool | None = None,
|
|
64
|
+
glob: bool = True,
|
|
65
|
+
schema: SchemaDict | None = None,
|
|
66
|
+
hive_schema: SchemaDict | None = None,
|
|
67
|
+
try_parse_hive_dates: bool = True,
|
|
68
|
+
rechunk: bool = False,
|
|
69
|
+
low_memory: bool = False,
|
|
70
|
+
storage_options: dict[str, Any] | None = None,
|
|
71
|
+
credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
|
|
72
|
+
retries: int = 2,
|
|
73
|
+
use_pyarrow: bool = False,
|
|
74
|
+
pyarrow_options: dict[str, Any] | None = None,
|
|
75
|
+
memory_map: bool = True,
|
|
76
|
+
include_file_paths: str | None = None,
|
|
77
|
+
missing_columns: Literal["insert", "raise"] = "raise",
|
|
78
|
+
allow_missing_columns: bool | None = None,
|
|
79
|
+
) -> DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Read into a DataFrame from a parquet file.
|
|
82
|
+
|
|
83
|
+
.. versionchanged:: 0.20.4
|
|
84
|
+
* The `row_count_name` parameter was renamed `row_index_name`.
|
|
85
|
+
* The `row_count_offset` parameter was renamed `row_index_offset`.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
source
|
|
90
|
+
Path(s) to a file or directory
|
|
91
|
+
When needing to authenticate for scanning cloud locations, see the
|
|
92
|
+
`storage_options` parameter.
|
|
93
|
+
|
|
94
|
+
File-like objects are supported (by "file-like object" we refer to objects
|
|
95
|
+
that have a `read()` method, such as a file handler like the builtin `open`
|
|
96
|
+
function, or a `BytesIO` instance). For file-like objects, the stream position
|
|
97
|
+
may not be updated accordingly after reading.
|
|
98
|
+
columns
|
|
99
|
+
Columns to select. Accepts a list of column indices (starting at zero) or a list
|
|
100
|
+
of column names.
|
|
101
|
+
n_rows
|
|
102
|
+
Stop reading from parquet file after reading `n_rows`.
|
|
103
|
+
Only valid when `use_pyarrow=False`.
|
|
104
|
+
row_index_name
|
|
105
|
+
Insert a row index column with the given name into the DataFrame as the first
|
|
106
|
+
column. If set to `None` (default), no row index column is created.
|
|
107
|
+
row_index_offset
|
|
108
|
+
Start the row index at this offset. Cannot be negative.
|
|
109
|
+
Only used if `row_index_name` is set.
|
|
110
|
+
parallel : {'auto', 'columns', 'row_groups', 'none'}
|
|
111
|
+
This determines the direction of parallelism. 'auto' will try to determine the
|
|
112
|
+
optimal direction.
|
|
113
|
+
use_statistics
|
|
114
|
+
Use statistics in the parquet to determine if pages
|
|
115
|
+
can be skipped from reading.
|
|
116
|
+
hive_partitioning
|
|
117
|
+
Infer statistics and schema from Hive partitioned URL and use them
|
|
118
|
+
to prune reads. This is unset by default (i.e. `None`), meaning it is
|
|
119
|
+
automatically enabled when a single directory is passed, and otherwise
|
|
120
|
+
disabled.
|
|
121
|
+
glob
|
|
122
|
+
Expand path given via globbing rules.
|
|
123
|
+
schema
|
|
124
|
+
Specify the datatypes of the columns. The datatypes must match the
|
|
125
|
+
datatypes in the file(s). If there are extra columns that are not in the
|
|
126
|
+
file(s), consider also passing `missing_columns='insert'`.
|
|
127
|
+
|
|
128
|
+
.. warning::
|
|
129
|
+
This functionality is considered **unstable**. It may be changed
|
|
130
|
+
at any point without it being considered a breaking change.
|
|
131
|
+
hive_schema
|
|
132
|
+
The column names and data types of the columns by which the data is partitioned.
|
|
133
|
+
If set to `None` (default), the schema of the Hive partitions is inferred.
|
|
134
|
+
|
|
135
|
+
.. warning::
|
|
136
|
+
This functionality is considered **unstable**. It may be changed
|
|
137
|
+
at any point without it being considered a breaking change.
|
|
138
|
+
try_parse_hive_dates
|
|
139
|
+
Whether to try parsing hive values as date/datetime types.
|
|
140
|
+
rechunk
|
|
141
|
+
Make sure that all columns are contiguous in memory by
|
|
142
|
+
aggregating the chunks into a single array.
|
|
143
|
+
low_memory
|
|
144
|
+
Reduce memory pressure at the expense of performance.
|
|
145
|
+
storage_options
|
|
146
|
+
Options that indicate how to connect to a cloud provider.
|
|
147
|
+
|
|
148
|
+
The cloud providers currently supported are AWS, GCP, and Azure.
|
|
149
|
+
See supported keys here:
|
|
150
|
+
|
|
151
|
+
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
|
|
152
|
+
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
|
|
153
|
+
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
|
|
154
|
+
* Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
|
|
155
|
+
`{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
|
156
|
+
|
|
157
|
+
If `storage_options` is not provided, Polars will try to infer the information
|
|
158
|
+
from environment variables.
|
|
159
|
+
credential_provider
|
|
160
|
+
Provide a function that can be called to provide cloud storage
|
|
161
|
+
credentials. The function is expected to return a dictionary of
|
|
162
|
+
credential keys along with an optional credential expiry time.
|
|
163
|
+
|
|
164
|
+
.. warning::
|
|
165
|
+
This functionality is considered **unstable**. It may be changed
|
|
166
|
+
at any point without it being considered a breaking change.
|
|
167
|
+
retries
|
|
168
|
+
Number of retries if accessing a cloud instance fails.
|
|
169
|
+
use_pyarrow
|
|
170
|
+
Use PyArrow instead of the Rust-native Parquet reader. The PyArrow reader is
|
|
171
|
+
more stable.
|
|
172
|
+
pyarrow_options
|
|
173
|
+
Keyword arguments for `pyarrow.parquet.read_table
|
|
174
|
+
<https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html>`_.
|
|
175
|
+
memory_map
|
|
176
|
+
Memory map underlying file. This will likely increase performance.
|
|
177
|
+
Only used when `use_pyarrow=True`.
|
|
178
|
+
include_file_paths
|
|
179
|
+
Include the path of the source file(s) as a column with this name.
|
|
180
|
+
Only valid when `use_pyarrow=False`.
|
|
181
|
+
missing_columns
|
|
182
|
+
Configuration for behavior when columns defined in the schema
|
|
183
|
+
are missing from the data:
|
|
184
|
+
|
|
185
|
+
* `insert`: Inserts the missing columns using NULLs as the row values.
|
|
186
|
+
* `raise`: Raises an error.
|
|
187
|
+
|
|
188
|
+
allow_missing_columns
|
|
189
|
+
When reading a list of parquet files, if a column existing in the first
|
|
190
|
+
file cannot be found in subsequent files, the default behavior is to
|
|
191
|
+
raise an error. However, if `allow_missing_columns` is set to
|
|
192
|
+
`True`, a full-NULL column is returned instead of erroring for the files
|
|
193
|
+
that do not contain the column.
|
|
194
|
+
|
|
195
|
+
.. deprecated:: 1.30.0
|
|
196
|
+
Use the parameter `missing_columns` instead and pass one of
|
|
197
|
+
`('insert', 'raise')`.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
DataFrame
|
|
202
|
+
|
|
203
|
+
See Also
|
|
204
|
+
--------
|
|
205
|
+
scan_parquet: Lazily read from a parquet file or multiple files via glob patterns.
|
|
206
|
+
scan_pyarrow_dataset
|
|
207
|
+
|
|
208
|
+
Warnings
|
|
209
|
+
--------
|
|
210
|
+
Calling `read_parquet().lazy()` is an antipattern as this forces Polars to
|
|
211
|
+
materialize a full parquet file and therefore cannot push any optimizations
|
|
212
|
+
into the reader. Therefore always prefer `scan_parquet` if you want to work
|
|
213
|
+
with `LazyFrame` s.
|
|
214
|
+
|
|
215
|
+
"""
|
|
216
|
+
if schema is not None:
|
|
217
|
+
msg = "the `schema` parameter of `read_parquet` is considered unstable."
|
|
218
|
+
issue_unstable_warning(msg)
|
|
219
|
+
|
|
220
|
+
if hive_schema is not None:
|
|
221
|
+
msg = "the `hive_schema` parameter of `read_parquet` is considered unstable."
|
|
222
|
+
issue_unstable_warning(msg)
|
|
223
|
+
|
|
224
|
+
# Dispatch to pyarrow if requested
|
|
225
|
+
if use_pyarrow:
|
|
226
|
+
if n_rows is not None:
|
|
227
|
+
msg = "`n_rows` cannot be used with `use_pyarrow=True`"
|
|
228
|
+
raise ValueError(msg)
|
|
229
|
+
if include_file_paths is not None:
|
|
230
|
+
msg = "`include_file_paths` cannot be used with `use_pyarrow=True`"
|
|
231
|
+
raise ValueError(msg)
|
|
232
|
+
if schema is not None:
|
|
233
|
+
msg = "`schema` cannot be used with `use_pyarrow=True`"
|
|
234
|
+
raise ValueError(msg)
|
|
235
|
+
if hive_schema is not None:
|
|
236
|
+
msg = (
|
|
237
|
+
"cannot use `hive_partitions` with `use_pyarrow=True`"
|
|
238
|
+
"\n\nHint: Pass `pyarrow_options` instead with a 'partitioning' entry."
|
|
239
|
+
)
|
|
240
|
+
raise TypeError(msg)
|
|
241
|
+
return _read_parquet_with_pyarrow(
|
|
242
|
+
source,
|
|
243
|
+
columns=columns,
|
|
244
|
+
storage_options=storage_options,
|
|
245
|
+
pyarrow_options=pyarrow_options,
|
|
246
|
+
memory_map=memory_map,
|
|
247
|
+
rechunk=rechunk,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if allow_missing_columns is not None:
|
|
251
|
+
issue_deprecation_warning(
|
|
252
|
+
"the parameter `allow_missing_columns` for `read_parquet` is deprecated. "
|
|
253
|
+
"Use the parameter `missing_columns` instead and pass one of "
|
|
254
|
+
"`('insert', 'raise')`.",
|
|
255
|
+
version="1.30.0",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
missing_columns = "insert" if allow_missing_columns else "raise"
|
|
259
|
+
|
|
260
|
+
# For other inputs, defer to `scan_parquet`
|
|
261
|
+
lf = scan_parquet(
|
|
262
|
+
source,
|
|
263
|
+
n_rows=n_rows,
|
|
264
|
+
row_index_name=row_index_name,
|
|
265
|
+
row_index_offset=row_index_offset,
|
|
266
|
+
parallel=parallel,
|
|
267
|
+
use_statistics=use_statistics,
|
|
268
|
+
hive_partitioning=hive_partitioning,
|
|
269
|
+
schema=schema,
|
|
270
|
+
hive_schema=hive_schema,
|
|
271
|
+
try_parse_hive_dates=try_parse_hive_dates,
|
|
272
|
+
rechunk=rechunk,
|
|
273
|
+
low_memory=low_memory,
|
|
274
|
+
cache=False,
|
|
275
|
+
storage_options=storage_options,
|
|
276
|
+
credential_provider=credential_provider,
|
|
277
|
+
retries=retries,
|
|
278
|
+
glob=glob,
|
|
279
|
+
include_file_paths=include_file_paths,
|
|
280
|
+
missing_columns=missing_columns,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
if columns is not None:
|
|
284
|
+
if is_int_sequence(columns):
|
|
285
|
+
lf = lf.select(F.nth(columns))
|
|
286
|
+
else:
|
|
287
|
+
lf = lf.select(columns)
|
|
288
|
+
|
|
289
|
+
return lf.collect()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _read_parquet_with_pyarrow(
|
|
293
|
+
source: str
|
|
294
|
+
| Path
|
|
295
|
+
| IO[bytes]
|
|
296
|
+
| bytes
|
|
297
|
+
| list[str]
|
|
298
|
+
| list[Path]
|
|
299
|
+
| list[IO[bytes]]
|
|
300
|
+
| list[bytes],
|
|
301
|
+
*,
|
|
302
|
+
columns: list[int] | list[str] | None = None,
|
|
303
|
+
storage_options: dict[str, Any] | None = None,
|
|
304
|
+
pyarrow_options: dict[str, Any] | None = None,
|
|
305
|
+
memory_map: bool = True,
|
|
306
|
+
rechunk: bool = True,
|
|
307
|
+
) -> DataFrame:
|
|
308
|
+
pyarrow_parquet = import_optional(
|
|
309
|
+
"pyarrow.parquet",
|
|
310
|
+
err_prefix="",
|
|
311
|
+
err_suffix="is required when using `read_parquet(..., use_pyarrow=True)`",
|
|
312
|
+
)
|
|
313
|
+
pyarrow_options = pyarrow_options or {}
|
|
314
|
+
|
|
315
|
+
sources: list[str | Path | IO[bytes] | bytes | list[str] | list[Path]] = []
|
|
316
|
+
if isinstance(source, list):
|
|
317
|
+
if len(source) > 0 and isinstance(source[0], (bytes, io.IOBase)):
|
|
318
|
+
sources = source # type: ignore[assignment]
|
|
319
|
+
else:
|
|
320
|
+
sources = [source] # type: ignore[list-item]
|
|
321
|
+
else:
|
|
322
|
+
sources = [source]
|
|
323
|
+
|
|
324
|
+
results: list[DataFrame] = []
|
|
325
|
+
for source in sources:
|
|
326
|
+
with prepare_file_arg(
|
|
327
|
+
source, # type: ignore[arg-type]
|
|
328
|
+
use_pyarrow=True,
|
|
329
|
+
storage_options=storage_options,
|
|
330
|
+
) as source_prep:
|
|
331
|
+
pa_table = pyarrow_parquet.read_table(
|
|
332
|
+
source_prep,
|
|
333
|
+
memory_map=memory_map,
|
|
334
|
+
columns=columns,
|
|
335
|
+
**pyarrow_options,
|
|
336
|
+
)
|
|
337
|
+
result = from_arrow(pa_table, rechunk=rechunk)
|
|
338
|
+
results.append(result) # type: ignore[arg-type]
|
|
339
|
+
|
|
340
|
+
if len(results) == 1:
|
|
341
|
+
return results[0]
|
|
342
|
+
else:
|
|
343
|
+
return plconcat(results)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def read_parquet_schema(source: str | Path | IO[bytes] | bytes) -> dict[str, DataType]:
|
|
347
|
+
"""
|
|
348
|
+
Get the schema of a Parquet file without reading data.
|
|
349
|
+
|
|
350
|
+
If you would like to read the schema of a cloud file with authentication
|
|
351
|
+
configuration, it is recommended use `scan_parquet` - e.g.
|
|
352
|
+
`scan_parquet(..., storage_options=...).collect_schema()`.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
source
|
|
357
|
+
Path to a file or a file-like object (by "file-like object" we refer to objects
|
|
358
|
+
that have a `read()` method, such as a file handler like the builtin `open`
|
|
359
|
+
function, or a `BytesIO` instance). For file-like objects, the stream position
|
|
360
|
+
may not be updated accordingly after reading.
|
|
361
|
+
|
|
362
|
+
Returns
|
|
363
|
+
-------
|
|
364
|
+
dict
|
|
365
|
+
Dictionary mapping column names to datatypes
|
|
366
|
+
|
|
367
|
+
See Also
|
|
368
|
+
--------
|
|
369
|
+
scan_parquet
|
|
370
|
+
"""
|
|
371
|
+
return scan_parquet(source).collect_schema()
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def read_parquet_metadata(
|
|
375
|
+
source: str | Path | IO[bytes] | bytes,
|
|
376
|
+
storage_options: dict[str, Any] | None = None,
|
|
377
|
+
credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
|
|
378
|
+
retries: int = 2,
|
|
379
|
+
) -> dict[str, str]:
|
|
380
|
+
"""
|
|
381
|
+
Get file-level custom metadata of a Parquet file without reading data.
|
|
382
|
+
|
|
383
|
+
.. warning::
|
|
384
|
+
This functionality is considered **experimental**. It may be removed or
|
|
385
|
+
changed at any point without it being considered a breaking change.
|
|
386
|
+
|
|
387
|
+
Parameters
|
|
388
|
+
----------
|
|
389
|
+
source
|
|
390
|
+
Path to a file or a file-like object (by "file-like object" we refer to objects
|
|
391
|
+
that have a `read()` method, such as a file handler like the builtin `open`
|
|
392
|
+
function, or a `BytesIO` instance). For file-like objects, the stream position
|
|
393
|
+
may not be updated accordingly after reading.
|
|
394
|
+
storage_options
|
|
395
|
+
Options that indicate how to connect to a cloud provider.
|
|
396
|
+
|
|
397
|
+
The cloud providers currently supported are AWS, GCP, and Azure.
|
|
398
|
+
See supported keys here:
|
|
399
|
+
|
|
400
|
+
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
|
|
401
|
+
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
|
|
402
|
+
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
|
|
403
|
+
* Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
|
|
404
|
+
`{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
|
405
|
+
|
|
406
|
+
If `storage_options` is not provided, Polars will try to infer the information
|
|
407
|
+
from environment variables.
|
|
408
|
+
credential_provider
|
|
409
|
+
Provide a function that can be called to provide cloud storage
|
|
410
|
+
credentials. The function is expected to return a dictionary of
|
|
411
|
+
credential keys along with an optional credential expiry time.
|
|
412
|
+
|
|
413
|
+
.. warning::
|
|
414
|
+
This functionality is considered **unstable**. It may be changed
|
|
415
|
+
at any point without it being considered a breaking change.
|
|
416
|
+
retries
|
|
417
|
+
Number of retries if accessing a cloud instance fails.
|
|
418
|
+
|
|
419
|
+
Returns
|
|
420
|
+
-------
|
|
421
|
+
dict
|
|
422
|
+
Dictionary with the metadata. Empty if no custom metadata is available.
|
|
423
|
+
"""
|
|
424
|
+
if isinstance(source, (str, Path)):
|
|
425
|
+
source = normalize_filepath(source, check_not_directory=False)
|
|
426
|
+
|
|
427
|
+
credential_provider_builder = _init_credential_provider_builder(
|
|
428
|
+
credential_provider, source, storage_options, "scan_parquet"
|
|
429
|
+
)
|
|
430
|
+
del credential_provider
|
|
431
|
+
|
|
432
|
+
return _read_parquet_metadata(
|
|
433
|
+
source,
|
|
434
|
+
storage_options=(
|
|
435
|
+
list(storage_options.items()) if storage_options is not None else None
|
|
436
|
+
),
|
|
437
|
+
credential_provider=credential_provider_builder,
|
|
438
|
+
retries=retries,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@deprecate_renamed_parameter("row_count_name", "row_index_name", version="0.20.4")
|
|
443
|
+
@deprecate_renamed_parameter("row_count_offset", "row_index_offset", version="0.20.4")
|
|
444
|
+
def scan_parquet(
|
|
445
|
+
source: FileSource,
|
|
446
|
+
*,
|
|
447
|
+
n_rows: int | None = None,
|
|
448
|
+
row_index_name: str | None = None,
|
|
449
|
+
row_index_offset: int = 0,
|
|
450
|
+
parallel: ParallelStrategy = "auto",
|
|
451
|
+
use_statistics: bool = True,
|
|
452
|
+
hive_partitioning: bool | None = None,
|
|
453
|
+
glob: bool = True,
|
|
454
|
+
hidden_file_prefix: str | Sequence[str] | None = None,
|
|
455
|
+
schema: SchemaDict | None = None,
|
|
456
|
+
hive_schema: SchemaDict | None = None,
|
|
457
|
+
try_parse_hive_dates: bool = True,
|
|
458
|
+
rechunk: bool = False,
|
|
459
|
+
low_memory: bool = False,
|
|
460
|
+
cache: bool = True,
|
|
461
|
+
storage_options: dict[str, Any] | None = None,
|
|
462
|
+
credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
|
|
463
|
+
retries: int = 2,
|
|
464
|
+
include_file_paths: str | None = None,
|
|
465
|
+
missing_columns: Literal["insert", "raise"] = "raise",
|
|
466
|
+
allow_missing_columns: bool | None = None,
|
|
467
|
+
extra_columns: Literal["ignore", "raise"] = "raise",
|
|
468
|
+
cast_options: ScanCastOptions | None = None,
|
|
469
|
+
_column_mapping: ColumnMapping | None = None,
|
|
470
|
+
_default_values: DefaultFieldValues | None = None,
|
|
471
|
+
_deletion_files: DeletionFiles | None = None,
|
|
472
|
+
_table_statistics: DataFrame | None = None,
|
|
473
|
+
) -> LazyFrame:
|
|
474
|
+
"""
|
|
475
|
+
Lazily read from a local or cloud-hosted parquet file (or files).
|
|
476
|
+
|
|
477
|
+
This function allows the query optimizer to push down predicates and projections to
|
|
478
|
+
the scan level, typically increasing performance and reducing memory overhead.
|
|
479
|
+
|
|
480
|
+
.. versionchanged:: 0.20.4
|
|
481
|
+
* The `row_count_name` parameter was renamed `row_index_name`.
|
|
482
|
+
* The `row_count_offset` parameter was renamed `row_index_offset`.
|
|
483
|
+
|
|
484
|
+
.. versionchanged:: 1.30.0
|
|
485
|
+
* The `allow_missing_columns` is deprecated in favor of `missing_columns`.
|
|
486
|
+
|
|
487
|
+
Parameters
|
|
488
|
+
----------
|
|
489
|
+
source
|
|
490
|
+
Path(s) to a file or directory
|
|
491
|
+
When needing to authenticate for scanning cloud locations, see the
|
|
492
|
+
`storage_options` parameter.
|
|
493
|
+
n_rows
|
|
494
|
+
Stop reading from parquet file after reading `n_rows`.
|
|
495
|
+
row_index_name
|
|
496
|
+
If not None, this will insert a row index column with the given name into the
|
|
497
|
+
DataFrame
|
|
498
|
+
row_index_offset
|
|
499
|
+
Offset to start the row index column (only used if the name is set)
|
|
500
|
+
parallel : {'auto', 'columns', 'row_groups', 'prefiltered', 'none'}
|
|
501
|
+
This determines the direction and strategy of parallelism. 'auto' will
|
|
502
|
+
try to determine the optimal direction.
|
|
503
|
+
|
|
504
|
+
The `prefiltered` strategy first evaluates the pushed-down predicates in
|
|
505
|
+
parallel and determines a mask of which rows to read. Then, it
|
|
506
|
+
parallelizes over both the columns and the row groups while filtering
|
|
507
|
+
out rows that do not need to be read. This can provide significant
|
|
508
|
+
speedups for large files (i.e. many row-groups) with a predicate that
|
|
509
|
+
filters clustered rows or filters heavily. In other cases,
|
|
510
|
+
`prefiltered` may slow down the scan compared other strategies.
|
|
511
|
+
|
|
512
|
+
The `prefiltered` settings falls back to `auto` if no predicate is
|
|
513
|
+
given.
|
|
514
|
+
|
|
515
|
+
.. warning::
|
|
516
|
+
The `prefiltered` strategy is considered **unstable**. It may be
|
|
517
|
+
changed at any point without it being considered a breaking change.
|
|
518
|
+
|
|
519
|
+
use_statistics
|
|
520
|
+
Use statistics in the parquet to determine if pages
|
|
521
|
+
can be skipped from reading.
|
|
522
|
+
hive_partitioning
|
|
523
|
+
Infer statistics and schema from hive partitioned URL and use them
|
|
524
|
+
to prune reads.
|
|
525
|
+
glob
|
|
526
|
+
Expand path given via globbing rules.
|
|
527
|
+
hidden_file_prefix
|
|
528
|
+
Skip reading files whose names begin with the specified prefixes.
|
|
529
|
+
|
|
530
|
+
.. warning::
|
|
531
|
+
This functionality is considered **unstable**. It may be changed
|
|
532
|
+
at any point without it being considered a breaking change.
|
|
533
|
+
schema
|
|
534
|
+
Specify the datatypes of the columns. The datatypes must match the
|
|
535
|
+
datatypes in the file(s). If there are extra columns that are not in the
|
|
536
|
+
file(s), consider also passing `missing_columns='insert'`.
|
|
537
|
+
|
|
538
|
+
.. warning::
|
|
539
|
+
This functionality is considered **unstable**. It may be changed
|
|
540
|
+
at any point without it being considered a breaking change.
|
|
541
|
+
hive_schema
|
|
542
|
+
The column names and data types of the columns by which the data is partitioned.
|
|
543
|
+
If set to `None` (default), the schema of the Hive partitions is inferred.
|
|
544
|
+
|
|
545
|
+
.. warning::
|
|
546
|
+
This functionality is considered **unstable**. It may be changed
|
|
547
|
+
at any point without it being considered a breaking change.
|
|
548
|
+
try_parse_hive_dates
|
|
549
|
+
Whether to try parsing hive values as date/datetime types.
|
|
550
|
+
rechunk
|
|
551
|
+
In case of reading multiple files via a glob pattern rechunk the final DataFrame
|
|
552
|
+
into contiguous memory chunks.
|
|
553
|
+
low_memory
|
|
554
|
+
Reduce memory pressure at the expense of performance.
|
|
555
|
+
cache
|
|
556
|
+
Cache the result after reading.
|
|
557
|
+
storage_options
|
|
558
|
+
Options that indicate how to connect to a cloud provider.
|
|
559
|
+
|
|
560
|
+
The cloud providers currently supported are AWS, GCP, and Azure.
|
|
561
|
+
See supported keys here:
|
|
562
|
+
|
|
563
|
+
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
|
|
564
|
+
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
|
|
565
|
+
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
|
|
566
|
+
* Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
|
|
567
|
+
`{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
|
|
568
|
+
|
|
569
|
+
If `storage_options` is not provided, Polars will try to infer the information
|
|
570
|
+
from environment variables.
|
|
571
|
+
credential_provider
|
|
572
|
+
Provide a function that can be called to provide cloud storage
|
|
573
|
+
credentials. The function is expected to return a dictionary of
|
|
574
|
+
credential keys along with an optional credential expiry time.
|
|
575
|
+
|
|
576
|
+
.. warning::
|
|
577
|
+
This functionality is considered **unstable**. It may be changed
|
|
578
|
+
at any point without it being considered a breaking change.
|
|
579
|
+
retries
|
|
580
|
+
Number of retries if accessing a cloud instance fails.
|
|
581
|
+
include_file_paths
|
|
582
|
+
Include the path of the source file(s) as a column with this name.
|
|
583
|
+
missing_columns
|
|
584
|
+
Configuration for behavior when columns defined in the schema
|
|
585
|
+
are missing from the data:
|
|
586
|
+
|
|
587
|
+
* `insert`: Inserts the missing columns using NULLs as the row values.
|
|
588
|
+
* `raise`: Raises an error.
|
|
589
|
+
|
|
590
|
+
allow_missing_columns
|
|
591
|
+
When reading a list of parquet files, if a column existing in the first
|
|
592
|
+
file cannot be found in subsequent files, the default behavior is to
|
|
593
|
+
raise an error. However, if `allow_missing_columns` is set to
|
|
594
|
+
`True`, a full-NULL column is returned instead of erroring for the files
|
|
595
|
+
that do not contain the column.
|
|
596
|
+
|
|
597
|
+
.. deprecated:: 1.30.0
|
|
598
|
+
Use the parameter `missing_columns` instead and pass one of
|
|
599
|
+
`('insert', 'raise')`.
|
|
600
|
+
extra_columns
|
|
601
|
+
Configuration for behavior when extra columns outside of the
|
|
602
|
+
defined schema are encountered in the data:
|
|
603
|
+
|
|
604
|
+
* `ignore`: Silently ignores.
|
|
605
|
+
* `raise`: Raises an error.
|
|
606
|
+
|
|
607
|
+
cast_options
|
|
608
|
+
Configuration for column type-casting during scans. Useful for datasets
|
|
609
|
+
containing files that have differing schemas.
|
|
610
|
+
|
|
611
|
+
.. warning::
|
|
612
|
+
This functionality is considered **unstable**. It may be changed
|
|
613
|
+
at any point without it being considered a breaking change.
|
|
614
|
+
|
|
615
|
+
See Also
|
|
616
|
+
--------
|
|
617
|
+
read_parquet
|
|
618
|
+
scan_pyarrow_dataset
|
|
619
|
+
|
|
620
|
+
Examples
|
|
621
|
+
--------
|
|
622
|
+
Scan a local Parquet file.
|
|
623
|
+
|
|
624
|
+
>>> pl.scan_parquet("path/to/file.parquet") # doctest: +SKIP
|
|
625
|
+
|
|
626
|
+
Scan a file on AWS S3.
|
|
627
|
+
|
|
628
|
+
>>> source = "s3://bucket/*.parquet"
|
|
629
|
+
>>> pl.scan_parquet(source) # doctest: +SKIP
|
|
630
|
+
>>> storage_options = {
|
|
631
|
+
... "aws_access_key_id": "<secret>",
|
|
632
|
+
... "aws_secret_access_key": "<secret>",
|
|
633
|
+
... "aws_region": "us-east-1",
|
|
634
|
+
... }
|
|
635
|
+
>>> pl.scan_parquet(source, storage_options=storage_options) # doctest: +SKIP
|
|
636
|
+
"""
|
|
637
|
+
if schema is not None:
|
|
638
|
+
msg = "the `schema` parameter of `scan_parquet` is considered unstable."
|
|
639
|
+
issue_unstable_warning(msg)
|
|
640
|
+
|
|
641
|
+
if hive_schema is not None:
|
|
642
|
+
msg = "the `hive_schema` parameter of `scan_parquet` is considered unstable."
|
|
643
|
+
issue_unstable_warning(msg)
|
|
644
|
+
|
|
645
|
+
if cast_options is not None:
|
|
646
|
+
msg = "The `cast_options` parameter of `scan_parquet` is considered unstable."
|
|
647
|
+
issue_unstable_warning(msg)
|
|
648
|
+
|
|
649
|
+
if hidden_file_prefix is not None:
|
|
650
|
+
msg = "The `hidden_file_prefix` parameter of `scan_parquet` is considered unstable."
|
|
651
|
+
issue_unstable_warning(msg)
|
|
652
|
+
|
|
653
|
+
if allow_missing_columns is not None:
|
|
654
|
+
issue_deprecation_warning(
|
|
655
|
+
"the parameter `allow_missing_columns` for `scan_parquet` is deprecated. "
|
|
656
|
+
"Use the parameter `missing_columns` instead and pass one of "
|
|
657
|
+
"`('insert', 'raise')`.",
|
|
658
|
+
version="1.30.0",
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
missing_columns = "insert" if allow_missing_columns else "raise"
|
|
662
|
+
|
|
663
|
+
if isinstance(source, (str, Path)):
|
|
664
|
+
source = normalize_filepath(source, check_not_directory=False)
|
|
665
|
+
elif is_path_or_str_sequence(source):
|
|
666
|
+
source = [
|
|
667
|
+
normalize_filepath(source, check_not_directory=False) for source in source
|
|
668
|
+
]
|
|
669
|
+
|
|
670
|
+
credential_provider_builder = _init_credential_provider_builder(
|
|
671
|
+
credential_provider, source, storage_options, "scan_parquet"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
del credential_provider
|
|
675
|
+
|
|
676
|
+
sources = (
|
|
677
|
+
[source]
|
|
678
|
+
if not isinstance(source, Sequence) or isinstance(source, (str, bytes))
|
|
679
|
+
else source
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
pylf = PyLazyFrame.new_from_parquet(
|
|
683
|
+
sources=sources,
|
|
684
|
+
schema=schema,
|
|
685
|
+
parallel=parallel,
|
|
686
|
+
low_memory=low_memory,
|
|
687
|
+
use_statistics=use_statistics,
|
|
688
|
+
scan_options=ScanOptions(
|
|
689
|
+
row_index=(
|
|
690
|
+
(row_index_name, row_index_offset)
|
|
691
|
+
if row_index_name is not None
|
|
692
|
+
else None
|
|
693
|
+
),
|
|
694
|
+
pre_slice=(0, n_rows) if n_rows is not None else None,
|
|
695
|
+
cast_options=cast_options,
|
|
696
|
+
extra_columns=extra_columns,
|
|
697
|
+
missing_columns=missing_columns,
|
|
698
|
+
include_file_paths=include_file_paths,
|
|
699
|
+
glob=glob,
|
|
700
|
+
hidden_file_prefix=(
|
|
701
|
+
[hidden_file_prefix]
|
|
702
|
+
if isinstance(hidden_file_prefix, str)
|
|
703
|
+
else hidden_file_prefix
|
|
704
|
+
),
|
|
705
|
+
hive_partitioning=hive_partitioning,
|
|
706
|
+
hive_schema=hive_schema,
|
|
707
|
+
try_parse_hive_dates=try_parse_hive_dates,
|
|
708
|
+
rechunk=rechunk,
|
|
709
|
+
cache=cache,
|
|
710
|
+
storage_options=(
|
|
711
|
+
list(storage_options.items()) if storage_options is not None else None
|
|
712
|
+
),
|
|
713
|
+
credential_provider=credential_provider_builder,
|
|
714
|
+
retries=retries,
|
|
715
|
+
column_mapping=_column_mapping,
|
|
716
|
+
default_values=_default_values,
|
|
717
|
+
deletion_files=_deletion_files,
|
|
718
|
+
table_statistics=_table_statistics,
|
|
719
|
+
),
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
return wrap_ldf(pylf)
|