polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from polars._utils.various import (
|
|
8
|
+
_process_null_values,
|
|
9
|
+
normalize_filepath,
|
|
10
|
+
)
|
|
11
|
+
from polars._utils.wrap import wrap_df
|
|
12
|
+
from polars.datatypes import N_INFER_DEFAULT, parse_into_dtype
|
|
13
|
+
from polars.io._utils import parse_columns_arg, parse_row_index_args
|
|
14
|
+
from polars.io.csv._utils import _update_columns
|
|
15
|
+
|
|
16
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
17
|
+
from polars._plr import PyBatchedCsv
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from polars import DataFrame
|
|
23
|
+
from polars._typing import CsvEncoding, PolarsDataType, SchemaDict
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BatchedCsvReader:
|
|
27
|
+
"""Read a CSV file in batches."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
source: str | Path,
|
|
32
|
+
*,
|
|
33
|
+
has_header: bool = True,
|
|
34
|
+
columns: Sequence[int] | Sequence[str] | None = None,
|
|
35
|
+
separator: str = ",",
|
|
36
|
+
comment_prefix: str | None = None,
|
|
37
|
+
quote_char: str | None = '"',
|
|
38
|
+
skip_rows: int = 0,
|
|
39
|
+
skip_lines: int = 0,
|
|
40
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
41
|
+
null_values: str | Sequence[str] | dict[str, str] | None = None,
|
|
42
|
+
missing_utf8_is_empty_string: bool = False,
|
|
43
|
+
ignore_errors: bool = False,
|
|
44
|
+
try_parse_dates: bool = False,
|
|
45
|
+
n_threads: int | None = None,
|
|
46
|
+
infer_schema_length: int | None = N_INFER_DEFAULT,
|
|
47
|
+
batch_size: int = 50_000,
|
|
48
|
+
n_rows: int | None = None,
|
|
49
|
+
encoding: CsvEncoding = "utf8",
|
|
50
|
+
low_memory: bool = False,
|
|
51
|
+
rechunk: bool = True,
|
|
52
|
+
skip_rows_after_header: int = 0,
|
|
53
|
+
row_index_name: str | None = None,
|
|
54
|
+
row_index_offset: int = 0,
|
|
55
|
+
eol_char: str = "\n",
|
|
56
|
+
new_columns: Sequence[str] | None = None,
|
|
57
|
+
raise_if_empty: bool = True,
|
|
58
|
+
truncate_ragged_lines: bool = False,
|
|
59
|
+
decimal_comma: bool = False,
|
|
60
|
+
) -> None:
|
|
61
|
+
path = normalize_filepath(source, check_not_directory=False)
|
|
62
|
+
|
|
63
|
+
dtype_list: Sequence[tuple[str, PolarsDataType]] | None = None
|
|
64
|
+
dtype_slice: Sequence[PolarsDataType] | None = None
|
|
65
|
+
if schema_overrides is not None:
|
|
66
|
+
if isinstance(schema_overrides, dict):
|
|
67
|
+
dtype_list = []
|
|
68
|
+
for k, v in schema_overrides.items():
|
|
69
|
+
dtype_list.append((k, parse_into_dtype(v)))
|
|
70
|
+
elif isinstance(schema_overrides, Sequence):
|
|
71
|
+
dtype_slice = schema_overrides
|
|
72
|
+
else:
|
|
73
|
+
msg = "`schema_overrides` arg should be list or dict"
|
|
74
|
+
raise TypeError(msg)
|
|
75
|
+
|
|
76
|
+
processed_null_values = _process_null_values(null_values)
|
|
77
|
+
projection, columns = parse_columns_arg(columns)
|
|
78
|
+
|
|
79
|
+
self._reader = PyBatchedCsv.new(
|
|
80
|
+
infer_schema_length=infer_schema_length,
|
|
81
|
+
chunk_size=batch_size,
|
|
82
|
+
has_header=has_header,
|
|
83
|
+
ignore_errors=ignore_errors,
|
|
84
|
+
n_rows=n_rows,
|
|
85
|
+
skip_rows=skip_rows,
|
|
86
|
+
skip_lines=skip_lines,
|
|
87
|
+
projection=projection,
|
|
88
|
+
separator=separator,
|
|
89
|
+
rechunk=rechunk,
|
|
90
|
+
columns=columns,
|
|
91
|
+
encoding=encoding,
|
|
92
|
+
n_threads=n_threads,
|
|
93
|
+
path=path,
|
|
94
|
+
schema_overrides=dtype_list,
|
|
95
|
+
overwrite_dtype_slice=dtype_slice,
|
|
96
|
+
low_memory=low_memory,
|
|
97
|
+
comment_prefix=comment_prefix,
|
|
98
|
+
quote_char=quote_char,
|
|
99
|
+
null_values=processed_null_values,
|
|
100
|
+
missing_utf8_is_empty_string=missing_utf8_is_empty_string,
|
|
101
|
+
try_parse_dates=try_parse_dates,
|
|
102
|
+
skip_rows_after_header=skip_rows_after_header,
|
|
103
|
+
row_index=parse_row_index_args(row_index_name, row_index_offset),
|
|
104
|
+
eol_char=eol_char,
|
|
105
|
+
raise_if_empty=raise_if_empty,
|
|
106
|
+
truncate_ragged_lines=truncate_ragged_lines,
|
|
107
|
+
decimal_comma=decimal_comma,
|
|
108
|
+
)
|
|
109
|
+
self.new_columns = new_columns
|
|
110
|
+
|
|
111
|
+
def next_batches(self, n: int) -> list[DataFrame] | None:
|
|
112
|
+
"""
|
|
113
|
+
Read `n` batches from the reader.
|
|
114
|
+
|
|
115
|
+
These batches will be parallelized over the available threads.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
n
|
|
120
|
+
Number of chunks to fetch; ideally this is >= number of threads.
|
|
121
|
+
|
|
122
|
+
Examples
|
|
123
|
+
--------
|
|
124
|
+
>>> reader = pl.read_csv_batched(
|
|
125
|
+
... "./pdsh/tables_scale_100/lineitem.tbl",
|
|
126
|
+
... separator="|",
|
|
127
|
+
... try_parse_dates=True,
|
|
128
|
+
... ) # doctest: +SKIP
|
|
129
|
+
>>> reader.next_batches(5) # doctest: +SKIP
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
list of DataFrames
|
|
134
|
+
"""
|
|
135
|
+
if (batches := self._reader.next_batches(n)) is not None:
|
|
136
|
+
if self.new_columns:
|
|
137
|
+
return [
|
|
138
|
+
_update_columns(wrap_df(df), self.new_columns) for df in batches
|
|
139
|
+
]
|
|
140
|
+
else:
|
|
141
|
+
return [wrap_df(df) for df in batches]
|
|
142
|
+
return None
|