polars-runtime-compat 1.34.0b2__cp39-abi3-win_amd64.whl → 1.34.0b4__cp39-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -96
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/io/csv/batched_reader.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
from collections.abc import Sequence
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
from polars._utils.various import (
|
|
8
|
-
_process_null_values,
|
|
9
|
-
normalize_filepath,
|
|
10
|
-
)
|
|
11
|
-
from polars._utils.wrap import wrap_df
|
|
12
|
-
from polars.datatypes import N_INFER_DEFAULT, parse_into_dtype
|
|
13
|
-
from polars.io._utils import parse_columns_arg, parse_row_index_args
|
|
14
|
-
from polars.io.csv._utils import _update_columns
|
|
15
|
-
|
|
16
|
-
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
17
|
-
from polars._plr import PyBatchedCsv
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
|
|
22
|
-
from polars import DataFrame
|
|
23
|
-
from polars._typing import CsvEncoding, PolarsDataType, SchemaDict
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class BatchedCsvReader:
|
|
27
|
-
"""Read a CSV file in batches."""
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
self,
|
|
31
|
-
source: str | Path,
|
|
32
|
-
*,
|
|
33
|
-
has_header: bool = True,
|
|
34
|
-
columns: Sequence[int] | Sequence[str] | None = None,
|
|
35
|
-
separator: str = ",",
|
|
36
|
-
comment_prefix: str | None = None,
|
|
37
|
-
quote_char: str | None = '"',
|
|
38
|
-
skip_rows: int = 0,
|
|
39
|
-
skip_lines: int = 0,
|
|
40
|
-
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
41
|
-
null_values: str | Sequence[str] | dict[str, str] | None = None,
|
|
42
|
-
missing_utf8_is_empty_string: bool = False,
|
|
43
|
-
ignore_errors: bool = False,
|
|
44
|
-
try_parse_dates: bool = False,
|
|
45
|
-
n_threads: int | None = None,
|
|
46
|
-
infer_schema_length: int | None = N_INFER_DEFAULT,
|
|
47
|
-
batch_size: int = 50_000,
|
|
48
|
-
n_rows: int | None = None,
|
|
49
|
-
encoding: CsvEncoding = "utf8",
|
|
50
|
-
low_memory: bool = False,
|
|
51
|
-
rechunk: bool = True,
|
|
52
|
-
skip_rows_after_header: int = 0,
|
|
53
|
-
row_index_name: str | None = None,
|
|
54
|
-
row_index_offset: int = 0,
|
|
55
|
-
eol_char: str = "\n",
|
|
56
|
-
new_columns: Sequence[str] | None = None,
|
|
57
|
-
raise_if_empty: bool = True,
|
|
58
|
-
truncate_ragged_lines: bool = False,
|
|
59
|
-
decimal_comma: bool = False,
|
|
60
|
-
) -> None:
|
|
61
|
-
path = normalize_filepath(source, check_not_directory=False)
|
|
62
|
-
|
|
63
|
-
dtype_list: Sequence[tuple[str, PolarsDataType]] | None = None
|
|
64
|
-
dtype_slice: Sequence[PolarsDataType] | None = None
|
|
65
|
-
if schema_overrides is not None:
|
|
66
|
-
if isinstance(schema_overrides, dict):
|
|
67
|
-
dtype_list = []
|
|
68
|
-
for k, v in schema_overrides.items():
|
|
69
|
-
dtype_list.append((k, parse_into_dtype(v)))
|
|
70
|
-
elif isinstance(schema_overrides, Sequence):
|
|
71
|
-
dtype_slice = schema_overrides
|
|
72
|
-
else:
|
|
73
|
-
msg = "`schema_overrides` arg should be list or dict"
|
|
74
|
-
raise TypeError(msg)
|
|
75
|
-
|
|
76
|
-
processed_null_values = _process_null_values(null_values)
|
|
77
|
-
projection, columns = parse_columns_arg(columns)
|
|
78
|
-
|
|
79
|
-
self._reader = PyBatchedCsv.new(
|
|
80
|
-
infer_schema_length=infer_schema_length,
|
|
81
|
-
chunk_size=batch_size,
|
|
82
|
-
has_header=has_header,
|
|
83
|
-
ignore_errors=ignore_errors,
|
|
84
|
-
n_rows=n_rows,
|
|
85
|
-
skip_rows=skip_rows,
|
|
86
|
-
skip_lines=skip_lines,
|
|
87
|
-
projection=projection,
|
|
88
|
-
separator=separator,
|
|
89
|
-
rechunk=rechunk,
|
|
90
|
-
columns=columns,
|
|
91
|
-
encoding=encoding,
|
|
92
|
-
n_threads=n_threads,
|
|
93
|
-
path=path,
|
|
94
|
-
schema_overrides=dtype_list,
|
|
95
|
-
overwrite_dtype_slice=dtype_slice,
|
|
96
|
-
low_memory=low_memory,
|
|
97
|
-
comment_prefix=comment_prefix,
|
|
98
|
-
quote_char=quote_char,
|
|
99
|
-
null_values=processed_null_values,
|
|
100
|
-
missing_utf8_is_empty_string=missing_utf8_is_empty_string,
|
|
101
|
-
try_parse_dates=try_parse_dates,
|
|
102
|
-
skip_rows_after_header=skip_rows_after_header,
|
|
103
|
-
row_index=parse_row_index_args(row_index_name, row_index_offset),
|
|
104
|
-
eol_char=eol_char,
|
|
105
|
-
raise_if_empty=raise_if_empty,
|
|
106
|
-
truncate_ragged_lines=truncate_ragged_lines,
|
|
107
|
-
decimal_comma=decimal_comma,
|
|
108
|
-
)
|
|
109
|
-
self.new_columns = new_columns
|
|
110
|
-
|
|
111
|
-
def next_batches(self, n: int) -> list[DataFrame] | None:
|
|
112
|
-
"""
|
|
113
|
-
Read `n` batches from the reader.
|
|
114
|
-
|
|
115
|
-
These batches will be parallelized over the available threads.
|
|
116
|
-
|
|
117
|
-
Parameters
|
|
118
|
-
----------
|
|
119
|
-
n
|
|
120
|
-
Number of chunks to fetch; ideally this is >= number of threads.
|
|
121
|
-
|
|
122
|
-
Examples
|
|
123
|
-
--------
|
|
124
|
-
>>> reader = pl.read_csv_batched(
|
|
125
|
-
... "./pdsh/tables_scale_100/lineitem.tbl",
|
|
126
|
-
... separator="|",
|
|
127
|
-
... try_parse_dates=True,
|
|
128
|
-
... ) # doctest: +SKIP
|
|
129
|
-
>>> reader.next_batches(5) # doctest: +SKIP
|
|
130
|
-
|
|
131
|
-
Returns
|
|
132
|
-
-------
|
|
133
|
-
list of DataFrames
|
|
134
|
-
"""
|
|
135
|
-
if (batches := self._reader.next_batches(n)) is not None:
|
|
136
|
-
if self.new_columns:
|
|
137
|
-
return [
|
|
138
|
-
_update_columns(wrap_df(df), self.new_columns) for df in batches
|
|
139
|
-
]
|
|
140
|
-
else:
|
|
141
|
-
return [wrap_df(df) for df in batches]
|
|
142
|
-
return None
|