polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/interchange/utils.py
DELETED
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import re
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
|
-
|
|
6
|
-
from polars.datatypes import (
|
|
7
|
-
Boolean,
|
|
8
|
-
Categorical,
|
|
9
|
-
Date,
|
|
10
|
-
Datetime,
|
|
11
|
-
Duration,
|
|
12
|
-
Enum,
|
|
13
|
-
Float32,
|
|
14
|
-
Float64,
|
|
15
|
-
Int8,
|
|
16
|
-
Int16,
|
|
17
|
-
Int32,
|
|
18
|
-
Int64,
|
|
19
|
-
String,
|
|
20
|
-
Time,
|
|
21
|
-
UInt8,
|
|
22
|
-
UInt16,
|
|
23
|
-
UInt32,
|
|
24
|
-
UInt64,
|
|
25
|
-
)
|
|
26
|
-
from polars.interchange.protocol import DtypeKind, Endianness
|
|
27
|
-
|
|
28
|
-
if TYPE_CHECKING:
|
|
29
|
-
from polars._typing import PolarsDataType
|
|
30
|
-
from polars.datatypes import DataTypeClass
|
|
31
|
-
from polars.interchange.protocol import Dtype
|
|
32
|
-
|
|
33
|
-
NE = Endianness.NATIVE
|
|
34
|
-
|
|
35
|
-
polars_dtype_to_dtype_map: dict[DataTypeClass, Dtype] = {
|
|
36
|
-
Int8: (DtypeKind.INT, 8, "c", NE),
|
|
37
|
-
Int16: (DtypeKind.INT, 16, "s", NE),
|
|
38
|
-
Int32: (DtypeKind.INT, 32, "i", NE),
|
|
39
|
-
Int64: (DtypeKind.INT, 64, "l", NE),
|
|
40
|
-
UInt8: (DtypeKind.UINT, 8, "C", NE),
|
|
41
|
-
UInt16: (DtypeKind.UINT, 16, "S", NE),
|
|
42
|
-
UInt32: (DtypeKind.UINT, 32, "I", NE),
|
|
43
|
-
UInt64: (DtypeKind.UINT, 64, "L", NE),
|
|
44
|
-
Float32: (DtypeKind.FLOAT, 32, "f", NE),
|
|
45
|
-
Float64: (DtypeKind.FLOAT, 64, "g", NE),
|
|
46
|
-
Boolean: (DtypeKind.BOOL, 1, "b", NE),
|
|
47
|
-
String: (DtypeKind.STRING, 8, "U", NE),
|
|
48
|
-
Date: (DtypeKind.DATETIME, 32, "tdD", NE),
|
|
49
|
-
Time: (DtypeKind.DATETIME, 64, "ttu", NE),
|
|
50
|
-
Datetime: (DtypeKind.DATETIME, 64, "tsu:", NE),
|
|
51
|
-
Duration: (DtypeKind.DATETIME, 64, "tDu", NE),
|
|
52
|
-
Categorical: (DtypeKind.CATEGORICAL, 32, "I", NE),
|
|
53
|
-
Enum: (DtypeKind.CATEGORICAL, 32, "I", NE),
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def polars_dtype_to_dtype(dtype: PolarsDataType) -> Dtype:
|
|
58
|
-
"""Convert Polars data type to interchange protocol data type."""
|
|
59
|
-
try:
|
|
60
|
-
result = polars_dtype_to_dtype_map[dtype.base_type()]
|
|
61
|
-
except KeyError as exc:
|
|
62
|
-
msg = f"data type {dtype!r} not supported by the interchange protocol"
|
|
63
|
-
raise ValueError(msg) from exc
|
|
64
|
-
|
|
65
|
-
# Handle instantiated data types
|
|
66
|
-
if isinstance(dtype, Datetime):
|
|
67
|
-
return _datetime_to_dtype(dtype)
|
|
68
|
-
elif isinstance(dtype, Duration):
|
|
69
|
-
return _duration_to_dtype(dtype)
|
|
70
|
-
|
|
71
|
-
return result
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def _datetime_to_dtype(dtype: Datetime) -> Dtype:
|
|
75
|
-
tu = dtype.time_unit[0]
|
|
76
|
-
tz = dtype.time_zone if dtype.time_zone is not None else ""
|
|
77
|
-
arrow_c_type = f"ts{tu}:{tz}"
|
|
78
|
-
return DtypeKind.DATETIME, 64, arrow_c_type, NE
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _duration_to_dtype(dtype: Duration) -> Dtype:
|
|
82
|
-
tu = dtype.time_unit[0]
|
|
83
|
-
arrow_c_type = f"tD{tu}"
|
|
84
|
-
return DtypeKind.DATETIME, 64, arrow_c_type, NE
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
dtype_to_polars_dtype_map: dict[DtypeKind, dict[int, PolarsDataType]] = {
|
|
88
|
-
DtypeKind.INT: {
|
|
89
|
-
8: Int8,
|
|
90
|
-
16: Int16,
|
|
91
|
-
32: Int32,
|
|
92
|
-
64: Int64,
|
|
93
|
-
},
|
|
94
|
-
DtypeKind.UINT: {
|
|
95
|
-
8: UInt8,
|
|
96
|
-
16: UInt16,
|
|
97
|
-
32: UInt32,
|
|
98
|
-
64: UInt64,
|
|
99
|
-
},
|
|
100
|
-
DtypeKind.FLOAT: {
|
|
101
|
-
32: Float32,
|
|
102
|
-
64: Float64,
|
|
103
|
-
},
|
|
104
|
-
DtypeKind.BOOL: {
|
|
105
|
-
1: Boolean,
|
|
106
|
-
8: Boolean,
|
|
107
|
-
},
|
|
108
|
-
DtypeKind.STRING: {8: String},
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def dtype_to_polars_dtype(dtype: Dtype) -> PolarsDataType:
|
|
113
|
-
"""Convert interchange protocol data type to Polars data type."""
|
|
114
|
-
kind, bit_width, format_str, _ = dtype
|
|
115
|
-
|
|
116
|
-
if kind == DtypeKind.DATETIME:
|
|
117
|
-
return _temporal_dtype_to_polars_dtype(format_str, dtype)
|
|
118
|
-
elif kind == DtypeKind.CATEGORICAL:
|
|
119
|
-
return Enum
|
|
120
|
-
|
|
121
|
-
try:
|
|
122
|
-
return dtype_to_polars_dtype_map[kind][bit_width]
|
|
123
|
-
except KeyError as exc:
|
|
124
|
-
msg = f"unsupported data type: {dtype!r}"
|
|
125
|
-
raise NotImplementedError(msg) from exc
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _temporal_dtype_to_polars_dtype(format_str: str, dtype: Dtype) -> PolarsDataType:
|
|
129
|
-
if (match := re.fullmatch(r"ts([mun]):(.*)", format_str)) is not None:
|
|
130
|
-
time_unit = match.group(1) + "s"
|
|
131
|
-
time_zone = match.group(2) or None
|
|
132
|
-
return Datetime(
|
|
133
|
-
time_unit=time_unit, # type: ignore[arg-type]
|
|
134
|
-
time_zone=time_zone,
|
|
135
|
-
)
|
|
136
|
-
elif format_str == "tdD":
|
|
137
|
-
return Date
|
|
138
|
-
elif format_str == "ttu":
|
|
139
|
-
return Time
|
|
140
|
-
elif (match := re.fullmatch(r"tD([mun])", format_str)) is not None:
|
|
141
|
-
time_unit = match.group(1) + "s"
|
|
142
|
-
return Duration(time_unit=time_unit) # type: ignore[arg-type]
|
|
143
|
-
|
|
144
|
-
msg = f"unsupported temporal data type: {dtype!r}"
|
|
145
|
-
raise NotImplementedError(msg)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def get_buffer_length_in_elements(buffer_size: int, dtype: Dtype) -> int:
|
|
149
|
-
"""Get the length of a buffer in elements."""
|
|
150
|
-
bits_per_element = dtype[1]
|
|
151
|
-
bytes_per_element, rest = divmod(bits_per_element, 8)
|
|
152
|
-
if rest > 0:
|
|
153
|
-
msg = f"cannot get buffer length for buffer with dtype {dtype!r}"
|
|
154
|
-
raise ValueError(msg)
|
|
155
|
-
return buffer_size // bytes_per_element
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def polars_dtype_to_data_buffer_dtype(dtype: PolarsDataType) -> PolarsDataType:
|
|
159
|
-
"""Get the data type of the data buffer."""
|
|
160
|
-
if dtype.is_integer() or dtype.is_float() or dtype == Boolean:
|
|
161
|
-
return dtype
|
|
162
|
-
elif dtype.is_temporal():
|
|
163
|
-
return Int32 if dtype == Date else Int64
|
|
164
|
-
elif dtype == String:
|
|
165
|
-
return UInt8
|
|
166
|
-
elif dtype in (Enum, Categorical):
|
|
167
|
-
return UInt32
|
|
168
|
-
|
|
169
|
-
msg = f"unsupported data type: {dtype}"
|
|
170
|
-
raise NotImplementedError(msg)
|
polars/io/__init__.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"""Functions for reading data."""
|
|
2
|
-
|
|
3
|
-
from polars.io.avro import read_avro
|
|
4
|
-
from polars.io.clipboard import read_clipboard
|
|
5
|
-
from polars.io.csv import read_csv, read_csv_batched, scan_csv
|
|
6
|
-
from polars.io.database import read_database, read_database_uri
|
|
7
|
-
from polars.io.delta import read_delta, scan_delta
|
|
8
|
-
from polars.io.iceberg import scan_iceberg
|
|
9
|
-
from polars.io.ipc import read_ipc, read_ipc_schema, read_ipc_stream, scan_ipc
|
|
10
|
-
from polars.io.json import read_json
|
|
11
|
-
from polars.io.ndjson import read_ndjson, scan_ndjson
|
|
12
|
-
from polars.io.parquet import (
|
|
13
|
-
read_parquet,
|
|
14
|
-
read_parquet_metadata,
|
|
15
|
-
read_parquet_schema,
|
|
16
|
-
scan_parquet,
|
|
17
|
-
)
|
|
18
|
-
from polars.io.partition import (
|
|
19
|
-
BasePartitionContext,
|
|
20
|
-
KeyedPartition,
|
|
21
|
-
KeyedPartitionContext,
|
|
22
|
-
PartitionByKey,
|
|
23
|
-
PartitionMaxSize,
|
|
24
|
-
PartitionParted,
|
|
25
|
-
)
|
|
26
|
-
from polars.io.plugins import _defer as defer
|
|
27
|
-
from polars.io.pyarrow_dataset import scan_pyarrow_dataset
|
|
28
|
-
from polars.io.scan_options import ScanCastOptions
|
|
29
|
-
from polars.io.spreadsheet import read_excel, read_ods
|
|
30
|
-
|
|
31
|
-
__all__ = [
|
|
32
|
-
"defer",
|
|
33
|
-
"PartitionByKey",
|
|
34
|
-
"PartitionMaxSize",
|
|
35
|
-
"PartitionParted",
|
|
36
|
-
"KeyedPartition",
|
|
37
|
-
"BasePartitionContext",
|
|
38
|
-
"KeyedPartitionContext",
|
|
39
|
-
"read_avro",
|
|
40
|
-
"read_clipboard",
|
|
41
|
-
"read_csv",
|
|
42
|
-
"read_csv_batched",
|
|
43
|
-
"read_database",
|
|
44
|
-
"read_database_uri",
|
|
45
|
-
"read_delta",
|
|
46
|
-
"read_excel",
|
|
47
|
-
"read_ipc",
|
|
48
|
-
"read_ipc_schema",
|
|
49
|
-
"read_ipc_stream",
|
|
50
|
-
"read_json",
|
|
51
|
-
"read_ndjson",
|
|
52
|
-
"read_ods",
|
|
53
|
-
"read_parquet",
|
|
54
|
-
"read_parquet_metadata",
|
|
55
|
-
"read_parquet_schema",
|
|
56
|
-
"scan_csv",
|
|
57
|
-
"scan_delta",
|
|
58
|
-
"scan_iceberg",
|
|
59
|
-
"scan_ipc",
|
|
60
|
-
"scan_ndjson",
|
|
61
|
-
"scan_parquet",
|
|
62
|
-
"scan_pyarrow_dataset",
|
|
63
|
-
"ScanCastOptions",
|
|
64
|
-
]
|
polars/io/_utils.py
DELETED
|
@@ -1,317 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import glob
|
|
4
|
-
import re
|
|
5
|
-
from contextlib import contextmanager
|
|
6
|
-
from io import BytesIO, StringIO
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import IO, TYPE_CHECKING, Any, overload
|
|
9
|
-
|
|
10
|
-
from polars._dependencies import _FSSPEC_AVAILABLE, fsspec
|
|
11
|
-
from polars._utils.various import (
|
|
12
|
-
is_int_sequence,
|
|
13
|
-
is_str_sequence,
|
|
14
|
-
normalize_filepath,
|
|
15
|
-
)
|
|
16
|
-
from polars.exceptions import NoDataError
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from collections.abc import Iterator, Sequence
|
|
20
|
-
from contextlib import AbstractContextManager as ContextManager
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def parse_columns_arg(
|
|
24
|
-
columns: Sequence[str] | Sequence[int] | str | int | None,
|
|
25
|
-
) -> tuple[Sequence[int] | None, Sequence[str] | None]:
|
|
26
|
-
"""
|
|
27
|
-
Parse the `columns` argument of an I/O function.
|
|
28
|
-
|
|
29
|
-
Disambiguates between column names and column indices input.
|
|
30
|
-
|
|
31
|
-
Returns
|
|
32
|
-
-------
|
|
33
|
-
tuple
|
|
34
|
-
A tuple containing the columns as a projection and a list of column names.
|
|
35
|
-
Only one will be specified, the other will be `None`.
|
|
36
|
-
"""
|
|
37
|
-
if columns is None:
|
|
38
|
-
return None, None
|
|
39
|
-
|
|
40
|
-
projection: Sequence[int] | None = None
|
|
41
|
-
column_names: Sequence[str] | None = None
|
|
42
|
-
|
|
43
|
-
if isinstance(columns, str):
|
|
44
|
-
column_names = [columns]
|
|
45
|
-
elif isinstance(columns, int):
|
|
46
|
-
projection = [columns]
|
|
47
|
-
elif is_str_sequence(columns):
|
|
48
|
-
_ensure_columns_are_unique(columns)
|
|
49
|
-
column_names = columns
|
|
50
|
-
elif is_int_sequence(columns):
|
|
51
|
-
_ensure_columns_are_unique(columns)
|
|
52
|
-
projection = columns
|
|
53
|
-
else:
|
|
54
|
-
msg = "the `columns` argument should contain a list of all integers or all string values"
|
|
55
|
-
raise TypeError(msg)
|
|
56
|
-
|
|
57
|
-
return projection, column_names
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _ensure_columns_are_unique(columns: Sequence[str] | Sequence[int]) -> None:
|
|
61
|
-
if len(columns) != len(set(columns)):
|
|
62
|
-
msg = f"`columns` arg should only have unique values, got {columns!r}"
|
|
63
|
-
raise ValueError(msg)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def parse_row_index_args(
|
|
67
|
-
row_index_name: str | None = None,
|
|
68
|
-
row_index_offset: int = 0,
|
|
69
|
-
) -> tuple[str, int] | None:
|
|
70
|
-
"""
|
|
71
|
-
Parse the `row_index_name` and `row_index_offset` arguments of an I/O function.
|
|
72
|
-
|
|
73
|
-
The Rust functions take a single tuple rather than two separate arguments.
|
|
74
|
-
"""
|
|
75
|
-
if row_index_name is None:
|
|
76
|
-
return None
|
|
77
|
-
else:
|
|
78
|
-
return (row_index_name, row_index_offset)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
@overload
|
|
82
|
-
def prepare_file_arg(
|
|
83
|
-
file: str | Path | list[str] | IO[bytes] | bytes,
|
|
84
|
-
encoding: str | None = ...,
|
|
85
|
-
*,
|
|
86
|
-
use_pyarrow: bool = ...,
|
|
87
|
-
raise_if_empty: bool = ...,
|
|
88
|
-
storage_options: dict[str, Any] | None = ...,
|
|
89
|
-
) -> ContextManager[str | BytesIO]: ...
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
@overload
|
|
93
|
-
def prepare_file_arg(
|
|
94
|
-
file: str | Path | IO[str] | IO[bytes] | bytes,
|
|
95
|
-
encoding: str | None = ...,
|
|
96
|
-
*,
|
|
97
|
-
use_pyarrow: bool = ...,
|
|
98
|
-
raise_if_empty: bool = ...,
|
|
99
|
-
storage_options: dict[str, Any] | None = ...,
|
|
100
|
-
) -> ContextManager[str | BytesIO]: ...
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@overload
|
|
104
|
-
def prepare_file_arg(
|
|
105
|
-
file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
|
|
106
|
-
encoding: str | None = ...,
|
|
107
|
-
*,
|
|
108
|
-
use_pyarrow: bool = ...,
|
|
109
|
-
raise_if_empty: bool = ...,
|
|
110
|
-
storage_options: dict[str, Any] | None = ...,
|
|
111
|
-
) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]: ...
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def prepare_file_arg(
|
|
115
|
-
file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
|
|
116
|
-
encoding: str | None = None,
|
|
117
|
-
*,
|
|
118
|
-
use_pyarrow: bool = False,
|
|
119
|
-
raise_if_empty: bool = True,
|
|
120
|
-
storage_options: dict[str, Any] | None = None,
|
|
121
|
-
) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]:
|
|
122
|
-
"""
|
|
123
|
-
Prepare file argument.
|
|
124
|
-
|
|
125
|
-
Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
|
|
126
|
-
Returned value is always usable as a context.
|
|
127
|
-
|
|
128
|
-
A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
|
|
129
|
-
A local path is returned as a string.
|
|
130
|
-
An http URL is read into a buffer and returned as a `BytesIO`.
|
|
131
|
-
|
|
132
|
-
When `encoding` is not `utf8` or `utf8-lossy`, the whole file is
|
|
133
|
-
first read in Python and decoded using the specified encoding and
|
|
134
|
-
returned as a `BytesIO` (for usage with `read_csv`). If encoding
|
|
135
|
-
ends with "-lossy", characters that can't be decoded are replaced
|
|
136
|
-
with `�`.
|
|
137
|
-
|
|
138
|
-
A `bytes` file is returned as a `BytesIO` if `use_pyarrow=True`.
|
|
139
|
-
|
|
140
|
-
When fsspec is installed, remote file(s) is (are) opened with
|
|
141
|
-
`fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
|
|
142
|
-
If encoding is not `utf8` or `utf8-lossy`, decoding is handled by
|
|
143
|
-
fsspec too.
|
|
144
|
-
"""
|
|
145
|
-
storage_options = storage_options.copy() if storage_options else {}
|
|
146
|
-
if storage_options and not _FSSPEC_AVAILABLE:
|
|
147
|
-
msg = "`fsspec` is required for `storage_options` argument"
|
|
148
|
-
raise ImportError(msg)
|
|
149
|
-
|
|
150
|
-
# Small helper to use a variable as context
|
|
151
|
-
@contextmanager
|
|
152
|
-
def managed_file(file: Any) -> Iterator[Any]:
|
|
153
|
-
try:
|
|
154
|
-
yield file
|
|
155
|
-
finally:
|
|
156
|
-
pass
|
|
157
|
-
|
|
158
|
-
has_utf8_utf8_lossy_encoding = (
|
|
159
|
-
encoding in {"utf8", "utf8-lossy"} if encoding else True
|
|
160
|
-
)
|
|
161
|
-
encoding_str = encoding if encoding else "utf8"
|
|
162
|
-
encoding_str, encoding_errors = (
|
|
163
|
-
(encoding_str[:-6], "replace")
|
|
164
|
-
if encoding_str.endswith("-lossy")
|
|
165
|
-
else (encoding_str, "strict")
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
# PyArrow allows directories, so we only check that something is not
|
|
169
|
-
# a dir if we are not using PyArrow
|
|
170
|
-
check_not_dir = not use_pyarrow
|
|
171
|
-
|
|
172
|
-
if isinstance(file, bytes):
|
|
173
|
-
if not has_utf8_utf8_lossy_encoding:
|
|
174
|
-
file = file.decode(encoding_str, errors=encoding_errors).encode("utf8")
|
|
175
|
-
return _check_empty(
|
|
176
|
-
BytesIO(file), context="bytes", raise_if_empty=raise_if_empty
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
if isinstance(file, StringIO):
|
|
180
|
-
return _check_empty(
|
|
181
|
-
BytesIO(file.read().encode("utf8")),
|
|
182
|
-
context="StringIO",
|
|
183
|
-
read_position=file.tell(),
|
|
184
|
-
raise_if_empty=raise_if_empty,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
if isinstance(file, BytesIO):
|
|
188
|
-
if not has_utf8_utf8_lossy_encoding:
|
|
189
|
-
return _check_empty(
|
|
190
|
-
BytesIO(
|
|
191
|
-
file.read()
|
|
192
|
-
.decode(encoding_str, errors=encoding_errors)
|
|
193
|
-
.encode("utf8")
|
|
194
|
-
),
|
|
195
|
-
context="BytesIO",
|
|
196
|
-
read_position=file.tell(),
|
|
197
|
-
raise_if_empty=raise_if_empty,
|
|
198
|
-
)
|
|
199
|
-
return managed_file(
|
|
200
|
-
_check_empty(
|
|
201
|
-
b=file,
|
|
202
|
-
context="BytesIO",
|
|
203
|
-
read_position=file.tell(),
|
|
204
|
-
raise_if_empty=raise_if_empty,
|
|
205
|
-
)
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
if isinstance(file, Path):
|
|
209
|
-
if not has_utf8_utf8_lossy_encoding:
|
|
210
|
-
return _check_empty(
|
|
211
|
-
BytesIO(
|
|
212
|
-
file.read_bytes()
|
|
213
|
-
.decode(encoding_str, errors=encoding_errors)
|
|
214
|
-
.encode("utf8")
|
|
215
|
-
),
|
|
216
|
-
context=f"Path ({file!r})",
|
|
217
|
-
raise_if_empty=raise_if_empty,
|
|
218
|
-
)
|
|
219
|
-
return managed_file(normalize_filepath(file, check_not_directory=check_not_dir))
|
|
220
|
-
|
|
221
|
-
if isinstance(file, str):
|
|
222
|
-
# make sure that this is before fsspec
|
|
223
|
-
# as fsspec needs requests to be installed
|
|
224
|
-
# to read from http
|
|
225
|
-
if looks_like_url(file):
|
|
226
|
-
return process_file_url(file, encoding_str)
|
|
227
|
-
if _FSSPEC_AVAILABLE:
|
|
228
|
-
from fsspec.utils import infer_storage_options
|
|
229
|
-
|
|
230
|
-
# check if it is a local file
|
|
231
|
-
if infer_storage_options(file)["protocol"] == "file":
|
|
232
|
-
# (lossy) utf8
|
|
233
|
-
if has_utf8_utf8_lossy_encoding:
|
|
234
|
-
return managed_file(
|
|
235
|
-
normalize_filepath(file, check_not_directory=check_not_dir)
|
|
236
|
-
)
|
|
237
|
-
# decode first
|
|
238
|
-
with Path(file).open(
|
|
239
|
-
encoding=encoding_str, errors=encoding_errors
|
|
240
|
-
) as f:
|
|
241
|
-
return _check_empty(
|
|
242
|
-
BytesIO(f.read().encode("utf8")),
|
|
243
|
-
context=f"{file!r}",
|
|
244
|
-
raise_if_empty=raise_if_empty,
|
|
245
|
-
)
|
|
246
|
-
storage_options["encoding"] = encoding
|
|
247
|
-
storage_options["errors"] = encoding_errors
|
|
248
|
-
return fsspec.open(file, **storage_options)
|
|
249
|
-
|
|
250
|
-
if isinstance(file, list) and bool(file) and all(isinstance(f, str) for f in file):
|
|
251
|
-
if _FSSPEC_AVAILABLE:
|
|
252
|
-
from fsspec.utils import infer_storage_options
|
|
253
|
-
|
|
254
|
-
if has_utf8_utf8_lossy_encoding:
|
|
255
|
-
if all(infer_storage_options(f)["protocol"] == "file" for f in file):
|
|
256
|
-
return managed_file(
|
|
257
|
-
[
|
|
258
|
-
normalize_filepath(f, check_not_directory=check_not_dir)
|
|
259
|
-
for f in file
|
|
260
|
-
]
|
|
261
|
-
)
|
|
262
|
-
storage_options["encoding"] = encoding
|
|
263
|
-
storage_options["errors"] = encoding_errors
|
|
264
|
-
return fsspec.open_files(file, **storage_options)
|
|
265
|
-
|
|
266
|
-
if isinstance(file, str):
|
|
267
|
-
file = normalize_filepath(file, check_not_directory=check_not_dir)
|
|
268
|
-
if not has_utf8_utf8_lossy_encoding:
|
|
269
|
-
with Path(file).open(encoding=encoding_str, errors=encoding_errors) as f:
|
|
270
|
-
return _check_empty(
|
|
271
|
-
BytesIO(f.read().encode("utf8")),
|
|
272
|
-
context=f"{file!r}",
|
|
273
|
-
raise_if_empty=raise_if_empty,
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
return managed_file(file)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
def _check_empty(
|
|
280
|
-
b: BytesIO, *, context: str, raise_if_empty: bool, read_position: int | None = None
|
|
281
|
-
) -> BytesIO:
|
|
282
|
-
if raise_if_empty and b.getbuffer().nbytes == 0:
|
|
283
|
-
hint = (
|
|
284
|
-
f" (buffer position = {read_position}; try seek(0) before reading?)"
|
|
285
|
-
if context in ("StringIO", "BytesIO") and read_position
|
|
286
|
-
else ""
|
|
287
|
-
)
|
|
288
|
-
msg = f"empty data from {context}{hint}"
|
|
289
|
-
raise NoDataError(msg)
|
|
290
|
-
return b
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
def looks_like_url(path: str) -> bool:
|
|
294
|
-
return re.match("^(ht|f)tps?://", path, re.IGNORECASE) is not None
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
def process_file_url(path: str, encoding: str | None = None) -> BytesIO:
|
|
298
|
-
from urllib.request import urlopen
|
|
299
|
-
|
|
300
|
-
with urlopen(path) as f:
|
|
301
|
-
if not encoding or encoding in {"utf8", "utf8-lossy"}:
|
|
302
|
-
return BytesIO(f.read())
|
|
303
|
-
else:
|
|
304
|
-
return BytesIO(f.read().decode(encoding).encode("utf8"))
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
def is_glob_pattern(file: str) -> bool:
|
|
308
|
-
return any(char in file for char in ["*", "?", "["])
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
def is_local_file(file: str) -> bool:
|
|
312
|
-
try:
|
|
313
|
-
next(glob.iglob(file, recursive=True)) # noqa: PTH207
|
|
314
|
-
except StopIteration:
|
|
315
|
-
return False
|
|
316
|
-
else:
|
|
317
|
-
return True
|
polars/io/avro.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import IO, TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
from polars._utils.various import normalize_filepath
|
|
8
|
-
from polars._utils.wrap import wrap_df
|
|
9
|
-
from polars.io._utils import parse_columns_arg
|
|
10
|
-
|
|
11
|
-
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
12
|
-
from polars._plr import PyDataFrame
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from polars import DataFrame
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def read_avro(
|
|
19
|
-
source: str | Path | IO[bytes] | bytes,
|
|
20
|
-
*,
|
|
21
|
-
columns: list[int] | list[str] | None = None,
|
|
22
|
-
n_rows: int | None = None,
|
|
23
|
-
) -> DataFrame:
|
|
24
|
-
"""
|
|
25
|
-
Read into a DataFrame from Apache Avro format.
|
|
26
|
-
|
|
27
|
-
Parameters
|
|
28
|
-
----------
|
|
29
|
-
source
|
|
30
|
-
Path to a file or a file-like object (by "file-like object" we refer to objects
|
|
31
|
-
that have a `read()` method, such as a file handler like the builtin `open`
|
|
32
|
-
function, or a `BytesIO` instance). For file-like objects, the stream position
|
|
33
|
-
may not be updated accordingly after reading.
|
|
34
|
-
columns
|
|
35
|
-
Columns to select. Accepts a list of column indices (starting at zero) or a list
|
|
36
|
-
of column names.
|
|
37
|
-
n_rows
|
|
38
|
-
Stop reading from Apache Avro file after reading `n_rows`.
|
|
39
|
-
|
|
40
|
-
Returns
|
|
41
|
-
-------
|
|
42
|
-
DataFrame
|
|
43
|
-
"""
|
|
44
|
-
if isinstance(source, (str, Path)):
|
|
45
|
-
source = normalize_filepath(source)
|
|
46
|
-
projection, column_names = parse_columns_arg(columns)
|
|
47
|
-
|
|
48
|
-
pydf = PyDataFrame.read_avro(source, column_names, projection, n_rows)
|
|
49
|
-
return wrap_df(pydf)
|
polars/io/clipboard.py
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
from io import StringIO
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
-
|
|
7
|
-
from polars.io.csv.functions import read_csv
|
|
8
|
-
|
|
9
|
-
with contextlib.suppress(ImportError):
|
|
10
|
-
from polars._plr import read_clipboard_string as _read_clipboard_string
|
|
11
|
-
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from polars import DataFrame
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def read_clipboard(separator: str = "\t", **kwargs: Any) -> DataFrame:
|
|
17
|
-
"""
|
|
18
|
-
Read text from clipboard and pass to `read_csv`.
|
|
19
|
-
|
|
20
|
-
Useful for reading data copied from Excel or other similar spreadsheet software.
|
|
21
|
-
|
|
22
|
-
Parameters
|
|
23
|
-
----------
|
|
24
|
-
separator
|
|
25
|
-
Single byte character to use as separator parsing csv from clipboard.
|
|
26
|
-
kwargs
|
|
27
|
-
Additional arguments passed to `read_csv`.
|
|
28
|
-
|
|
29
|
-
See Also
|
|
30
|
-
--------
|
|
31
|
-
read_csv : Read a csv file into a DataFrame.
|
|
32
|
-
DataFrame.write_clipboard : Write a DataFrame to the clipboard.
|
|
33
|
-
"""
|
|
34
|
-
csv_string: str = _read_clipboard_string()
|
|
35
|
-
io_string = StringIO(csv_string)
|
|
36
|
-
return read_csv(source=io_string, separator=separator, **kwargs)
|
polars/io/cloud/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from polars.io.cloud.credential_provider._providers import (
|
|
2
|
-
CredentialProvider,
|
|
3
|
-
CredentialProviderAWS,
|
|
4
|
-
CredentialProviderAzure,
|
|
5
|
-
CredentialProviderFunction,
|
|
6
|
-
CredentialProviderFunctionReturn,
|
|
7
|
-
CredentialProviderGCP,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"CredentialProvider",
|
|
12
|
-
"CredentialProviderAWS",
|
|
13
|
-
"CredentialProviderAzure",
|
|
14
|
-
"CredentialProviderFunction",
|
|
15
|
-
"CredentialProviderFunctionReturn",
|
|
16
|
-
"CredentialProviderGCP",
|
|
17
|
-
]
|