polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
from polars._dependencies import pyarrow as pa
|
|
6
|
+
from polars._utils.construction.utils import get_first_non_none
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from polars._dependencies import pandas as pd
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def pandas_series_to_arrow(
|
|
13
|
+
values: pd.Series[Any] | pd.Index[Any],
|
|
14
|
+
*,
|
|
15
|
+
length: int | None = None,
|
|
16
|
+
nan_to_null: bool = True,
|
|
17
|
+
) -> pa.Array:
|
|
18
|
+
"""
|
|
19
|
+
Convert a pandas Series to an Arrow Array.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
values : :class:`pandas.Series` or :class:`pandas.Index`.
|
|
24
|
+
Series to convert to arrow
|
|
25
|
+
nan_to_null : bool, default = True
|
|
26
|
+
Interpret `NaN` as missing values.
|
|
27
|
+
length : int, optional
|
|
28
|
+
in case all values are null, create a null array of this length.
|
|
29
|
+
if unset, length is inferred from values.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
:class:`pyarrow.Array`
|
|
34
|
+
"""
|
|
35
|
+
dtype = getattr(values, "dtype", None)
|
|
36
|
+
if dtype == "object":
|
|
37
|
+
first_non_none = get_first_non_none(values.values) # type: ignore[arg-type]
|
|
38
|
+
if isinstance(first_non_none, str):
|
|
39
|
+
return pa.array(values, pa.large_utf8(), from_pandas=nan_to_null)
|
|
40
|
+
elif first_non_none is None:
|
|
41
|
+
return pa.nulls(length or len(values), pa.large_utf8())
|
|
42
|
+
return pa.array(values, from_pandas=nan_to_null)
|
|
43
|
+
elif dtype:
|
|
44
|
+
return pa.array(values, from_pandas=nan_to_null)
|
|
45
|
+
else:
|
|
46
|
+
# Pandas Series is actually a Pandas DataFrame when the original DataFrame
|
|
47
|
+
# contains duplicated columns and a duplicated column is requested with df["a"].
|
|
48
|
+
msg = "duplicate column names found: "
|
|
49
|
+
raise ValueError(
|
|
50
|
+
msg,
|
|
51
|
+
f"{values.columns.tolist()!s}", # type: ignore[union-attr]
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def coerce_arrow(array: pa.Array) -> pa.Array:
|
|
56
|
+
"""..."""
|
|
57
|
+
import pyarrow.compute as pc
|
|
58
|
+
|
|
59
|
+
if hasattr(array, "num_chunks") and array.num_chunks > 1:
|
|
60
|
+
# small integer keys can often not be combined, so let's already cast
|
|
61
|
+
# to the uint32 used by polars
|
|
62
|
+
if pa.types.is_dictionary(array.type) and (
|
|
63
|
+
pa.types.is_int8(array.type.index_type)
|
|
64
|
+
or pa.types.is_uint8(array.type.index_type)
|
|
65
|
+
or pa.types.is_int16(array.type.index_type)
|
|
66
|
+
or pa.types.is_uint16(array.type.index_type)
|
|
67
|
+
or pa.types.is_int32(array.type.index_type)
|
|
68
|
+
):
|
|
69
|
+
array = pc.cast(
|
|
70
|
+
array, pa.dictionary(pa.uint32(), pa.large_string())
|
|
71
|
+
).combine_chunks()
|
|
72
|
+
return array
|
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from collections.abc import Generator, Iterator, Mapping
|
|
5
|
+
from datetime import date, datetime, time, timedelta
|
|
6
|
+
from enum import Enum as PyEnum
|
|
7
|
+
from itertools import islice
|
|
8
|
+
from typing import (
|
|
9
|
+
TYPE_CHECKING,
|
|
10
|
+
Any,
|
|
11
|
+
Callable,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
import polars._reexport as pl
|
|
15
|
+
import polars._utils.construction as plc
|
|
16
|
+
from polars._dependencies import (
|
|
17
|
+
_PYARROW_AVAILABLE,
|
|
18
|
+
_check_for_numpy,
|
|
19
|
+
dataclasses,
|
|
20
|
+
)
|
|
21
|
+
from polars._dependencies import numpy as np
|
|
22
|
+
from polars._dependencies import pandas as pd
|
|
23
|
+
from polars._dependencies import pyarrow as pa
|
|
24
|
+
from polars._utils.construction.dataframe import _sequence_of_dict_to_pydf
|
|
25
|
+
from polars._utils.construction.utils import (
|
|
26
|
+
get_first_non_none,
|
|
27
|
+
is_namedtuple,
|
|
28
|
+
is_pydantic_model,
|
|
29
|
+
is_simple_numpy_backed_pandas_series,
|
|
30
|
+
is_sqlalchemy_row,
|
|
31
|
+
)
|
|
32
|
+
from polars._utils.various import (
|
|
33
|
+
range_to_series,
|
|
34
|
+
)
|
|
35
|
+
from polars._utils.wrap import wrap_s
|
|
36
|
+
from polars.datatypes import (
|
|
37
|
+
Array,
|
|
38
|
+
Boolean,
|
|
39
|
+
Categorical,
|
|
40
|
+
Date,
|
|
41
|
+
Datetime,
|
|
42
|
+
Decimal,
|
|
43
|
+
Duration,
|
|
44
|
+
Enum,
|
|
45
|
+
List,
|
|
46
|
+
Null,
|
|
47
|
+
Object,
|
|
48
|
+
String,
|
|
49
|
+
Struct,
|
|
50
|
+
Time,
|
|
51
|
+
Unknown,
|
|
52
|
+
dtype_to_py_type,
|
|
53
|
+
is_polars_dtype,
|
|
54
|
+
numpy_char_code_to_dtype,
|
|
55
|
+
parse_into_dtype,
|
|
56
|
+
try_parse_into_dtype,
|
|
57
|
+
)
|
|
58
|
+
from polars.datatypes.constructor import (
|
|
59
|
+
numpy_type_to_constructor,
|
|
60
|
+
numpy_values_and_dtype,
|
|
61
|
+
polars_type_to_constructor,
|
|
62
|
+
py_type_to_constructor,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
66
|
+
from polars._plr import PySeries
|
|
67
|
+
|
|
68
|
+
if TYPE_CHECKING:
|
|
69
|
+
from collections.abc import Iterable, Sequence
|
|
70
|
+
|
|
71
|
+
from polars import DataFrame, Series
|
|
72
|
+
from polars._dependencies import pandas as pd
|
|
73
|
+
from polars._typing import PolarsDataType
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def sequence_to_pyseries(
|
|
77
|
+
name: str,
|
|
78
|
+
values: Sequence[Any],
|
|
79
|
+
dtype: PolarsDataType | None = None,
|
|
80
|
+
*,
|
|
81
|
+
strict: bool = True,
|
|
82
|
+
nan_to_null: bool = False,
|
|
83
|
+
) -> PySeries:
|
|
84
|
+
"""Construct a PySeries from a sequence."""
|
|
85
|
+
python_dtype: type | None = None
|
|
86
|
+
|
|
87
|
+
if isinstance(values, range):
|
|
88
|
+
return range_to_series(name, values, dtype=dtype)._s
|
|
89
|
+
|
|
90
|
+
# empty sequence
|
|
91
|
+
if len(values) == 0 and dtype is None:
|
|
92
|
+
# if dtype for empty sequence could be guessed
|
|
93
|
+
# (e.g comparisons between self and other), default to Null
|
|
94
|
+
dtype = Null
|
|
95
|
+
|
|
96
|
+
# lists defer to subsequent handling; identify nested type
|
|
97
|
+
elif dtype in (List, Array):
|
|
98
|
+
python_dtype = list
|
|
99
|
+
|
|
100
|
+
# infer temporal type handling
|
|
101
|
+
py_temporal_types = {date, datetime, timedelta, time}
|
|
102
|
+
pl_temporal_types = {Date, Datetime, Duration, Time}
|
|
103
|
+
|
|
104
|
+
value = get_first_non_none(values)
|
|
105
|
+
if value is not None:
|
|
106
|
+
if (
|
|
107
|
+
dataclasses.is_dataclass(value)
|
|
108
|
+
or is_pydantic_model(value)
|
|
109
|
+
or is_namedtuple(value.__class__)
|
|
110
|
+
or is_sqlalchemy_row(value)
|
|
111
|
+
) and dtype != Object:
|
|
112
|
+
return pl.DataFrame(values).to_struct(name)._s
|
|
113
|
+
elif (
|
|
114
|
+
not isinstance(value, dict) and isinstance(value, Mapping)
|
|
115
|
+
) and dtype != Object:
|
|
116
|
+
return _sequence_of_dict_to_pydf(
|
|
117
|
+
value,
|
|
118
|
+
data=values,
|
|
119
|
+
strict=strict,
|
|
120
|
+
schema_overrides=None,
|
|
121
|
+
infer_schema_length=None,
|
|
122
|
+
schema=None,
|
|
123
|
+
).to_struct(name, [])
|
|
124
|
+
elif isinstance(value, range) and dtype is None:
|
|
125
|
+
values = [range_to_series("", v) for v in values]
|
|
126
|
+
else:
|
|
127
|
+
# for temporal dtypes:
|
|
128
|
+
# * if the values are integer, we take the physical branch.
|
|
129
|
+
# * if the values are python types, take the temporal branch.
|
|
130
|
+
# * if the values are ISO-8601 strings, init then convert via strptime.
|
|
131
|
+
# * if the values are floats/other dtypes, this is an error.
|
|
132
|
+
if dtype in py_temporal_types and isinstance(value, int):
|
|
133
|
+
dtype = parse_into_dtype(dtype) # construct from integer
|
|
134
|
+
elif (
|
|
135
|
+
dtype in pl_temporal_types or type(dtype) in pl_temporal_types
|
|
136
|
+
) and not isinstance(value, int):
|
|
137
|
+
python_dtype = dtype_to_py_type(dtype) # type: ignore[arg-type]
|
|
138
|
+
|
|
139
|
+
# if values are enums, infer and load the appropriate dtype/values
|
|
140
|
+
if issubclass(type(value), PyEnum):
|
|
141
|
+
if dtype is None and python_dtype is None:
|
|
142
|
+
with contextlib.suppress(TypeError):
|
|
143
|
+
dtype = Enum(type(value))
|
|
144
|
+
if not isinstance(value, (str, int)):
|
|
145
|
+
values = [v.value for v in values]
|
|
146
|
+
|
|
147
|
+
# physical branch
|
|
148
|
+
# flat data
|
|
149
|
+
if (
|
|
150
|
+
dtype is not None
|
|
151
|
+
and is_polars_dtype(dtype)
|
|
152
|
+
and not dtype.is_nested()
|
|
153
|
+
and dtype != Unknown
|
|
154
|
+
and (python_dtype is None)
|
|
155
|
+
):
|
|
156
|
+
constructor = polars_type_to_constructor(dtype)
|
|
157
|
+
pyseries = _construct_series_with_fallbacks(
|
|
158
|
+
constructor, name, values, dtype, strict=strict
|
|
159
|
+
)
|
|
160
|
+
if dtype in (
|
|
161
|
+
Date,
|
|
162
|
+
Datetime,
|
|
163
|
+
Duration,
|
|
164
|
+
Time,
|
|
165
|
+
Boolean,
|
|
166
|
+
Categorical,
|
|
167
|
+
Enum,
|
|
168
|
+
) or isinstance(dtype, (Categorical, Decimal)):
|
|
169
|
+
if pyseries.dtype() != dtype:
|
|
170
|
+
pyseries = pyseries.cast(dtype, strict=strict, wrap_numerical=False)
|
|
171
|
+
|
|
172
|
+
# Uninstanced Decimal is a bit special and has various inference paths
|
|
173
|
+
if dtype == Decimal:
|
|
174
|
+
if pyseries.dtype() == String:
|
|
175
|
+
pyseries = pyseries.str_to_decimal_infer(inference_length=0)
|
|
176
|
+
elif pyseries.dtype().is_float():
|
|
177
|
+
# Go through string so we infer an appropriate scale.
|
|
178
|
+
pyseries = pyseries.cast(
|
|
179
|
+
String, strict=strict, wrap_numerical=False
|
|
180
|
+
).str_to_decimal_infer(inference_length=0)
|
|
181
|
+
elif pyseries.dtype().is_integer() or pyseries.dtype() == Null:
|
|
182
|
+
pyseries = pyseries.cast(
|
|
183
|
+
Decimal(scale=0), strict=strict, wrap_numerical=False
|
|
184
|
+
)
|
|
185
|
+
elif not isinstance(pyseries.dtype(), Decimal):
|
|
186
|
+
msg = f"can't convert {pyseries.dtype()} to Decimal"
|
|
187
|
+
raise TypeError(msg)
|
|
188
|
+
|
|
189
|
+
return pyseries
|
|
190
|
+
|
|
191
|
+
elif dtype == Struct:
|
|
192
|
+
# This is very bad. Goes via rows? And needs to do outer nullability separate.
|
|
193
|
+
# It also has two data passes.
|
|
194
|
+
# TODO: eventually go into struct builder
|
|
195
|
+
struct_schema = dtype.to_schema() if isinstance(dtype, Struct) else None
|
|
196
|
+
empty = {} # type: ignore[var-annotated]
|
|
197
|
+
|
|
198
|
+
data = []
|
|
199
|
+
invalid = []
|
|
200
|
+
for i, v in enumerate(values):
|
|
201
|
+
if v is None:
|
|
202
|
+
invalid.append(i)
|
|
203
|
+
data.append(empty)
|
|
204
|
+
else:
|
|
205
|
+
data.append(v)
|
|
206
|
+
|
|
207
|
+
return plc.sequence_to_pydf(
|
|
208
|
+
data=data,
|
|
209
|
+
schema=struct_schema,
|
|
210
|
+
orient="row",
|
|
211
|
+
).to_struct(name, invalid)
|
|
212
|
+
|
|
213
|
+
if python_dtype is None:
|
|
214
|
+
if value is None:
|
|
215
|
+
constructor = polars_type_to_constructor(Null)
|
|
216
|
+
return constructor(name, values, strict)
|
|
217
|
+
|
|
218
|
+
# generic default dtype
|
|
219
|
+
python_dtype = type(value)
|
|
220
|
+
|
|
221
|
+
# temporal branch
|
|
222
|
+
if issubclass(python_dtype, tuple(py_temporal_types)):
|
|
223
|
+
if dtype is None:
|
|
224
|
+
dtype = parse_into_dtype(python_dtype) # construct from integer
|
|
225
|
+
elif dtype in py_temporal_types:
|
|
226
|
+
dtype = parse_into_dtype(dtype)
|
|
227
|
+
|
|
228
|
+
values_dtype = None if value is None else try_parse_into_dtype(type(value))
|
|
229
|
+
if values_dtype is not None and values_dtype.is_float():
|
|
230
|
+
msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}"
|
|
231
|
+
raise TypeError(
|
|
232
|
+
# we do not accept float values as temporal; if this is
|
|
233
|
+
# required, the caller should explicitly cast to int first.
|
|
234
|
+
msg
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# We use the AnyValue builder to create the datetime array
|
|
238
|
+
# We store the values internally as UTC and set the timezone
|
|
239
|
+
py_series = PySeries.new_from_any_values(name, values, strict)
|
|
240
|
+
|
|
241
|
+
time_unit = getattr(dtype, "time_unit", None)
|
|
242
|
+
time_zone = getattr(dtype, "time_zone", None)
|
|
243
|
+
|
|
244
|
+
if time_unit is None or values_dtype == Date:
|
|
245
|
+
s = wrap_s(py_series)
|
|
246
|
+
else:
|
|
247
|
+
s = wrap_s(py_series).dt.cast_time_unit(time_unit)
|
|
248
|
+
|
|
249
|
+
if (values_dtype == Date) & (dtype == Datetime):
|
|
250
|
+
s = s.cast(Datetime(time_unit or "us"))
|
|
251
|
+
|
|
252
|
+
if dtype == Datetime and time_zone is not None:
|
|
253
|
+
return s.dt.convert_time_zone(time_zone)._s
|
|
254
|
+
return s._s
|
|
255
|
+
|
|
256
|
+
elif (
|
|
257
|
+
_check_for_numpy(value)
|
|
258
|
+
and isinstance(value, np.ndarray)
|
|
259
|
+
and len(value.shape) == 1
|
|
260
|
+
):
|
|
261
|
+
n_elems = len(value)
|
|
262
|
+
if all(len(v) == n_elems for v in values):
|
|
263
|
+
# can take (much) faster path if all lists are the same length
|
|
264
|
+
return numpy_to_pyseries(
|
|
265
|
+
name,
|
|
266
|
+
np.vstack(values),
|
|
267
|
+
strict=strict,
|
|
268
|
+
nan_to_null=nan_to_null,
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
return PySeries.new_series_list(
|
|
272
|
+
name,
|
|
273
|
+
[
|
|
274
|
+
numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null)
|
|
275
|
+
for v in values
|
|
276
|
+
],
|
|
277
|
+
strict,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
elif python_dtype in (list, tuple):
|
|
281
|
+
if dtype is None:
|
|
282
|
+
return PySeries.new_from_any_values(name, values, strict=strict)
|
|
283
|
+
elif dtype == Object:
|
|
284
|
+
return PySeries.new_object(name, values, strict)
|
|
285
|
+
else:
|
|
286
|
+
if (inner_dtype := getattr(dtype, "inner", None)) is not None:
|
|
287
|
+
pyseries_list = [
|
|
288
|
+
None
|
|
289
|
+
if value is None
|
|
290
|
+
else sequence_to_pyseries(
|
|
291
|
+
"",
|
|
292
|
+
value,
|
|
293
|
+
inner_dtype,
|
|
294
|
+
strict=strict,
|
|
295
|
+
nan_to_null=nan_to_null,
|
|
296
|
+
)
|
|
297
|
+
for value in values
|
|
298
|
+
]
|
|
299
|
+
pyseries = PySeries.new_series_list(name, pyseries_list, strict)
|
|
300
|
+
else:
|
|
301
|
+
pyseries = PySeries.new_from_any_values_and_dtype(
|
|
302
|
+
name, values, dtype, strict=strict
|
|
303
|
+
)
|
|
304
|
+
if dtype != pyseries.dtype():
|
|
305
|
+
pyseries = pyseries.cast(dtype, strict=False, wrap_numerical=False)
|
|
306
|
+
return pyseries
|
|
307
|
+
|
|
308
|
+
elif python_dtype == pl.Series:
|
|
309
|
+
return PySeries.new_series_list(
|
|
310
|
+
name, [v._s if v is not None else None for v in values], strict
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
elif python_dtype == PySeries:
|
|
314
|
+
return PySeries.new_series_list(name, values, strict)
|
|
315
|
+
else:
|
|
316
|
+
constructor = py_type_to_constructor(python_dtype)
|
|
317
|
+
if constructor == PySeries.new_object:
|
|
318
|
+
try:
|
|
319
|
+
srs = PySeries.new_from_any_values(name, values, strict)
|
|
320
|
+
if _check_for_numpy(python_dtype, check_type=False) and isinstance(
|
|
321
|
+
np.bool_(True), np.generic
|
|
322
|
+
):
|
|
323
|
+
dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char)
|
|
324
|
+
return srs.cast(dtype, strict=strict, wrap_numerical=False)
|
|
325
|
+
else:
|
|
326
|
+
return srs
|
|
327
|
+
|
|
328
|
+
except RuntimeError:
|
|
329
|
+
return PySeries.new_from_any_values(name, values, strict=strict)
|
|
330
|
+
|
|
331
|
+
return _construct_series_with_fallbacks(
|
|
332
|
+
constructor, name, values, dtype, strict=strict
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _construct_series_with_fallbacks(
|
|
337
|
+
constructor: Callable[[str, Sequence[Any], bool], PySeries],
|
|
338
|
+
name: str,
|
|
339
|
+
values: Sequence[Any],
|
|
340
|
+
dtype: PolarsDataType | None,
|
|
341
|
+
*,
|
|
342
|
+
strict: bool,
|
|
343
|
+
) -> PySeries:
|
|
344
|
+
"""Construct Series, with fallbacks for basic type mismatch (eg: bool/int)."""
|
|
345
|
+
try:
|
|
346
|
+
return constructor(name, values, strict)
|
|
347
|
+
except (TypeError, OverflowError) as e:
|
|
348
|
+
# # This retry with i64 is related to https://github.com/pola-rs/polars/issues/17231
|
|
349
|
+
# # Essentially, when given a [0, u64::MAX] then it would Overflow.
|
|
350
|
+
if (
|
|
351
|
+
isinstance(e, OverflowError)
|
|
352
|
+
and dtype is None
|
|
353
|
+
and constructor == PySeries.new_opt_i64
|
|
354
|
+
):
|
|
355
|
+
return _construct_series_with_fallbacks(
|
|
356
|
+
PySeries.new_opt_u64, name, values, dtype, strict=strict
|
|
357
|
+
)
|
|
358
|
+
elif dtype is None:
|
|
359
|
+
return PySeries.new_from_any_values(name, values, strict=strict)
|
|
360
|
+
else:
|
|
361
|
+
return PySeries.new_from_any_values_and_dtype(
|
|
362
|
+
name, values, dtype, strict=strict
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def iterable_to_pyseries(
|
|
367
|
+
name: str,
|
|
368
|
+
values: Iterable[Any],
|
|
369
|
+
dtype: PolarsDataType | None = None,
|
|
370
|
+
*,
|
|
371
|
+
chunk_size: int = 1_000_000,
|
|
372
|
+
strict: bool = True,
|
|
373
|
+
) -> PySeries:
|
|
374
|
+
"""Construct a PySeries from an iterable/generator."""
|
|
375
|
+
if not isinstance(values, (Generator, Iterator)):
|
|
376
|
+
values = iter(values)
|
|
377
|
+
|
|
378
|
+
def to_series_chunk(values: list[Any], dtype: PolarsDataType | None) -> Series:
|
|
379
|
+
return pl.Series(
|
|
380
|
+
name=name,
|
|
381
|
+
values=values,
|
|
382
|
+
dtype=dtype,
|
|
383
|
+
strict=strict,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
n_chunks = 0
|
|
387
|
+
series: Series = None # type: ignore[assignment]
|
|
388
|
+
while True:
|
|
389
|
+
slice_values = list(islice(values, chunk_size))
|
|
390
|
+
if not slice_values:
|
|
391
|
+
break
|
|
392
|
+
schunk = to_series_chunk(slice_values, dtype)
|
|
393
|
+
if series is None:
|
|
394
|
+
series = schunk
|
|
395
|
+
dtype = series.dtype
|
|
396
|
+
else:
|
|
397
|
+
series.append(schunk)
|
|
398
|
+
n_chunks += 1
|
|
399
|
+
|
|
400
|
+
if series is None:
|
|
401
|
+
series = to_series_chunk([], dtype)
|
|
402
|
+
if n_chunks > 0:
|
|
403
|
+
series.rechunk(in_place=True)
|
|
404
|
+
|
|
405
|
+
return series._s
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def pandas_to_pyseries(
|
|
409
|
+
name: str,
|
|
410
|
+
values: pd.Series[Any] | pd.Index[Any] | pd.DatetimeIndex,
|
|
411
|
+
dtype: PolarsDataType | None = None,
|
|
412
|
+
*,
|
|
413
|
+
strict: bool = True,
|
|
414
|
+
nan_to_null: bool = True,
|
|
415
|
+
) -> PySeries:
|
|
416
|
+
"""Construct a PySeries from a pandas Series or DatetimeIndex."""
|
|
417
|
+
if not name and values.name is not None:
|
|
418
|
+
name = str(values.name)
|
|
419
|
+
if is_simple_numpy_backed_pandas_series(values):
|
|
420
|
+
return pl.Series(
|
|
421
|
+
name, values.to_numpy(), dtype=dtype, nan_to_null=nan_to_null, strict=strict
|
|
422
|
+
)._s
|
|
423
|
+
if not _PYARROW_AVAILABLE:
|
|
424
|
+
msg = (
|
|
425
|
+
"pyarrow is required for converting a pandas series to Polars, "
|
|
426
|
+
"unless it is a simple numpy-backed one "
|
|
427
|
+
"(e.g. 'int64', 'bool', 'float32' - not 'Int64')"
|
|
428
|
+
)
|
|
429
|
+
raise ImportError(msg)
|
|
430
|
+
return arrow_to_pyseries(
|
|
431
|
+
name,
|
|
432
|
+
plc.pandas_series_to_arrow(values, nan_to_null=nan_to_null),
|
|
433
|
+
dtype=dtype,
|
|
434
|
+
strict=strict,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def arrow_to_pyseries(
|
|
439
|
+
name: str,
|
|
440
|
+
values: pa.Array,
|
|
441
|
+
dtype: PolarsDataType | None = None,
|
|
442
|
+
*,
|
|
443
|
+
strict: bool = True,
|
|
444
|
+
rechunk: bool = True,
|
|
445
|
+
) -> PySeries:
|
|
446
|
+
"""Construct a PySeries from an Arrow array."""
|
|
447
|
+
array = plc.coerce_arrow(values)
|
|
448
|
+
|
|
449
|
+
# special handling of empty categorical arrays
|
|
450
|
+
if (
|
|
451
|
+
len(array) == 0
|
|
452
|
+
and isinstance(array.type, pa.DictionaryType)
|
|
453
|
+
and array.type.value_type
|
|
454
|
+
in (
|
|
455
|
+
pa.utf8(),
|
|
456
|
+
pa.large_utf8(),
|
|
457
|
+
)
|
|
458
|
+
):
|
|
459
|
+
pys = pl.Series(name, [], dtype=Categorical)._s
|
|
460
|
+
|
|
461
|
+
elif not hasattr(array, "num_chunks"):
|
|
462
|
+
pys = PySeries.from_arrow(name, array)
|
|
463
|
+
else:
|
|
464
|
+
if array.num_chunks > 1:
|
|
465
|
+
# somehow going through ffi with a structarray
|
|
466
|
+
# returns the first chunk every time
|
|
467
|
+
if isinstance(array.type, pa.StructType):
|
|
468
|
+
pys = PySeries.from_arrow(name, array.combine_chunks())
|
|
469
|
+
else:
|
|
470
|
+
it = array.iterchunks()
|
|
471
|
+
pys = PySeries.from_arrow(name, next(it))
|
|
472
|
+
for a in it:
|
|
473
|
+
pys.append(PySeries.from_arrow(name, a))
|
|
474
|
+
elif array.num_chunks == 0:
|
|
475
|
+
pys = PySeries.from_arrow(name, pa.nulls(0, type=array.type))
|
|
476
|
+
else:
|
|
477
|
+
pys = PySeries.from_arrow(name, array.chunks[0])
|
|
478
|
+
|
|
479
|
+
if rechunk:
|
|
480
|
+
pys.rechunk(in_place=True)
|
|
481
|
+
|
|
482
|
+
return (
|
|
483
|
+
pys.cast(dtype, strict=strict, wrap_numerical=False)
|
|
484
|
+
if dtype is not None
|
|
485
|
+
else pys
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def numpy_to_pyseries(
|
|
490
|
+
name: str,
|
|
491
|
+
values: np.ndarray[Any, Any],
|
|
492
|
+
*,
|
|
493
|
+
strict: bool = True,
|
|
494
|
+
nan_to_null: bool = False,
|
|
495
|
+
) -> PySeries:
|
|
496
|
+
"""Construct a PySeries from a numpy array."""
|
|
497
|
+
values = np.ascontiguousarray(values)
|
|
498
|
+
|
|
499
|
+
if values.ndim == 1:
|
|
500
|
+
values, dtype = numpy_values_and_dtype(values)
|
|
501
|
+
constructor = numpy_type_to_constructor(values, dtype)
|
|
502
|
+
return constructor(
|
|
503
|
+
name, values, nan_to_null if dtype in (np.float32, np.float64) else strict
|
|
504
|
+
)
|
|
505
|
+
else:
|
|
506
|
+
original_shape = values.shape
|
|
507
|
+
values_1d = values.reshape(-1)
|
|
508
|
+
|
|
509
|
+
from polars.series.utils import _with_no_check_length
|
|
510
|
+
|
|
511
|
+
py_s = _with_no_check_length(
|
|
512
|
+
lambda: numpy_to_pyseries(
|
|
513
|
+
name,
|
|
514
|
+
values_1d,
|
|
515
|
+
strict=strict,
|
|
516
|
+
nan_to_null=nan_to_null,
|
|
517
|
+
)
|
|
518
|
+
)
|
|
519
|
+
return wrap_s(py_s).reshape(original_shape)._s
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def series_to_pyseries(
|
|
523
|
+
name: str | None,
|
|
524
|
+
values: Series,
|
|
525
|
+
*,
|
|
526
|
+
dtype: PolarsDataType | None = None,
|
|
527
|
+
strict: bool = True,
|
|
528
|
+
) -> PySeries:
|
|
529
|
+
"""Construct a new PySeries from a Polars Series."""
|
|
530
|
+
s = values.clone()
|
|
531
|
+
if dtype is not None and dtype != s.dtype:
|
|
532
|
+
s = s.cast(dtype, strict=strict)
|
|
533
|
+
if name is not None:
|
|
534
|
+
s = s.alias(name)
|
|
535
|
+
return s._s
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def dataframe_to_pyseries(
|
|
539
|
+
name: str | None,
|
|
540
|
+
values: DataFrame,
|
|
541
|
+
*,
|
|
542
|
+
dtype: PolarsDataType | None = None,
|
|
543
|
+
strict: bool = True,
|
|
544
|
+
) -> PySeries:
|
|
545
|
+
"""Construct a new PySeries from a Polars DataFrame."""
|
|
546
|
+
if values.width > 1:
|
|
547
|
+
name = name or ""
|
|
548
|
+
s = values.to_struct(name)
|
|
549
|
+
elif values.width == 1:
|
|
550
|
+
s = values.to_series()
|
|
551
|
+
if name is not None:
|
|
552
|
+
s = s.alias(name)
|
|
553
|
+
else:
|
|
554
|
+
msg = "cannot initialize Series from DataFrame without any columns"
|
|
555
|
+
raise TypeError(msg)
|
|
556
|
+
|
|
557
|
+
if dtype is not None and dtype != s.dtype:
|
|
558
|
+
s = s.cast(dtype, strict=strict)
|
|
559
|
+
|
|
560
|
+
return s._s
|