polars-runtime-compat 1.34.0b3__cp39-abi3-win_arm64.whl → 1.34.0b4__cp39-abi3-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,72 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Any
4
-
5
- from polars._dependencies import pyarrow as pa
6
- from polars._utils.construction.utils import get_first_non_none
7
-
8
- if TYPE_CHECKING:
9
- from polars._dependencies import pandas as pd
10
-
11
-
12
- def pandas_series_to_arrow(
13
- values: pd.Series[Any] | pd.Index[Any],
14
- *,
15
- length: int | None = None,
16
- nan_to_null: bool = True,
17
- ) -> pa.Array:
18
- """
19
- Convert a pandas Series to an Arrow Array.
20
-
21
- Parameters
22
- ----------
23
- values : :class:`pandas.Series` or :class:`pandas.Index`.
24
- Series to convert to arrow
25
- nan_to_null : bool, default = True
26
- Interpret `NaN` as missing values.
27
- length : int, optional
28
- in case all values are null, create a null array of this length.
29
- if unset, length is inferred from values.
30
-
31
- Returns
32
- -------
33
- :class:`pyarrow.Array`
34
- """
35
- dtype = getattr(values, "dtype", None)
36
- if dtype == "object":
37
- first_non_none = get_first_non_none(values.values) # type: ignore[arg-type]
38
- if isinstance(first_non_none, str):
39
- return pa.array(values, pa.large_utf8(), from_pandas=nan_to_null)
40
- elif first_non_none is None:
41
- return pa.nulls(length or len(values), pa.large_utf8())
42
- return pa.array(values, from_pandas=nan_to_null)
43
- elif dtype:
44
- return pa.array(values, from_pandas=nan_to_null)
45
- else:
46
- # Pandas Series is actually a Pandas DataFrame when the original DataFrame
47
- # contains duplicated columns and a duplicated column is requested with df["a"].
48
- msg = "duplicate column names found: "
49
- raise ValueError(
50
- msg,
51
- f"{values.columns.tolist()!s}", # type: ignore[union-attr]
52
- )
53
-
54
-
55
- def coerce_arrow(array: pa.Array) -> pa.Array:
56
- """..."""
57
- import pyarrow.compute as pc
58
-
59
- if hasattr(array, "num_chunks") and array.num_chunks > 1:
60
- # small integer keys can often not be combined, so let's already cast
61
- # to the uint32 used by polars
62
- if pa.types.is_dictionary(array.type) and (
63
- pa.types.is_int8(array.type.index_type)
64
- or pa.types.is_uint8(array.type.index_type)
65
- or pa.types.is_int16(array.type.index_type)
66
- or pa.types.is_uint16(array.type.index_type)
67
- or pa.types.is_int32(array.type.index_type)
68
- ):
69
- array = pc.cast(
70
- array, pa.dictionary(pa.uint32(), pa.large_string())
71
- ).combine_chunks()
72
- return array
@@ -1,560 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from collections.abc import Generator, Iterator, Mapping
5
- from datetime import date, datetime, time, timedelta
6
- from enum import Enum as PyEnum
7
- from itertools import islice
8
- from typing import (
9
- TYPE_CHECKING,
10
- Any,
11
- Callable,
12
- )
13
-
14
- import polars._reexport as pl
15
- import polars._utils.construction as plc
16
- from polars._dependencies import (
17
- _PYARROW_AVAILABLE,
18
- _check_for_numpy,
19
- dataclasses,
20
- )
21
- from polars._dependencies import numpy as np
22
- from polars._dependencies import pandas as pd
23
- from polars._dependencies import pyarrow as pa
24
- from polars._utils.construction.dataframe import _sequence_of_dict_to_pydf
25
- from polars._utils.construction.utils import (
26
- get_first_non_none,
27
- is_namedtuple,
28
- is_pydantic_model,
29
- is_simple_numpy_backed_pandas_series,
30
- is_sqlalchemy_row,
31
- )
32
- from polars._utils.various import (
33
- range_to_series,
34
- )
35
- from polars._utils.wrap import wrap_s
36
- from polars.datatypes import (
37
- Array,
38
- Boolean,
39
- Categorical,
40
- Date,
41
- Datetime,
42
- Decimal,
43
- Duration,
44
- Enum,
45
- List,
46
- Null,
47
- Object,
48
- String,
49
- Struct,
50
- Time,
51
- Unknown,
52
- dtype_to_py_type,
53
- is_polars_dtype,
54
- numpy_char_code_to_dtype,
55
- parse_into_dtype,
56
- try_parse_into_dtype,
57
- )
58
- from polars.datatypes.constructor import (
59
- numpy_type_to_constructor,
60
- numpy_values_and_dtype,
61
- polars_type_to_constructor,
62
- py_type_to_constructor,
63
- )
64
-
65
- with contextlib.suppress(ImportError): # Module not available when building docs
66
- from polars._plr import PySeries
67
-
68
- if TYPE_CHECKING:
69
- from collections.abc import Iterable, Sequence
70
-
71
- from polars import DataFrame, Series
72
- from polars._dependencies import pandas as pd
73
- from polars._typing import PolarsDataType
74
-
75
-
76
- def sequence_to_pyseries(
77
- name: str,
78
- values: Sequence[Any],
79
- dtype: PolarsDataType | None = None,
80
- *,
81
- strict: bool = True,
82
- nan_to_null: bool = False,
83
- ) -> PySeries:
84
- """Construct a PySeries from a sequence."""
85
- python_dtype: type | None = None
86
-
87
- if isinstance(values, range):
88
- return range_to_series(name, values, dtype=dtype)._s
89
-
90
- # empty sequence
91
- if len(values) == 0 and dtype is None:
92
- # if dtype for empty sequence could be guessed
93
- # (e.g comparisons between self and other), default to Null
94
- dtype = Null
95
-
96
- # lists defer to subsequent handling; identify nested type
97
- elif dtype in (List, Array):
98
- python_dtype = list
99
-
100
- # infer temporal type handling
101
- py_temporal_types = {date, datetime, timedelta, time}
102
- pl_temporal_types = {Date, Datetime, Duration, Time}
103
-
104
- value = get_first_non_none(values)
105
- if value is not None:
106
- if (
107
- dataclasses.is_dataclass(value)
108
- or is_pydantic_model(value)
109
- or is_namedtuple(value.__class__)
110
- or is_sqlalchemy_row(value)
111
- ) and dtype != Object:
112
- return pl.DataFrame(values).to_struct(name)._s
113
- elif (
114
- not isinstance(value, dict) and isinstance(value, Mapping)
115
- ) and dtype != Object:
116
- return _sequence_of_dict_to_pydf(
117
- value,
118
- data=values,
119
- strict=strict,
120
- schema_overrides=None,
121
- infer_schema_length=None,
122
- schema=None,
123
- ).to_struct(name, [])
124
- elif isinstance(value, range) and dtype is None:
125
- values = [range_to_series("", v) for v in values]
126
- else:
127
- # for temporal dtypes:
128
- # * if the values are integer, we take the physical branch.
129
- # * if the values are python types, take the temporal branch.
130
- # * if the values are ISO-8601 strings, init then convert via strptime.
131
- # * if the values are floats/other dtypes, this is an error.
132
- if dtype in py_temporal_types and isinstance(value, int):
133
- dtype = parse_into_dtype(dtype) # construct from integer
134
- elif (
135
- dtype in pl_temporal_types or type(dtype) in pl_temporal_types
136
- ) and not isinstance(value, int):
137
- python_dtype = dtype_to_py_type(dtype) # type: ignore[arg-type]
138
-
139
- # if values are enums, infer and load the appropriate dtype/values
140
- if issubclass(type(value), PyEnum):
141
- if dtype is None and python_dtype is None:
142
- with contextlib.suppress(TypeError):
143
- dtype = Enum(type(value))
144
- if not isinstance(value, (str, int)):
145
- values = [v.value for v in values]
146
-
147
- # physical branch
148
- # flat data
149
- if (
150
- dtype is not None
151
- and is_polars_dtype(dtype)
152
- and not dtype.is_nested()
153
- and dtype != Unknown
154
- and (python_dtype is None)
155
- ):
156
- constructor = polars_type_to_constructor(dtype)
157
- pyseries = _construct_series_with_fallbacks(
158
- constructor, name, values, dtype, strict=strict
159
- )
160
- if dtype in (
161
- Date,
162
- Datetime,
163
- Duration,
164
- Time,
165
- Boolean,
166
- Categorical,
167
- Enum,
168
- ) or isinstance(dtype, (Categorical, Decimal)):
169
- if pyseries.dtype() != dtype:
170
- pyseries = pyseries.cast(dtype, strict=strict, wrap_numerical=False)
171
-
172
- # Uninstanced Decimal is a bit special and has various inference paths
173
- if dtype == Decimal:
174
- if pyseries.dtype() == String:
175
- pyseries = pyseries.str_to_decimal_infer(inference_length=0)
176
- elif pyseries.dtype().is_float():
177
- # Go through string so we infer an appropriate scale.
178
- pyseries = pyseries.cast(
179
- String, strict=strict, wrap_numerical=False
180
- ).str_to_decimal_infer(inference_length=0)
181
- elif pyseries.dtype().is_integer() or pyseries.dtype() == Null:
182
- pyseries = pyseries.cast(
183
- Decimal(scale=0), strict=strict, wrap_numerical=False
184
- )
185
- elif not isinstance(pyseries.dtype(), Decimal):
186
- msg = f"can't convert {pyseries.dtype()} to Decimal"
187
- raise TypeError(msg)
188
-
189
- return pyseries
190
-
191
- elif dtype == Struct:
192
- # This is very bad. Goes via rows? And needs to do outer nullability separate.
193
- # It also has two data passes.
194
- # TODO: eventually go into struct builder
195
- struct_schema = dtype.to_schema() if isinstance(dtype, Struct) else None
196
- empty = {} # type: ignore[var-annotated]
197
-
198
- data = []
199
- invalid = []
200
- for i, v in enumerate(values):
201
- if v is None:
202
- invalid.append(i)
203
- data.append(empty)
204
- else:
205
- data.append(v)
206
-
207
- return plc.sequence_to_pydf(
208
- data=data,
209
- schema=struct_schema,
210
- orient="row",
211
- ).to_struct(name, invalid)
212
-
213
- if python_dtype is None:
214
- if value is None:
215
- constructor = polars_type_to_constructor(Null)
216
- return constructor(name, values, strict)
217
-
218
- # generic default dtype
219
- python_dtype = type(value)
220
-
221
- # temporal branch
222
- if issubclass(python_dtype, tuple(py_temporal_types)):
223
- if dtype is None:
224
- dtype = parse_into_dtype(python_dtype) # construct from integer
225
- elif dtype in py_temporal_types:
226
- dtype = parse_into_dtype(dtype)
227
-
228
- values_dtype = None if value is None else try_parse_into_dtype(type(value))
229
- if values_dtype is not None and values_dtype.is_float():
230
- msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}"
231
- raise TypeError(
232
- # we do not accept float values as temporal; if this is
233
- # required, the caller should explicitly cast to int first.
234
- msg
235
- )
236
-
237
- # We use the AnyValue builder to create the datetime array
238
- # We store the values internally as UTC and set the timezone
239
- py_series = PySeries.new_from_any_values(name, values, strict)
240
-
241
- time_unit = getattr(dtype, "time_unit", None)
242
- time_zone = getattr(dtype, "time_zone", None)
243
-
244
- if time_unit is None or values_dtype == Date:
245
- s = wrap_s(py_series)
246
- else:
247
- s = wrap_s(py_series).dt.cast_time_unit(time_unit)
248
-
249
- if (values_dtype == Date) & (dtype == Datetime):
250
- s = s.cast(Datetime(time_unit or "us"))
251
-
252
- if dtype == Datetime and time_zone is not None:
253
- return s.dt.convert_time_zone(time_zone)._s
254
- return s._s
255
-
256
- elif (
257
- _check_for_numpy(value)
258
- and isinstance(value, np.ndarray)
259
- and len(value.shape) == 1
260
- ):
261
- n_elems = len(value)
262
- if all(len(v) == n_elems for v in values):
263
- # can take (much) faster path if all lists are the same length
264
- return numpy_to_pyseries(
265
- name,
266
- np.vstack(values),
267
- strict=strict,
268
- nan_to_null=nan_to_null,
269
- )
270
- else:
271
- return PySeries.new_series_list(
272
- name,
273
- [
274
- numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null)
275
- for v in values
276
- ],
277
- strict,
278
- )
279
-
280
- elif python_dtype in (list, tuple):
281
- if dtype is None:
282
- return PySeries.new_from_any_values(name, values, strict=strict)
283
- elif dtype == Object:
284
- return PySeries.new_object(name, values, strict)
285
- else:
286
- if (inner_dtype := getattr(dtype, "inner", None)) is not None:
287
- pyseries_list = [
288
- None
289
- if value is None
290
- else sequence_to_pyseries(
291
- "",
292
- value,
293
- inner_dtype,
294
- strict=strict,
295
- nan_to_null=nan_to_null,
296
- )
297
- for value in values
298
- ]
299
- pyseries = PySeries.new_series_list(name, pyseries_list, strict)
300
- else:
301
- pyseries = PySeries.new_from_any_values_and_dtype(
302
- name, values, dtype, strict=strict
303
- )
304
- if dtype != pyseries.dtype():
305
- pyseries = pyseries.cast(dtype, strict=False, wrap_numerical=False)
306
- return pyseries
307
-
308
- elif python_dtype == pl.Series:
309
- return PySeries.new_series_list(
310
- name, [v._s if v is not None else None for v in values], strict
311
- )
312
-
313
- elif python_dtype == PySeries:
314
- return PySeries.new_series_list(name, values, strict)
315
- else:
316
- constructor = py_type_to_constructor(python_dtype)
317
- if constructor == PySeries.new_object:
318
- try:
319
- srs = PySeries.new_from_any_values(name, values, strict)
320
- if _check_for_numpy(python_dtype, check_type=False) and isinstance(
321
- np.bool_(True), np.generic
322
- ):
323
- dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char)
324
- return srs.cast(dtype, strict=strict, wrap_numerical=False)
325
- else:
326
- return srs
327
-
328
- except RuntimeError:
329
- return PySeries.new_from_any_values(name, values, strict=strict)
330
-
331
- return _construct_series_with_fallbacks(
332
- constructor, name, values, dtype, strict=strict
333
- )
334
-
335
-
336
- def _construct_series_with_fallbacks(
337
- constructor: Callable[[str, Sequence[Any], bool], PySeries],
338
- name: str,
339
- values: Sequence[Any],
340
- dtype: PolarsDataType | None,
341
- *,
342
- strict: bool,
343
- ) -> PySeries:
344
- """Construct Series, with fallbacks for basic type mismatch (eg: bool/int)."""
345
- try:
346
- return constructor(name, values, strict)
347
- except (TypeError, OverflowError) as e:
348
- # # This retry with i64 is related to https://github.com/pola-rs/polars/issues/17231
349
- # # Essentially, when given a [0, u64::MAX] then it would Overflow.
350
- if (
351
- isinstance(e, OverflowError)
352
- and dtype is None
353
- and constructor == PySeries.new_opt_i64
354
- ):
355
- return _construct_series_with_fallbacks(
356
- PySeries.new_opt_u64, name, values, dtype, strict=strict
357
- )
358
- elif dtype is None:
359
- return PySeries.new_from_any_values(name, values, strict=strict)
360
- else:
361
- return PySeries.new_from_any_values_and_dtype(
362
- name, values, dtype, strict=strict
363
- )
364
-
365
-
366
- def iterable_to_pyseries(
367
- name: str,
368
- values: Iterable[Any],
369
- dtype: PolarsDataType | None = None,
370
- *,
371
- chunk_size: int = 1_000_000,
372
- strict: bool = True,
373
- ) -> PySeries:
374
- """Construct a PySeries from an iterable/generator."""
375
- if not isinstance(values, (Generator, Iterator)):
376
- values = iter(values)
377
-
378
- def to_series_chunk(values: list[Any], dtype: PolarsDataType | None) -> Series:
379
- return pl.Series(
380
- name=name,
381
- values=values,
382
- dtype=dtype,
383
- strict=strict,
384
- )
385
-
386
- n_chunks = 0
387
- series: Series = None # type: ignore[assignment]
388
- while True:
389
- slice_values = list(islice(values, chunk_size))
390
- if not slice_values:
391
- break
392
- schunk = to_series_chunk(slice_values, dtype)
393
- if series is None:
394
- series = schunk
395
- dtype = series.dtype
396
- else:
397
- series.append(schunk)
398
- n_chunks += 1
399
-
400
- if series is None:
401
- series = to_series_chunk([], dtype)
402
- if n_chunks > 0:
403
- series.rechunk(in_place=True)
404
-
405
- return series._s
406
-
407
-
408
- def pandas_to_pyseries(
409
- name: str,
410
- values: pd.Series[Any] | pd.Index[Any] | pd.DatetimeIndex,
411
- dtype: PolarsDataType | None = None,
412
- *,
413
- strict: bool = True,
414
- nan_to_null: bool = True,
415
- ) -> PySeries:
416
- """Construct a PySeries from a pandas Series or DatetimeIndex."""
417
- if not name and values.name is not None:
418
- name = str(values.name)
419
- if is_simple_numpy_backed_pandas_series(values):
420
- return pl.Series(
421
- name, values.to_numpy(), dtype=dtype, nan_to_null=nan_to_null, strict=strict
422
- )._s
423
- if not _PYARROW_AVAILABLE:
424
- msg = (
425
- "pyarrow is required for converting a pandas series to Polars, "
426
- "unless it is a simple numpy-backed one "
427
- "(e.g. 'int64', 'bool', 'float32' - not 'Int64')"
428
- )
429
- raise ImportError(msg)
430
- return arrow_to_pyseries(
431
- name,
432
- plc.pandas_series_to_arrow(values, nan_to_null=nan_to_null),
433
- dtype=dtype,
434
- strict=strict,
435
- )
436
-
437
-
438
- def arrow_to_pyseries(
439
- name: str,
440
- values: pa.Array,
441
- dtype: PolarsDataType | None = None,
442
- *,
443
- strict: bool = True,
444
- rechunk: bool = True,
445
- ) -> PySeries:
446
- """Construct a PySeries from an Arrow array."""
447
- array = plc.coerce_arrow(values)
448
-
449
- # special handling of empty categorical arrays
450
- if (
451
- len(array) == 0
452
- and isinstance(array.type, pa.DictionaryType)
453
- and array.type.value_type
454
- in (
455
- pa.utf8(),
456
- pa.large_utf8(),
457
- )
458
- ):
459
- pys = pl.Series(name, [], dtype=Categorical)._s
460
-
461
- elif not hasattr(array, "num_chunks"):
462
- pys = PySeries.from_arrow(name, array)
463
- else:
464
- if array.num_chunks > 1:
465
- # somehow going through ffi with a structarray
466
- # returns the first chunk every time
467
- if isinstance(array.type, pa.StructType):
468
- pys = PySeries.from_arrow(name, array.combine_chunks())
469
- else:
470
- it = array.iterchunks()
471
- pys = PySeries.from_arrow(name, next(it))
472
- for a in it:
473
- pys.append(PySeries.from_arrow(name, a))
474
- elif array.num_chunks == 0:
475
- pys = PySeries.from_arrow(name, pa.nulls(0, type=array.type))
476
- else:
477
- pys = PySeries.from_arrow(name, array.chunks[0])
478
-
479
- if rechunk:
480
- pys.rechunk(in_place=True)
481
-
482
- return (
483
- pys.cast(dtype, strict=strict, wrap_numerical=False)
484
- if dtype is not None
485
- else pys
486
- )
487
-
488
-
489
- def numpy_to_pyseries(
490
- name: str,
491
- values: np.ndarray[Any, Any],
492
- *,
493
- strict: bool = True,
494
- nan_to_null: bool = False,
495
- ) -> PySeries:
496
- """Construct a PySeries from a numpy array."""
497
- values = np.ascontiguousarray(values)
498
-
499
- if values.ndim == 1:
500
- values, dtype = numpy_values_and_dtype(values)
501
- constructor = numpy_type_to_constructor(values, dtype)
502
- return constructor(
503
- name, values, nan_to_null if dtype in (np.float32, np.float64) else strict
504
- )
505
- else:
506
- original_shape = values.shape
507
- values_1d = values.reshape(-1)
508
-
509
- from polars.series.utils import _with_no_check_length
510
-
511
- py_s = _with_no_check_length(
512
- lambda: numpy_to_pyseries(
513
- name,
514
- values_1d,
515
- strict=strict,
516
- nan_to_null=nan_to_null,
517
- )
518
- )
519
- return wrap_s(py_s).reshape(original_shape)._s
520
-
521
-
522
- def series_to_pyseries(
523
- name: str | None,
524
- values: Series,
525
- *,
526
- dtype: PolarsDataType | None = None,
527
- strict: bool = True,
528
- ) -> PySeries:
529
- """Construct a new PySeries from a Polars Series."""
530
- s = values.clone()
531
- if dtype is not None and dtype != s.dtype:
532
- s = s.cast(dtype, strict=strict)
533
- if name is not None:
534
- s = s.alias(name)
535
- return s._s
536
-
537
-
538
- def dataframe_to_pyseries(
539
- name: str | None,
540
- values: DataFrame,
541
- *,
542
- dtype: PolarsDataType | None = None,
543
- strict: bool = True,
544
- ) -> PySeries:
545
- """Construct a new PySeries from a Polars DataFrame."""
546
- if values.width > 1:
547
- name = name or ""
548
- s = values.to_struct(name)
549
- elif values.width == 1:
550
- s = values.to_series()
551
- if name is not None:
552
- s = s.alias(name)
553
- else:
554
- msg = "cannot initialize Series from DataFrame without any columns"
555
- raise TypeError(msg)
556
-
557
- if dtype is not None and dtype != s.dtype:
558
- s = s.cast(dtype, strict=strict)
559
-
560
- return s._s