polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
  202. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  204. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
@@ -1,170 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- from typing import TYPE_CHECKING
5
-
6
- from polars.datatypes import (
7
- Boolean,
8
- Categorical,
9
- Date,
10
- Datetime,
11
- Duration,
12
- Enum,
13
- Float32,
14
- Float64,
15
- Int8,
16
- Int16,
17
- Int32,
18
- Int64,
19
- String,
20
- Time,
21
- UInt8,
22
- UInt16,
23
- UInt32,
24
- UInt64,
25
- )
26
- from polars.interchange.protocol import DtypeKind, Endianness
27
-
28
- if TYPE_CHECKING:
29
- from polars._typing import PolarsDataType
30
- from polars.datatypes import DataTypeClass
31
- from polars.interchange.protocol import Dtype
32
-
33
- NE = Endianness.NATIVE
34
-
35
- polars_dtype_to_dtype_map: dict[DataTypeClass, Dtype] = {
36
- Int8: (DtypeKind.INT, 8, "c", NE),
37
- Int16: (DtypeKind.INT, 16, "s", NE),
38
- Int32: (DtypeKind.INT, 32, "i", NE),
39
- Int64: (DtypeKind.INT, 64, "l", NE),
40
- UInt8: (DtypeKind.UINT, 8, "C", NE),
41
- UInt16: (DtypeKind.UINT, 16, "S", NE),
42
- UInt32: (DtypeKind.UINT, 32, "I", NE),
43
- UInt64: (DtypeKind.UINT, 64, "L", NE),
44
- Float32: (DtypeKind.FLOAT, 32, "f", NE),
45
- Float64: (DtypeKind.FLOAT, 64, "g", NE),
46
- Boolean: (DtypeKind.BOOL, 1, "b", NE),
47
- String: (DtypeKind.STRING, 8, "U", NE),
48
- Date: (DtypeKind.DATETIME, 32, "tdD", NE),
49
- Time: (DtypeKind.DATETIME, 64, "ttu", NE),
50
- Datetime: (DtypeKind.DATETIME, 64, "tsu:", NE),
51
- Duration: (DtypeKind.DATETIME, 64, "tDu", NE),
52
- Categorical: (DtypeKind.CATEGORICAL, 32, "I", NE),
53
- Enum: (DtypeKind.CATEGORICAL, 32, "I", NE),
54
- }
55
-
56
-
57
- def polars_dtype_to_dtype(dtype: PolarsDataType) -> Dtype:
58
- """Convert Polars data type to interchange protocol data type."""
59
- try:
60
- result = polars_dtype_to_dtype_map[dtype.base_type()]
61
- except KeyError as exc:
62
- msg = f"data type {dtype!r} not supported by the interchange protocol"
63
- raise ValueError(msg) from exc
64
-
65
- # Handle instantiated data types
66
- if isinstance(dtype, Datetime):
67
- return _datetime_to_dtype(dtype)
68
- elif isinstance(dtype, Duration):
69
- return _duration_to_dtype(dtype)
70
-
71
- return result
72
-
73
-
74
- def _datetime_to_dtype(dtype: Datetime) -> Dtype:
75
- tu = dtype.time_unit[0]
76
- tz = dtype.time_zone if dtype.time_zone is not None else ""
77
- arrow_c_type = f"ts{tu}:{tz}"
78
- return DtypeKind.DATETIME, 64, arrow_c_type, NE
79
-
80
-
81
- def _duration_to_dtype(dtype: Duration) -> Dtype:
82
- tu = dtype.time_unit[0]
83
- arrow_c_type = f"tD{tu}"
84
- return DtypeKind.DATETIME, 64, arrow_c_type, NE
85
-
86
-
87
- dtype_to_polars_dtype_map: dict[DtypeKind, dict[int, PolarsDataType]] = {
88
- DtypeKind.INT: {
89
- 8: Int8,
90
- 16: Int16,
91
- 32: Int32,
92
- 64: Int64,
93
- },
94
- DtypeKind.UINT: {
95
- 8: UInt8,
96
- 16: UInt16,
97
- 32: UInt32,
98
- 64: UInt64,
99
- },
100
- DtypeKind.FLOAT: {
101
- 32: Float32,
102
- 64: Float64,
103
- },
104
- DtypeKind.BOOL: {
105
- 1: Boolean,
106
- 8: Boolean,
107
- },
108
- DtypeKind.STRING: {8: String},
109
- }
110
-
111
-
112
- def dtype_to_polars_dtype(dtype: Dtype) -> PolarsDataType:
113
- """Convert interchange protocol data type to Polars data type."""
114
- kind, bit_width, format_str, _ = dtype
115
-
116
- if kind == DtypeKind.DATETIME:
117
- return _temporal_dtype_to_polars_dtype(format_str, dtype)
118
- elif kind == DtypeKind.CATEGORICAL:
119
- return Enum
120
-
121
- try:
122
- return dtype_to_polars_dtype_map[kind][bit_width]
123
- except KeyError as exc:
124
- msg = f"unsupported data type: {dtype!r}"
125
- raise NotImplementedError(msg) from exc
126
-
127
-
128
- def _temporal_dtype_to_polars_dtype(format_str: str, dtype: Dtype) -> PolarsDataType:
129
- if (match := re.fullmatch(r"ts([mun]):(.*)", format_str)) is not None:
130
- time_unit = match.group(1) + "s"
131
- time_zone = match.group(2) or None
132
- return Datetime(
133
- time_unit=time_unit, # type: ignore[arg-type]
134
- time_zone=time_zone,
135
- )
136
- elif format_str == "tdD":
137
- return Date
138
- elif format_str == "ttu":
139
- return Time
140
- elif (match := re.fullmatch(r"tD([mun])", format_str)) is not None:
141
- time_unit = match.group(1) + "s"
142
- return Duration(time_unit=time_unit) # type: ignore[arg-type]
143
-
144
- msg = f"unsupported temporal data type: {dtype!r}"
145
- raise NotImplementedError(msg)
146
-
147
-
148
- def get_buffer_length_in_elements(buffer_size: int, dtype: Dtype) -> int:
149
- """Get the length of a buffer in elements."""
150
- bits_per_element = dtype[1]
151
- bytes_per_element, rest = divmod(bits_per_element, 8)
152
- if rest > 0:
153
- msg = f"cannot get buffer length for buffer with dtype {dtype!r}"
154
- raise ValueError(msg)
155
- return buffer_size // bytes_per_element
156
-
157
-
158
- def polars_dtype_to_data_buffer_dtype(dtype: PolarsDataType) -> PolarsDataType:
159
- """Get the data type of the data buffer."""
160
- if dtype.is_integer() or dtype.is_float() or dtype == Boolean:
161
- return dtype
162
- elif dtype.is_temporal():
163
- return Int32 if dtype == Date else Int64
164
- elif dtype == String:
165
- return UInt8
166
- elif dtype in (Enum, Categorical):
167
- return UInt32
168
-
169
- msg = f"unsupported data type: {dtype}"
170
- raise NotImplementedError(msg)
polars/io/__init__.py DELETED
@@ -1,64 +0,0 @@
1
- """Functions for reading data."""
2
-
3
- from polars.io.avro import read_avro
4
- from polars.io.clipboard import read_clipboard
5
- from polars.io.csv import read_csv, read_csv_batched, scan_csv
6
- from polars.io.database import read_database, read_database_uri
7
- from polars.io.delta import read_delta, scan_delta
8
- from polars.io.iceberg import scan_iceberg
9
- from polars.io.ipc import read_ipc, read_ipc_schema, read_ipc_stream, scan_ipc
10
- from polars.io.json import read_json
11
- from polars.io.ndjson import read_ndjson, scan_ndjson
12
- from polars.io.parquet import (
13
- read_parquet,
14
- read_parquet_metadata,
15
- read_parquet_schema,
16
- scan_parquet,
17
- )
18
- from polars.io.partition import (
19
- BasePartitionContext,
20
- KeyedPartition,
21
- KeyedPartitionContext,
22
- PartitionByKey,
23
- PartitionMaxSize,
24
- PartitionParted,
25
- )
26
- from polars.io.plugins import _defer as defer
27
- from polars.io.pyarrow_dataset import scan_pyarrow_dataset
28
- from polars.io.scan_options import ScanCastOptions
29
- from polars.io.spreadsheet import read_excel, read_ods
30
-
31
- __all__ = [
32
- "defer",
33
- "PartitionByKey",
34
- "PartitionMaxSize",
35
- "PartitionParted",
36
- "KeyedPartition",
37
- "BasePartitionContext",
38
- "KeyedPartitionContext",
39
- "read_avro",
40
- "read_clipboard",
41
- "read_csv",
42
- "read_csv_batched",
43
- "read_database",
44
- "read_database_uri",
45
- "read_delta",
46
- "read_excel",
47
- "read_ipc",
48
- "read_ipc_schema",
49
- "read_ipc_stream",
50
- "read_json",
51
- "read_ndjson",
52
- "read_ods",
53
- "read_parquet",
54
- "read_parquet_metadata",
55
- "read_parquet_schema",
56
- "scan_csv",
57
- "scan_delta",
58
- "scan_iceberg",
59
- "scan_ipc",
60
- "scan_ndjson",
61
- "scan_parquet",
62
- "scan_pyarrow_dataset",
63
- "ScanCastOptions",
64
- ]
polars/io/_utils.py DELETED
@@ -1,317 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import glob
4
- import re
5
- from contextlib import contextmanager
6
- from io import BytesIO, StringIO
7
- from pathlib import Path
8
- from typing import IO, TYPE_CHECKING, Any, overload
9
-
10
- from polars._dependencies import _FSSPEC_AVAILABLE, fsspec
11
- from polars._utils.various import (
12
- is_int_sequence,
13
- is_str_sequence,
14
- normalize_filepath,
15
- )
16
- from polars.exceptions import NoDataError
17
-
18
- if TYPE_CHECKING:
19
- from collections.abc import Iterator, Sequence
20
- from contextlib import AbstractContextManager as ContextManager
21
-
22
-
23
- def parse_columns_arg(
24
- columns: Sequence[str] | Sequence[int] | str | int | None,
25
- ) -> tuple[Sequence[int] | None, Sequence[str] | None]:
26
- """
27
- Parse the `columns` argument of an I/O function.
28
-
29
- Disambiguates between column names and column indices input.
30
-
31
- Returns
32
- -------
33
- tuple
34
- A tuple containing the columns as a projection and a list of column names.
35
- Only one will be specified, the other will be `None`.
36
- """
37
- if columns is None:
38
- return None, None
39
-
40
- projection: Sequence[int] | None = None
41
- column_names: Sequence[str] | None = None
42
-
43
- if isinstance(columns, str):
44
- column_names = [columns]
45
- elif isinstance(columns, int):
46
- projection = [columns]
47
- elif is_str_sequence(columns):
48
- _ensure_columns_are_unique(columns)
49
- column_names = columns
50
- elif is_int_sequence(columns):
51
- _ensure_columns_are_unique(columns)
52
- projection = columns
53
- else:
54
- msg = "the `columns` argument should contain a list of all integers or all string values"
55
- raise TypeError(msg)
56
-
57
- return projection, column_names
58
-
59
-
60
- def _ensure_columns_are_unique(columns: Sequence[str] | Sequence[int]) -> None:
61
- if len(columns) != len(set(columns)):
62
- msg = f"`columns` arg should only have unique values, got {columns!r}"
63
- raise ValueError(msg)
64
-
65
-
66
- def parse_row_index_args(
67
- row_index_name: str | None = None,
68
- row_index_offset: int = 0,
69
- ) -> tuple[str, int] | None:
70
- """
71
- Parse the `row_index_name` and `row_index_offset` arguments of an I/O function.
72
-
73
- The Rust functions take a single tuple rather than two separate arguments.
74
- """
75
- if row_index_name is None:
76
- return None
77
- else:
78
- return (row_index_name, row_index_offset)
79
-
80
-
81
- @overload
82
- def prepare_file_arg(
83
- file: str | Path | list[str] | IO[bytes] | bytes,
84
- encoding: str | None = ...,
85
- *,
86
- use_pyarrow: bool = ...,
87
- raise_if_empty: bool = ...,
88
- storage_options: dict[str, Any] | None = ...,
89
- ) -> ContextManager[str | BytesIO]: ...
90
-
91
-
92
- @overload
93
- def prepare_file_arg(
94
- file: str | Path | IO[str] | IO[bytes] | bytes,
95
- encoding: str | None = ...,
96
- *,
97
- use_pyarrow: bool = ...,
98
- raise_if_empty: bool = ...,
99
- storage_options: dict[str, Any] | None = ...,
100
- ) -> ContextManager[str | BytesIO]: ...
101
-
102
-
103
- @overload
104
- def prepare_file_arg(
105
- file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
106
- encoding: str | None = ...,
107
- *,
108
- use_pyarrow: bool = ...,
109
- raise_if_empty: bool = ...,
110
- storage_options: dict[str, Any] | None = ...,
111
- ) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]: ...
112
-
113
-
114
- def prepare_file_arg(
115
- file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
116
- encoding: str | None = None,
117
- *,
118
- use_pyarrow: bool = False,
119
- raise_if_empty: bool = True,
120
- storage_options: dict[str, Any] | None = None,
121
- ) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]:
122
- """
123
- Prepare file argument.
124
-
125
- Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
126
- Returned value is always usable as a context.
127
-
128
- A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
129
- A local path is returned as a string.
130
- An http URL is read into a buffer and returned as a `BytesIO`.
131
-
132
- When `encoding` is not `utf8` or `utf8-lossy`, the whole file is
133
- first read in Python and decoded using the specified encoding and
134
- returned as a `BytesIO` (for usage with `read_csv`). If encoding
135
- ends with "-lossy", characters that can't be decoded are replaced
136
- with `�`.
137
-
138
- A `bytes` file is returned as a `BytesIO` if `use_pyarrow=True`.
139
-
140
- When fsspec is installed, remote file(s) is (are) opened with
141
- `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
142
- If encoding is not `utf8` or `utf8-lossy`, decoding is handled by
143
- fsspec too.
144
- """
145
- storage_options = storage_options.copy() if storage_options else {}
146
- if storage_options and not _FSSPEC_AVAILABLE:
147
- msg = "`fsspec` is required for `storage_options` argument"
148
- raise ImportError(msg)
149
-
150
- # Small helper to use a variable as context
151
- @contextmanager
152
- def managed_file(file: Any) -> Iterator[Any]:
153
- try:
154
- yield file
155
- finally:
156
- pass
157
-
158
- has_utf8_utf8_lossy_encoding = (
159
- encoding in {"utf8", "utf8-lossy"} if encoding else True
160
- )
161
- encoding_str = encoding if encoding else "utf8"
162
- encoding_str, encoding_errors = (
163
- (encoding_str[:-6], "replace")
164
- if encoding_str.endswith("-lossy")
165
- else (encoding_str, "strict")
166
- )
167
-
168
- # PyArrow allows directories, so we only check that something is not
169
- # a dir if we are not using PyArrow
170
- check_not_dir = not use_pyarrow
171
-
172
- if isinstance(file, bytes):
173
- if not has_utf8_utf8_lossy_encoding:
174
- file = file.decode(encoding_str, errors=encoding_errors).encode("utf8")
175
- return _check_empty(
176
- BytesIO(file), context="bytes", raise_if_empty=raise_if_empty
177
- )
178
-
179
- if isinstance(file, StringIO):
180
- return _check_empty(
181
- BytesIO(file.read().encode("utf8")),
182
- context="StringIO",
183
- read_position=file.tell(),
184
- raise_if_empty=raise_if_empty,
185
- )
186
-
187
- if isinstance(file, BytesIO):
188
- if not has_utf8_utf8_lossy_encoding:
189
- return _check_empty(
190
- BytesIO(
191
- file.read()
192
- .decode(encoding_str, errors=encoding_errors)
193
- .encode("utf8")
194
- ),
195
- context="BytesIO",
196
- read_position=file.tell(),
197
- raise_if_empty=raise_if_empty,
198
- )
199
- return managed_file(
200
- _check_empty(
201
- b=file,
202
- context="BytesIO",
203
- read_position=file.tell(),
204
- raise_if_empty=raise_if_empty,
205
- )
206
- )
207
-
208
- if isinstance(file, Path):
209
- if not has_utf8_utf8_lossy_encoding:
210
- return _check_empty(
211
- BytesIO(
212
- file.read_bytes()
213
- .decode(encoding_str, errors=encoding_errors)
214
- .encode("utf8")
215
- ),
216
- context=f"Path ({file!r})",
217
- raise_if_empty=raise_if_empty,
218
- )
219
- return managed_file(normalize_filepath(file, check_not_directory=check_not_dir))
220
-
221
- if isinstance(file, str):
222
- # make sure that this is before fsspec
223
- # as fsspec needs requests to be installed
224
- # to read from http
225
- if looks_like_url(file):
226
- return process_file_url(file, encoding_str)
227
- if _FSSPEC_AVAILABLE:
228
- from fsspec.utils import infer_storage_options
229
-
230
- # check if it is a local file
231
- if infer_storage_options(file)["protocol"] == "file":
232
- # (lossy) utf8
233
- if has_utf8_utf8_lossy_encoding:
234
- return managed_file(
235
- normalize_filepath(file, check_not_directory=check_not_dir)
236
- )
237
- # decode first
238
- with Path(file).open(
239
- encoding=encoding_str, errors=encoding_errors
240
- ) as f:
241
- return _check_empty(
242
- BytesIO(f.read().encode("utf8")),
243
- context=f"{file!r}",
244
- raise_if_empty=raise_if_empty,
245
- )
246
- storage_options["encoding"] = encoding
247
- storage_options["errors"] = encoding_errors
248
- return fsspec.open(file, **storage_options)
249
-
250
- if isinstance(file, list) and bool(file) and all(isinstance(f, str) for f in file):
251
- if _FSSPEC_AVAILABLE:
252
- from fsspec.utils import infer_storage_options
253
-
254
- if has_utf8_utf8_lossy_encoding:
255
- if all(infer_storage_options(f)["protocol"] == "file" for f in file):
256
- return managed_file(
257
- [
258
- normalize_filepath(f, check_not_directory=check_not_dir)
259
- for f in file
260
- ]
261
- )
262
- storage_options["encoding"] = encoding
263
- storage_options["errors"] = encoding_errors
264
- return fsspec.open_files(file, **storage_options)
265
-
266
- if isinstance(file, str):
267
- file = normalize_filepath(file, check_not_directory=check_not_dir)
268
- if not has_utf8_utf8_lossy_encoding:
269
- with Path(file).open(encoding=encoding_str, errors=encoding_errors) as f:
270
- return _check_empty(
271
- BytesIO(f.read().encode("utf8")),
272
- context=f"{file!r}",
273
- raise_if_empty=raise_if_empty,
274
- )
275
-
276
- return managed_file(file)
277
-
278
-
279
- def _check_empty(
280
- b: BytesIO, *, context: str, raise_if_empty: bool, read_position: int | None = None
281
- ) -> BytesIO:
282
- if raise_if_empty and b.getbuffer().nbytes == 0:
283
- hint = (
284
- f" (buffer position = {read_position}; try seek(0) before reading?)"
285
- if context in ("StringIO", "BytesIO") and read_position
286
- else ""
287
- )
288
- msg = f"empty data from {context}{hint}"
289
- raise NoDataError(msg)
290
- return b
291
-
292
-
293
- def looks_like_url(path: str) -> bool:
294
- return re.match("^(ht|f)tps?://", path, re.IGNORECASE) is not None
295
-
296
-
297
- def process_file_url(path: str, encoding: str | None = None) -> BytesIO:
298
- from urllib.request import urlopen
299
-
300
- with urlopen(path) as f:
301
- if not encoding or encoding in {"utf8", "utf8-lossy"}:
302
- return BytesIO(f.read())
303
- else:
304
- return BytesIO(f.read().decode(encoding).encode("utf8"))
305
-
306
-
307
- def is_glob_pattern(file: str) -> bool:
308
- return any(char in file for char in ["*", "?", "["])
309
-
310
-
311
- def is_local_file(file: str) -> bool:
312
- try:
313
- next(glob.iglob(file, recursive=True)) # noqa: PTH207
314
- except StopIteration:
315
- return False
316
- else:
317
- return True
polars/io/avro.py DELETED
@@ -1,49 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from pathlib import Path
5
- from typing import IO, TYPE_CHECKING
6
-
7
- from polars._utils.various import normalize_filepath
8
- from polars._utils.wrap import wrap_df
9
- from polars.io._utils import parse_columns_arg
10
-
11
- with contextlib.suppress(ImportError): # Module not available when building docs
12
- from polars._plr import PyDataFrame
13
-
14
- if TYPE_CHECKING:
15
- from polars import DataFrame
16
-
17
-
18
- def read_avro(
19
- source: str | Path | IO[bytes] | bytes,
20
- *,
21
- columns: list[int] | list[str] | None = None,
22
- n_rows: int | None = None,
23
- ) -> DataFrame:
24
- """
25
- Read into a DataFrame from Apache Avro format.
26
-
27
- Parameters
28
- ----------
29
- source
30
- Path to a file or a file-like object (by "file-like object" we refer to objects
31
- that have a `read()` method, such as a file handler like the builtin `open`
32
- function, or a `BytesIO` instance). For file-like objects, the stream position
33
- may not be updated accordingly after reading.
34
- columns
35
- Columns to select. Accepts a list of column indices (starting at zero) or a list
36
- of column names.
37
- n_rows
38
- Stop reading from Apache Avro file after reading `n_rows`.
39
-
40
- Returns
41
- -------
42
- DataFrame
43
- """
44
- if isinstance(source, (str, Path)):
45
- source = normalize_filepath(source)
46
- projection, column_names = parse_columns_arg(columns)
47
-
48
- pydf = PyDataFrame.read_avro(source, column_names, projection, n_rows)
49
- return wrap_df(pydf)
polars/io/clipboard.py DELETED
@@ -1,36 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from io import StringIO
5
- from typing import TYPE_CHECKING, Any
6
-
7
- from polars.io.csv.functions import read_csv
8
-
9
- with contextlib.suppress(ImportError):
10
- from polars._plr import read_clipboard_string as _read_clipboard_string
11
-
12
- if TYPE_CHECKING:
13
- from polars import DataFrame
14
-
15
-
16
- def read_clipboard(separator: str = "\t", **kwargs: Any) -> DataFrame:
17
- """
18
- Read text from clipboard and pass to `read_csv`.
19
-
20
- Useful for reading data copied from Excel or other similar spreadsheet software.
21
-
22
- Parameters
23
- ----------
24
- separator
25
- Single byte character to use as separator parsing csv from clipboard.
26
- kwargs
27
- Additional arguments passed to `read_csv`.
28
-
29
- See Also
30
- --------
31
- read_csv : Read a csv file into a DataFrame.
32
- DataFrame.write_clipboard : Write a DataFrame to the clipboard.
33
- """
34
- csv_string: str = _read_clipboard_string()
35
- io_string = StringIO(csv_string)
36
- return read_csv(source=io_string, separator=separator, **kwargs)
@@ -1,17 +0,0 @@
1
- from polars.io.cloud.credential_provider._providers import (
2
- CredentialProvider,
3
- CredentialProviderAWS,
4
- CredentialProviderAzure,
5
- CredentialProviderFunction,
6
- CredentialProviderFunctionReturn,
7
- CredentialProviderGCP,
8
- )
9
-
10
- __all__ = [
11
- "CredentialProvider",
12
- "CredentialProviderAWS",
13
- "CredentialProviderAzure",
14
- "CredentialProviderFunction",
15
- "CredentialProviderFunctionReturn",
16
- "CredentialProviderGCP",
17
- ]