polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b4__cp39-abi3-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/io/ndjson.py DELETED
@@ -1,332 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from pathlib import Path
5
- from typing import IO, TYPE_CHECKING, Any, Literal
6
-
7
- from polars._utils.deprecation import deprecate_renamed_parameter
8
- from polars._utils.various import is_path_or_str_sequence, normalize_filepath
9
- from polars._utils.wrap import wrap_ldf
10
- from polars.datatypes import N_INFER_DEFAULT
11
- from polars.io._utils import parse_row_index_args
12
- from polars.io.cloud.credential_provider._builder import (
13
- _init_credential_provider_builder,
14
- )
15
-
16
- with contextlib.suppress(ImportError): # Module not available when building docs
17
- from polars._plr import PyLazyFrame
18
-
19
- if TYPE_CHECKING:
20
- from polars import DataFrame, LazyFrame
21
- from polars._typing import SchemaDefinition
22
- from polars.io.cloud import CredentialProviderFunction
23
-
24
-
25
- def read_ndjson(
26
- source: str
27
- | Path
28
- | IO[str]
29
- | IO[bytes]
30
- | bytes
31
- | list[str]
32
- | list[Path]
33
- | list[IO[str]]
34
- | list[IO[bytes]],
35
- *,
36
- schema: SchemaDefinition | None = None,
37
- schema_overrides: SchemaDefinition | None = None,
38
- infer_schema_length: int | None = N_INFER_DEFAULT,
39
- batch_size: int | None = 1024,
40
- n_rows: int | None = None,
41
- low_memory: bool = False,
42
- rechunk: bool = False,
43
- row_index_name: str | None = None,
44
- row_index_offset: int = 0,
45
- ignore_errors: bool = False,
46
- storage_options: dict[str, Any] | None = None,
47
- credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
48
- retries: int = 2,
49
- file_cache_ttl: int | None = None,
50
- include_file_paths: str | None = None,
51
- ) -> DataFrame:
52
- r"""
53
- Read into a DataFrame from a newline delimited JSON file.
54
-
55
- Parameters
56
- ----------
57
- source
58
- Path to a file or a file-like object (by "file-like object" we refer to objects
59
- that have a `read()` method, such as a file handler like the builtin `open`
60
- function, or a `BytesIO` instance). For file-like objects, the stream position
61
- may not be updated accordingly after reading.
62
- schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
63
- The DataFrame schema may be declared in several ways:
64
-
65
- * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
66
- * As a list of column names; in this case types are automatically inferred.
67
- * As a list of (name,type) pairs; this is equivalent to the dictionary form.
68
-
69
- If you supply a list of column names that does not match the names in the
70
- underlying data, the names given here will overwrite them. The number
71
- of names given in the schema should match the underlying data dimensions.
72
- schema_overrides : dict, default None
73
- Support type specification or override of one or more columns; note that
74
- any dtypes inferred from the schema param will be overridden.
75
- infer_schema_length
76
- The maximum number of rows to scan for schema inference.
77
- If set to `None`, the full data may be scanned *(this is slow)*.
78
- batch_size
79
- Number of rows to read in each batch.
80
- n_rows
81
- Stop reading from JSON file after reading `n_rows`.
82
- low_memory
83
- Reduce memory pressure at the expense of performance.
84
- rechunk
85
- Reallocate to contiguous memory when all chunks/ files are parsed.
86
- row_index_name
87
- If not None, this will insert a row index column with give name into the
88
- DataFrame
89
- row_index_offset
90
- Offset to start the row index column (only use if the name is set)
91
- ignore_errors
92
- Return `Null` if parsing fails because of schema mismatches.
93
- storage_options
94
- Options that indicate how to connect to a cloud provider.
95
-
96
- The cloud providers currently supported are AWS, GCP, and Azure.
97
- See supported keys here:
98
-
99
- * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
100
- * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
101
- * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
102
- * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
103
- `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
104
-
105
- If `storage_options` is not provided, Polars will try to infer the information
106
- from environment variables.
107
- credential_provider
108
- Provide a function that can be called to provide cloud storage
109
- credentials. The function is expected to return a dictionary of
110
- credential keys along with an optional credential expiry time.
111
-
112
- .. warning::
113
- This functionality is considered **unstable**. It may be changed
114
- at any point without it being considered a breaking change.
115
- retries
116
- Number of retries if accessing a cloud instance fails.
117
- file_cache_ttl
118
- Amount of time to keep downloaded cloud files since their last access time,
119
- in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
120
- (which defaults to 1 hour) if not given.
121
- include_file_paths
122
- Include the path of the source file(s) as a column with this name.
123
-
124
- See Also
125
- --------
126
- scan_ndjson : Lazily read from an NDJSON file or multiple files via glob patterns.
127
-
128
- Warnings
129
- --------
130
- Calling `read_ndjson().lazy()` is an antipattern as this forces Polars to
131
- materialize a full ndjson file and therefore cannot push any optimizations into
132
- the reader. Therefore always prefer `scan_ndjson` if you want to work with
133
- `LazyFrame` s.
134
-
135
- Examples
136
- --------
137
- >>> from io import StringIO
138
- >>> json_str = '{"foo":1,"bar":6}\n{"foo":2,"bar":7}\n{"foo":3,"bar":8}\n'
139
- >>> pl.read_ndjson(StringIO(json_str))
140
- shape: (3, 2)
141
- ┌─────┬─────┐
142
- │ foo ┆ bar │
143
- │ --- ┆ --- │
144
- │ i64 ┆ i64 │
145
- ╞═════╪═════╡
146
- │ 1 ┆ 6 │
147
- │ 2 ┆ 7 │
148
- │ 3 ┆ 8 │
149
- └─────┴─────┘
150
- """
151
- credential_provider_builder = _init_credential_provider_builder(
152
- credential_provider, source, storage_options, "read_ndjson"
153
- )
154
-
155
- del credential_provider
156
-
157
- return scan_ndjson(
158
- source,
159
- schema=schema,
160
- schema_overrides=schema_overrides,
161
- infer_schema_length=infer_schema_length,
162
- batch_size=batch_size,
163
- n_rows=n_rows,
164
- low_memory=low_memory,
165
- rechunk=rechunk,
166
- row_index_name=row_index_name,
167
- row_index_offset=row_index_offset,
168
- ignore_errors=ignore_errors,
169
- include_file_paths=include_file_paths,
170
- retries=retries,
171
- storage_options=storage_options,
172
- credential_provider=credential_provider_builder, # type: ignore[arg-type]
173
- file_cache_ttl=file_cache_ttl,
174
- ).collect()
175
-
176
-
177
- @deprecate_renamed_parameter("row_count_name", "row_index_name", version="0.20.4")
178
- @deprecate_renamed_parameter("row_count_offset", "row_index_offset", version="0.20.4")
179
- def scan_ndjson(
180
- source: (
181
- str
182
- | Path
183
- | IO[str]
184
- | IO[bytes]
185
- | bytes
186
- | list[str]
187
- | list[Path]
188
- | list[IO[str]]
189
- | list[IO[bytes]]
190
- ),
191
- *,
192
- schema: SchemaDefinition | None = None,
193
- schema_overrides: SchemaDefinition | None = None,
194
- infer_schema_length: int | None = N_INFER_DEFAULT,
195
- batch_size: int | None = 1024,
196
- n_rows: int | None = None,
197
- low_memory: bool = False,
198
- rechunk: bool = False,
199
- row_index_name: str | None = None,
200
- row_index_offset: int = 0,
201
- ignore_errors: bool = False,
202
- storage_options: dict[str, Any] | None = None,
203
- credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
204
- retries: int = 2,
205
- file_cache_ttl: int | None = None,
206
- include_file_paths: str | None = None,
207
- ) -> LazyFrame:
208
- """
209
- Lazily read from a newline delimited JSON file or multiple files via glob patterns.
210
-
211
- This allows the query optimizer to push down predicates and projections to the scan
212
- level, thereby potentially reducing memory overhead.
213
-
214
- .. versionchanged:: 0.20.4
215
- * The `row_count_name` parameter was renamed `row_index_name`.
216
- * The `row_count_offset` parameter was renamed `row_index_offset`.
217
-
218
- Parameters
219
- ----------
220
- source
221
- Path to a file.
222
- schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
223
- The DataFrame schema may be declared in several ways:
224
-
225
- * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
226
- * As a list of column names; in this case types are automatically inferred.
227
- * As a list of (name,type) pairs; this is equivalent to the dictionary form.
228
-
229
- If you supply a list of column names that does not match the names in the
230
- underlying data, the names given here will overwrite them. The number
231
- of names given in the schema should match the underlying data dimensions.
232
- schema_overrides : dict, default None
233
- Support type specification or override of one or more columns; note that
234
- any dtypes inferred from the schema param will be overridden.
235
- infer_schema_length
236
- The maximum number of rows to scan for schema inference.
237
- If set to `None`, the full data may be scanned *(this is slow)*.
238
- batch_size
239
- Number of rows to read in each batch.
240
- n_rows
241
- Stop reading from JSON file after reading `n_rows`.
242
- low_memory
243
- Reduce memory pressure at the expense of performance.
244
- rechunk
245
- Reallocate to contiguous memory when all chunks/ files are parsed.
246
- row_index_name
247
- If not None, this will insert a row index column with give name into the
248
- DataFrame
249
- row_index_offset
250
- Offset to start the row index column (only use if the name is set)
251
- ignore_errors
252
- Return `Null` if parsing fails because of schema mismatches.
253
- storage_options
254
- Options that indicate how to connect to a cloud provider.
255
-
256
- The cloud providers currently supported are AWS, GCP, and Azure.
257
- See supported keys here:
258
-
259
- * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
260
- * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
261
- * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
262
- * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
263
- `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.
264
-
265
- If `storage_options` is not provided, Polars will try to infer the information
266
- from environment variables.
267
- credential_provider
268
- Provide a function that can be called to provide cloud storage
269
- credentials. The function is expected to return a dictionary of
270
- credential keys along with an optional credential expiry time.
271
-
272
- .. warning::
273
- This functionality is considered **unstable**. It may be changed
274
- at any point without it being considered a breaking change.
275
- retries
276
- Number of retries if accessing a cloud instance fails.
277
- file_cache_ttl
278
- Amount of time to keep downloaded cloud files since their last access time,
279
- in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
280
- (which defaults to 1 hour) if not given.
281
- include_file_paths
282
- Include the path of the source file(s) as a column with this name.
283
- """
284
- sources: list[str] | list[Path] | list[IO[str]] | list[IO[bytes]] = []
285
- if isinstance(source, (str, Path)):
286
- source = normalize_filepath(source, check_not_directory=False)
287
- elif isinstance(source, list):
288
- if is_path_or_str_sequence(source):
289
- sources = [
290
- normalize_filepath(source, check_not_directory=False)
291
- for source in source
292
- ]
293
- else:
294
- sources = source
295
-
296
- source = None # type: ignore[assignment]
297
-
298
- if infer_schema_length == 0:
299
- msg = "'infer_schema_length' should be positive"
300
- raise ValueError(msg)
301
-
302
- credential_provider_builder = _init_credential_provider_builder(
303
- credential_provider, source, storage_options, "scan_ndjson"
304
- )
305
-
306
- del credential_provider
307
-
308
- if storage_options:
309
- storage_options = list(storage_options.items()) # type: ignore[assignment]
310
- else:
311
- # Handle empty dict input
312
- storage_options = None
313
-
314
- pylf = PyLazyFrame.new_from_ndjson(
315
- source,
316
- sources,
317
- infer_schema_length=infer_schema_length,
318
- schema=schema,
319
- schema_overrides=schema_overrides,
320
- batch_size=batch_size,
321
- n_rows=n_rows,
322
- low_memory=low_memory,
323
- rechunk=rechunk,
324
- row_index=parse_row_index_args(row_index_name, row_index_offset),
325
- ignore_errors=ignore_errors,
326
- include_file_paths=include_file_paths,
327
- retries=retries,
328
- cloud_options=storage_options,
329
- credential_provider=credential_provider_builder,
330
- file_cache_ttl=file_cache_ttl,
331
- )
332
- return wrap_ldf(pylf)
@@ -1,17 +0,0 @@
1
- from polars.io.parquet.field_overwrites import (
2
- ParquetFieldOverwrites,
3
- )
4
- from polars.io.parquet.functions import (
5
- read_parquet,
6
- read_parquet_metadata,
7
- read_parquet_schema,
8
- scan_parquet,
9
- )
10
-
11
- __all__ = [
12
- "ParquetFieldOverwrites",
13
- "read_parquet",
14
- "read_parquet_metadata",
15
- "read_parquet_schema",
16
- "scan_parquet",
17
- ]
@@ -1,140 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections.abc import Mapping, Sequence
4
- from typing import Any
5
-
6
-
7
- def _parquet_field_overwrites_dict_to_dict_list(
8
- pqo: dict[str, ParquetFieldOverwrites],
9
- ) -> list[dict[str, Any]]:
10
- children = []
11
- for name, child in pqo.items():
12
- if child.name is not None:
13
- msg = "ParquetFieldOverwrites has both a name in the dictionary and in the overwrites"
14
- raise ValueError(msg)
15
- child.name = name
16
- children.append(_parquet_field_overwrites_to_dict(child))
17
- return children
18
-
19
-
20
- def _parquet_field_overwrites_to_dict(pqo: ParquetFieldOverwrites) -> dict[str, Any]:
21
- d: dict[str, Any] = {}
22
-
23
- # Name
24
- if pqo.name is not None:
25
- d["name"] = pqo.name
26
-
27
- # Children
28
- if pqo.children is not None:
29
- if isinstance(pqo.children, ParquetFieldOverwrites):
30
- d["children"] = _parquet_field_overwrites_to_dict(pqo.children)
31
- elif isinstance(pqo.children, dict):
32
- d["children"] = _parquet_field_overwrites_dict_to_dict_list(pqo.children)
33
- elif isinstance(pqo.children, list):
34
- d["children"] = [_parquet_field_overwrites_to_dict(c) for c in pqo.children]
35
- else:
36
- msg = "invalid ParquetFieldOverwrites children type"
37
- raise TypeError(msg)
38
-
39
- if pqo.field_id is not None:
40
- d["field_id"] = pqo.field_id
41
-
42
- # Metadata
43
- if pqo.metadata is not None:
44
- d["metadata"] = list(pqo.metadata.items())
45
-
46
- if pqo.required is not None:
47
- d["required"] = pqo.required
48
-
49
- return d
50
-
51
-
52
- class ParquetFieldOverwrites:
53
- """
54
- Write-option overwrites for individual Parquet fields.
55
-
56
- .. warning::
57
- This functionality is considered **unstable**. It may be changed
58
- at any point without it being considered a breaking change.
59
-
60
-
61
- Examples
62
- --------
63
- >>> lf = pl.LazyFrame(
64
- ... {
65
- ... "a": [None, 2, 3, 4],
66
- ... "b": [[1, 2, 3], [42], [13], [37]],
67
- ... "c": [
68
- ... {"x": "a", "y": 42},
69
- ... {"x": "b", "y": 13},
70
- ... {"x": "X", "y": 37},
71
- ... {"x": "Y", "y": 15},
72
- ... ],
73
- ... }
74
- ... ) # doctest: +SKIP
75
- >>> lf.sink_parquet(
76
- ... "./out/parquet",
77
- ... field_overwrites={
78
- ... "a": ParquetFieldOverwrites(metadata={"flat_from_polars": "yes"}),
79
- ... "b": ParquetFieldOverwrites(
80
- ... children=ParquetFieldOverwrites(metadata={"listitem": "yes"}),
81
- ... metadata={"list": "true"},
82
- ... ),
83
- ... "c": ParquetFieldOverwrites(
84
- ... children=[
85
- ... ParquetFieldOverwrites(name="x", metadata={"md": "yes"}),
86
- ... ParquetFieldOverwrites(name="y", metadata={"md2": "Yes!"}),
87
- ... ],
88
- ... metadata={"struct": "true"},
89
- ... ),
90
- ... },
91
- ... ) # doctest: +SKIP
92
- """
93
-
94
- name: None | str #: Name of the column or field
95
- children: (
96
- None
97
- | ParquetFieldOverwrites
98
- | list[ParquetFieldOverwrites]
99
- | dict[str, ParquetFieldOverwrites]
100
- ) #: Children of the column or field.
101
- #
102
- # For flat types (e.g. `Int32`), this should be `None`. For lists, this can be a
103
- # unnamed `ParquetFieldOverwrites`. For structs, this can be a dict or list of named
104
- # overwrites.
105
-
106
- field_id: int | None = None #: The field ID used in the Parquet schema
107
- metadata: (
108
- dict[str, None | str] | None
109
- ) #: Arrow metadata added to the field before writing
110
- required: bool | None = None #: Is the field not allowed to have missing values
111
-
112
- def __init__(
113
- self,
114
- *,
115
- name: str | None = None,
116
- children: (
117
- None
118
- | ParquetFieldOverwrites
119
- | Sequence[ParquetFieldOverwrites]
120
- | Mapping[str, ParquetFieldOverwrites]
121
- ) = None,
122
- field_id: int | None = None,
123
- metadata: Mapping[str, None | str] | None = None,
124
- required: bool | None = None,
125
- ) -> None:
126
- self.name = name
127
-
128
- if isinstance(children, Mapping):
129
- self.children = dict(children)
130
- elif isinstance(children, Sequence):
131
- self.children = list(children)
132
- else:
133
- self.children = children
134
-
135
- self.field_id = field_id
136
- if isinstance(metadata, Mapping):
137
- self.metadata = dict(metadata)
138
- else:
139
- self.metadata = metadata
140
- self.required = required