polars-runtime-compat 1.34.0b3__cp39-abi3-win_arm64.whl → 1.34.0b4__cp39-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/convert/normalize.py
DELETED
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
# This code is partially forked and adapted from pandas.
|
|
2
|
-
# Some parts are distributed under: https://github.com/pandas-dev/pandas/blob/main/LICENSE
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
from collections.abc import Iterable, Mapping, Sequence
|
|
7
|
-
from typing import TYPE_CHECKING, Any
|
|
8
|
-
|
|
9
|
-
from polars._utils.unstable import unstable
|
|
10
|
-
from polars.dataframe import DataFrame
|
|
11
|
-
from polars.datatypes.constants import N_INFER_DEFAULT
|
|
12
|
-
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from polars._typing import JSONEncoder
|
|
15
|
-
from polars.schema import Schema
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _simple_json_normalize(
|
|
19
|
-
data: dict[Any, Any] | Sequence[dict[Any, Any] | Any],
|
|
20
|
-
separator: str,
|
|
21
|
-
max_level: int,
|
|
22
|
-
encoder: JSONEncoder,
|
|
23
|
-
) -> dict[Any, Any] | list[dict[Any, Any]] | Any:
|
|
24
|
-
if max_level > 0:
|
|
25
|
-
# expect dict or list (both are valid JSON objects)
|
|
26
|
-
normalized_json_object = {}
|
|
27
|
-
if isinstance(data, dict):
|
|
28
|
-
normalized_json_object = _normalize_json_ordered(
|
|
29
|
-
data=data,
|
|
30
|
-
separator=separator,
|
|
31
|
-
max_level=max_level,
|
|
32
|
-
encoder=encoder,
|
|
33
|
-
)
|
|
34
|
-
elif isinstance(data, list):
|
|
35
|
-
normalized_json_list = [
|
|
36
|
-
_simple_json_normalize(
|
|
37
|
-
row,
|
|
38
|
-
separator=separator,
|
|
39
|
-
max_level=max_level,
|
|
40
|
-
encoder=encoder,
|
|
41
|
-
)
|
|
42
|
-
for row in data
|
|
43
|
-
]
|
|
44
|
-
return normalized_json_list
|
|
45
|
-
return normalized_json_object
|
|
46
|
-
else:
|
|
47
|
-
return data
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _normalize_json(
|
|
51
|
-
data: Any,
|
|
52
|
-
key_string: str,
|
|
53
|
-
normalized_dict: dict[str, Any],
|
|
54
|
-
separator: str,
|
|
55
|
-
max_level: int,
|
|
56
|
-
encoder: JSONEncoder,
|
|
57
|
-
) -> dict[str, Any]:
|
|
58
|
-
"""
|
|
59
|
-
Main recursive function.
|
|
60
|
-
|
|
61
|
-
Designed for the most basic use case of `pl.json_normalize(data)`,
|
|
62
|
-
intended as a performance improvement.
|
|
63
|
-
|
|
64
|
-
Parameters
|
|
65
|
-
----------
|
|
66
|
-
data : Any
|
|
67
|
-
Type dependent on types contained within nested Json
|
|
68
|
-
key_string : str
|
|
69
|
-
New key (with separator(s) in) for data
|
|
70
|
-
normalized_dict : dict
|
|
71
|
-
The new normalized/flattened Json dict
|
|
72
|
-
separator : str, default '.'
|
|
73
|
-
Nested records will generate names separated by sep,
|
|
74
|
-
e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
|
|
75
|
-
max_level
|
|
76
|
-
recursion depth
|
|
77
|
-
encoder
|
|
78
|
-
Custom JSON encoder; if not given, `json.dumps` is used.
|
|
79
|
-
"""
|
|
80
|
-
if isinstance(data, dict):
|
|
81
|
-
if max_level > 0:
|
|
82
|
-
key_root = f"{key_string}{separator}" if key_string else ""
|
|
83
|
-
nested_max_level = max_level - 1
|
|
84
|
-
|
|
85
|
-
for key, value in data.items():
|
|
86
|
-
new_key = f"{key_root}{key}" if key_root else key
|
|
87
|
-
_normalize_json(
|
|
88
|
-
data=value,
|
|
89
|
-
key_string=new_key,
|
|
90
|
-
normalized_dict=normalized_dict,
|
|
91
|
-
separator=separator,
|
|
92
|
-
max_level=nested_max_level,
|
|
93
|
-
encoder=encoder,
|
|
94
|
-
)
|
|
95
|
-
else:
|
|
96
|
-
normalized_dict[key_string] = encoder(data)
|
|
97
|
-
return normalized_dict
|
|
98
|
-
else:
|
|
99
|
-
normalized_dict[key_string] = data
|
|
100
|
-
return normalized_dict
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def _normalize_json_ordered(
|
|
104
|
-
data: dict[str, Any],
|
|
105
|
-
separator: str,
|
|
106
|
-
max_level: int,
|
|
107
|
-
encoder: JSONEncoder,
|
|
108
|
-
) -> dict[str, Any]:
|
|
109
|
-
"""
|
|
110
|
-
Order the top level keys and then recursively go to depth.
|
|
111
|
-
|
|
112
|
-
Parameters
|
|
113
|
-
----------
|
|
114
|
-
data
|
|
115
|
-
Deserialized JSON objects (dict or list of dicts)
|
|
116
|
-
separator
|
|
117
|
-
Nested records will generate names separated by sep. e.g.,
|
|
118
|
-
for `separator=".", {"foo": {"bar": 0}}` -> foo.bar.
|
|
119
|
-
max_level
|
|
120
|
-
Max number of levels(depth of dict) to normalize.
|
|
121
|
-
encoder
|
|
122
|
-
Custom JSON encoder; if not given, `json.dumps` is used.
|
|
123
|
-
|
|
124
|
-
Returns
|
|
125
|
-
-------
|
|
126
|
-
dict or list of dicts, matching `normalized_json_object`
|
|
127
|
-
"""
|
|
128
|
-
top_, nested_data = {}, {}
|
|
129
|
-
for k, v in data.items():
|
|
130
|
-
if isinstance(v, dict):
|
|
131
|
-
nested_data[k] = v
|
|
132
|
-
else:
|
|
133
|
-
top_[k] = v
|
|
134
|
-
|
|
135
|
-
nested_ = _normalize_json(
|
|
136
|
-
data=nested_data,
|
|
137
|
-
key_string="",
|
|
138
|
-
normalized_dict={},
|
|
139
|
-
separator=separator,
|
|
140
|
-
max_level=max_level,
|
|
141
|
-
encoder=encoder,
|
|
142
|
-
)
|
|
143
|
-
return {**top_, **nested_}
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
@unstable()
|
|
147
|
-
def json_normalize(
|
|
148
|
-
data: dict[Any, Any] | Sequence[dict[Any, Any] | Any],
|
|
149
|
-
*,
|
|
150
|
-
separator: str = ".",
|
|
151
|
-
max_level: int | None = None,
|
|
152
|
-
schema: Schema | None = None,
|
|
153
|
-
strict: bool = True,
|
|
154
|
-
infer_schema_length: int | None = N_INFER_DEFAULT,
|
|
155
|
-
encoder: JSONEncoder | None = None,
|
|
156
|
-
) -> DataFrame:
|
|
157
|
-
"""
|
|
158
|
-
Normalize semi-structured deserialized JSON data into a flat table.
|
|
159
|
-
|
|
160
|
-
Dictionary objects that will not be unnested/normalized are encoded
|
|
161
|
-
as json string data. Unlike it pandas' counterpart, this function will
|
|
162
|
-
not encode dictionaries as objects at any level.
|
|
163
|
-
|
|
164
|
-
.. warning::
|
|
165
|
-
This functionality is considered **unstable**. It may be changed
|
|
166
|
-
at any point without it being considered a breaking change.
|
|
167
|
-
|
|
168
|
-
Parameters
|
|
169
|
-
----------
|
|
170
|
-
data
|
|
171
|
-
Deserialized JSON objects.
|
|
172
|
-
separator
|
|
173
|
-
Nested records will generate names separated by sep. e.g.,
|
|
174
|
-
for `separator=".", {"foo": {"bar": 0}}` -> foo.bar.
|
|
175
|
-
max_level
|
|
176
|
-
Max number of levels(depth of dict) to normalize.
|
|
177
|
-
If None, normalizes all levels.
|
|
178
|
-
schema
|
|
179
|
-
Overwrite the `Schema` when the normalized data is passed to
|
|
180
|
-
the `DataFrame` constructor.
|
|
181
|
-
strict
|
|
182
|
-
Whether Polars should be strict when constructing the DataFrame.
|
|
183
|
-
infer_schema_length
|
|
184
|
-
Number of rows to take into consideration to determine the schema.
|
|
185
|
-
encoder
|
|
186
|
-
Custom JSON encoder function; if not given, `json.dumps` is used.
|
|
187
|
-
|
|
188
|
-
Examples
|
|
189
|
-
--------
|
|
190
|
-
>>> data = [
|
|
191
|
-
... {
|
|
192
|
-
... "id": 1,
|
|
193
|
-
... "name": "Cole Volk",
|
|
194
|
-
... "fitness": {"height": 180, "weight": 85},
|
|
195
|
-
... },
|
|
196
|
-
... {
|
|
197
|
-
... "id": 2,
|
|
198
|
-
... "name": "Faye Raker",
|
|
199
|
-
... "fitness": {"height": 155, "weight": 58},
|
|
200
|
-
... },
|
|
201
|
-
... {
|
|
202
|
-
... "name": "Mark Reg",
|
|
203
|
-
... "fitness": {"height": 170, "weight": 78},
|
|
204
|
-
... },
|
|
205
|
-
... ]
|
|
206
|
-
>>> pl.json_normalize(data, max_level=1)
|
|
207
|
-
shape: (3, 4)
|
|
208
|
-
┌──────┬────────────┬────────────────┬────────────────┐
|
|
209
|
-
│ id ┆ name ┆ fitness.height ┆ fitness.weight │
|
|
210
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
211
|
-
│ i64 ┆ str ┆ i64 ┆ i64 │
|
|
212
|
-
╞══════╪════════════╪════════════════╪════════════════╡
|
|
213
|
-
│ 1 ┆ Cole Volk ┆ 180 ┆ 85 │
|
|
214
|
-
│ 2 ┆ Faye Raker ┆ 155 ┆ 58 │
|
|
215
|
-
│ null ┆ Mark Reg ┆ 170 ┆ 78 │
|
|
216
|
-
└──────┴────────────┴────────────────┴────────────────┘
|
|
217
|
-
|
|
218
|
-
Normalize to a specific depth, using a custom JSON encoder
|
|
219
|
-
(note that `orson.dumps` encodes to bytes, not str).
|
|
220
|
-
|
|
221
|
-
>>> import orjson
|
|
222
|
-
>>> pl.json_normalize(data, max_level=0, encoder=orjson.dumps)
|
|
223
|
-
shape: (3, 3)
|
|
224
|
-
┌──────┬────────────┬───────────────────────────────┐
|
|
225
|
-
│ id ┆ name ┆ fitness │
|
|
226
|
-
│ --- ┆ --- ┆ --- │
|
|
227
|
-
│ i64 ┆ str ┆ binary │
|
|
228
|
-
╞══════╪════════════╪═══════════════════════════════╡
|
|
229
|
-
│ 1 ┆ Cole Volk ┆ b"{"height":180,"weight":85}" │
|
|
230
|
-
│ 2 ┆ Faye Raker ┆ b"{"height":155,"weight":58}" │
|
|
231
|
-
│ null ┆ Mark Reg ┆ b"{"height":170,"weight":78}" │
|
|
232
|
-
└──────┴────────────┴───────────────────────────────┘
|
|
233
|
-
"""
|
|
234
|
-
if max_level is None:
|
|
235
|
-
max_level = 1 << 32 # eg: u32
|
|
236
|
-
max_level += 1
|
|
237
|
-
|
|
238
|
-
if isinstance(data, Sequence) and len(data) == 0:
|
|
239
|
-
return DataFrame(schema=schema)
|
|
240
|
-
elif isinstance(data, Mapping):
|
|
241
|
-
data = [data]
|
|
242
|
-
elif isinstance(data, Iterable) and not isinstance(data, str): # type: ignore[redundant-expr]
|
|
243
|
-
data = list(data)
|
|
244
|
-
else:
|
|
245
|
-
msg = "expected list or dict of objects"
|
|
246
|
-
raise ValueError(msg)
|
|
247
|
-
|
|
248
|
-
if encoder is None:
|
|
249
|
-
encoder = json.dumps
|
|
250
|
-
|
|
251
|
-
return DataFrame(
|
|
252
|
-
_simple_json_normalize(
|
|
253
|
-
data,
|
|
254
|
-
separator=separator,
|
|
255
|
-
max_level=max_level,
|
|
256
|
-
encoder=encoder,
|
|
257
|
-
),
|
|
258
|
-
schema=schema,
|
|
259
|
-
strict=strict,
|
|
260
|
-
infer_schema_length=infer_schema_length,
|
|
261
|
-
)
|
polars/dataframe/__init__.py
DELETED
polars/dataframe/_html.py
DELETED
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
"""Module for formatting output data in HTML."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
import re
|
|
7
|
-
from textwrap import dedent
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
9
|
-
|
|
10
|
-
from polars._dependencies import html
|
|
11
|
-
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from collections.abc import Iterable
|
|
14
|
-
from types import TracebackType
|
|
15
|
-
|
|
16
|
-
from polars import DataFrame
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def replace_consecutive_spaces(s: str) -> str:
|
|
20
|
-
"""Replace consecutive spaces with HTML non-breaking spaces."""
|
|
21
|
-
return re.sub(r"( {2,})", lambda match: " " * len(match.group(0)), s)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class Tag:
|
|
25
|
-
"""Class for representing an HTML tag."""
|
|
26
|
-
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
elements: list[str],
|
|
30
|
-
tag: str,
|
|
31
|
-
attributes: dict[str, str] | None = None,
|
|
32
|
-
) -> None:
|
|
33
|
-
self.tag = tag
|
|
34
|
-
self.elements = elements
|
|
35
|
-
self.attributes = attributes
|
|
36
|
-
|
|
37
|
-
def __enter__(self) -> None:
|
|
38
|
-
if self.attributes is not None:
|
|
39
|
-
s = f"<{self.tag} "
|
|
40
|
-
for k, v in self.attributes.items():
|
|
41
|
-
s += f'{k}="{v}" '
|
|
42
|
-
s = f"{s.rstrip()}>"
|
|
43
|
-
self.elements.append(s)
|
|
44
|
-
else:
|
|
45
|
-
self.elements.append(f"<{self.tag}>")
|
|
46
|
-
|
|
47
|
-
def __exit__(
|
|
48
|
-
self,
|
|
49
|
-
exc_type: type[BaseException] | None,
|
|
50
|
-
exc_val: BaseException | None,
|
|
51
|
-
exc_tb: TracebackType | None,
|
|
52
|
-
) -> None:
|
|
53
|
-
self.elements.append(f"</{self.tag}>")
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class HTMLFormatter:
|
|
57
|
-
def __init__(
|
|
58
|
-
self,
|
|
59
|
-
df: DataFrame,
|
|
60
|
-
*,
|
|
61
|
-
max_cols: int = 75,
|
|
62
|
-
max_rows: int = 40,
|
|
63
|
-
from_series: bool = False,
|
|
64
|
-
) -> None:
|
|
65
|
-
self.df = df
|
|
66
|
-
self.elements: list[str] = []
|
|
67
|
-
self.max_cols = max_cols
|
|
68
|
-
self.max_rows = max_rows
|
|
69
|
-
self.from_series = from_series
|
|
70
|
-
self.row_idx: Iterable[int]
|
|
71
|
-
self.col_idx: Iterable[int]
|
|
72
|
-
|
|
73
|
-
if max_rows < df.height:
|
|
74
|
-
half, rest = divmod(max_rows, 2)
|
|
75
|
-
self.row_idx = [
|
|
76
|
-
*list(range(half + rest)),
|
|
77
|
-
-1,
|
|
78
|
-
*list(range(df.height - half, df.height)),
|
|
79
|
-
]
|
|
80
|
-
else:
|
|
81
|
-
self.row_idx = range(df.height)
|
|
82
|
-
if max_cols < df.width:
|
|
83
|
-
self.col_idx = [
|
|
84
|
-
*list(range(max_cols // 2)),
|
|
85
|
-
-1,
|
|
86
|
-
*list(range(df.width - max_cols // 2, df.width)),
|
|
87
|
-
]
|
|
88
|
-
else:
|
|
89
|
-
self.col_idx = range(df.width)
|
|
90
|
-
|
|
91
|
-
def write_header(self) -> None:
|
|
92
|
-
"""Write the header of an HTML table."""
|
|
93
|
-
with Tag(self.elements, "thead"):
|
|
94
|
-
if not bool(int(os.environ.get("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES", "0"))):
|
|
95
|
-
with Tag(self.elements, "tr"):
|
|
96
|
-
columns = self.df.columns
|
|
97
|
-
for c in self.col_idx:
|
|
98
|
-
with Tag(self.elements, "th"):
|
|
99
|
-
if c == -1:
|
|
100
|
-
self.elements.append("…")
|
|
101
|
-
else:
|
|
102
|
-
self.elements.append(html.escape(columns[c]))
|
|
103
|
-
if not bool(
|
|
104
|
-
int(os.environ.get("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES", "0"))
|
|
105
|
-
):
|
|
106
|
-
with Tag(self.elements, "tr"):
|
|
107
|
-
dtypes = self.df._df.dtype_strings()
|
|
108
|
-
for c in self.col_idx:
|
|
109
|
-
with Tag(self.elements, "td"):
|
|
110
|
-
if c == -1:
|
|
111
|
-
self.elements.append("…")
|
|
112
|
-
else:
|
|
113
|
-
self.elements.append(dtypes[c])
|
|
114
|
-
|
|
115
|
-
def write_body(self) -> None:
|
|
116
|
-
"""Write the body of an HTML table."""
|
|
117
|
-
str_len_limit = int(os.environ.get("POLARS_FMT_STR_LEN", default=30))
|
|
118
|
-
with Tag(self.elements, "tbody"):
|
|
119
|
-
for r in self.row_idx:
|
|
120
|
-
with Tag(self.elements, "tr"):
|
|
121
|
-
for c in self.col_idx:
|
|
122
|
-
with Tag(self.elements, "td"):
|
|
123
|
-
if r == -1 or c == -1:
|
|
124
|
-
self.elements.append("…")
|
|
125
|
-
else:
|
|
126
|
-
series = self.df[:, c]
|
|
127
|
-
self.elements.append(
|
|
128
|
-
replace_consecutive_spaces(
|
|
129
|
-
html.escape(series._s.get_fmt(r, str_len_limit))
|
|
130
|
-
)
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
def write(self, inner: str) -> None:
|
|
134
|
-
"""Append a raw string to the inner HTML."""
|
|
135
|
-
self.elements.append(inner)
|
|
136
|
-
|
|
137
|
-
def render(self) -> list[str]:
|
|
138
|
-
"""Return the lines needed to render a HTML table."""
|
|
139
|
-
if not bool(
|
|
140
|
-
int(
|
|
141
|
-
os.environ.get("POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION", "0")
|
|
142
|
-
)
|
|
143
|
-
):
|
|
144
|
-
# format frame/series shape with '_' thousand-separators
|
|
145
|
-
s = self.df.shape
|
|
146
|
-
shape = f"({s[0]:_},)" if self.from_series else f"({s[0]:_}, {s[1]:_})"
|
|
147
|
-
|
|
148
|
-
self.elements.append(f"<small>shape: {shape}</small>")
|
|
149
|
-
|
|
150
|
-
with Tag(
|
|
151
|
-
# be careful changing the CSS class ref here...
|
|
152
|
-
# ref: https://github.com/pola-rs/polars/issues/7443
|
|
153
|
-
self.elements,
|
|
154
|
-
"table",
|
|
155
|
-
{"border": "1", "class": "dataframe"},
|
|
156
|
-
):
|
|
157
|
-
self.write_header()
|
|
158
|
-
self.write_body()
|
|
159
|
-
return self.elements
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
class NotebookFormatter(HTMLFormatter):
|
|
163
|
-
"""
|
|
164
|
-
Class for formatting output data in HTML for display in Jupyter Notebooks.
|
|
165
|
-
|
|
166
|
-
This class is intended for functionality specific to DataFrame._repr_html_().
|
|
167
|
-
"""
|
|
168
|
-
|
|
169
|
-
def write_style(self) -> None:
|
|
170
|
-
style = """\
|
|
171
|
-
<style>
|
|
172
|
-
.dataframe > thead > tr,
|
|
173
|
-
.dataframe > tbody > tr {
|
|
174
|
-
text-align: right;
|
|
175
|
-
white-space: pre-wrap;
|
|
176
|
-
}
|
|
177
|
-
</style>
|
|
178
|
-
"""
|
|
179
|
-
self.write(dedent(style))
|
|
180
|
-
|
|
181
|
-
def render(self) -> list[str]:
|
|
182
|
-
"""Return the lines needed to render a HTML table."""
|
|
183
|
-
with Tag(self.elements, "div"):
|
|
184
|
-
self.write_style()
|
|
185
|
-
super().render()
|
|
186
|
-
return self.elements
|