pandas-plots 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/tbl.py +107 -69
- {pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/METADATA +1 -1
- {pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/RECORD +6 -6
- {pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/LICENSE +0 -0
- {pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/WHEEL +0 -0
- {pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/top_level.txt +0 -0
pandas_plots/tbl.py
CHANGED
@@ -1,24 +1,25 @@
|
|
1
1
|
import warnings
|
2
|
-
warnings.filterwarnings('ignore')
|
3
2
|
|
4
|
-
|
5
|
-
|
3
|
+
warnings.filterwarnings("ignore")
|
4
|
+
|
5
|
+
import math
|
6
|
+
import os
|
7
|
+
from typing import Literal, get_args
|
8
|
+
|
6
9
|
import numpy as np
|
7
10
|
import pandas as pd
|
8
11
|
import plotly.express as px
|
9
|
-
import pandas as pd
|
10
|
-
import math
|
11
|
-
import os
|
12
12
|
from plotly.subplots import make_subplots
|
13
|
+
from scipy import stats
|
13
14
|
# pd.options.mode.chained_assignment = None
|
15
|
+
|
14
16
|
from . import txt
|
15
17
|
|
16
|
-
|
17
|
-
assert pd.__version__ > '2.0.0', 'pandas version must be >= 2.0.0'
|
18
|
+
AGG_FUNC=Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
|
18
19
|
|
19
20
|
def describe_df(
|
20
21
|
df: pd.DataFrame,
|
21
|
-
caption: str,
|
22
|
+
caption: str,
|
22
23
|
use_plot: bool = True,
|
23
24
|
use_columns: bool = True,
|
24
25
|
renderer: Literal["png", "svg", None] = "png",
|
@@ -44,7 +45,7 @@ def describe_df(
|
|
44
45
|
sort_mode (Literal["value", "index"]): sort by value or index
|
45
46
|
top_n_uniques (int): number of uniques to display
|
46
47
|
top_n_chars_in_index (int): number of characters to display on plot axis
|
47
|
-
|
48
|
+
|
48
49
|
usage:
|
49
50
|
describe_df(
|
50
51
|
df=df,
|
@@ -56,12 +57,12 @@ def describe_df(
|
|
56
57
|
fig_offset=None,
|
57
58
|
sort_mode="value",
|
58
59
|
)
|
59
|
-
|
60
|
+
|
60
61
|
hint: skewness may not properly work if the columns is float and/or has only 1 value
|
61
62
|
"""
|
62
63
|
# * copy df, df col types are modified
|
63
64
|
df = df.copy()
|
64
|
-
|
65
|
+
|
65
66
|
# * check if df is empty
|
66
67
|
if len(df) == 0:
|
67
68
|
print(f"DataFrame is empty!")
|
@@ -73,10 +74,11 @@ def describe_df(
|
|
73
74
|
print(f"🟣 missings: {dict(df.isna().sum())}")
|
74
75
|
print("--- column uniques (all)")
|
75
76
|
print(f"🟠 index {txt.wrap(df.index.tolist()[:top_n_uniques])}")
|
77
|
+
|
76
78
|
def get_uniques_header(col: str):
|
77
79
|
# * sorting has issues when col is of mixed type (object)
|
78
|
-
if df[col].dtype==
|
79
|
-
df[col]=df[col].astype(str)
|
80
|
+
if df[col].dtype == "object":
|
81
|
+
df[col] = df[col].astype(str)
|
80
82
|
# * get unique values
|
81
83
|
# unis = df[col].sort_values().unique()
|
82
84
|
unis = list(df[col].value_counts().sort_index().index)
|
@@ -89,16 +91,18 @@ def describe_df(
|
|
89
91
|
_u, _h = get_uniques_header(col)
|
90
92
|
if use_columns:
|
91
93
|
# * check col type
|
92
|
-
is_str=df.loc[:,col].dtype.kind ==
|
94
|
+
is_str = df.loc[:, col].dtype.kind == "O"
|
93
95
|
# * wrap output
|
94
|
-
print(
|
96
|
+
print(
|
97
|
+
f"{_h} {txt.wrap(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}"
|
98
|
+
)
|
95
99
|
# print(f"{_h} {_u[:top_n_uniques]}")
|
96
100
|
else:
|
97
101
|
print(f"{_h}")
|
98
102
|
|
99
103
|
print("--- column stats (numeric)")
|
100
104
|
# * only show numerics
|
101
|
-
for col in df.select_dtypes(
|
105
|
+
for col in df.select_dtypes("number").columns:
|
102
106
|
_u, _h = get_uniques_header(col)
|
103
107
|
|
104
108
|
# * extra care for scipy metrics, these are very vulnarable to nan
|
@@ -117,7 +121,7 @@ def describe_df(
|
|
117
121
|
cols = df.iloc[:, :fig_offset].columns
|
118
122
|
cols_num = df.select_dtypes(np.number).columns.tolist()
|
119
123
|
# cols_str = list(set(df.columns) - set(cols_num))
|
120
|
-
|
124
|
+
|
121
125
|
# * set constant column count, calc rows
|
122
126
|
fig_rows = math.ceil(len(cols) / fig_cols)
|
123
127
|
|
@@ -145,17 +149,21 @@ def describe_df(
|
|
145
149
|
figsub = px.box(df, x=col, points="outliers")
|
146
150
|
else:
|
147
151
|
# * only respect 100 items (fixed value)
|
148
|
-
x=span.iloc[:100].index
|
149
|
-
y=span.iloc[:100].values
|
152
|
+
x = span.iloc[:100].index
|
153
|
+
y = span.iloc[:100].values
|
150
154
|
# * cut long strings
|
151
|
-
if x.dtype==
|
152
|
-
x=x.astype(str).tolist()
|
153
|
-
_cut = lambda s:
|
154
|
-
|
155
|
+
if x.dtype == "object" and top_n_chars_in_index > 0:
|
156
|
+
x = x.astype(str).tolist()
|
157
|
+
_cut = lambda s: (
|
158
|
+
s[:top_n_chars_in_index] + ".."
|
159
|
+
if len(s) > top_n_chars_in_index
|
160
|
+
else s[:top_n_chars_in_index]
|
161
|
+
)
|
162
|
+
x = [_cut(item) for item in x]
|
155
163
|
figsub = px.bar(
|
156
164
|
x=x,
|
157
165
|
y=y,
|
158
|
-
|
166
|
+
)
|
159
167
|
# * grid position
|
160
168
|
_row = math.floor((i) / fig_cols) + 1
|
161
169
|
_col = i % fig_cols + 1
|
@@ -164,7 +172,9 @@ def describe_df(
|
|
164
172
|
fig.add_trace(figsub["data"][0], row=_row, col=_col)
|
165
173
|
|
166
174
|
# * set template
|
167
|
-
fig.update_layout(
|
175
|
+
fig.update_layout(
|
176
|
+
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
177
|
+
)
|
168
178
|
fig.show(renderer)
|
169
179
|
|
170
180
|
|
@@ -177,12 +187,13 @@ def pivot_df(
|
|
177
187
|
data_bar_axis: Literal["x", "y", "xy", None] = "xy",
|
178
188
|
pct_axis: Literal["x", "xy", None] = "xy",
|
179
189
|
precision: int = 0,
|
180
|
-
|
181
|
-
|
190
|
+
heatmap_axis: Literal["x", "y", "xy", None] = None,
|
191
|
+
total_mode: AGG_FUNC = "sum",
|
192
|
+
total_axis: Literal["x", "y", "xy", None] = "xy",
|
182
193
|
) -> pd.DataFrame:
|
183
194
|
"""
|
184
195
|
A function to pivot a DataFrame based on specified parameters and return the result as a new DataFrame.
|
185
|
-
|
196
|
+
|
186
197
|
Args:
|
187
198
|
df (pd.DataFrame): The input DataFrame to be pivoted.
|
188
199
|
dropna (bool, optional): Whether to drop NaN values. Defaults to False.
|
@@ -192,14 +203,17 @@ def pivot_df(
|
|
192
203
|
data_bar_axis (Literal["x", "y", "xy", None], optional): The axis for displaying data bars. Defaults to "xy".
|
193
204
|
pct_axis (Literal["x", "xy", None], optional): The axis for displaying percentages. Defaults to None.
|
194
205
|
precision (int, optional): The precision for displaying values. Defaults to 0.
|
195
|
-
show_totals (bool, optional): Whether to show totals in the result. Defaults to False.
|
196
206
|
heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
|
197
|
-
|
207
|
+
total_mode (Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"], optional): The aggregation mode for displaying totals. Defaults to "sum".
|
208
|
+
total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
|
209
|
+
|
198
210
|
Returns:
|
199
211
|
pd.DataFrame: The pivoted DataFrame.
|
200
212
|
"""
|
201
213
|
# * ensure arguments match parameter definition
|
202
|
-
if (pct_axis and pct_axis not in ["x", "xy"]) or (
|
214
|
+
if (pct_axis and pct_axis not in ["x", "xy"]) or (
|
215
|
+
data_bar_axis and data_bar_axis not in ["x", "y", "xy"]
|
216
|
+
):
|
203
217
|
print(f"❌ axis not supported")
|
204
218
|
return
|
205
219
|
|
@@ -210,6 +224,10 @@ def pivot_df(
|
|
210
224
|
if not pd.api.types.is_numeric_dtype(df.iloc[:, 2]):
|
211
225
|
print("❌ 3rd column must be numeric")
|
212
226
|
return
|
227
|
+
|
228
|
+
if total_mode and total_mode not in get_args(AGG_FUNC):
|
229
|
+
print(f"❌ total_mode '{total_mode}' not supported")
|
230
|
+
return
|
213
231
|
|
214
232
|
df = df.copy()
|
215
233
|
|
@@ -257,25 +275,36 @@ def pivot_df(
|
|
257
275
|
)
|
258
276
|
df = df.fillna(0) # .astype(_type)
|
259
277
|
|
260
|
-
return show_num_df(
|
278
|
+
return show_num_df(
|
279
|
+
df,
|
280
|
+
total_mode=total_mode,
|
281
|
+
total_axis=total_axis,
|
282
|
+
data_bar_axis=data_bar_axis,
|
283
|
+
pct_axis=pct_axis,
|
284
|
+
swap=swap,
|
285
|
+
precision=precision,
|
286
|
+
heatmap_axis=heatmap_axis,
|
287
|
+
)
|
288
|
+
|
261
289
|
|
262
290
|
def show_num_df(
|
263
291
|
df,
|
264
|
-
|
265
|
-
|
266
|
-
heatmap_axis: Literal["x","y","xy", None] = None,
|
267
|
-
data_bar_axis: Literal["x","y","xy", None] = None,
|
292
|
+
total_mode: AGG_FUNC = "sum",
|
293
|
+
total_axis: Literal["x", "y", "xy", None] = "xy",
|
294
|
+
heatmap_axis: Literal["x", "y", "xy", None] = None,
|
295
|
+
data_bar_axis: Literal["x", "y", "xy", None] = None,
|
268
296
|
pct_axis: Literal["x", "xy", None] = None,
|
269
297
|
swap: bool = False,
|
270
|
-
precision: int=0,
|
298
|
+
precision: int = 0,
|
271
299
|
):
|
272
300
|
"""
|
273
|
-
A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
|
301
|
+
A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
|
274
302
|
|
275
303
|
Parameters:
|
276
304
|
- df: the DataFrame to display
|
277
|
-
- show_total: a boolean indicating whether to show totals
|
278
305
|
- total_mode: a Literal indicating the mode for aggregating totals ["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
|
306
|
+
- total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
|
307
|
+
|
279
308
|
- heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
|
280
309
|
- data_bar_axis: a Literal indicating the axis for applying data bar coloring ["x","y","xy", None]
|
281
310
|
- pct_axis: a Literal indicating the directions for displaying percentages ["x","xy", None]. "x" means sum up pct per column
|
@@ -285,43 +314,48 @@ def show_num_df(
|
|
285
314
|
The function returns a styled representation of the DataFrame.
|
286
315
|
"""
|
287
316
|
# * ensure arguments match parameter definition
|
288
|
-
if any([df[col].dtype.kind not in [
|
317
|
+
if any([df[col].dtype.kind not in ["i", "u", "f"] for col in df.columns]) == True:
|
289
318
|
print(f"❌ table must contain numeric data only")
|
290
319
|
return
|
291
|
-
|
292
|
-
if (
|
320
|
+
|
321
|
+
if (
|
322
|
+
(pct_axis and pct_axis not in ["x", "xy"])
|
323
|
+
or (data_bar_axis and data_bar_axis not in ["x", "y", "xy"])
|
324
|
+
or (heatmap_axis and heatmap_axis not in ["x", "y", "xy"])
|
325
|
+
):
|
293
326
|
print(f"❌ axis not supported")
|
294
327
|
return
|
295
328
|
|
296
|
-
if
|
297
|
-
print(f"❌
|
329
|
+
if total_mode and total_mode not in get_args(AGG_FUNC):
|
330
|
+
print(f"❌ total_mode '{total_mode}' not supported")
|
298
331
|
return
|
299
332
|
|
300
333
|
theme = os.getenv("THEME") or "light"
|
301
|
-
|
334
|
+
|
302
335
|
# * copy df, do not reference original
|
303
336
|
df_ = df.copy() if not swap else df.T.copy()
|
304
|
-
|
305
|
-
# * alter
|
306
|
-
if
|
307
|
-
df_.loc["Total"] = df_.agg(total_mode,axis=0)
|
337
|
+
|
338
|
+
# * alter df_, add totals
|
339
|
+
if total_mode and total_axis in ['x','xy']:
|
340
|
+
df_.loc["Total"] = df_.agg(total_mode, axis=0)
|
341
|
+
if total_mode and total_axis in ['y','xy']:
|
308
342
|
df_.loc[:, "Total"] = df_.agg(total_mode, axis=1)
|
309
343
|
|
310
344
|
# * derive style
|
311
345
|
out = df_.style
|
312
346
|
|
313
|
-
color_highlight = "lightblue" if theme == "light" else "
|
347
|
+
color_highlight = "lightblue" if theme == "light" else "#666666"
|
314
348
|
color_zeros = "grey" if theme == "light" else "grey"
|
315
349
|
color_pct = "grey" if theme == "light" else "yellow"
|
316
350
|
color_values = "black" if theme == "light" else "white"
|
317
351
|
color_minus = "red" if theme == "light" else "red"
|
318
|
-
cmap_heat="Blues" if theme == "light" else "copper"
|
352
|
+
cmap_heat = "Blues" if theme == "light" else "copper"
|
319
353
|
|
320
354
|
# * apply data bar coloring
|
321
355
|
if data_bar_axis:
|
322
356
|
out.bar(
|
323
357
|
color=f"{color_highlight}",
|
324
|
-
axis=
|
358
|
+
axis=0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
|
325
359
|
)
|
326
360
|
|
327
361
|
# * all cell formatting in one place
|
@@ -334,17 +368,18 @@ def show_num_df(
|
|
334
368
|
# * here cell > 0
|
335
369
|
if show_pct:
|
336
370
|
return f'{cell:_.{precision}f} <span style="color: {color_pct}">({(cell /sum):.1%})</span>'
|
337
|
-
return f
|
371
|
+
return f"{cell:_.{precision}f}"
|
338
372
|
|
339
373
|
# * build pct formatting
|
340
|
-
if pct_axis ==
|
374
|
+
if pct_axis == "x":
|
341
375
|
# * totals on either axis influence the sum
|
342
|
-
divider = 2 if
|
376
|
+
divider = 2 if total_axis in ['x','xy'] else 1
|
343
377
|
# * cell formatting to each column instead of altering values w/ df.apply
|
344
378
|
# * uses dictionary comprehension, and a lambda function with two input variables
|
345
379
|
col_sums = df_.sum() / divider
|
346
380
|
formatter = {
|
347
|
-
col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis)
|
381
|
+
col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis)
|
382
|
+
for col in df_.columns
|
348
383
|
}
|
349
384
|
|
350
385
|
# ? y is not implemented, needs row wise formatting
|
@@ -354,14 +389,14 @@ def show_num_df(
|
|
354
389
|
# row: lambda x, row=row: format_cell(x, row_sums[row]) for row in _df.index
|
355
390
|
# }
|
356
391
|
|
357
|
-
elif pct_axis==
|
358
|
-
divider = 4 if
|
392
|
+
elif pct_axis == "xy":
|
393
|
+
divider = 4 if total_axis == 'xy' else 2 if total_axis in ['x','y'] else 1
|
359
394
|
n = df_.sum().sum() / divider
|
360
395
|
formatter = {
|
361
396
|
col: lambda x, col=col: format_cell(x, n, pct_axis) for col in df_.columns
|
362
397
|
}
|
363
398
|
else:
|
364
|
-
# *
|
399
|
+
# *
|
365
400
|
formatter = {
|
366
401
|
col: lambda x, col=col: format_cell(x, x, False) for col in df_.columns
|
367
402
|
}
|
@@ -369,15 +404,15 @@ def show_num_df(
|
|
369
404
|
out.format(formatter=formatter)
|
370
405
|
|
371
406
|
# * apply fonts for cells
|
372
|
-
out.set_properties(**{
|
407
|
+
out.set_properties(**{"font-family": "Courier"})
|
373
408
|
|
374
409
|
# * apply fonts for th (inkl. index)
|
375
|
-
_props=[
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
410
|
+
_props = [
|
411
|
+
# ("font-size", "10pt"),
|
412
|
+
# ("font-weight", "bold"),
|
413
|
+
# ("font-family", "Courier"),
|
414
|
+
("text-align", "right")
|
415
|
+
]
|
381
416
|
out.set_table_styles(
|
382
417
|
[
|
383
418
|
dict(selector="th", props=_props),
|
@@ -386,6 +421,9 @@ def show_num_df(
|
|
386
421
|
)
|
387
422
|
|
388
423
|
if heatmap_axis:
|
389
|
-
out.background_gradient(
|
424
|
+
out.background_gradient(
|
425
|
+
cmap=cmap_heat,
|
426
|
+
axis=None if heatmap_axis == "xy" else 0 if heatmap_axis == "y" else 1,
|
427
|
+
)
|
390
428
|
|
391
|
-
return out
|
429
|
+
return out
|
@@ -1,11 +1,11 @@
|
|
1
1
|
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
pandas_plots/pls.py,sha256=CUh2lskJ6eLO_ccAg_UXXpRoWvZ7-Q3xKcUSEnKhK9U,23349
|
3
3
|
pandas_plots/sql.py,sha256=SHrmwhmzq0QYygvaoKwv7neiwf_Rv87VmdUkADYPdR8,2485
|
4
|
-
pandas_plots/tbl.py,sha256
|
4
|
+
pandas_plots/tbl.py,sha256=-Clf01gUetNw3KieqjpFRL0-2MJpIB3mfKU36Tzeij0,16027
|
5
5
|
pandas_plots/txt.py,sha256=LnW9OF3mSX2fp9JajefF3Mz3LuCA8MaqlFZYjT_jaQw,1537
|
6
6
|
pandas_plots/ven.py,sha256=nDKS7cTIHOJhIXKnAxAkEoqPgVZCUPJld5CvSiB2JC4,11721
|
7
|
-
pandas_plots-0.8.
|
8
|
-
pandas_plots-0.8.
|
9
|
-
pandas_plots-0.8.
|
10
|
-
pandas_plots-0.8.
|
11
|
-
pandas_plots-0.8.
|
7
|
+
pandas_plots-0.8.8.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
8
|
+
pandas_plots-0.8.8.dist-info/METADATA,sha256=1r7P6xc2coVUrwDz8b0e5cEwJVw7T38jFnxEnrmZwJE,5478
|
9
|
+
pandas_plots-0.8.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
10
|
+
pandas_plots-0.8.8.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
11
|
+
pandas_plots-0.8.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|