pandas-plots 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/pls.py +368 -192
- pandas_plots/tbl.py +6 -4
- {pandas_plots-0.14.0.dist-info → pandas_plots-0.15.0.dist-info}/METADATA +3 -1
- pandas_plots-0.15.0.dist-info/RECORD +9 -0
- pandas_plots-0.14.0.dist-info/RECORD +0 -9
- {pandas_plots-0.14.0.dist-info → pandas_plots-0.15.0.dist-info}/WHEEL +0 -0
- {pandas_plots-0.14.0.dist-info → pandas_plots-0.15.0.dist-info}/licenses/LICENSE +0 -0
pandas_plots/pls.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
import warnings
|
3
|
+
|
3
4
|
warnings.filterwarnings("ignore")
|
4
5
|
|
5
6
|
import os
|
@@ -62,12 +63,14 @@ def aggregate_data(
|
|
62
63
|
top_indexes = (
|
63
64
|
aggregated_df.groupby("index")["value"]
|
64
65
|
.sum()
|
65
|
-
.sort_values(ascending=False)[:top_n_index or None]
|
66
|
+
.sort_values(ascending=False)[: top_n_index or None]
|
66
67
|
.index
|
67
68
|
)
|
68
|
-
|
69
|
+
|
69
70
|
else:
|
70
|
-
top_indexes =
|
71
|
+
top_indexes = (
|
72
|
+
aggregated_df["index"].sort_values().unique()[: top_n_index or None]
|
73
|
+
)
|
71
74
|
|
72
75
|
aggregated_df = aggregated_df[aggregated_df["index"].isin(top_indexes)]
|
73
76
|
|
@@ -75,18 +78,16 @@ def aggregate_data(
|
|
75
78
|
top_colors = (
|
76
79
|
aggregated_df.groupby("col")["value"]
|
77
80
|
.sum()
|
78
|
-
.sort_values(ascending=False)[:top_n_color or None]
|
81
|
+
.sort_values(ascending=False)[: top_n_color or None]
|
79
82
|
.index
|
80
83
|
)
|
81
84
|
else:
|
82
|
-
top_colors = aggregated_df["col"].sort_values().unique()[:top_n_color or None]
|
85
|
+
top_colors = aggregated_df["col"].sort_values().unique()[: top_n_color or None]
|
83
86
|
|
84
87
|
others_df = df[~df["col"].isin(top_colors)]
|
85
88
|
aggregated_df = aggregated_df[aggregated_df["col"].isin(top_colors)]
|
86
89
|
if show_other and top_n_color > 0 and not others_df.empty:
|
87
|
-
other_agg = others_df.groupby(["index", "facet"], as_index=False)[
|
88
|
-
"value"
|
89
|
-
].sum()
|
90
|
+
other_agg = others_df.groupby(["index", "facet"], as_index=False)["value"].sum()
|
90
91
|
other_agg["col"] = "<other>"
|
91
92
|
other_agg = other_agg[["index", "col", "facet", "value"]]
|
92
93
|
aggregated_df = pd.concat([aggregated_df, other_agg], ignore_index=True)
|
@@ -96,11 +97,13 @@ def aggregate_data(
|
|
96
97
|
top_facets = (
|
97
98
|
aggregated_df.groupby("facet")["value"]
|
98
99
|
.sum()
|
99
|
-
.sort_values(ascending=False)[:top_n_facet or None]
|
100
|
+
.sort_values(ascending=False)[: top_n_facet or None]
|
100
101
|
.index
|
101
102
|
)
|
102
103
|
else:
|
103
|
-
top_facets =
|
104
|
+
top_facets = (
|
105
|
+
aggregated_df["facet"].sort_values().unique()[: top_n_facet or None]
|
106
|
+
)
|
104
107
|
|
105
108
|
aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
|
106
109
|
|
@@ -358,7 +361,7 @@ def plot_stacked_bars(
|
|
358
361
|
show_other=show_other,
|
359
362
|
sort_values_index=sort_values_index,
|
360
363
|
sort_values_color=sort_values_color,
|
361
|
-
sort_values_facet=False,
|
364
|
+
sort_values_facet=False, # just a placeholder
|
362
365
|
)
|
363
366
|
|
364
367
|
df = aggregated_df.copy()
|
@@ -377,8 +380,8 @@ def plot_stacked_bars(
|
|
377
380
|
)
|
378
381
|
|
379
382
|
if sort_values_color:
|
380
|
-
colors_unique = (
|
381
|
-
.groupby("col", observed=True)["value"]
|
383
|
+
colors_unique = (
|
384
|
+
df.groupby("col", observed=True)["value"]
|
382
385
|
.sum()
|
383
386
|
.sort_values(ascending=False)
|
384
387
|
.index.tolist()
|
@@ -387,8 +390,8 @@ def plot_stacked_bars(
|
|
387
390
|
colors_unique = sorted(df["col"].unique().tolist())
|
388
391
|
|
389
392
|
if sort_values_index:
|
390
|
-
index_unique = (
|
391
|
-
.groupby("index", observed=True)["value"]
|
393
|
+
index_unique = (
|
394
|
+
df.groupby("index", observed=True)["value"]
|
392
395
|
.sum()
|
393
396
|
.sort_values(ascending=False)
|
394
397
|
.index.tolist()
|
@@ -397,7 +400,6 @@ def plot_stacked_bars(
|
|
397
400
|
index_unique = sorted(df["index"].unique().tolist())
|
398
401
|
|
399
402
|
color_map = assign_column_colors(colors_unique, color_palette, null_label)
|
400
|
-
|
401
403
|
|
402
404
|
cat_orders = {
|
403
405
|
"index": index_unique,
|
@@ -405,8 +407,9 @@ def plot_stacked_bars(
|
|
405
407
|
}
|
406
408
|
|
407
409
|
# Ensure bl is categorical with the correct order
|
408
|
-
df["index"] = pd.Categorical(
|
409
|
-
|
410
|
+
df["index"] = pd.Categorical(
|
411
|
+
df["index"], categories=cat_orders["index"], ordered=True
|
412
|
+
)
|
410
413
|
|
411
414
|
# * plot
|
412
415
|
fig = px.bar(
|
@@ -420,13 +423,10 @@ def plot_stacked_bars(
|
|
420
423
|
title=title
|
421
424
|
or f"{caption}{_title_str_top_index}[{col_index}] by {_title_str_top_color}[{col_color}]{_title_str_null}{_title_str_n}",
|
422
425
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
423
|
-
width=width,
|
424
|
-
height=height,
|
425
426
|
color_discrete_map=color_map, # Use assigned colors
|
426
|
-
category_orders=
|
427
|
+
category_orders=cat_orders,
|
427
428
|
)
|
428
429
|
|
429
|
-
|
430
430
|
# print(cat_orders)
|
431
431
|
# print(color_map)
|
432
432
|
# display(df)
|
@@ -457,10 +457,9 @@ def plot_stacked_bars(
|
|
457
457
|
},
|
458
458
|
},
|
459
459
|
)
|
460
|
-
fig.update_layout(legend_traceorder="normal")
|
460
|
+
fig.update_layout(legend_traceorder="normal")
|
461
461
|
fig.update_layout(legend_title_text=col_color)
|
462
462
|
|
463
|
-
|
464
463
|
# * set dtick
|
465
464
|
if orientation == "h":
|
466
465
|
if relative:
|
@@ -482,7 +481,11 @@ def plot_stacked_bars(
|
|
482
481
|
if png_path is not None:
|
483
482
|
fig.write_image(Path(png_path).as_posix())
|
484
483
|
|
485
|
-
fig.show(
|
484
|
+
fig.show(
|
485
|
+
renderer=renderer,
|
486
|
+
width=width,
|
487
|
+
height=height,
|
488
|
+
)
|
486
489
|
|
487
490
|
return fig
|
488
491
|
|
@@ -563,8 +566,9 @@ def plot_bars(
|
|
563
566
|
|
564
567
|
# * ensure df is grouped to prevent false aggregations, reset index to return df
|
565
568
|
if use_ci:
|
566
|
-
# * grouping is smoother on df than on series
|
567
|
-
df = (
|
569
|
+
# * grouping is smoother on df than on series
|
570
|
+
df = (
|
571
|
+
df_in
|
568
572
|
# ? dont dropna() here, this biases the input data
|
569
573
|
.groupby(
|
570
574
|
col_index,
|
@@ -573,7 +577,12 @@ def plot_bars(
|
|
573
577
|
.agg(
|
574
578
|
mean=(col_name, ci_agg),
|
575
579
|
# * retrieve margin from custom func
|
576
|
-
margin=(
|
580
|
+
margin=(
|
581
|
+
col_name,
|
582
|
+
lambda x: mean_confidence_interval(
|
583
|
+
x, use_median=(ci_agg == "median")
|
584
|
+
)[1],
|
585
|
+
),
|
577
586
|
)
|
578
587
|
.reset_index()
|
579
588
|
)
|
@@ -593,7 +602,6 @@ def plot_bars(
|
|
593
602
|
else:
|
594
603
|
df = df.fillna("<NA>")
|
595
604
|
|
596
|
-
|
597
605
|
# * get n, col1 now is always numeric
|
598
606
|
n = df[df.columns[1]].sum()
|
599
607
|
n_len = len(df_in)
|
@@ -657,7 +665,9 @@ def plot_bars(
|
|
657
665
|
|
658
666
|
# * title str n
|
659
667
|
_title_str_n = (
|
660
|
-
f", n={n_len:_} ({n:_})"
|
668
|
+
f", n={n_len:_} ({n:_})"
|
669
|
+
if not use_ci
|
670
|
+
else f", n={n_len:_})<br><sub>ci(95) on {ci_agg}s<sub>"
|
661
671
|
)
|
662
672
|
|
663
673
|
# * title str na
|
@@ -680,8 +690,6 @@ def plot_bars(
|
|
680
690
|
or f"{caption}{_title_str_minval}{_title_str_top}[{col_name}] by [{col_index}]{_title_str_null}{_title_str_n}",
|
681
691
|
# * retrieve theme from env (intro.set_theme) or default
|
682
692
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
683
|
-
width=width,
|
684
|
-
height=height,
|
685
693
|
error_y=None if not use_ci else df["margin"],
|
686
694
|
color_discrete_sequence=px.colors.qualitative.D3,
|
687
695
|
color=col_index,
|
@@ -734,14 +742,12 @@ def plot_bars(
|
|
734
742
|
_fig.update_layout(yaxis={"categoryorder": "category descending"})
|
735
743
|
|
736
744
|
# * looks better on single bars
|
737
|
-
_fig.update_traces(
|
738
|
-
error_y=dict(thickness=5)
|
739
|
-
)
|
745
|
+
_fig.update_traces(error_y=dict(thickness=5))
|
740
746
|
if use_ci:
|
741
747
|
_fig.update_traces(
|
742
748
|
textposition="inside", # Put labels inside bars
|
743
749
|
insidetextanchor="start", # Align labels at the bottom
|
744
|
-
textfont=dict(size=14, color="white") # Adjust text color for visibility
|
750
|
+
textfont=dict(size=14, color="white"), # Adjust text color for visibility
|
745
751
|
)
|
746
752
|
else:
|
747
753
|
_fig.update_traces(
|
@@ -750,8 +756,11 @@ def plot_bars(
|
|
750
756
|
)
|
751
757
|
|
752
758
|
# * set axis title
|
753
|
-
|
754
|
-
|
759
|
+
_fig.show(
|
760
|
+
renderer,
|
761
|
+
width=width,
|
762
|
+
height=height,
|
763
|
+
)
|
755
764
|
|
756
765
|
# * save to png if path is provided
|
757
766
|
if png_path is not None:
|
@@ -828,8 +837,6 @@ def plot_histogram(
|
|
828
837
|
marginal="box",
|
829
838
|
barmode=barmode,
|
830
839
|
text_auto=text_auto,
|
831
|
-
height=height,
|
832
|
-
width=width,
|
833
840
|
orientation=orientation,
|
834
841
|
title=title or f"{_caption}[{', '.join(df.columns)}], n={df.shape[0]:_}",
|
835
842
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
@@ -848,7 +855,11 @@ def plot_histogram(
|
|
848
855
|
showlegend=False if df.shape[1] == 1 else True,
|
849
856
|
)
|
850
857
|
|
851
|
-
fig.show(
|
858
|
+
fig.show(
|
859
|
+
renderer,
|
860
|
+
width=width,
|
861
|
+
height=height,
|
862
|
+
)
|
852
863
|
|
853
864
|
# * save to png if path is provided
|
854
865
|
if png_path is not None:
|
@@ -971,6 +982,7 @@ def plot_box(
|
|
971
982
|
x_max: float = None,
|
972
983
|
use_log: bool = False,
|
973
984
|
png_path: Path | str = None,
|
985
|
+
renderer: Literal["png", "svg", None] = "png",
|
974
986
|
) -> object:
|
975
987
|
"""
|
976
988
|
Plots a horizontal box plot for the given pandas Series.
|
@@ -990,6 +1002,7 @@ def plot_box(
|
|
990
1002
|
x_max: The maximum value for the x-axis scale (max and min must be set).
|
991
1003
|
use_log: Use logarithmic scale for the axis.
|
992
1004
|
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
1005
|
+
renderer (Literal["png", "svg", None], optional): The renderer to use for saving the image. Defaults to "png".
|
993
1006
|
|
994
1007
|
Returns:
|
995
1008
|
plot object
|
@@ -1024,11 +1037,9 @@ def plot_box(
|
|
1024
1037
|
"data_frame": ser,
|
1025
1038
|
"orientation": "h",
|
1026
1039
|
"template": "plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
1027
|
-
"height": height,
|
1028
|
-
"width": width,
|
1029
1040
|
"points": points,
|
1030
1041
|
# 'box':True,
|
1031
|
-
"log_x": use_log,
|
1042
|
+
"log_x": use_log, # * logarithmic scale, axis is always x
|
1032
1043
|
# "notched": True,
|
1033
1044
|
"title": f"{caption}[{ser.name}]{log_str}, n = {n_:_}" if not title else title,
|
1034
1045
|
}
|
@@ -1106,7 +1117,11 @@ def plot_box(
|
|
1106
1117
|
y=-0,
|
1107
1118
|
)
|
1108
1119
|
|
1109
|
-
fig.show(
|
1120
|
+
fig.show(
|
1121
|
+
renderer=renderer,
|
1122
|
+
width=width,
|
1123
|
+
height=height,
|
1124
|
+
)
|
1110
1125
|
|
1111
1126
|
if summary:
|
1112
1127
|
# * if only series is provided, col name is None
|
@@ -1119,8 +1134,6 @@ def plot_box(
|
|
1119
1134
|
return fig
|
1120
1135
|
|
1121
1136
|
|
1122
|
-
|
1123
|
-
|
1124
1137
|
def plot_boxes(
|
1125
1138
|
df: pd.DataFrame,
|
1126
1139
|
caption: str = None,
|
@@ -1134,6 +1147,7 @@ def plot_boxes(
|
|
1134
1147
|
use_log: bool = False,
|
1135
1148
|
box_width: float = 0.5,
|
1136
1149
|
png_path: Path | str = None,
|
1150
|
+
renderer: Literal["png", "svg", None] = "png",
|
1137
1151
|
) -> object:
|
1138
1152
|
"""
|
1139
1153
|
[Experimental] Plot vertical boxes for each unique item in the DataFrame and add annotations for statistics.
|
@@ -1149,6 +1163,7 @@ def plot_boxes(
|
|
1149
1163
|
summary (bool): Whether to add a summary to the plot.
|
1150
1164
|
use_log (bool): Whether to use logarithmic scale for the plot (cannot show negative values).
|
1151
1165
|
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
1166
|
+
renderer (Literal["png", "svg", None], optional): The renderer to use for saving the image. Defaults to "png".
|
1152
1167
|
|
1153
1168
|
Returns:
|
1154
1169
|
plot object
|
@@ -1184,8 +1199,6 @@ def plot_boxes(
|
|
1184
1199
|
color=df.iloc[:, 0],
|
1185
1200
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
1186
1201
|
orientation="v",
|
1187
|
-
height=height,
|
1188
|
-
width=width,
|
1189
1202
|
points=points,
|
1190
1203
|
log_y=use_log,
|
1191
1204
|
# color_discrete_sequence=px.colors.qualitative.Plotly,
|
@@ -1264,9 +1277,11 @@ def plot_boxes(
|
|
1264
1277
|
fig.update_yaxes(title_text=df.columns[1])
|
1265
1278
|
fig.update_layout(boxmode="group") # Ensures boxes are not too compressed
|
1266
1279
|
fig.update_layout(showlegend=False)
|
1267
|
-
fig.update_traces(
|
1280
|
+
fig.update_traces(
|
1281
|
+
marker=dict(size=5), width=box_width
|
1282
|
+
) # Adjust width (default ~0.5)
|
1268
1283
|
|
1269
|
-
fig.show(
|
1284
|
+
fig.show(renderer=renderer, width=width, height=height)
|
1270
1285
|
if summary:
|
1271
1286
|
# * sort df by first column
|
1272
1287
|
print_summary(df=df.sort_values(df.columns[0]), precision=precision)
|
@@ -1301,18 +1316,50 @@ def plot_facet_stacked_bars(
|
|
1301
1316
|
show_pct: bool = False,
|
1302
1317
|
) -> go.Figure:
|
1303
1318
|
|
1304
|
-
|
1319
|
+
"""
|
1320
|
+
A function to plot multiple (subplots_per_row) stacked bar charts, facetted by the third column, with the first column as the index and the second column as the colors.
|
1321
|
+
|
1322
|
+
Parameters:
|
1323
|
+
- df (pd.DataFrame): Input DataFrame with 3 or 4 columns.
|
1324
|
+
- subplots_per_row (int): The number of subplots to display per row.
|
1325
|
+
- top_n_index (int): The number of top indexes to include in the chart. Default is 0, which includes all indexes.
|
1326
|
+
- top_n_color (int): The number of top colors to include in the chart. Default is 0, which includes all colors.
|
1327
|
+
- top_n_facet (int): The number of top facets to include in the chart. Default is 0, which includes all facets.
|
1328
|
+
- null_label (str): The label to use for null values. Default is "<NA>".
|
1329
|
+
- subplot_size (int): The size of each subplot in pixels. Default is 300.
|
1330
|
+
- color_palette (str): The name of the color palette to use. Default is "Plotly".
|
1331
|
+
- caption (str): An optional string indicating the caption for the chart.
|
1332
|
+
- renderer (str): The output format. Default is "png".
|
1333
|
+
- annotations (bool): Whether to include annotations on the chart. Default is False.
|
1334
|
+
- precision (int): The number of decimal places to round the values to. Default is 0.
|
1335
|
+
- png_path (str): The path to save the chart to, if provided.
|
1336
|
+
- show_other (bool): Whether to include "<other>" for columns not in top_n_color. Default is False.
|
1337
|
+
- sort_values (bool): Whether to sort the values in the chart. Default is True.
|
1338
|
+
- sort_values_index (bool): Whether to sort the index column. Default is False.
|
1339
|
+
- sort_values_color (bool): Whether to sort the color column. Default is False.
|
1340
|
+
- sort_values_facet (bool): Whether to sort the facet column. Default is False.
|
1341
|
+
- relative (bool): Whether to show the bars as relative values (0-1 range). Default is False.
|
1342
|
+
- show_pct (bool): Whether to show the annotations as percentages. Default is False.
|
1343
|
+
|
1344
|
+
Returns:
|
1345
|
+
- go.Figure: The chart object.
|
1346
|
+
"""
|
1347
|
+
# ENFORCE show_pct RULES ---
|
1305
1348
|
if not relative:
|
1306
1349
|
# If bars are absolute, annotations MUST be absolute
|
1307
1350
|
if show_pct:
|
1308
|
-
print(
|
1351
|
+
print(
|
1352
|
+
"Warning: 'show_pct' cannot be True when 'relative' is False. Setting 'show_pct' to False."
|
1353
|
+
)
|
1309
1354
|
show_pct = False
|
1310
|
-
#
|
1355
|
+
#
|
1311
1356
|
|
1312
1357
|
try:
|
1313
1358
|
precision = int(precision)
|
1314
1359
|
except (ValueError, TypeError):
|
1315
|
-
print(
|
1360
|
+
print(
|
1361
|
+
f"Warning: 'precision' received as {precision} (type: {type(precision)}). Defaulting to 0."
|
1362
|
+
)
|
1316
1363
|
precision = 0
|
1317
1364
|
|
1318
1365
|
df_copy = df.copy()
|
@@ -1331,7 +1378,7 @@ def plot_facet_stacked_bars(
|
|
1331
1378
|
n = df_copy["value"].sum()
|
1332
1379
|
original_rows = len(df_copy)
|
1333
1380
|
|
1334
|
-
aggregated_df = aggregate_data(
|
1381
|
+
aggregated_df = aggregate_data( # Assumes aggregate_data is accessible
|
1335
1382
|
df_copy,
|
1336
1383
|
top_n_index,
|
1337
1384
|
top_n_color,
|
@@ -1343,46 +1390,60 @@ def plot_facet_stacked_bars(
|
|
1343
1390
|
sort_values_facet=sort_values_facet,
|
1344
1391
|
)
|
1345
1392
|
|
1346
|
-
aggregated_df[
|
1347
|
-
aggregated_df[
|
1348
|
-
aggregated_df[
|
1393
|
+
aggregated_df["index"] = aggregated_df["index"].astype(str)
|
1394
|
+
aggregated_df["col"] = aggregated_df["col"].astype(str)
|
1395
|
+
aggregated_df["facet"] = aggregated_df["facet"].astype(str)
|
1349
1396
|
|
1350
1397
|
# --- Store original 'value' for annotations before potential scaling ---
|
1351
|
-
aggregated_df[
|
1398
|
+
aggregated_df["annotation_value"] = aggregated_df["value"].copy()
|
1352
1399
|
# ----------------------------------------------------------------------
|
1353
1400
|
|
1354
1401
|
if relative:
|
1355
1402
|
# This transforms the bar heights (value column) to percentages (0-1 range)
|
1356
|
-
aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])[
|
1403
|
+
aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])[
|
1404
|
+
"value"
|
1405
|
+
].transform(lambda x: x / x.sum())
|
1357
1406
|
|
1358
1407
|
category_orders = {}
|
1359
1408
|
|
1360
1409
|
if sort_values_index:
|
1361
|
-
sum_by_index =
|
1410
|
+
sum_by_index = (
|
1411
|
+
aggregated_df.groupby("index")["value"].sum().sort_values(ascending=False)
|
1412
|
+
)
|
1362
1413
|
category_orders["index"] = sum_by_index.index.tolist()
|
1363
1414
|
|
1364
1415
|
if sort_values_color:
|
1365
|
-
sum_by_col =
|
1416
|
+
sum_by_col = (
|
1417
|
+
aggregated_df.groupby("col")["value"].sum().sort_values(ascending=False)
|
1418
|
+
)
|
1366
1419
|
category_orders["col"] = sum_by_col.index.tolist()
|
1367
1420
|
|
1368
1421
|
if sort_values_facet:
|
1369
|
-
sum_by_facet =
|
1422
|
+
sum_by_facet = (
|
1423
|
+
aggregated_df.groupby("facet")["value"].sum().sort_values(ascending=False)
|
1424
|
+
)
|
1370
1425
|
category_orders["facet"] = sum_by_facet.index.tolist()
|
1371
1426
|
|
1372
1427
|
columns_for_color = sorted(aggregated_df["col"].unique().tolist())
|
1373
|
-
column_colors_map = assign_column_colors(
|
1428
|
+
column_colors_map = assign_column_colors(
|
1429
|
+
columns_for_color, color_palette, null_label
|
1430
|
+
) # Assumes assign_column_colors is accessible
|
1374
1431
|
|
1375
|
-
#
|
1432
|
+
# Prepare the text series for annotations with 'show_pct' control
|
1376
1433
|
if annotations:
|
1377
1434
|
if show_pct:
|
1378
1435
|
# When show_pct is True, use the scaled 'value' column (0-1) and format as percentage
|
1379
|
-
formatted_text_series = aggregated_df["value"].apply(
|
1436
|
+
formatted_text_series = aggregated_df["value"].apply(
|
1437
|
+
lambda x: f"{x:.{precision}%}".replace(".", ",")
|
1438
|
+
)
|
1380
1439
|
else:
|
1381
1440
|
# When show_pct is False, use the 'annotation_value' (original absolute) and format as absolute
|
1382
|
-
formatted_text_series = aggregated_df["annotation_value"].apply(
|
1441
|
+
formatted_text_series = aggregated_df["annotation_value"].apply(
|
1442
|
+
lambda x: f"{x:_.{precision}f}".replace(".", ",")
|
1443
|
+
)
|
1383
1444
|
else:
|
1384
1445
|
formatted_text_series = None
|
1385
|
-
#
|
1446
|
+
# - - - -
|
1386
1447
|
|
1387
1448
|
fig = px.bar(
|
1388
1449
|
aggregated_df,
|
@@ -1396,7 +1457,7 @@ def plot_facet_stacked_bars(
|
|
1396
1457
|
category_orders=category_orders,
|
1397
1458
|
text=formatted_text_series,
|
1398
1459
|
text_auto=False,
|
1399
|
-
height=subplot_size * (-(-len(aggregated_df["facet"].unique()) // subplots_per_row)),
|
1460
|
+
# height=subplot_size * (-(-len(aggregated_df["facet"].unique()) // subplots_per_row)),
|
1400
1461
|
title=f"{caption} {original_column_names[0]}, {original_column_names[1]}, {original_column_names[2]}",
|
1401
1462
|
)
|
1402
1463
|
|
@@ -1410,19 +1471,19 @@ def plot_facet_stacked_bars(
|
|
1410
1471
|
template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
1411
1472
|
|
1412
1473
|
layout_updates = {
|
1413
|
-
"title_text":
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1474
|
+
"title_text": f"{caption} "
|
1475
|
+
f"{'TOP ' + str(top_n_index) + ' ' if top_n_index > 0 else ''}[{original_column_names[0]}] "
|
1476
|
+
f"{'TOP ' + str(top_n_color) + ' ' if top_n_color > 0 else ''}[{original_column_names[1]}] "
|
1477
|
+
f"{'TOP ' + str(top_n_facet) + ' ' if top_n_facet > 0 else ''}[{original_column_names[2]}] "
|
1478
|
+
f", n = {original_rows:_} ({n:_})",
|
1418
1479
|
"showlegend": True,
|
1419
1480
|
"template": template,
|
1420
|
-
"width": subplot_size * subplots_per_row,
|
1481
|
+
# "width": subplot_size * subplots_per_row,
|
1421
1482
|
}
|
1422
1483
|
|
1423
1484
|
if relative:
|
1424
|
-
layout_updates[
|
1425
|
-
layout_updates[
|
1485
|
+
layout_updates["yaxis_range"] = [0, 1.1]
|
1486
|
+
layout_updates["yaxis_tickformat"] = ".0%"
|
1426
1487
|
|
1427
1488
|
fig.update_layout(**layout_updates)
|
1428
1489
|
|
@@ -1433,12 +1494,27 @@ def plot_facet_stacked_bars(
|
|
1433
1494
|
png_path = Path(png_path)
|
1434
1495
|
fig.write_image(str(png_path))
|
1435
1496
|
|
1436
|
-
fig.show(
|
1497
|
+
fig.show(
|
1498
|
+
renderer=renderer,
|
1499
|
+
width=subplot_size * subplots_per_row,
|
1500
|
+
height=subplot_size
|
1501
|
+
* (-(-len(aggregated_df["facet"].unique()) // subplots_per_row)),
|
1502
|
+
)
|
1437
1503
|
|
1438
1504
|
return fig
|
1439
1505
|
|
1440
1506
|
|
1441
|
-
def plot_sankey(
|
1507
|
+
def plot_sankey(
|
1508
|
+
df=None,
|
1509
|
+
max_events_per_id=None,
|
1510
|
+
height=None,
|
1511
|
+
width=None,
|
1512
|
+
exclude_overlap_id=False,
|
1513
|
+
exclude_overlap_event=False,
|
1514
|
+
renderer=None,
|
1515
|
+
show_start_node=True,
|
1516
|
+
font_size=10,
|
1517
|
+
):
|
1442
1518
|
"""
|
1443
1519
|
Generates a Sankey diagram from a Pandas DataFrame, assuming the column order is:
|
1444
1520
|
1. ID (string or integer)
|
@@ -1450,71 +1526,117 @@ def plot_sankey(df=None, max_events_per_id=None, height=None, width=None, exclud
|
|
1450
1526
|
|
1451
1527
|
Args:
|
1452
1528
|
df (pd.DataFrame, optional): A Pandas DataFrame containing the event data.
|
1453
|
-
|
1529
|
+
Expected column order: ID, Date, Event.
|
1454
1530
|
max_events_per_id (int, optional): The maximum number of events to display for each ID.
|
1455
|
-
|
1531
|
+
If None, all events for each ID will be used.
|
1456
1532
|
height (int, optional): The height of the plot in pixels.
|
1457
1533
|
width (int, optional): The width of the plot in pixels.
|
1458
1534
|
exclude_overlap_id (bool): If True, excludes any IDs that have multiple events on the same date.
|
1459
|
-
|
1535
|
+
This takes precedence over `exclude_overlap_event`.
|
1460
1536
|
exclude_overlap_event (bool): If True, only excludes the specific events that fall on the same date,
|
1461
|
-
|
1537
|
+
retaining other non-overlapping events for that ID.
|
1462
1538
|
renderer (str, optional): The renderer to use for displaying the plot. Options include
|
1463
|
-
|
1464
|
-
|
1539
|
+
'browser', 'notebook', 'json', 'png', 'svg', 'jpeg', 'webp', or 'pdf'.
|
1540
|
+
If None, plotly's default renderer is used.
|
1465
1541
|
show_start_node (bool): If True, adds a visual 'start' node and links all
|
1466
1542
|
first events to it. This is useful for visualizing
|
1467
1543
|
IDs with only one event.
|
1544
|
+
font_size (int): The font size of the labels in the plot.
|
1468
1545
|
"""
|
1469
1546
|
# --- Example Usage with Enlarged Pandas DataFrame if no DataFrame is provided ---
|
1470
1547
|
if df is None:
|
1471
|
-
data_demo = {
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1477
|
-
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
|
1484
|
-
|
1548
|
+
data_demo = { # Renamed to data_demo for clarity
|
1549
|
+
"tumor-id": [
|
1550
|
+
"1",
|
1551
|
+
"1",
|
1552
|
+
"1",
|
1553
|
+
"1",
|
1554
|
+
"1",
|
1555
|
+
"2",
|
1556
|
+
"2",
|
1557
|
+
"2",
|
1558
|
+
"2",
|
1559
|
+
"3",
|
1560
|
+
"3",
|
1561
|
+
"3",
|
1562
|
+
"3",
|
1563
|
+
"4",
|
1564
|
+
"4",
|
1565
|
+
"4",
|
1566
|
+
"5",
|
1567
|
+
"5",
|
1568
|
+
"6",
|
1569
|
+
"6",
|
1570
|
+
"7",
|
1571
|
+
"7",
|
1572
|
+
"8",
|
1573
|
+
"9",
|
1574
|
+
"10",
|
1575
|
+
"11",
|
1576
|
+
"12",
|
1485
1577
|
],
|
1486
|
-
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1578
|
+
"diagnosis date": [
|
1579
|
+
"2020-01-01",
|
1580
|
+
"2021-02-01",
|
1581
|
+
"2022-03-01",
|
1582
|
+
"2023-04-01",
|
1583
|
+
"2024-05-01", # Tumor 1
|
1584
|
+
"2010-01-01",
|
1585
|
+
"2011-02-01",
|
1586
|
+
"2012-03-01",
|
1587
|
+
"2013-04-01", # Tumor 2
|
1588
|
+
"2015-01-01",
|
1589
|
+
"2016-02-01",
|
1590
|
+
"2017-03-01",
|
1591
|
+
"2018-04-01", # Tumor 3
|
1592
|
+
"2005-01-01",
|
1593
|
+
"2006-02-01",
|
1594
|
+
"2007-03-01", # Tumor 4
|
1595
|
+
"2019-01-01",
|
1596
|
+
"2020-02-01", # Tumor 5
|
1597
|
+
"2021-01-01",
|
1598
|
+
"2022-02-01", # Tumor 6
|
1599
|
+
"2014-01-01",
|
1600
|
+
"2015-02-01", # Tumor 7
|
1601
|
+
"2025-01-01", # Tumor 8 (single event)
|
1602
|
+
"2025-02-01", # Tumor 9 (single event)
|
1603
|
+
"2025-03-01", # Tumor 10 (single event)
|
1604
|
+
"2025-04-01", # Tumor 11 (single event)
|
1605
|
+
"2025-05-01", # Tumor 12 (single event)
|
1606
|
+
],
|
1607
|
+
"treatment": [
|
1608
|
+
"op",
|
1609
|
+
"syst",
|
1610
|
+
"op",
|
1611
|
+
"rad",
|
1612
|
+
"op", # Tumor 1
|
1613
|
+
"syst",
|
1614
|
+
"st",
|
1615
|
+
"op",
|
1616
|
+
"rad", # Tumor 2
|
1617
|
+
"op",
|
1618
|
+
"rad",
|
1619
|
+
"syst",
|
1620
|
+
"op", # Tumor 3
|
1621
|
+
"st",
|
1622
|
+
"syst",
|
1623
|
+
"op", # Tumor 4
|
1624
|
+
"op",
|
1625
|
+
"rad", # Tumor 5
|
1626
|
+
"syst",
|
1627
|
+
"op", # Tumor 6
|
1628
|
+
"st",
|
1629
|
+
"rad", # Tumor 7
|
1630
|
+
"op", # Tumor 8
|
1631
|
+
"op", # Tumor 9
|
1632
|
+
"syst", # Tumor 10
|
1633
|
+
"rad", # Tumor 11
|
1634
|
+
"op", # Tumor 12
|
1499
1635
|
],
|
1500
|
-
'treatment': [
|
1501
|
-
'op', 'syst', 'op', 'rad', 'op', # Tumor 1
|
1502
|
-
'syst', 'st', 'op', 'rad', # Tumor 2
|
1503
|
-
'op', 'rad', 'syst', 'op', # Tumor 3
|
1504
|
-
'st', 'syst', 'op', # Tumor 4
|
1505
|
-
'op', 'rad', # Tumor 5
|
1506
|
-
'syst', 'op', # Tumor 6
|
1507
|
-
'st', 'rad', # Tumor 7
|
1508
|
-
'op', # Tumor 8
|
1509
|
-
'op', # Tumor 9
|
1510
|
-
'syst', # Tumor 10
|
1511
|
-
'rad', # Tumor 11
|
1512
|
-
'op' # Tumor 12
|
1513
|
-
]
|
1514
1636
|
}
|
1515
1637
|
df = pd.DataFrame(data_demo)
|
1516
1638
|
print("--- Using demo data (data_demo) ---")
|
1517
|
-
print(df.head().to_string())
|
1639
|
+
print(df.head().to_string()) # Print first 5 rows of the DataFrame prettily
|
1518
1640
|
print("-----------------------------------")
|
1519
1641
|
|
1520
1642
|
# --- Simplified Column Recognition based on index ---
|
@@ -1525,139 +1647,193 @@ def plot_sankey(df=None, max_events_per_id=None, height=None, width=None, exclud
|
|
1525
1647
|
df_processed = df.copy()
|
1526
1648
|
|
1527
1649
|
# --- Aggregate the data to remove duplicate rows before processing ---
|
1528
|
-
df_processed = df_processed.drop_duplicates(
|
1650
|
+
df_processed = df_processed.drop_duplicates(
|
1651
|
+
subset=[id_col_name, date_col_name, event_col_name]
|
1652
|
+
)
|
1529
1653
|
|
1530
1654
|
try:
|
1531
1655
|
df_processed[date_col_name] = pd.to_datetime(df_processed[date_col_name])
|
1532
1656
|
except (ValueError, TypeError):
|
1533
|
-
print(
|
1657
|
+
print(
|
1658
|
+
f"Error: Could not convert column '{date_col_name}' to a valid date format."
|
1659
|
+
)
|
1534
1660
|
return None
|
1535
1661
|
|
1536
1662
|
# --- Handle overlap exclusion based on user selection ---
|
1537
1663
|
overlap_title_part = ""
|
1538
1664
|
if exclude_overlap_id:
|
1539
|
-
overlapping_ids =
|
1540
|
-
|
1665
|
+
overlapping_ids = (
|
1666
|
+
df_processed.groupby([id_col_name, date_col_name])
|
1667
|
+
.size()
|
1668
|
+
.loc[lambda x: x > 1]
|
1669
|
+
.index.get_level_values(id_col_name)
|
1670
|
+
.unique()
|
1671
|
+
)
|
1672
|
+
df_processed = df_processed[
|
1673
|
+
~df_processed[id_col_name].isin(overlapping_ids)
|
1674
|
+
].copy()
|
1541
1675
|
overlap_title_part = ", overlap ids excluded"
|
1542
1676
|
elif exclude_overlap_event:
|
1543
|
-
overlapping_event_set = set(
|
1544
|
-
|
1677
|
+
overlapping_event_set = set(
|
1678
|
+
df_processed.groupby([id_col_name, date_col_name])
|
1679
|
+
.size()
|
1680
|
+
.loc[lambda x: x > 1]
|
1681
|
+
.index
|
1682
|
+
)
|
1683
|
+
df_processed = df_processed[
|
1684
|
+
~df_processed.set_index([id_col_name, date_col_name]).index.isin(
|
1685
|
+
overlapping_event_set
|
1686
|
+
)
|
1687
|
+
].copy()
|
1545
1688
|
overlap_title_part = ", overlap events excluded"
|
1546
1689
|
|
1547
1690
|
df_sorted = df_processed.sort_values(by=[id_col_name, date_col_name])
|
1548
|
-
|
1691
|
+
|
1549
1692
|
# --- Performance Optimization: Use vectorized operations instead of loops ---
|
1550
|
-
df_sorted[
|
1551
|
-
|
1693
|
+
df_sorted["event_order"] = df_sorted.groupby(id_col_name).cumcount() + 1
|
1694
|
+
|
1552
1695
|
if max_events_per_id is not None:
|
1553
|
-
df_sorted = df_sorted[df_sorted[
|
1554
|
-
|
1555
|
-
df_sorted[
|
1556
|
-
|
1696
|
+
df_sorted = df_sorted[df_sorted["event_order"] <= max_events_per_id]
|
1697
|
+
|
1698
|
+
df_sorted["ordered_event_label"] = (
|
1699
|
+
"[" + df_sorted["event_order"].astype(str) + "] " + df_sorted[event_col_name]
|
1700
|
+
)
|
1701
|
+
|
1557
1702
|
if df_sorted.empty:
|
1558
1703
|
print("No valid data to plot after filtering.")
|
1559
1704
|
return None
|
1560
1705
|
|
1561
1706
|
# Use a vectorized shift operation to create source and target columns
|
1562
|
-
df_sorted[
|
1563
|
-
|
1707
|
+
df_sorted["source_label"] = df_sorted.groupby(id_col_name)[
|
1708
|
+
"ordered_event_label"
|
1709
|
+
].shift(1)
|
1710
|
+
df_with_links = df_sorted.dropna(subset=["source_label"]).copy()
|
1564
1711
|
|
1565
1712
|
# Create the start node and links if enabled
|
1566
1713
|
if show_start_node:
|
1567
1714
|
first_events = df_sorted.groupby(id_col_name).first().reset_index()
|
1568
|
-
first_events[
|
1569
|
-
df_with_links = pd.concat(
|
1570
|
-
|
1571
|
-
|
1715
|
+
first_events["source_label"] = "[0] start"
|
1716
|
+
df_with_links = pd.concat(
|
1717
|
+
[
|
1718
|
+
first_events[["source_label", "ordered_event_label"]],
|
1719
|
+
df_with_links[["source_label", "ordered_event_label"]],
|
1720
|
+
],
|
1721
|
+
ignore_index=True,
|
1722
|
+
)
|
1723
|
+
|
1724
|
+
link_counts = (
|
1725
|
+
df_with_links.groupby(["source_label", "ordered_event_label"])
|
1726
|
+
.size()
|
1727
|
+
.reset_index(name="value")
|
1728
|
+
)
|
1572
1729
|
|
1573
1730
|
# Get all unique nodes for the labels and sorting
|
1574
|
-
all_labels = pd.concat(
|
1575
|
-
|
1576
|
-
|
1577
|
-
unique_labels_df
|
1578
|
-
|
1579
|
-
|
1731
|
+
all_labels = pd.concat(
|
1732
|
+
[link_counts["source_label"], link_counts["ordered_event_label"]]
|
1733
|
+
).unique()
|
1734
|
+
unique_labels_df = pd.DataFrame(all_labels, columns=["label"])
|
1735
|
+
unique_labels_df["event_order_num"] = (
|
1736
|
+
unique_labels_df["label"].str.extract(r"\[(\d+)\]").astype(float).fillna(0)
|
1737
|
+
)
|
1738
|
+
unique_labels_df["event_name"] = (
|
1739
|
+
unique_labels_df["label"].str.extract(r"\] (.*)").fillna("start")
|
1740
|
+
)
|
1741
|
+
unique_labels_df_sorted = unique_labels_df.sort_values(
|
1742
|
+
by=["event_order_num", "event_name"]
|
1743
|
+
)
|
1744
|
+
unique_unformatted_labels_sorted = unique_labels_df_sorted["label"].tolist()
|
1580
1745
|
|
1581
|
-
label_to_index = {
|
1746
|
+
label_to_index = {
|
1747
|
+
label: i for i, label in enumerate(unique_unformatted_labels_sorted)
|
1748
|
+
}
|
1582
1749
|
|
1583
1750
|
# Calculate total unique IDs for percentage calculation
|
1584
1751
|
total_unique_ids = df_processed[id_col_name].nunique()
|
1585
1752
|
|
1586
1753
|
display_labels = []
|
1587
|
-
node_counts = df_sorted[
|
1754
|
+
node_counts = df_sorted["ordered_event_label"].value_counts()
|
1588
1755
|
for label in unique_unformatted_labels_sorted:
|
1589
1756
|
if label == "[0] start":
|
1590
1757
|
count = total_unique_ids
|
1591
1758
|
else:
|
1592
1759
|
count = node_counts.get(label, 0)
|
1593
|
-
|
1760
|
+
|
1594
1761
|
percentage = (count / total_unique_ids) * 100
|
1595
|
-
formatted_count = f"{count:,}".replace(
|
1762
|
+
formatted_count = f"{count:,}".replace(",", "_")
|
1596
1763
|
formatted_percentage = f"({int(round(percentage, 0))}%)"
|
1597
1764
|
|
1598
1765
|
display_labels.append(f"{label} {formatted_count} {formatted_percentage}")
|
1599
1766
|
|
1600
1767
|
# Map sources and targets to indices
|
1601
|
-
sources = link_counts[
|
1602
|
-
targets = link_counts[
|
1603
|
-
values = link_counts[
|
1768
|
+
sources = link_counts["source_label"].map(label_to_index).tolist()
|
1769
|
+
targets = link_counts["ordered_event_label"].map(label_to_index).tolist()
|
1770
|
+
values = link_counts["value"].tolist()
|
1604
1771
|
|
1605
1772
|
# Define a color palette for links
|
1606
1773
|
color_palette = [
|
1607
|
-
"rgba(255, 99, 71, 0.6)",
|
1608
|
-
"rgba(
|
1609
|
-
"rgba(
|
1610
|
-
"rgba(
|
1774
|
+
"rgba(255, 99, 71, 0.6)",
|
1775
|
+
"rgba(60, 179, 113, 0.6)",
|
1776
|
+
"rgba(65, 105, 225, 0.6)",
|
1777
|
+
"rgba(255, 215, 0, 0.6)",
|
1778
|
+
"rgba(147, 112, 219, 0.6)",
|
1779
|
+
"rgba(0, 206, 209, 0.6)",
|
1780
|
+
"rgba(255, 160, 122, 0.6)",
|
1781
|
+
"rgba(124, 252, 0, 0.6)",
|
1782
|
+
"rgba(30, 144, 255, 0.6)",
|
1783
|
+
"rgba(218, 165, 32, 0.6)",
|
1611
1784
|
]
|
1612
1785
|
start_link_color = "rgba(128, 128, 128, 0.6)"
|
1613
|
-
|
1786
|
+
|
1614
1787
|
link_colors = []
|
1615
1788
|
link_type_to_color = {}
|
1616
1789
|
color_index = 0
|
1617
1790
|
for i, row in link_counts.iterrows():
|
1618
|
-
source_l = row[
|
1619
|
-
target_l = row[
|
1791
|
+
source_l = row["source_label"]
|
1792
|
+
target_l = row["ordered_event_label"]
|
1620
1793
|
if source_l == "[0] start":
|
1621
1794
|
link_colors.append(start_link_color)
|
1622
1795
|
else:
|
1623
|
-
source_event_name = re.search(r
|
1624
|
-
target_event_name = re.search(r
|
1796
|
+
source_event_name = re.search(r"\] (.*)", source_l).group(1)
|
1797
|
+
target_event_name = re.search(r"\] (.*)", target_l).group(1)
|
1625
1798
|
link_type = (source_event_name, target_event_name)
|
1626
1799
|
|
1627
1800
|
if link_type not in link_type_to_color:
|
1628
|
-
link_type_to_color[link_type] = color_palette[
|
1801
|
+
link_type_to_color[link_type] = color_palette[
|
1802
|
+
color_index % len(color_palette)
|
1803
|
+
]
|
1629
1804
|
color_index += 1
|
1630
1805
|
link_colors.append(link_type_to_color[link_type])
|
1631
1806
|
|
1632
|
-
formatted_total_ids = f"{total_unique_ids:,}".replace(
|
1807
|
+
formatted_total_ids = f"{total_unique_ids:,}".replace(",", "_")
|
1633
1808
|
total_rows = len(df_processed)
|
1634
|
-
formatted_total_rows = f"{total_rows:,}".replace(
|
1635
|
-
|
1809
|
+
formatted_total_rows = f"{total_rows:,}".replace(",", "_")
|
1810
|
+
|
1636
1811
|
chart_title = f"[{id_col_name}] over [{event_col_name}]"
|
1637
1812
|
if max_events_per_id is not None:
|
1638
1813
|
chart_title += f", top {max_events_per_id} events"
|
1639
1814
|
chart_title += overlap_title_part
|
1640
1815
|
chart_title += f", n = {formatted_total_ids} ({formatted_total_rows})"
|
1641
1816
|
|
1642
|
-
fig = go.Figure(
|
1643
|
-
|
1644
|
-
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1648
|
-
|
1649
|
-
|
1650
|
-
|
1651
|
-
|
1652
|
-
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1656
|
-
|
1657
|
-
|
1817
|
+
fig = go.Figure(
|
1818
|
+
data=[
|
1819
|
+
go.Sankey(
|
1820
|
+
node=dict(
|
1821
|
+
pad=15,
|
1822
|
+
thickness=20,
|
1823
|
+
line=dict(color="black", width=0.5),
|
1824
|
+
label=display_labels,
|
1825
|
+
color="blue",
|
1826
|
+
align="left",
|
1827
|
+
),
|
1828
|
+
link=dict(
|
1829
|
+
source=sources, target=targets, value=values, color=link_colors
|
1830
|
+
),
|
1831
|
+
)
|
1832
|
+
]
|
1833
|
+
)
|
1658
1834
|
|
1659
|
-
fig.update_layout(title_text=chart_title, font_size=
|
1660
|
-
fig.show(renderer=renderer)
|
1835
|
+
fig.update_layout(title_text=chart_title, font_size=font_size)
|
1836
|
+
fig.show(renderer=renderer, width=width, height=height)
|
1661
1837
|
|
1662
1838
|
|
1663
1839
|
# * extend objects to enable chaining
|
@@ -1669,4 +1845,4 @@ pd.DataFrame.plot_stacked_boxes = plot_boxes
|
|
1669
1845
|
pd.DataFrame.plot_quadrants = plot_quadrants
|
1670
1846
|
pd.DataFrame.plot_histogram = plot_histogram
|
1671
1847
|
pd.DataFrame.plot_joint = plot_joint
|
1672
|
-
pd.DataFrame.plot_sankey = plot_sankey
|
1848
|
+
pd.DataFrame.plot_sankey = plot_sankey
|
pandas_plots/tbl.py
CHANGED
@@ -70,8 +70,9 @@ def describe_df(
|
|
70
70
|
fig_cols: int = 3,
|
71
71
|
fig_offset: int = None,
|
72
72
|
fig_rowheight: int = 300,
|
73
|
+
fig_width: int = 400,
|
73
74
|
sort_mode: Literal["value", "index"] = "value",
|
74
|
-
top_n_uniques: int =
|
75
|
+
top_n_uniques: int = 5,
|
75
76
|
top_n_chars_in_index: int = 0,
|
76
77
|
top_n_chars_in_columns: int = 0,
|
77
78
|
):
|
@@ -88,6 +89,7 @@ def describe_df(
|
|
88
89
|
fig_cols (int): number of columns in plot
|
89
90
|
fig_offset (int): offset for plots as iloc Argument. None = no offset, -1 = omit last plot
|
90
91
|
fig_rowheight (int): row height for plot (default 300)
|
92
|
+
fig_width (int): width for plot (default 400)
|
91
93
|
sort_mode (Literal["value", "index"]): sort by value or index
|
92
94
|
top_n_uniques (int): number of uniques to display
|
93
95
|
top_n_chars_in_index (int): number of characters to display on plot axis
|
@@ -203,8 +205,8 @@ def describe_df(
|
|
203
205
|
subplot_titles=cols,
|
204
206
|
)
|
205
207
|
# * layout settings
|
206
|
-
fig.layout.height = fig_rowheight * fig_rows
|
207
|
-
fig.layout.width = 400 * fig_cols
|
208
|
+
# fig.layout.height = fig_rowheight * fig_rows
|
209
|
+
# fig.layout.width = 400 * fig_cols
|
208
210
|
|
209
211
|
# * construct subplots
|
210
212
|
for i, col in enumerate(cols):
|
@@ -246,7 +248,7 @@ def describe_df(
|
|
246
248
|
fig.update_layout(
|
247
249
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
248
250
|
)
|
249
|
-
fig.show(renderer)
|
251
|
+
fig.show(renderer, width=fig_width * fig_cols, height=fig_rowheight * fig_rows)
|
250
252
|
|
251
253
|
if use_missing:
|
252
254
|
import missingno as msno
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.0
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Project-URL: Homepage, https://github.com/smeisegeier/pandas-plots
|
6
6
|
Project-URL: Repository, https://github.com/smeisegeier/pandas-plots
|
@@ -16,9 +16,11 @@ Classifier: Programming Language :: Python :: 3
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
17
17
|
Classifier: Topic :: Scientific/Engineering
|
18
18
|
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: connection-helper>=0.11.2
|
19
20
|
Requires-Dist: dataframe-image>=0.2.6
|
20
21
|
Requires-Dist: duckdb>=1.3.0
|
21
22
|
Requires-Dist: jinja2>=3.1.4
|
23
|
+
Requires-Dist: kaleido>=1
|
22
24
|
Requires-Dist: matplotlib-venn==0.11.10
|
23
25
|
Requires-Dist: matplotlib>=3.8.2
|
24
26
|
Requires-Dist: missingno>=0.5.2
|
@@ -0,0 +1,9 @@
|
|
1
|
+
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
pandas_plots/hlp.py,sha256=z8rrVNbH9qMohdXPT-FksP-VkTOjI0bGFj47Sw5p3aY,21141
|
3
|
+
pandas_plots/pls.py,sha256=dPs9TosCmEvScKdZADRwCJwh-u40BmG4AgCOX8Cpul8,63623
|
4
|
+
pandas_plots/tbl.py,sha256=mzrUif2TUZ8JJmkgzNpVYApBZS8L0MS1Yjpx9KZN7Vs,32920
|
5
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
+
pandas_plots-0.15.0.dist-info/METADATA,sha256=CexYKyjaJy2O00V2GLjG2yZcg2Y9kDQHK3SBzMaW0BY,7461
|
7
|
+
pandas_plots-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
+
pandas_plots-0.15.0.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
9
|
+
pandas_plots-0.15.0.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
pandas_plots/hlp.py,sha256=z8rrVNbH9qMohdXPT-FksP-VkTOjI0bGFj47Sw5p3aY,21141
|
3
|
-
pandas_plots/pls.py,sha256=80uXr3bT66LGjDcuT4a0ewCBwATcOUZ3QQ228Hn9glY,60052
|
4
|
-
pandas_plots/tbl.py,sha256=R2E6FLhxNpUtS88Zf88Eh9i8dSKgmJtmFimFvOt0foQ,32780
|
5
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
-
pandas_plots-0.14.0.dist-info/METADATA,sha256=tw4QxZ9io1c9MgSESxsrGHdKXqoTr9-xNfOpV5hxfUo,7394
|
7
|
-
pandas_plots-0.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
-
pandas_plots-0.14.0.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
9
|
-
pandas_plots-0.14.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|