pandas-plots 0.11.15__tar.gz → 0.11.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.11.15/src/pandas_plots.egg-info → pandas_plots-0.11.17}/PKG-INFO +2 -1
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/README.md +1 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/setup.cfg +1 -1
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots/pls.py +9 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots/tbl.py +61 -3
- {pandas_plots-0.11.15 → pandas_plots-0.11.17/src/pandas_plots.egg-info}/PKG-INFO +2 -1
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/LICENSE +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/pyproject.toml +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots/hlp.py +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots/pii.py +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots.egg-info/SOURCES.txt +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.17
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -83,6 +83,7 @@ tbl.show_num_df(
|
|
83
83
|
- `describe_df()` an alternative version of pandas `describe()` function
|
84
84
|
- `descr_db()` a very short descr for a `duckdb` relation
|
85
85
|
- `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
|
86
|
+
- `print_summary()` shows statistics for a pandas DataFrame or Series
|
86
87
|
|
87
88
|
- `pls` for plotly visualizations
|
88
89
|
- `plot_box()` auto annotated boxplot w/ violin option
|
@@ -49,6 +49,7 @@ tbl.show_num_df(
|
|
49
49
|
- `describe_df()` an alternative version of pandas `describe()` function
|
50
50
|
- `descr_db()` a very short descr for a `duckdb` relation
|
51
51
|
- `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
|
52
|
+
- `print_summary()` shows statistics for a pandas DataFrame or Series
|
52
53
|
|
53
54
|
- `pls` for plotly visualizations
|
54
55
|
- `plot_box()` auto annotated boxplot w/ violin option
|
@@ -11,6 +11,7 @@ from matplotlib import pyplot as plt
|
|
11
11
|
from plotly import express as px
|
12
12
|
|
13
13
|
from .hlp import *
|
14
|
+
from .tbl import print_summary
|
14
15
|
|
15
16
|
|
16
17
|
def _set_caption(caption: str) -> str:
|
@@ -757,6 +758,7 @@ def plot_box(
|
|
757
758
|
height: int = 200,
|
758
759
|
width: int = 1200,
|
759
760
|
annotations: bool = True,
|
761
|
+
summary: bool = True,
|
760
762
|
caption: str = None,
|
761
763
|
title: str = None,
|
762
764
|
violin: bool = False,
|
@@ -776,6 +778,7 @@ def plot_box(
|
|
776
778
|
violin: Use violin plot or not
|
777
779
|
x_min: The minimum value for the x-axis scale (max and min must be set)
|
778
780
|
x_max: The maximum value for the x-axis scale (max and min must be set)
|
781
|
+
summary: Whether to add a summary table to the plot
|
779
782
|
|
780
783
|
Returns:
|
781
784
|
None
|
@@ -885,6 +888,8 @@ def plot_box(
|
|
885
888
|
)
|
886
889
|
|
887
890
|
fig.show("png")
|
891
|
+
if summary:
|
892
|
+
print_summary(ser)
|
888
893
|
return
|
889
894
|
|
890
895
|
|
@@ -896,6 +901,7 @@ def plot_boxes(
|
|
896
901
|
height: int = 600,
|
897
902
|
width: int = 800,
|
898
903
|
annotations: bool = True,
|
904
|
+
summary: bool = True,
|
899
905
|
title: str = None,
|
900
906
|
) -> None:
|
901
907
|
"""
|
@@ -909,6 +915,7 @@ def plot_boxes(
|
|
909
915
|
height (int): The height of the plot.
|
910
916
|
width (int): The width of the plot.
|
911
917
|
annotations (bool): Whether to add annotations to the plot.
|
918
|
+
summary (bool): Whether to add a summary to the plot.
|
912
919
|
|
913
920
|
Returns:
|
914
921
|
None
|
@@ -1022,6 +1029,8 @@ def plot_boxes(
|
|
1022
1029
|
fig.update_yaxes(title_text=df.columns[1])
|
1023
1030
|
|
1024
1031
|
fig.show("png")
|
1032
|
+
if summary:
|
1033
|
+
print_summary(df)
|
1025
1034
|
return
|
1026
1035
|
|
1027
1036
|
|
@@ -132,9 +132,10 @@ def describe_df(
|
|
132
132
|
_u, _h = get_uniques_header(col)
|
133
133
|
|
134
134
|
# * extra care for scipy metrics, these are very vulnarable to nan
|
135
|
-
print(
|
136
|
-
|
137
|
-
)
|
135
|
+
# print(
|
136
|
+
# f"{_h} min: {round(df[col].min(),3):_} | max: {round(df[col].max(),3):_} | median: {round(df[col].median(),3):_} | mean: {round(df[col].mean(),3):_} | std: {round(df[col].std(),3):_} | cv: {round(df[col].std() / df[col].mean(),3):_} | sum: {round(df[col].sum(),3):_} | skew: {round(stats.skew(df[col].dropna().tolist()),3)} | kurto: {round(stats.kurtosis(df[col].dropna().tolist()),3)}"
|
137
|
+
# )
|
138
|
+
print_summary(df[col], _h)
|
138
139
|
|
139
140
|
# * show first 3 rows
|
140
141
|
display(df[:3])
|
@@ -620,3 +621,60 @@ def show_num_df(
|
|
620
621
|
)
|
621
622
|
|
622
623
|
return out
|
624
|
+
|
625
|
+
def print_summary(df: pd.DataFrame | pd.Series, name: str="🟠 "):
|
626
|
+
"""
|
627
|
+
Print statistical summary for a pandas DataFrame or Series.
|
628
|
+
|
629
|
+
The function computes and prints various statistics for each numeric column in a DataFrame
|
630
|
+
or for a Series. Statistics include minimum, lower bound, 25th percentile (Q1), median, mean,
|
631
|
+
75th percentile (Q3), upper bound, maximum, standard deviation, coefficient of variation,
|
632
|
+
sum, skewness, and kurtosis. The interquartile range (IQR) is used to compute the lower
|
633
|
+
and upper bounds, which are adjusted not to exceed the min and max of the data.
|
634
|
+
|
635
|
+
Args:
|
636
|
+
df (Union[pd.DataFrame, pd.Series]): Input DataFrame or Series. Only numeric columns
|
637
|
+
in DataFrame are considered.
|
638
|
+
"""
|
639
|
+
if df.empty:
|
640
|
+
return
|
641
|
+
|
642
|
+
def print_summary_ser(ser: pd.Series, name: str=""):
|
643
|
+
# Calculate IQR and pass `rng=(25, 75)` to get the interquartile range
|
644
|
+
iqr_value = stats.iqr(ser)
|
645
|
+
|
646
|
+
# * drop NA to keep scipy sane
|
647
|
+
ser.dropna(inplace=True)
|
648
|
+
|
649
|
+
# Using the iqr function, we still calculate the bounds manually
|
650
|
+
q1 = stats.scoreatpercentile(ser, 25)
|
651
|
+
q3 = stats.scoreatpercentile(ser, 75)
|
652
|
+
|
653
|
+
# Calculate upper bound directly
|
654
|
+
min = round(ser.min(),3)
|
655
|
+
med = round(ser.median(),3)
|
656
|
+
upper = round(q3 + 1.5 * iqr_value,3)
|
657
|
+
lower = round(q1 - 1.5 * iqr_value,3)
|
658
|
+
mean = round(ser.mean(),3)
|
659
|
+
std = round(ser.std(),3)
|
660
|
+
cv = round(ser.std() / ser.mean(),3)
|
661
|
+
max = round(ser.max(),3)
|
662
|
+
sum = round(ser.sum(),3)
|
663
|
+
skew = round(stats.skew(ser.dropna().tolist()),3)
|
664
|
+
kurto = round(stats.kurtosis(ser.dropna().tolist()),3)
|
665
|
+
|
666
|
+
lower = min if lower < min else lower
|
667
|
+
upper = max if upper > max else upper
|
668
|
+
|
669
|
+
# * extra care for scipy metrics, these are very vulnarable to nan
|
670
|
+
print(
|
671
|
+
f"""{name} min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto}""")
|
672
|
+
|
673
|
+
if isinstance(df, pd.Series):
|
674
|
+
print_summary_ser(df, name)
|
675
|
+
return
|
676
|
+
if isinstance(df, pd.DataFrame):
|
677
|
+
# * only show numerics
|
678
|
+
for col in df.select_dtypes("number").columns:
|
679
|
+
print_summary_ser(ser=df[col], name=col)
|
680
|
+
return
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.17
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -83,6 +83,7 @@ tbl.show_num_df(
|
|
83
83
|
- `describe_df()` an alternative version of pandas `describe()` function
|
84
84
|
- `descr_db()` a very short descr for a `duckdb` relation
|
85
85
|
- `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
|
86
|
+
- `print_summary()` shows statistics for a pandas DataFrame or Series
|
86
87
|
|
87
88
|
- `pls` for plotly visualizations
|
88
89
|
- `plot_box()` auto annotated boxplot w/ violin option
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{pandas_plots-0.11.15 → pandas_plots-0.11.17}/src/pandas_plots.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|