pandas-plots 0.12.20__py3-none-any.whl → 0.12.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/hlp.py +20 -12
- pandas_plots/pls.py +25 -10
- pandas_plots/tbl.py +11 -11
- {pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/METADATA +1 -1
- pandas_plots-0.12.22.dist-info/RECORD +11 -0
- pandas_plots-0.12.20.dist-info/RECORD +0 -11
- {pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/WHEEL +0 -0
- {pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/licenses/LICENSE +0 -0
- {pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/pii.py +0 -0
- {pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/top_level.txt +0 -0
pandas_plots/hlp.py
CHANGED
@@ -22,29 +22,34 @@ from PIL import Image
|
|
22
22
|
URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
|
23
23
|
|
24
24
|
|
25
|
-
def mean_confidence_interval(df, confidence=0.95):
|
25
|
+
def mean_confidence_interval(df, confidence=0.95, use_median=False):
|
26
26
|
"""
|
27
|
-
Calculate the mean and confidence interval of the input dataframe.
|
28
|
-
|
27
|
+
Calculate the mean or median and confidence interval of the input dataframe.
|
28
|
+
Source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
|
29
29
|
|
30
30
|
Parameters:
|
31
31
|
df (array-like): The input dataframe.
|
32
32
|
confidence (float, optional): The confidence level for the interval. Defaults to 0.95.
|
33
|
+
use_median (bool, optional): If True, calculates median and confidence interval instead of mean. Defaults to False.
|
33
34
|
|
34
35
|
Returns:
|
35
|
-
tuple: A tuple containing the mean, interval, lower bound, and upper bound.
|
36
|
+
tuple: A tuple containing the central value (mean or median), interval, lower bound, and upper bound.
|
36
37
|
"""
|
37
38
|
df = to_series(df)
|
38
39
|
if df is None:
|
39
40
|
return None
|
40
41
|
a = 1.0 * np.array(df)
|
41
42
|
n = len(a)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
43
|
+
|
44
|
+
if use_median:
|
45
|
+
median = np.median(a)
|
46
|
+
se = 1.253 * scipy.stats.sem(a) # Approximate standard error for median
|
47
|
+
margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
|
48
|
+
return median, margin, median - margin, median + margin
|
49
|
+
else:
|
50
|
+
mean, se = np.mean(a), scipy.stats.sem(a)
|
51
|
+
margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
|
52
|
+
return mean, margin, mean - margin, mean + margin
|
48
53
|
|
49
54
|
# # * Alternative
|
50
55
|
# # from statistics import NormalDist
|
@@ -190,7 +195,8 @@ def wrap_text(
|
|
190
195
|
line = line + word_s + " "
|
191
196
|
# * reset if counter exceeds limit, or if word ends with newline
|
192
197
|
if i >= max_items_in_line or str(word).endswith("\n"):
|
193
|
-
out = out + line + "\n"
|
198
|
+
# out = out + line + "\n"
|
199
|
+
out = out + line.rstrip() + " \n"
|
194
200
|
line = ""
|
195
201
|
i = 0
|
196
202
|
# else:
|
@@ -542,7 +548,9 @@ def add_measures_to_pyg_config(json_path: str, nodes: list[tuple[str, str]] = [(
|
|
542
548
|
|
543
549
|
Example
|
544
550
|
-------
|
545
|
-
`add_measures_to_pyg_config('config.json', [('cnt_tum', 'count(distinct z_tum_id)')], strict=True)`
|
551
|
+
default: `add_measures_to_pyg_config('config.json', [('cnt_tum', 'count(distinct z_tum_id)')], strict=True)`
|
552
|
+
|
553
|
+
usage: start pygwalker with empty config file but defined config path. make changes on the chart, save the config file. then run this function again - measures will be added
|
546
554
|
"""
|
547
555
|
if not os.path.exists(json_path):
|
548
556
|
if strict:
|
pandas_plots/pls.py
CHANGED
@@ -500,6 +500,7 @@ def plot_bars(
|
|
500
500
|
width: int = 1600,
|
501
501
|
title: str = None,
|
502
502
|
use_ci: bool = False,
|
503
|
+
ci_agg: Literal["mean", "median"] = "mean",
|
503
504
|
precision: int = 0,
|
504
505
|
renderer: Literal["png", "svg", None] = "png",
|
505
506
|
png_path: Path | str = None,
|
@@ -569,9 +570,9 @@ def plot_bars(
|
|
569
570
|
dropna=False,
|
570
571
|
)
|
571
572
|
.agg(
|
572
|
-
mean=(col_name,
|
573
|
+
mean=(col_name, ci_agg),
|
573
574
|
# * retrieve margin from custom func
|
574
|
-
margin=(col_name, lambda x: mean_confidence_interval(x)[1]),
|
575
|
+
margin=(col_name, lambda x: mean_confidence_interval(x, use_median = (ci_agg == "median"))[1]),
|
575
576
|
)
|
576
577
|
.reset_index()
|
577
578
|
)
|
@@ -653,7 +654,7 @@ def plot_bars(
|
|
653
654
|
|
654
655
|
# * title str n
|
655
656
|
_title_str_n = (
|
656
|
-
f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on
|
657
|
+
f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on {ci_agg}s<sub>"
|
657
658
|
)
|
658
659
|
|
659
660
|
# * title str na
|
@@ -965,6 +966,7 @@ def plot_box(
|
|
965
966
|
violin: bool = False,
|
966
967
|
x_min: float = None,
|
967
968
|
x_max: float = None,
|
969
|
+
use_log: bool = False,
|
968
970
|
png_path: Path | str = None,
|
969
971
|
) -> object:
|
970
972
|
"""
|
@@ -977,10 +979,13 @@ def plot_box(
|
|
977
979
|
height: The height of the plot.
|
978
980
|
width: The width of the plot.
|
979
981
|
annotations: Whether to add annotations to the plot.
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
982
|
+
summary: Whether to add a summary table to the plot.
|
983
|
+
caption: The caption for the plot.
|
984
|
+
title: The title of the plot.
|
985
|
+
violin: Use violin plot or not.
|
986
|
+
x_min: The minimum value for the x-axis scale (max and min must be set).
|
987
|
+
x_max: The maximum value for the x-axis scale (max and min must be set).
|
988
|
+
use_log: Use logarithmic scale for the axis.
|
984
989
|
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
985
990
|
|
986
991
|
Returns:
|
@@ -993,7 +998,7 @@ def plot_box(
|
|
993
998
|
# * drop na to keep scipy sane
|
994
999
|
n_ = len(ser)
|
995
1000
|
ser.dropna(inplace=True)
|
996
|
-
n = len(ser)
|
1001
|
+
# n = len(ser)
|
997
1002
|
|
998
1003
|
# hack
|
999
1004
|
median = ser.median()
|
@@ -1011,7 +1016,6 @@ def plot_box(
|
|
1011
1016
|
lvl3 = height * 0.25
|
1012
1017
|
|
1013
1018
|
caption = _set_caption(caption)
|
1014
|
-
|
1015
1019
|
dict = {
|
1016
1020
|
"data_frame": ser,
|
1017
1021
|
"orientation": "h",
|
@@ -1020,7 +1024,9 @@ def plot_box(
|
|
1020
1024
|
"width": width,
|
1021
1025
|
"points": points,
|
1022
1026
|
# 'box':True,
|
1023
|
-
"
|
1027
|
+
"log_x": use_log, # * logarithmic scale, axis is always x
|
1028
|
+
# "notched": True,
|
1029
|
+
"title": f"{caption}[{ser.name}], n = {n_:_}" if not title else title,
|
1024
1030
|
}
|
1025
1031
|
|
1026
1032
|
fig = px.violin(**{**dict, "box": True}) if violin else px.box(**dict)
|
@@ -1119,6 +1125,8 @@ def plot_boxes(
|
|
1119
1125
|
annotations: bool = True,
|
1120
1126
|
summary: bool = True,
|
1121
1127
|
title: str = None,
|
1128
|
+
use_log: bool = False,
|
1129
|
+
box_width: float = 0.5,
|
1122
1130
|
png_path: Path | str = None,
|
1123
1131
|
) -> object:
|
1124
1132
|
"""
|
@@ -1133,6 +1141,7 @@ def plot_boxes(
|
|
1133
1141
|
width (int): The width of the plot.
|
1134
1142
|
annotations (bool): Whether to add annotations to the plot.
|
1135
1143
|
summary (bool): Whether to add a summary to the plot.
|
1144
|
+
use_log (bool): Whether to use logarithmic scale for the plot (cannot show negative values).
|
1136
1145
|
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
1137
1146
|
|
1138
1147
|
Returns:
|
@@ -1170,11 +1179,14 @@ def plot_boxes(
|
|
1170
1179
|
df,
|
1171
1180
|
x=df.iloc[:, 0],
|
1172
1181
|
y=df.iloc[:, 1],
|
1182
|
+
color=df.iloc[:, 0],
|
1173
1183
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
1174
1184
|
orientation="v",
|
1175
1185
|
height=height,
|
1176
1186
|
width=width,
|
1177
1187
|
points=points,
|
1188
|
+
log_y=use_log,
|
1189
|
+
# color_discrete_sequence=px.colors.qualitative.Plotly,
|
1178
1190
|
title=(
|
1179
1191
|
f"{caption}[{df.columns[0]}] on [{df.columns[1]}], n = {len(df):_.0f}"
|
1180
1192
|
if not title
|
@@ -1245,6 +1257,9 @@ def plot_boxes(
|
|
1245
1257
|
|
1246
1258
|
fig.update_xaxes(title_text=df.columns[0])
|
1247
1259
|
fig.update_yaxes(title_text=df.columns[1])
|
1260
|
+
fig.update_layout(boxmode="group") # Ensures boxes are not too compressed
|
1261
|
+
fig.update_layout(showlegend=False)
|
1262
|
+
fig.update_traces(marker=dict(size=5), width=box_width) # Adjust width (default ~0.5)
|
1248
1263
|
|
1249
1264
|
fig.show("png")
|
1250
1265
|
if summary:
|
pandas_plots/tbl.py
CHANGED
@@ -121,14 +121,14 @@ def describe_df(
|
|
121
121
|
if df[col].notna().sum() == 0 and df[col].dtype == "float":
|
122
122
|
df[col] = df[col].astype(str)
|
123
123
|
|
124
|
-
print(f"🔵 {'*'*3} df: {caption} {'*'*3}")
|
125
|
-
print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {np.array(df.columns)}
|
124
|
+
print(f"🔵 {'*'*3} df: {caption} {'*'*3} ")
|
125
|
+
print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {np.array(df.columns)} ")
|
126
126
|
# print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {df.columns.tolist()} ")
|
127
|
-
print(f"🟣 duplicates: {df.duplicated().sum():_}")
|
128
|
-
print(f"🟣 uniques: {wrap_text(str({col: f'{df[col].nunique():_}' for col in df})) }")
|
127
|
+
print(f"🟣 duplicates: {df.duplicated().sum():_} ")
|
128
|
+
print(f"🟣 uniques: {wrap_text(str({col: f'{df[col].nunique():_}' for col in df})) } ")
|
129
129
|
# print(f"🟣 uniques: { {col: f'{df[col].nunique():_}' for col in df} }")
|
130
130
|
# print(f"🟣 uniques: {{ {', '.join(f'{col}: {df[col].nunique():_}' for col in df)} }}")
|
131
|
-
print(f"🟣 missings: {wrap_text(str({col: f'{df[col].isna().sum():_}' for col in df})) }")
|
131
|
+
print(f"🟣 missings: {wrap_text(str({col: f'{df[col].isna().sum():_}' for col in df})) } ")
|
132
132
|
# print(f"🟣 missings: { {col: f'{df[col].isna().sum():_}' for col in df} }")
|
133
133
|
# print(f"🟣 missings: {dict(df.isna().sum())}")
|
134
134
|
|
@@ -141,13 +141,13 @@ def describe_df(
|
|
141
141
|
# unis = df[col].sort_values().unique()
|
142
142
|
unis = list(df[col].value_counts().sort_index().index)
|
143
143
|
# * get header
|
144
|
-
header = f"🟠 {col}({len(unis):_}|{df[col].dtype})"
|
144
|
+
header = f"🟠 {col}({len(unis):_}|{df[col].dtype}) "
|
145
145
|
return unis, header
|
146
146
|
|
147
147
|
# hack this block somehow interferes with the plotly renderer. so its run even when use_columns=False
|
148
148
|
if use_columns:
|
149
|
-
print("--- column uniques (all)")
|
150
|
-
print(f"🟠 index {wrap_text(df.index.tolist()[:top_n_uniques])}")
|
149
|
+
print("--- column uniques (all) ")
|
150
|
+
print(f"🟠 index {wrap_text(df.index.tolist()[:top_n_uniques])} ")
|
151
151
|
for col in df.columns[:]:
|
152
152
|
_u, _h = get_uniques_header(col)
|
153
153
|
# * check col type
|
@@ -155,10 +155,10 @@ def describe_df(
|
|
155
155
|
# * wrap output
|
156
156
|
if use_columns:
|
157
157
|
print(
|
158
|
-
f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
|
158
|
+
f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)} "
|
159
159
|
)
|
160
160
|
|
161
|
-
print("--- column stats (numeric)")
|
161
|
+
print("--- column stats (numeric) ")
|
162
162
|
# * only show numerics
|
163
163
|
for col in df.select_dtypes("number").columns:
|
164
164
|
_u, _h = get_uniques_header(col)
|
@@ -793,7 +793,7 @@ def print_summary(df: pd.DataFrame | pd.Series, show: bool = True, name: str=" "
|
|
793
793
|
# * extra care for scipy metrics, these are very vulnarable to nan
|
794
794
|
if show:
|
795
795
|
print(
|
796
|
-
f"""{name} -> min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto}""")
|
796
|
+
f"""{name} -> min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto} """)
|
797
797
|
|
798
798
|
summary = {
|
799
799
|
"min": min,
|
@@ -0,0 +1,11 @@
|
|
1
|
+
pandas_plots/hlp.py,sha256=i11Ep9P-u9O0bvexGTELRDUtmLzvNgNHxnkQTGf3DwQ,20838
|
2
|
+
pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
3
|
+
pandas_plots/pls.py,sha256=APvF_cEYN28TtlpNNIJ2NPTA3chTP9ZHtwnVEuZ-skI,49059
|
4
|
+
pandas_plots/tbl.py,sha256=RJWBHeKGTAhGpVCY57TsS_dYR-FpInP-TOsKW_tU4V4,32556
|
5
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
+
pandas_plots-0.12.22.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
7
|
+
pandas_plots-0.12.22.dist-info/METADATA,sha256=0bdvEP5M1SgmSJI3QKLd8MX1RjSrwzxlXWrygQNjHaM,7564
|
8
|
+
pandas_plots-0.12.22.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
9
|
+
pandas_plots-0.12.22.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
10
|
+
pandas_plots-0.12.22.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
11
|
+
pandas_plots-0.12.22.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
pandas_plots/hlp.py,sha256=TxduvDztLtMuVg7rRS5mEnhktc-eHf5vI9SG1ppAbxk,20299
|
2
|
-
pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
3
|
-
pandas_plots/pls.py,sha256=S9hKQTPp8XIL9RmzvfeSQE4t2jm18bG4bejwcd-dJF4,48236
|
4
|
-
pandas_plots/tbl.py,sha256=LxMKJh4qkGuQZ1DdCZIq1tMS26F6elsqbe_uabvQx4E,32535
|
5
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
-
pandas_plots-0.12.20.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
7
|
-
pandas_plots-0.12.20.dist-info/METADATA,sha256=E64_SFyau96op38X5-Ld-8mVJkRr9xBKY9cJr2lu5-o,7564
|
8
|
-
pandas_plots-0.12.20.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
9
|
-
pandas_plots-0.12.20.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
10
|
-
pandas_plots-0.12.20.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
11
|
-
pandas_plots-0.12.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|