pandas-plots 0.15.2__py3-none-any.whl → 0.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/pls.py +28 -11
- pandas_plots/tbl.py +3 -1
- {pandas_plots-0.15.2.dist-info → pandas_plots-0.15.4.dist-info}/METADATA +2 -2
- pandas_plots-0.15.4.dist-info/RECORD +9 -0
- pandas_plots-0.15.2.dist-info/RECORD +0 -9
- {pandas_plots-0.15.2.dist-info → pandas_plots-0.15.4.dist-info}/WHEEL +0 -0
- {pandas_plots-0.15.2.dist-info → pandas_plots-0.15.4.dist-info}/licenses/LICENSE +0 -0
pandas_plots/pls.py
CHANGED
@@ -375,19 +375,24 @@ def plot_stacked_bars(
|
|
375
375
|
caption = _set_caption(caption)
|
376
376
|
|
377
377
|
# * after grouping add cols for pct and formatting
|
378
|
-
df["cnt_pct_all_only"] = df["value"].apply(lambda x: f"{(x
|
379
|
-
df["cnt_pct_bar_only"] = (df["value"] / bar_totals * 100).apply(lambda x: f"{x:.{precision}f}%")
|
378
|
+
df["cnt_pct_all_only"] = (df["value"] / n * 100).apply(lambda x: f"{(x):.{precision}f}%")
|
379
|
+
df["cnt_pct_bar_only"] = (df["value"] / bar_totals * 100).apply(lambda x: f"{(x):.{precision}f}%")
|
380
380
|
|
381
381
|
# * format output
|
382
382
|
df["cnt_str"] = df["value"].apply(lambda x: f"{x:_.{precision}f}")
|
383
383
|
|
384
384
|
divider2 = "<br>" if orientation == "v" else " "
|
385
385
|
|
386
|
+
# Modify this section
|
386
387
|
df["cnt_pct_all_str"] = df.apply(
|
387
|
-
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_all_only']})"
|
388
|
+
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_all_only']})"
|
389
|
+
if (row["value"] / n * 100) >= 5 else row["cnt_str"],
|
390
|
+
axis=1
|
388
391
|
)
|
389
392
|
df["cnt_pct_bar_str"] = df.apply(
|
390
|
-
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_bar_only']})"
|
393
|
+
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_bar_only']})"
|
394
|
+
if (row["value"] / bar_totals.loc[row.name] * 100) >= 5 else row["cnt_str"],
|
395
|
+
axis=1
|
391
396
|
)
|
392
397
|
|
393
398
|
text_to_show = "cnt_str"
|
@@ -564,13 +569,21 @@ def plot_bars(
|
|
564
569
|
|
565
570
|
# * if df, check if valid
|
566
571
|
if isinstance(df_in, pd.DataFrame):
|
567
|
-
if len(df_in.columns)
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
572
|
+
if len(df_in.columns) == 1:
|
573
|
+
if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]):
|
574
|
+
print("❌ df must have 1 column of object or bool type.")
|
575
|
+
return
|
576
|
+
else:
|
577
|
+
df_in = df_in.value_counts(dropna=dropna).to_frame().reset_index()
|
578
|
+
use_ci = False
|
579
|
+
elif len(df_in.columns) == 2:
|
580
|
+
if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]) or not (
|
581
|
+
df_in.iloc[:, 1].dtype.kind in ["i", "f"]
|
582
|
+
):
|
583
|
+
print("❌ df must have string and numeric columns (in that order).")
|
584
|
+
return
|
585
|
+
else:
|
586
|
+
print("❌ df must have exactly 1 or 2 columns")
|
574
587
|
return
|
575
588
|
else:
|
576
589
|
print("❌ input must be series or dataframe.")
|
@@ -1194,6 +1207,10 @@ def plot_boxes(
|
|
1194
1207
|
xlvl1 = -50
|
1195
1208
|
xlvl2 = 0
|
1196
1209
|
xlvl3 = 50
|
1210
|
+
|
1211
|
+
# * type of col0 must be str, not object. otherwise px.box will fail since sorting will fail
|
1212
|
+
if pd.api.types.is_object_dtype(df.iloc[:, 0]):
|
1213
|
+
df.iloc[:, 0] = df.iloc[:, 0].astype(str)
|
1197
1214
|
|
1198
1215
|
# * unique items
|
1199
1216
|
# Sort the unique items alphabetically
|
pandas_plots/tbl.py
CHANGED
@@ -75,6 +75,7 @@ def describe_df(
|
|
75
75
|
top_n_uniques: int = 5,
|
76
76
|
top_n_chars_in_index: int = 0,
|
77
77
|
top_n_chars_in_columns: int = 0,
|
78
|
+
missing_figsize: tuple[int, int] = (26, 6),
|
78
79
|
):
|
79
80
|
"""
|
80
81
|
This function takes a pandas DataFrame and a caption as input parameters and prints out the caption as a styled header, followed by the shape of the DataFrame and the list of column names. For each column, it prints out the column name, the number of unique values, and the column data type. If the column is a numeric column with more than 100 unique values, it also prints out the minimum, mean, maximum, and sum values. Otherwise, it prints out the first 100 unique values of the column.
|
@@ -94,6 +95,7 @@ def describe_df(
|
|
94
95
|
top_n_uniques (int): number of uniques to display
|
95
96
|
top_n_chars_in_index (int): number of characters to display on plot axis
|
96
97
|
top_n_chars_in_columns (int): number of characters to display on plot axis. If set, minimum is 10.
|
98
|
+
missing_figsize (tuple[int, int]): figsize for missing plot (default (26, 6)
|
97
99
|
|
98
100
|
usage:
|
99
101
|
describe_df(
|
@@ -252,7 +254,7 @@ def describe_df(
|
|
252
254
|
|
253
255
|
if use_missing:
|
254
256
|
import missingno as msno
|
255
|
-
msno.matrix(df_, figsize=
|
257
|
+
msno.matrix(df_, figsize=missing_figsize)
|
256
258
|
|
257
259
|
|
258
260
|
def pivot_df(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.15.
|
3
|
+
Version: 0.15.4
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Project-URL: Homepage, https://github.com/smeisegeier/pandas-plots
|
6
6
|
Project-URL: Repository, https://github.com/smeisegeier/pandas-plots
|
@@ -16,7 +16,7 @@ Classifier: Programming Language :: Python :: 3
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
17
17
|
Classifier: Topic :: Scientific/Engineering
|
18
18
|
Requires-Python: >=3.10
|
19
|
-
Requires-Dist: connection-helper>=0.
|
19
|
+
Requires-Dist: connection-helper>=0.12
|
20
20
|
Requires-Dist: dataframe-image>=0.2.6
|
21
21
|
Requires-Dist: duckdb>=1.3.0
|
22
22
|
Requires-Dist: jinja2>=3.1.4
|
@@ -0,0 +1,9 @@
|
|
1
|
+
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
pandas_plots/hlp.py,sha256=z8rrVNbH9qMohdXPT-FksP-VkTOjI0bGFj47Sw5p3aY,21141
|
3
|
+
pandas_plots/pls.py,sha256=6ZvgO1lkaff-Wpj9zGu4J3vJybj1D94qjS2-coB83c8,64882
|
4
|
+
pandas_plots/tbl.py,sha256=y_4ZFVogCRp4v8g6IKzxEnGVHdRUZbF0ACLugJW2_G8,33057
|
5
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
+
pandas_plots-0.15.4.dist-info/METADATA,sha256=PfwI0peS8Qmw3Alio9x3zvyQaUKiHKKM7wwlQ2ncyis,7853
|
7
|
+
pandas_plots-0.15.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
+
pandas_plots-0.15.4.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
9
|
+
pandas_plots-0.15.4.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
pandas_plots/hlp.py,sha256=z8rrVNbH9qMohdXPT-FksP-VkTOjI0bGFj47Sw5p3aY,21141
|
3
|
-
pandas_plots/pls.py,sha256=k3btK4TWHUJCyHEzu3yLh40G9SuFlW84dYP2RLS5lWY,64118
|
4
|
-
pandas_plots/tbl.py,sha256=mzrUif2TUZ8JJmkgzNpVYApBZS8L0MS1Yjpx9KZN7Vs,32920
|
5
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
-
pandas_plots-0.15.2.dist-info/METADATA,sha256=w7JyqJ-dw7OqaZikJcKnxzZ8UWXBQxtoz7JIJPwN9EA,7855
|
7
|
-
pandas_plots-0.15.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
-
pandas_plots-0.15.2.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
9
|
-
pandas_plots-0.15.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|