pandas-plots 0.15.1__tar.gz → 0.15.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots-0.15.3/.python-version +1 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/PKG-INFO +16 -8
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/README.md +13 -5
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/pyproject.toml +3 -3
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/pandas_plots/pls.py +19 -7
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/pandas_plots/tbl.py +3 -1
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/test.ipynb +1370 -11164
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/uv.lock +1889 -1699
- pandas_plots-0.15.1/.python-version +0 -1
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/.gitignore +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/LICENSE +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/img/2024-02-13-00-40-27.png +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/img/2024-02-14-20-49-00.png +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/img/2024-02-19-20-49-52.png +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/img/2024-03-02-17-33-43.png +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/img/2024-03-24-09-59-32.png +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/assets/Rplots.pdf +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/assets/dsich.csv +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/assets/facets.csv +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/pandas_plots/__init__.py +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/pandas_plots/hlp.py +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.15.1 → pandas_plots-0.15.3}/src/test.r +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
3.12
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.15.
|
3
|
+
Version: 0.15.3
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Project-URL: Homepage, https://github.com/smeisegeier/pandas-plots
|
6
6
|
Project-URL: Repository, https://github.com/smeisegeier/pandas-plots
|
@@ -13,10 +13,10 @@ Classifier: Intended Audience :: Science/Research
|
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
14
14
|
Classifier: Operating System :: OS Independent
|
15
15
|
Classifier: Programming Language :: Python :: 3
|
16
|
-
Classifier: Programming Language :: Python :: 3.
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
17
17
|
Classifier: Topic :: Scientific/Engineering
|
18
18
|
Requires-Python: >=3.10
|
19
|
-
Requires-Dist: connection-helper>=0.
|
19
|
+
Requires-Dist: connection-helper>=0.12
|
20
20
|
Requires-Dist: dataframe-image>=0.2.6
|
21
21
|
Requires-Dist: duckdb>=1.3.0
|
22
22
|
Requires-Dist: jinja2>=3.1.4
|
@@ -41,7 +41,9 @@ Description-Content-Type: text/markdown
|
|
41
41
|
install / update package
|
42
42
|
|
43
43
|
```bash
|
44
|
-
|
44
|
+
uv add -U pandas-plots
|
45
|
+
# if no uv is available:
|
46
|
+
# pip install pandas-plots -U
|
45
47
|
```
|
46
48
|
|
47
49
|
include in python
|
@@ -79,7 +81,7 @@ tbl.show_num_df(
|
|
79
81
|
`pandas-plots` is a package to help you examine and visualize data that are organized in a pandas DataFrame. It provides a high level api to pandas / plotly with some selected functions and predefined options:
|
80
82
|
|
81
83
|
- `tbl` utilities for table descriptions
|
82
|
-
-
|
84
|
+
- `show_num_df()` displays a table as styled version with additional information
|
83
85
|
- `describe_df()` an alternative version of pandas `describe()` function
|
84
86
|
- `descr_db()` a very short descr for a `duckdb` relation
|
85
87
|
- `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
|
@@ -89,7 +91,7 @@ tbl.show_num_df(
|
|
89
91
|
- `pls` for plotly visualizations
|
90
92
|
- `plot_box()` auto annotated boxplot w/ violin option
|
91
93
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
92
|
-
- `plot_stacked_bars()` shortcut to stacked bars
|
94
|
+
- `plot_stacked_bars()` shortcut to stacked bars
|
93
95
|
- `plots_bars()` a standardized bar plot for a **categorical** column
|
94
96
|
- features confidence intervals via `use_ci` option
|
95
97
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
@@ -105,7 +107,7 @@ tbl.show_num_df(
|
|
105
107
|
<br>
|
106
108
|
|
107
109
|
- `hlp` contains some (variety) helper functions
|
108
|
-
- `to_series()` converts a dataframe to a series
|
110
|
+
- `to_series()` converts a dataframe to a series
|
109
111
|
- `mean_confidence_interval()` calculates mean and confidence interval for a series
|
110
112
|
- `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
|
111
113
|
- `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
|
@@ -118,7 +120,13 @@ tbl.show_num_df(
|
|
118
120
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
119
121
|
<br>
|
120
122
|
|
121
|
-
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
123
|
+
> note: theme setting ☀️ 🌔 can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
124
|
+
|
125
|
+
## prerequisites
|
126
|
+
|
127
|
+
- ⚠️ for static image generation, this package uses Plotly's kaleido engine, which requires a system-wide installation of the Chrome or Chromium browser
|
128
|
+
- if image generation fails, it may be because a compatible browser is missing
|
129
|
+
- in such cases, please run `kaleido_get_chrome` from your terminal to install the necessary dependency.
|
122
130
|
|
123
131
|
## more examples
|
124
132
|
|
@@ -7,7 +7,9 @@
|
|
7
7
|
install / update package
|
8
8
|
|
9
9
|
```bash
|
10
|
-
|
10
|
+
uv add -U pandas-plots
|
11
|
+
# if no uv is available:
|
12
|
+
# pip install pandas-plots -U
|
11
13
|
```
|
12
14
|
|
13
15
|
include in python
|
@@ -45,7 +47,7 @@ tbl.show_num_df(
|
|
45
47
|
`pandas-plots` is a package to help you examine and visualize data that are organized in a pandas DataFrame. It provides a high level api to pandas / plotly with some selected functions and predefined options:
|
46
48
|
|
47
49
|
- `tbl` utilities for table descriptions
|
48
|
-
-
|
50
|
+
- `show_num_df()` displays a table as styled version with additional information
|
49
51
|
- `describe_df()` an alternative version of pandas `describe()` function
|
50
52
|
- `descr_db()` a very short descr for a `duckdb` relation
|
51
53
|
- `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
|
@@ -55,7 +57,7 @@ tbl.show_num_df(
|
|
55
57
|
- `pls` for plotly visualizations
|
56
58
|
- `plot_box()` auto annotated boxplot w/ violin option
|
57
59
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
58
|
-
- `plot_stacked_bars()` shortcut to stacked bars
|
60
|
+
- `plot_stacked_bars()` shortcut to stacked bars
|
59
61
|
- `plots_bars()` a standardized bar plot for a **categorical** column
|
60
62
|
- features confidence intervals via `use_ci` option
|
61
63
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
@@ -71,7 +73,7 @@ tbl.show_num_df(
|
|
71
73
|
<br>
|
72
74
|
|
73
75
|
- `hlp` contains some (variety) helper functions
|
74
|
-
- `to_series()` converts a dataframe to a series
|
76
|
+
- `to_series()` converts a dataframe to a series
|
75
77
|
- `mean_confidence_interval()` calculates mean and confidence interval for a series
|
76
78
|
- `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
|
77
79
|
- `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
|
@@ -84,7 +86,13 @@ tbl.show_num_df(
|
|
84
86
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
85
87
|
<br>
|
86
88
|
|
87
|
-
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
89
|
+
> note: theme setting ☀️ 🌔 can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
90
|
+
|
91
|
+
## prerequisites
|
92
|
+
|
93
|
+
- ⚠️ for static image generation, this package uses Plotly's kaleido engine, which requires a system-wide installation of the Chrome or Chromium browser
|
94
|
+
- if image generation fails, it may be because a compatible browser is missing
|
95
|
+
- in such cases, please run `kaleido_get_chrome` from your terminal to install the necessary dependency.
|
88
96
|
|
89
97
|
## more examples
|
90
98
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "pandas-plots"
|
3
|
-
version = "0.15.
|
3
|
+
version = "0.15.3"
|
4
4
|
description = "A collection of helper for table handling and visualization"
|
5
5
|
long_description = "file: README.md"
|
6
6
|
long_description_content_type = "text/markdown"
|
@@ -14,7 +14,7 @@ authors = [
|
|
14
14
|
classifiers = [
|
15
15
|
"License :: OSI Approved :: MIT License",
|
16
16
|
"Programming Language :: Python :: 3",
|
17
|
-
"Programming Language :: Python :: 3.
|
17
|
+
"Programming Language :: Python :: 3.12",
|
18
18
|
'Development Status :: 4 - Beta',
|
19
19
|
'Intended Audience :: Science/Research',
|
20
20
|
'Operating System :: OS Independent',
|
@@ -35,7 +35,7 @@ dependencies = [
|
|
35
35
|
"duckdb>=1.3.0",
|
36
36
|
"nbformat>=4.2.0",
|
37
37
|
"dataframe_image>=0.2.6",
|
38
|
-
"connection-helper>=0.
|
38
|
+
"connection-helper>=0.12",
|
39
39
|
]
|
40
40
|
|
41
41
|
[project.urls]
|
@@ -564,13 +564,21 @@ def plot_bars(
|
|
564
564
|
|
565
565
|
# * if df, check if valid
|
566
566
|
if isinstance(df_in, pd.DataFrame):
|
567
|
-
if len(df_in.columns)
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
567
|
+
if len(df_in.columns) == 1:
|
568
|
+
if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]):
|
569
|
+
print("❌ df must have 1 column of object or bool type.")
|
570
|
+
return
|
571
|
+
else:
|
572
|
+
df_in = df_in.value_counts(dropna=dropna).to_frame().reset_index()
|
573
|
+
use_ci = False
|
574
|
+
elif len(df_in.columns) == 2:
|
575
|
+
if not (df_in.iloc[:, 0].dtype.kind in ["O", "b"]) or not (
|
576
|
+
df_in.iloc[:, 1].dtype.kind in ["i", "f"]
|
577
|
+
):
|
578
|
+
print("❌ df must have string and numeric columns (in that order).")
|
579
|
+
return
|
580
|
+
else:
|
581
|
+
print("❌ df must have exactly 1 or 2 columns")
|
574
582
|
return
|
575
583
|
else:
|
576
584
|
print("❌ input must be series or dataframe.")
|
@@ -1194,6 +1202,10 @@ def plot_boxes(
|
|
1194
1202
|
xlvl1 = -50
|
1195
1203
|
xlvl2 = 0
|
1196
1204
|
xlvl3 = 50
|
1205
|
+
|
1206
|
+
# * type of col0 must be str, not object. otherwise px.box will fail since sorting will fail
|
1207
|
+
if pd.api.types.is_object_dtype(df.iloc[:, 0]):
|
1208
|
+
df.iloc[:, 0] = df.iloc[:, 0].astype(str)
|
1197
1209
|
|
1198
1210
|
# * unique items
|
1199
1211
|
# Sort the unique items alphabetically
|
@@ -75,6 +75,7 @@ def describe_df(
|
|
75
75
|
top_n_uniques: int = 5,
|
76
76
|
top_n_chars_in_index: int = 0,
|
77
77
|
top_n_chars_in_columns: int = 0,
|
78
|
+
missing_figsize: tuple[int, int] = (26, 6),
|
78
79
|
):
|
79
80
|
"""
|
80
81
|
This function takes a pandas DataFrame and a caption as input parameters and prints out the caption as a styled header, followed by the shape of the DataFrame and the list of column names. For each column, it prints out the column name, the number of unique values, and the column data type. If the column is a numeric column with more than 100 unique values, it also prints out the minimum, mean, maximum, and sum values. Otherwise, it prints out the first 100 unique values of the column.
|
@@ -94,6 +95,7 @@ def describe_df(
|
|
94
95
|
top_n_uniques (int): number of uniques to display
|
95
96
|
top_n_chars_in_index (int): number of characters to display on plot axis
|
96
97
|
top_n_chars_in_columns (int): number of characters to display on plot axis. If set, minimum is 10.
|
98
|
+
missing_figsize (tuple[int, int]): figsize for missing plot (default (26, 6)
|
97
99
|
|
98
100
|
usage:
|
99
101
|
describe_df(
|
@@ -252,7 +254,7 @@ def describe_df(
|
|
252
254
|
|
253
255
|
if use_missing:
|
254
256
|
import missingno as msno
|
255
|
-
msno.matrix(df_, figsize=
|
257
|
+
msno.matrix(df_, figsize=missing_figsize)
|
256
258
|
|
257
259
|
|
258
260
|
def pivot_df(
|