pandas-plots 0.8.2__tar.gz → 0.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas-plots-0.8.2/src/pandas_plots.egg-info → pandas-plots-0.8.3}/PKG-INFO +23 -28
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/README.md +15 -15
- pandas-plots-0.8.3/pyproject.toml +42 -0
- pandas-plots-0.8.3/setup.cfg +39 -0
- pandas-plots-0.8.2/src/pandas_plots/plt.py → pandas-plots-0.8.3/src/pandas_plots/pls.py +14 -12
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots/tbl.py +71 -139
- {pandas-plots-0.8.2 → pandas-plots-0.8.3/src/pandas_plots.egg-info}/PKG-INFO +23 -28
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots.egg-info/SOURCES.txt +2 -1
- pandas-plots-0.8.2/pyproject.toml +0 -41
- pandas-plots-0.8.2/setup.cfg +0 -4
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/LICENSE +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots/__init__.py +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots/sql.py +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots/txt.py +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots/ven.py +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas-plots-0.8.2 → pandas-plots-0.8.3}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,19 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.3
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
13
|
-
|
14
|
-
Project-URL: homepage, https://github.com/smeisegeier/pandas-plots
|
15
|
-
Project-URL: repository, https://github.com/smeisegeier/pandas-plots
|
16
|
-
Keywords: tables,pivot,plotly,venn,plot,vizualization
|
5
|
+
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
|
+
Author: smeisegeier
|
7
|
+
Author-email: dexterDSDo@googlemail.com
|
8
|
+
License: MIT License
|
9
|
+
Project-URL: Documentation, https://github.com/smeisegeier/pandas-plots
|
10
|
+
Project-URL: Source Code, https://github.com/smeisegeier/pandas-plots
|
11
|
+
Project-URL: Bug Tracker, https://github.com/smeisegeier/pandas-plots/issues
|
17
12
|
Classifier: License :: OSI Approved :: MIT License
|
18
13
|
Classifier: Programming Language :: Python :: 3
|
19
14
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -47,7 +42,7 @@ pip install pandas-plots -U
|
|
47
42
|
include in python
|
48
43
|
|
49
44
|
```python
|
50
|
-
from pandas_plots import tbl,
|
45
|
+
from pandas_plots import tbl, pls, ven
|
51
46
|
```
|
52
47
|
|
53
48
|
## example
|
@@ -57,7 +52,7 @@ from pandas_plots import tbl, plt, ven
|
|
57
52
|
import seaborn as sb
|
58
53
|
df = sb.load_dataset('taxis')
|
59
54
|
|
60
|
-
|
55
|
+
pls.plot_box(df['fare'], height=400, violin=True)
|
61
56
|
```
|
62
57
|
|
63
58
|

|
@@ -73,7 +68,7 @@ It is subdivided into:
|
|
73
68
|
- `pivot_df()` gets a pivot table of a 3 column dataframe
|
74
69
|
- 🆕 `show_num_df()` displays a table as styled version with additional information
|
75
70
|
|
76
|
-
- `
|
71
|
+
- `pls` for plotly visualizations
|
77
72
|
- `plot_box()` auto annotated boxplot w/ violin option
|
78
73
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
79
74
|
- `plots_bars()` a standardized bar plot
|
@@ -90,6 +85,8 @@ It is subdivided into:
|
|
90
85
|
- `txt` includes some text based utilities
|
91
86
|
- `wrap` formats strings or lists to a given width to fit nicely on the screen
|
92
87
|
|
88
|
+
> note: theming can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
89
|
+
|
93
90
|
## more examples
|
94
91
|
|
95
92
|
```python
|
@@ -110,24 +107,22 @@ tbl.pivot_df(df[['color', 'payment', 'fare']])
|
|
110
107
|
# show venn diagram for 3 sets
|
111
108
|
from pandas_plots import ven
|
112
109
|
|
113
|
-
set_a =
|
114
|
-
set_b =
|
115
|
-
set_c =
|
110
|
+
set_a = {'ford','ferrari','mercedes', 'bmw'}
|
111
|
+
set_b = {'opel','bmw','bentley','audi'}
|
112
|
+
set_c = {'ferrari','bmw','chrysler','renault','peugeot','fiat'}
|
116
113
|
_df, _details = ven.show_venn3(
|
117
|
-
"taxis",
|
118
|
-
set_a,
|
119
|
-
"
|
120
|
-
set_b,
|
121
|
-
"
|
114
|
+
title="taxis",
|
115
|
+
a_set=set_a,
|
116
|
+
a_label="cars1",
|
117
|
+
b_set=set_b,
|
118
|
+
b_label="cars2",
|
122
119
|
c_set=set_c,
|
123
|
-
c_label="
|
120
|
+
c_label="cars3",
|
124
121
|
verbose=0,
|
125
122
|
size=8,
|
126
123
|
)
|
127
124
|
```
|
128
125
|
|
129
|
-

|
130
127
|
|
131
128
|
## dependencies
|
132
|
-
|
133
|
-
<!-- todo add themeing hint -->
|
@@ -13,7 +13,7 @@ pip install pandas-plots -U
|
|
13
13
|
include in python
|
14
14
|
|
15
15
|
```python
|
16
|
-
from pandas_plots import tbl,
|
16
|
+
from pandas_plots import tbl, pls, ven
|
17
17
|
```
|
18
18
|
|
19
19
|
## example
|
@@ -23,7 +23,7 @@ from pandas_plots import tbl, plt, ven
|
|
23
23
|
import seaborn as sb
|
24
24
|
df = sb.load_dataset('taxis')
|
25
25
|
|
26
|
-
|
26
|
+
pls.plot_box(df['fare'], height=400, violin=True)
|
27
27
|
```
|
28
28
|
|
29
29
|

|
@@ -39,7 +39,7 @@ It is subdivided into:
|
|
39
39
|
- `pivot_df()` gets a pivot table of a 3 column dataframe
|
40
40
|
- 🆕 `show_num_df()` displays a table as styled version with additional information
|
41
41
|
|
42
|
-
- `
|
42
|
+
- `pls` for plotly visualizations
|
43
43
|
- `plot_box()` auto annotated boxplot w/ violin option
|
44
44
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
45
45
|
- `plots_bars()` a standardized bar plot
|
@@ -56,6 +56,8 @@ It is subdivided into:
|
|
56
56
|
- `txt` includes some text based utilities
|
57
57
|
- `wrap` formats strings or lists to a given width to fit nicely on the screen
|
58
58
|
|
59
|
+
> note: theming can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
60
|
+
|
59
61
|
## more examples
|
60
62
|
|
61
63
|
```python
|
@@ -76,24 +78,22 @@ tbl.pivot_df(df[['color', 'payment', 'fare']])
|
|
76
78
|
# show venn diagram for 3 sets
|
77
79
|
from pandas_plots import ven
|
78
80
|
|
79
|
-
set_a =
|
80
|
-
set_b =
|
81
|
-
set_c =
|
81
|
+
set_a = {'ford','ferrari','mercedes', 'bmw'}
|
82
|
+
set_b = {'opel','bmw','bentley','audi'}
|
83
|
+
set_c = {'ferrari','bmw','chrysler','renault','peugeot','fiat'}
|
82
84
|
_df, _details = ven.show_venn3(
|
83
|
-
"taxis",
|
84
|
-
set_a,
|
85
|
-
"
|
86
|
-
set_b,
|
87
|
-
"
|
85
|
+
title="taxis",
|
86
|
+
a_set=set_a,
|
87
|
+
a_label="cars1",
|
88
|
+
b_set=set_b,
|
89
|
+
b_label="cars2",
|
88
90
|
c_set=set_c,
|
89
|
-
c_label="
|
91
|
+
c_label="cars3",
|
90
92
|
verbose=0,
|
91
93
|
size=8,
|
92
94
|
)
|
93
95
|
```
|
94
96
|
|
95
|
-

|
96
98
|
|
97
99
|
## dependencies
|
98
|
-
|
99
|
-
<!-- todo add themeing hint -->
|
@@ -0,0 +1,42 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
# [project]
|
6
|
+
# name = "pandas-plots"
|
7
|
+
# version = "0.8.2"
|
8
|
+
# requires-python = ">=3.10"
|
9
|
+
# description = "A collection of helper for table handling and vizualization"
|
10
|
+
# readme = "README.md"
|
11
|
+
# authors = [{ name = "smeisegeier", email = "dexterDSDo@googlemail.com" }]
|
12
|
+
# license = { text = "MIT licence" }
|
13
|
+
# # license = { file = "LICENSE" }
|
14
|
+
# classifiers = [
|
15
|
+
# "License :: OSI Approved :: MIT License",
|
16
|
+
# "Programming Language :: Python :: 3",
|
17
|
+
# "Programming Language :: Python :: 3.10",
|
18
|
+
# 'Development Status :: 4 - Beta',
|
19
|
+
# 'Intended Audience :: Science/Research',
|
20
|
+
# 'Operating System :: OS Independent',
|
21
|
+
# 'Topic :: Scientific/Engineering'
|
22
|
+
# ]
|
23
|
+
# keywords = ["tables", "pivot", "plotly", "venn", "plot", "vizualization"]
|
24
|
+
# dependencies = [
|
25
|
+
# "pandas >= 2.0.0",
|
26
|
+
# "plotly >= 5.18.0",
|
27
|
+
# "matplotlib >= 3.8.2",
|
28
|
+
# "matplotlib-venn >= 0.11.10",
|
29
|
+
# "seaborn >= 0.13.2",
|
30
|
+
# "sqlalchemy < 2.0.0",
|
31
|
+
# "Jinja2 >= 3.1.3",
|
32
|
+
# ]
|
33
|
+
|
34
|
+
# # [project.optional-dependencies]
|
35
|
+
# # dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]
|
36
|
+
|
37
|
+
# [project.urls]
|
38
|
+
# homepage = "https://github.com/smeisegeier/pandas-plots"
|
39
|
+
# repository = "https://github.com/smeisegeier/pandas-plots"
|
40
|
+
|
41
|
+
# # [project.scripts]
|
42
|
+
# # realpython = "reader.__main__:main"
|
@@ -0,0 +1,39 @@
|
|
1
|
+
[metadata]
|
2
|
+
name = pandas-plots
|
3
|
+
version = 0.8.3
|
4
|
+
author = smeisegeier
|
5
|
+
author_email = dexterDSDo@googlemail.com
|
6
|
+
description = A collection of helper for table handling and vizualization
|
7
|
+
long_description = file: README.md
|
8
|
+
long_description_content_type = text/markdown
|
9
|
+
license = MIT License
|
10
|
+
license_files = LICENSE
|
11
|
+
url = https://github.com/smeisegeier/pandas-plots
|
12
|
+
project_urls =
|
13
|
+
Documentation = https://github.com/smeisegeier/pandas-plots
|
14
|
+
Source Code = https://github.com/smeisegeier/pandas-plots
|
15
|
+
Bug Tracker = https://github.com/smeisegeier/pandas-plots/issues
|
16
|
+
classifiers =
|
17
|
+
License :: OSI Approved :: MIT License
|
18
|
+
Programming Language :: Python :: 3
|
19
|
+
Programming Language :: Python :: 3.10
|
20
|
+
Development Status :: 4 - Beta
|
21
|
+
Intended Audience :: Science/Research
|
22
|
+
Operating System :: OS Independent
|
23
|
+
Topic :: Scientific/Engineering
|
24
|
+
|
25
|
+
[options]
|
26
|
+
python_requires = >=3.10
|
27
|
+
install_requires =
|
28
|
+
pandas >= 2.0.0
|
29
|
+
plotly >= 5.18.0
|
30
|
+
matplotlib >= 3.8.2
|
31
|
+
matplotlib-venn >= 0.11.10
|
32
|
+
seaborn >= 0.13.2
|
33
|
+
sqlalchemy < 2.0.0
|
34
|
+
Jinja2 >= 3.1.3
|
35
|
+
|
36
|
+
[egg_info]
|
37
|
+
tag_build =
|
38
|
+
tag_date = 0
|
39
|
+
|
@@ -27,7 +27,7 @@ def plot_quadrants(
|
|
27
27
|
columns axis
|
28
28
|
values (can be derived as cnt=1)
|
29
29
|
df columns must contain 2 values
|
30
|
-
title (str, optional): The title for the heatmap
|
30
|
+
title (str, optional): The title for the heatmap to override the default.
|
31
31
|
caption (str, optional): The caption for the heatmap. Defaults to None.
|
32
32
|
|
33
33
|
Returns:
|
@@ -61,7 +61,7 @@ def plot_quadrants(
|
|
61
61
|
n = heat_wide.sum().sum()
|
62
62
|
heat_label = heat_wide.map(lambda x: f'{x:_}\n({x/n*100:.1f}%)')
|
63
63
|
|
64
|
-
|
64
|
+
# * seaborn. use less fancy stuff :)
|
65
65
|
caption= f'#{caption.lower()}, ' if caption else 'heatmap, '
|
66
66
|
|
67
67
|
# * plot
|
@@ -96,7 +96,8 @@ def plot_stacked_bars(
|
|
96
96
|
top_n_color: int = 0,
|
97
97
|
dropna: bool = False,
|
98
98
|
swap: bool = False,
|
99
|
-
normalize:
|
99
|
+
normalize: bool = False,
|
100
|
+
relative: bool = False,
|
100
101
|
orientation: Literal["h", "v"] = "v",
|
101
102
|
height: int=500,
|
102
103
|
width: int=2000,
|
@@ -119,7 +120,8 @@ def plot_stacked_bars(
|
|
119
120
|
- top_n_index: int = 0 - The number of top colors to include in the plot. WARNING: this forces distribution to 100% on a subset
|
120
121
|
- dropna: bool = False - Whether to include NULL values in the plot.
|
121
122
|
- swap: bool = False - Whether to swap the x-axis and y-axis.
|
122
|
-
- normalize:
|
123
|
+
- normalize: bool = False - Whether to normalize the values.
|
124
|
+
- relative: bool = False - Whether to show relative values as bars instead of absolute.
|
123
125
|
- orientation: Literal["h", "v"] = "v" - The orientation of the plot.
|
124
126
|
- height: int = 500 - The height of the plot.
|
125
127
|
- width: An optional integer indicating the width of the chart. Default is 2000.
|
@@ -246,7 +248,7 @@ def plot_stacked_bars(
|
|
246
248
|
)
|
247
249
|
|
248
250
|
# * ignore if bar mode is on
|
249
|
-
if
|
251
|
+
if not relative:
|
250
252
|
if orientation == "v":
|
251
253
|
_fig.update_yaxes(range=[0, bar_max])
|
252
254
|
else:
|
@@ -269,12 +271,12 @@ def plot_stacked_bars(
|
|
269
271
|
_fig.update_xaxes(
|
270
272
|
showgrid=True,
|
271
273
|
gridwidth=1,
|
272
|
-
dtick=.05 if orientation == "h" and normalize
|
274
|
+
dtick=.05 if orientation == "h" and normalize else 5 if orientation == "h" and relative else None,
|
273
275
|
)
|
274
276
|
_fig.update_yaxes(
|
275
277
|
showgrid=True,
|
276
278
|
gridwidth=1,
|
277
|
-
dtick=.05 if orientation == "v" and normalize
|
279
|
+
dtick=.05 if orientation == "v" and normalize else 5 if orientation == "v" and relative else None,
|
278
280
|
)
|
279
281
|
|
280
282
|
# * sorting is in a weird spot, do a 1:1 matrix
|
@@ -317,7 +319,7 @@ def plot_bars(
|
|
317
319
|
- dropna: A boolean indicating whether to drop NaN values from the chart. Default is False.
|
318
320
|
- orientation: A string indicating the orientation of the chart. It can be either "h" for horizontal or "v" for vertical. Default is "v".
|
319
321
|
- sort_values: A boolean indicating whether to sort the values in the chart. Default is False.
|
320
|
-
- nomalize: A boolean indicating whether to
|
322
|
+
- nomalize: A boolean indicating whether to show pct values in the chart. Default is False.
|
321
323
|
- height: An optional integer indicating the height of the chart. Default is 500.
|
322
324
|
- width: An optional integer indicating the width of the chart. Default is 2000.
|
323
325
|
- title: An optional string indicating the title of the chart. If not provided, the title will be the name of the index column.
|
@@ -467,7 +469,7 @@ def plot_bars(
|
|
467
469
|
|
468
470
|
def plot_box(
|
469
471
|
ser: pd.Series,
|
470
|
-
points: Literal['all', 'outliers', 'suspectedoutlieres',
|
472
|
+
points: Literal['all', 'outliers', 'suspectedoutlieres', None] = None,
|
471
473
|
precision: int=2,
|
472
474
|
height: int=200,
|
473
475
|
width: int=1200,
|
@@ -481,7 +483,7 @@ def plot_box(
|
|
481
483
|
|
482
484
|
Args:
|
483
485
|
ser: The pandas Series to plot.
|
484
|
-
points: The type of points to plot on the box plot ('all', 'outliers', 'suspectedoutliers',
|
486
|
+
points: The type of points to plot on the box plot ('all', 'outliers', 'suspectedoutliers', None).
|
485
487
|
precision: The precision of the annotations.
|
486
488
|
height: The height of the plot.
|
487
489
|
width: The width of the plot.
|
@@ -550,7 +552,7 @@ def plot_box(
|
|
550
552
|
def plot_boxes(
|
551
553
|
df: pd.DataFrame,
|
552
554
|
caption: str=None,
|
553
|
-
points: Literal["all", "outliers", "suspectedoutliers",
|
555
|
+
points: Literal["all", "outliers", "suspectedoutliers", None] = None,
|
554
556
|
precision: int = 2,
|
555
557
|
height: int = 600,
|
556
558
|
width: int = 800,
|
@@ -563,7 +565,7 @@ def plot_boxes(
|
|
563
565
|
Args:
|
564
566
|
df (pd.DataFrame): The input DataFrame with two columns, where the first column is string type and the second column is numeric.
|
565
567
|
caption (str): The caption for the plot.
|
566
|
-
points (Literal["all", "outliers", "suspectedoutliers",
|
568
|
+
points (Literal["all", "outliers", "suspectedoutliers", None]): The points to be plotted.
|
567
569
|
precision (int): The precision for rounding the statistics.
|
568
570
|
height (int): The height of the plot.
|
569
571
|
width (int): The width of the plot.
|
@@ -66,12 +66,13 @@ def describe_df(
|
|
66
66
|
if len(df) == 0:
|
67
67
|
print(f"DataFrame is empty!")
|
68
68
|
return
|
69
|
-
|
69
|
+
|
70
70
|
print(f"🔵 {'*'*3} df: {caption} {'*'*3}")
|
71
71
|
print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {df.columns.tolist()} ")
|
72
72
|
print(f"🟣 duplicates: {df.duplicated().sum():_}")
|
73
73
|
print(f"🟣 missings: {dict(df.isna().sum())}")
|
74
74
|
print("--- column uniques (all)")
|
75
|
+
print(f"🟠 index {txt.wrap(df.index.tolist()[:top_n_uniques])}")
|
75
76
|
def get_uniques_header(col: str):
|
76
77
|
# * sorting has issues when col is of mixed type (object)
|
77
78
|
if df[col].dtype=='object':
|
@@ -166,64 +167,45 @@ def describe_df(
|
|
166
167
|
fig.update_layout(template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly")
|
167
168
|
fig.show(renderer)
|
168
169
|
|
169
|
-
# todo rebuild into using show_df, this also affects api
|
170
170
|
def pivot_df(
|
171
171
|
df: pd.DataFrame,
|
172
172
|
dropna: bool = False,
|
173
|
-
normalize: bool = False,
|
174
|
-
normalize_mixed: bool = False,
|
175
173
|
swap: bool = False,
|
176
174
|
top_n_index: int = 0,
|
177
175
|
top_n_columns: int = 0,
|
178
|
-
data_bar_axis: Literal["x", "y", "
|
176
|
+
data_bar_axis: Literal["x", "y", "xy", None] = "xy",
|
177
|
+
pct_axis: Literal["x", "xy", None] = "xy",
|
179
178
|
precision: int = 0,
|
180
|
-
|
181
|
-
|
182
|
-
) -> None:
|
179
|
+
show_totals: bool = True,
|
180
|
+
) -> pd.DataFrame:
|
183
181
|
"""
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
df (pd.DataFrame): The DataFrame to be pivoted.
|
188
|
-
dropna (bool, optional): Whether to drop
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
182
|
+
A function to pivot a DataFrame based on specified parameters and return the result as a new DataFrame.
|
183
|
+
|
184
|
+
Args:
|
185
|
+
df (pd.DataFrame): The input DataFrame to be pivoted.
|
186
|
+
dropna (bool, optional): Whether to drop NaN values. Defaults to False.
|
187
|
+
swap (bool, optional): Whether to swap index and column. Defaults to False.
|
188
|
+
top_n_index (int, optional): The number of top index values to consider. Defaults to 0.
|
189
|
+
top_n_columns (int, optional): The number of top column values to consider. Defaults to 0.
|
190
|
+
data_bar_axis (Literal["x", "y", "xy", None], optional): The axis for displaying data bars. Defaults to "xy".
|
191
|
+
pct_axis (Literal["x", "xy", None], optional): The axis for displaying percentages. Defaults to None.
|
192
|
+
precision (int, optional): The precision for displaying values. Defaults to 0.
|
193
|
+
show_totals (bool, optional): Whether to show totals in the result. Defaults to False.
|
194
|
+
|
198
195
|
Returns:
|
199
|
-
|
200
|
-
Usage:
|
201
|
-
pivot_df(
|
202
|
-
df,
|
203
|
-
dropna=True,
|
204
|
-
normalize=True,
|
205
|
-
normalize_mixed=True
|
206
|
-
swap=True,
|
207
|
-
top_n_index=5,
|
208
|
-
top_n_columns=2,
|
209
|
-
data_bar_axis=None,
|
210
|
-
precision=2,
|
211
|
-
)
|
196
|
+
pd.DataFrame: The pivoted DataFrame.
|
212
197
|
"""
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
color_zeros = 'grey' if theme == 'light' else 'grey'
|
218
|
-
color_pct = 'grey' if theme == 'light' else 'yellow'
|
219
|
-
color_values = 'black' if theme == 'light' else 'white'
|
220
|
-
color_minus = 'red' if theme == 'light' else 'red'
|
198
|
+
# * ensure arguments match parameter definition
|
199
|
+
if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and data_bar_axis not in ["x","y","xy"]):
|
200
|
+
print(f"❌ axis not supported")
|
201
|
+
return
|
221
202
|
|
222
203
|
if len(df.columns) != 3:
|
223
|
-
print("df must have exactly 3 columns")
|
204
|
+
print("❌ df must have exactly 3 columns")
|
224
205
|
return
|
206
|
+
|
225
207
|
if not pd.api.types.is_numeric_dtype(df.iloc[:, 2]):
|
226
|
-
print("3rd column must be numeric")
|
208
|
+
print("❌ 3rd column must be numeric")
|
227
209
|
return
|
228
210
|
|
229
211
|
col_index = df.columns[0] if not swap else df.columns[1]
|
@@ -237,23 +219,6 @@ def pivot_df(
|
|
237
219
|
df.dropna(inplace=True, subset=[col_index])
|
238
220
|
df.dropna(inplace=True, subset=[col_column])
|
239
221
|
|
240
|
-
# * now calculate n, after dropna, before top n
|
241
|
-
n = df[col_value].sum()
|
242
|
-
|
243
|
-
if normalize:
|
244
|
-
df[col_value] = df[col_value] / n
|
245
|
-
_formatter = f"{{:_.{precision}%}}"
|
246
|
-
else:
|
247
|
-
# _type= 'int'
|
248
|
-
_formatter = f"{{:_.{precision}f}}"
|
249
|
-
|
250
|
-
if normalize_mixed:
|
251
|
-
_formatter = (
|
252
|
-
lambda x: f"{{:_.0f}} <span style='color: {color_pct}'>({{:.1%}})</span>".format(x, x / n)
|
253
|
-
if x > 0
|
254
|
-
else x
|
255
|
-
)
|
256
|
-
|
257
222
|
# * top n indexes
|
258
223
|
if top_n_index > 0:
|
259
224
|
# * get top n -> series
|
@@ -266,7 +231,7 @@ def pivot_df(
|
|
266
231
|
# * only process top n indexes. this does not change pct values
|
267
232
|
df = df[df[col_index].isin(ser_top_n.index)]
|
268
233
|
|
269
|
-
# top n columns
|
234
|
+
# * top n columns
|
270
235
|
if top_n_columns > 0:
|
271
236
|
# * get top n -> series
|
272
237
|
# * on pivot tables (all cells are values) you can also use sum for each column[df.sum(axis=1) > n]
|
@@ -287,83 +252,36 @@ def pivot_df(
|
|
287
252
|
)
|
288
253
|
df = df.fillna(0) # .astype(_type)
|
289
254
|
|
290
|
-
|
291
|
-
df.loc["Total"] = df.sum(axis=0)
|
292
|
-
if totals in(['y','all']):
|
293
|
-
df.loc[:, "Total"] = df.sum(axis=1)
|
294
|
-
|
295
|
-
out = df.style.map(
|
296
|
-
lambda x: f"color: {color_zeros}"
|
297
|
-
if x == 0
|
298
|
-
else f"color: {color_minus}"
|
299
|
-
if x < 0
|
300
|
-
else f"color: {color_values}"
|
301
|
-
)
|
302
|
-
|
303
|
-
# * apply data bar coloring
|
304
|
-
if data_bar_axis:
|
305
|
-
out.bar(
|
306
|
-
color=f"{color_highlight}",
|
307
|
-
axis=1 if data_bar_axis == "y" else 0 if data_bar_axis == "x" else None,
|
308
|
-
# props="width: 5%;",
|
309
|
-
)
|
310
|
-
|
311
|
-
# * apply formatter selected above
|
312
|
-
out.format(_formatter)
|
313
|
-
|
314
|
-
# * apply fonts for cells
|
315
|
-
out.set_properties(**{'font-family': 'Courier'})
|
316
|
-
|
317
|
-
# * apply fonts for th (inkl. index)
|
318
|
-
_props=[
|
319
|
-
# ("font-size", "10pt"),
|
320
|
-
# ("font-weight", "bold"),
|
321
|
-
# ("font-family", "Courier"),
|
322
|
-
("text-align", "right")
|
323
|
-
]
|
324
|
-
out.set_table_styles(
|
325
|
-
[
|
326
|
-
dict(selector="th", props=_props),
|
327
|
-
# dict(selector="th:nth-child(1)", props=_props),
|
328
|
-
]
|
329
|
-
)
|
330
|
-
# todo return out
|
331
|
-
display(out)
|
332
|
-
return
|
255
|
+
return show_num_df(df, show_totals=show_totals, data_bar_axis=data_bar_axis, pct_axis=pct_axis, swap=swap, precision=precision)
|
333
256
|
|
334
|
-
# todo check if date col can be included
|
335
257
|
def show_num_df(
|
336
258
|
df,
|
337
|
-
axis: Literal["x", "all", None] = None,
|
338
259
|
show_totals: bool = False,
|
339
|
-
|
340
|
-
|
260
|
+
data_bar_axis: Literal["x","y","xy", None] = None,
|
261
|
+
pct_axis: Literal["x", "xy", None] = None,
|
341
262
|
swap: bool = False,
|
342
|
-
precision=0,
|
263
|
+
precision: int=0,
|
343
264
|
):
|
344
265
|
"""
|
345
|
-
|
346
|
-
Table must contain numeric data only (int / float).
|
266
|
+
A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
|
347
267
|
|
348
268
|
Parameters:
|
349
|
-
- df: DataFrame to display
|
350
|
-
-
|
351
|
-
-
|
352
|
-
-
|
353
|
-
-
|
354
|
-
-
|
355
|
-
|
356
|
-
|
357
|
-
Returns:
|
358
|
-
- out: Styled display of the DataFrame
|
269
|
+
- df: the DataFrame to display
|
270
|
+
- show_totals: a boolean indicating whether to show totals
|
271
|
+
- data_bar_axis: a Literal indicating the axis for applying data bar coloring ["x","y","xy", None]
|
272
|
+
- pct_axis: a Literal indicating the directions for displaying percentages ["x","xy", None]. "x" means sum up pct per column
|
273
|
+
- swap: a boolean indicating whether to swap the axes
|
274
|
+
- precision: an integer indicating the display precision
|
275
|
+
|
276
|
+
The function returns a styled representation of the DataFrame.
|
359
277
|
"""
|
360
278
|
# * ensure arguments match parameter definition
|
361
279
|
if any([df[col].dtype.kind not in ['i','u','f'] for col in df.columns]) == True:
|
362
280
|
print(f"❌ table must contain numeric data only")
|
363
281
|
return
|
364
282
|
|
365
|
-
if
|
366
|
-
print(f"❌ axis
|
283
|
+
if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and data_bar_axis not in ["x","y","xy"]):
|
284
|
+
print(f"❌ axis not supported")
|
367
285
|
return
|
368
286
|
|
369
287
|
theme = os.getenv("THEME") or "light"
|
@@ -372,9 +290,8 @@ def show_num_df(
|
|
372
290
|
df_ = df.copy() if not swap else df.T.copy()
|
373
291
|
|
374
292
|
# * alter _df, add totals
|
375
|
-
if show_totals
|
293
|
+
if show_totals:
|
376
294
|
df_.loc["Total"] = df_.sum(axis=0)
|
377
|
-
if show_totals and axis == "all":
|
378
295
|
df_.loc[:, "Total"] = df_.sum(axis=1)
|
379
296
|
|
380
297
|
# * derive style
|
@@ -387,14 +304,14 @@ def show_num_df(
|
|
387
304
|
color_minus = "red" if theme == "light" else "red"
|
388
305
|
|
389
306
|
# * apply data bar coloring
|
390
|
-
if
|
307
|
+
if data_bar_axis:
|
391
308
|
out.bar(
|
392
309
|
color=f"{color_highlight}",
|
393
|
-
axis= 0 if
|
310
|
+
axis= 0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
|
394
311
|
)
|
395
312
|
|
396
313
|
# * all cell formatting in one place
|
397
|
-
#
|
314
|
+
# call hierarchy is not very well organized. all options land here, even if no cellwise formatting is applied
|
398
315
|
def format_cell(cell, sum, show_pct):
|
399
316
|
if cell == 0:
|
400
317
|
return f'<span style="color: {color_zeros}">{cell:.0f}</span>'
|
@@ -406,15 +323,14 @@ def show_num_df(
|
|
406
323
|
return f'{cell:_.{precision}f}'
|
407
324
|
|
408
325
|
# * build pct formatting
|
409
|
-
|
410
|
-
if axis =='x':
|
326
|
+
if pct_axis =='x':
|
411
327
|
# * totals on either axis influence the sum
|
412
|
-
divider = 2 if show_totals
|
328
|
+
divider = 2 if show_totals else 1
|
413
329
|
# * cell formatting to each column instead of altering values w/ df.apply
|
414
330
|
# * uses dictionary comprehension, and a lambda function with two input variables
|
415
331
|
col_sums = df_.sum() / divider
|
416
332
|
formatter = {
|
417
|
-
col: lambda x, col=col: format_cell(x, col_sums[col],
|
333
|
+
col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis) for col in df_.columns
|
418
334
|
}
|
419
335
|
|
420
336
|
# ? y is not implemented, needs row wise formatting
|
@@ -424,11 +340,11 @@ def show_num_df(
|
|
424
340
|
# row: lambda x, row=row: format_cell(x, row_sums[row]) for row in _df.index
|
425
341
|
# }
|
426
342
|
|
427
|
-
elif
|
428
|
-
divider =
|
343
|
+
elif pct_axis=='xy':
|
344
|
+
divider = 4 if show_totals else 1
|
429
345
|
n = df_.sum().sum() / divider
|
430
346
|
formatter = {
|
431
|
-
col: lambda x, col=col: format_cell(x, n,
|
347
|
+
col: lambda x, col=col: format_cell(x, n, pct_axis) for col in df_.columns
|
432
348
|
}
|
433
349
|
else:
|
434
350
|
# *
|
@@ -437,6 +353,22 @@ def show_num_df(
|
|
437
353
|
}
|
438
354
|
|
439
355
|
out.format(formatter=formatter)
|
440
|
-
return out
|
441
356
|
|
442
|
-
#
|
357
|
+
# * apply fonts for cells
|
358
|
+
out.set_properties(**{'font-family': 'Courier'})
|
359
|
+
|
360
|
+
# * apply fonts for th (inkl. index)
|
361
|
+
_props=[
|
362
|
+
# ("font-size", "10pt"),
|
363
|
+
# ("font-weight", "bold"),
|
364
|
+
# ("font-family", "Courier"),
|
365
|
+
("text-align", "right")
|
366
|
+
]
|
367
|
+
out.set_table_styles(
|
368
|
+
[
|
369
|
+
dict(selector="th", props=_props),
|
370
|
+
# dict(selector="th:nth-child(1)", props=_props),
|
371
|
+
]
|
372
|
+
)
|
373
|
+
|
374
|
+
return out
|
@@ -1,19 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.3
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
13
|
-
|
14
|
-
Project-URL: homepage, https://github.com/smeisegeier/pandas-plots
|
15
|
-
Project-URL: repository, https://github.com/smeisegeier/pandas-plots
|
16
|
-
Keywords: tables,pivot,plotly,venn,plot,vizualization
|
5
|
+
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
|
+
Author: smeisegeier
|
7
|
+
Author-email: dexterDSDo@googlemail.com
|
8
|
+
License: MIT License
|
9
|
+
Project-URL: Documentation, https://github.com/smeisegeier/pandas-plots
|
10
|
+
Project-URL: Source Code, https://github.com/smeisegeier/pandas-plots
|
11
|
+
Project-URL: Bug Tracker, https://github.com/smeisegeier/pandas-plots/issues
|
17
12
|
Classifier: License :: OSI Approved :: MIT License
|
18
13
|
Classifier: Programming Language :: Python :: 3
|
19
14
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -47,7 +42,7 @@ pip install pandas-plots -U
|
|
47
42
|
include in python
|
48
43
|
|
49
44
|
```python
|
50
|
-
from pandas_plots import tbl,
|
45
|
+
from pandas_plots import tbl, pls, ven
|
51
46
|
```
|
52
47
|
|
53
48
|
## example
|
@@ -57,7 +52,7 @@ from pandas_plots import tbl, plt, ven
|
|
57
52
|
import seaborn as sb
|
58
53
|
df = sb.load_dataset('taxis')
|
59
54
|
|
60
|
-
|
55
|
+
pls.plot_box(df['fare'], height=400, violin=True)
|
61
56
|
```
|
62
57
|
|
63
58
|

|
@@ -73,7 +68,7 @@ It is subdivided into:
|
|
73
68
|
- `pivot_df()` gets a pivot table of a 3 column dataframe
|
74
69
|
- 🆕 `show_num_df()` displays a table as styled version with additional information
|
75
70
|
|
76
|
-
- `
|
71
|
+
- `pls` for plotly visualizations
|
77
72
|
- `plot_box()` auto annotated boxplot w/ violin option
|
78
73
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
79
74
|
- `plots_bars()` a standardized bar plot
|
@@ -90,6 +85,8 @@ It is subdivided into:
|
|
90
85
|
- `txt` includes some text based utilities
|
91
86
|
- `wrap` formats strings or lists to a given width to fit nicely on the screen
|
92
87
|
|
88
|
+
> note: theming can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
89
|
+
|
93
90
|
## more examples
|
94
91
|
|
95
92
|
```python
|
@@ -110,24 +107,22 @@ tbl.pivot_df(df[['color', 'payment', 'fare']])
|
|
110
107
|
# show venn diagram for 3 sets
|
111
108
|
from pandas_plots import ven
|
112
109
|
|
113
|
-
set_a =
|
114
|
-
set_b =
|
115
|
-
set_c =
|
110
|
+
set_a = {'ford','ferrari','mercedes', 'bmw'}
|
111
|
+
set_b = {'opel','bmw','bentley','audi'}
|
112
|
+
set_c = {'ferrari','bmw','chrysler','renault','peugeot','fiat'}
|
116
113
|
_df, _details = ven.show_venn3(
|
117
|
-
"taxis",
|
118
|
-
set_a,
|
119
|
-
"
|
120
|
-
set_b,
|
121
|
-
"
|
114
|
+
title="taxis",
|
115
|
+
a_set=set_a,
|
116
|
+
a_label="cars1",
|
117
|
+
b_set=set_b,
|
118
|
+
b_label="cars2",
|
122
119
|
c_set=set_c,
|
123
|
-
c_label="
|
120
|
+
c_label="cars3",
|
124
121
|
verbose=0,
|
125
122
|
size=8,
|
126
123
|
)
|
127
124
|
```
|
128
125
|
|
129
|
-

|
130
127
|
|
131
128
|
## dependencies
|
132
|
-
|
133
|
-
<!-- todo add themeing hint -->
|
@@ -1,41 +0,0 @@
|
|
1
|
-
[build-system]
|
2
|
-
requires = ["setuptools"]
|
3
|
-
build-backend = "setuptools.build_meta"
|
4
|
-
|
5
|
-
[project]
|
6
|
-
name = "pandas-plots"
|
7
|
-
version = "0.8.2"
|
8
|
-
requires-python = ">=3.10"
|
9
|
-
description = "A collection of helper for table handling and vizualization"
|
10
|
-
readme = "README.md"
|
11
|
-
authors = [{ name = "smeisegeier", email = "dsexterDSDo@googlemail.com" }]
|
12
|
-
license = { file = "LICENSE" }
|
13
|
-
classifiers = [
|
14
|
-
"License :: OSI Approved :: MIT License",
|
15
|
-
"Programming Language :: Python :: 3",
|
16
|
-
"Programming Language :: Python :: 3.10",
|
17
|
-
'Development Status :: 4 - Beta',
|
18
|
-
'Intended Audience :: Science/Research',
|
19
|
-
'Operating System :: OS Independent',
|
20
|
-
'Topic :: Scientific/Engineering'
|
21
|
-
]
|
22
|
-
keywords = ["tables", "pivot", "plotly", "venn", "plot", "vizualization"]
|
23
|
-
dependencies = [
|
24
|
-
"pandas >= 2.0.0",
|
25
|
-
"plotly >= 5.18.0",
|
26
|
-
"matplotlib >= 3.8.2",
|
27
|
-
"matplotlib-venn >= 0.11.10",
|
28
|
-
"seaborn >= 0.13.2",
|
29
|
-
"sqlalchemy < 2.0.0",
|
30
|
-
"Jinja2 >= 3.1.3",
|
31
|
-
]
|
32
|
-
|
33
|
-
# [project.optional-dependencies]
|
34
|
-
# dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]
|
35
|
-
|
36
|
-
[project.urls]
|
37
|
-
homepage = "https://github.com/smeisegeier/pandas-plots"
|
38
|
-
repository = "https://github.com/smeisegeier/pandas-plots"
|
39
|
-
|
40
|
-
# [project.scripts]
|
41
|
-
# realpython = "reader.__main__:main"
|
pandas-plots-0.8.2/setup.cfg
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|