pandas-plots 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots-0.0.3.dist-info/LICENSE +7 -0
- pandas_plots-0.0.3.dist-info/METADATA +75 -0
- pandas_plots-0.0.3.dist-info/RECORD +8 -0
- pandas_plots/legacy.py +0 -276
- pandas_plots-0.0.1.dist-info/LICENSE +0 -674
- pandas_plots-0.0.1.dist-info/METADATA +0 -688
- pandas_plots-0.0.1.dist-info/RECORD +0 -9
- /pandas_plots/{pandas.py → tbl.py} +0 -0
- /pandas_plots/{plots.py → viz.py} +0 -0
- {pandas_plots-0.0.1.dist-info → pandas_plots-0.0.3.dist-info}/WHEEL +0 -0
- {pandas_plots-0.0.1.dist-info → pandas_plots-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,7 @@
|
|
1
|
+
Copyright 2024 smeisegeier
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,75 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: pandas-plots
|
3
|
+
Version: 0.0.3
|
4
|
+
Summary: A collection of helper for pandas n plots
|
5
|
+
Author-email: smeisegeier <dsexterDSDo@googlemail.com>
|
6
|
+
License: Copyright 2024 smeisegeier
|
7
|
+
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
11
|
+
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
13
|
+
|
14
|
+
Project-URL: Homepage, https://github.com/smeisegeier/pandas-plots
|
15
|
+
Keywords: tables,pivot,plotly
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Description-Content-Type: text/markdown
|
20
|
+
License-File: LICENSE
|
21
|
+
Requires-Dist: pandas >=2.0.0
|
22
|
+
Requires-Dist: plotly
|
23
|
+
Requires-Dist: matplotlib
|
24
|
+
Requires-Dist: seaborn
|
25
|
+
|
26
|
+
# pandas-plots
|
27
|
+
|
28
|
+
<!-- [](https://GitHub.com/Naereen/StrapDown.js/releases/) -->
|
29
|
+
<!-- [](https://GitHub.com/Naereen/StrapDown.js/commit/) -->
|
30
|
+
|
31
|
+

|
32
|
+
|
33
|
+
## quickstart
|
34
|
+
|
35
|
+
install / update package
|
36
|
+
|
37
|
+
```bash
|
38
|
+
pip install pandas-plots -U
|
39
|
+
```
|
40
|
+
|
41
|
+
include in python
|
42
|
+
|
43
|
+
```python
|
44
|
+
from pandas_plots import tbl, viz
|
45
|
+
```
|
46
|
+
|
47
|
+
example
|
48
|
+
|
49
|
+
```python
|
50
|
+
# load sample dataset from seaborn
|
51
|
+
import seaborn as sb
|
52
|
+
df = sb.load_dataset('taxis')
|
53
|
+
|
54
|
+
viz.plot_box(df['fare'], height=400, violin=True)
|
55
|
+
```
|
56
|
+

|
57
|
+
|
58
|
+
## why use pandas-plots
|
59
|
+
|
60
|
+
`pandas-plots` is a package to help you examine and visualize data that are organized in a pandas DataFrame. It provides a high level api to pandas / plotly with some selected functions.
|
61
|
+
|
62
|
+
It is subdivided into:
|
63
|
+
|
64
|
+
- `tbl` utilities for table descriptions
|
65
|
+
- `describe_df()` an alternative version of pandas `describe()` function
|
66
|
+
- `pivot_df()` gets a pivot table of a 3 column dataframe
|
67
|
+
|
68
|
+
- `viz` utilities for plotly visualizations
|
69
|
+
- `plot_box()` auto annotated boxplot w/ violin option
|
70
|
+
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
71
|
+
- `plots_bars()` a standardized bar plot
|
72
|
+
- `plot_stacked_bars()` shortcut to stacked bars 😄
|
73
|
+
- `plot_quadrants()` quickly show a 2x2 heatmap
|
74
|
+
|
75
|
+
## dependencies
|
@@ -0,0 +1,8 @@
|
|
1
|
+
pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
pandas_plots/tbl.py,sha256=7_o-Mu2nrniZd25zAtb_7IUdi7ZUnjcn9OFpBwFcVno,11500
|
3
|
+
pandas_plots/viz.py,sha256=eCDth3aFSU0_8Cj5Tax-FWM9TmPrmmNEiFuoVOB63ss,23207
|
4
|
+
pandas_plots-0.0.3.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
5
|
+
pandas_plots-0.0.3.dist-info/METADATA,sha256=CxfXjnDBAnALI1yDqMIi9SRh6hfOfJPWGmw7e2p4bt0,4379
|
6
|
+
pandas_plots-0.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
7
|
+
pandas_plots-0.0.3.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
8
|
+
pandas_plots-0.0.3.dist-info/RECORD,,
|
pandas_plots/legacy.py
DELETED
@@ -1,276 +0,0 @@
|
|
1
|
-
import warnings
|
2
|
-
warnings.filterwarnings('ignore')
|
3
|
-
|
4
|
-
from scipy import stats
|
5
|
-
import numpy as np
|
6
|
-
import pandas as pd
|
7
|
-
import matplotlib.pyplot as plt
|
8
|
-
import pandas as pd
|
9
|
-
import seaborn as sb
|
10
|
-
|
11
|
-
def my_show_info(df, hasPlot=False):
|
12
|
-
"""show info about given dataframe
|
13
|
-
|
14
|
-
Args:
|
15
|
-
df (DataFrame): dataframe
|
16
|
-
hasPlot (bool): display plot or not
|
17
|
-
|
18
|
-
Returns:
|
19
|
-
DataFrame: self
|
20
|
-
"""
|
21
|
-
|
22
|
-
df_ = df.copy()
|
23
|
-
import io
|
24
|
-
# .info(): cleanse whitespaces in column names!
|
25
|
-
df_.columns = df_.columns.str.replace(' ', '_')
|
26
|
-
buffer = io.StringIO()
|
27
|
-
df_.info(buf=buffer)
|
28
|
-
|
29
|
-
df_n = df_.select_dtypes(np.number) # store numeric columns for later
|
30
|
-
lines = buffer.getvalue().splitlines()
|
31
|
-
df = (pd.DataFrame([x.split() for x in lines[5:-2]], columns=lines[3].split())
|
32
|
-
)
|
33
|
-
|
34
|
-
|
35
|
-
#* .nunique
|
36
|
-
uni = df_.nunique().reset_index(drop=True).rename('Uni_Count')
|
37
|
-
|
38
|
-
|
39
|
-
#* .isnull
|
40
|
-
nulls = df_.isnull().sum().reset_index(drop=True).rename('Nulls')
|
41
|
-
df = df.join(uni).join(nulls)
|
42
|
-
|
43
|
-
|
44
|
-
#* .value_count
|
45
|
-
mylist = []
|
46
|
-
for col in df_.columns:
|
47
|
-
# only append if count_values yields a result
|
48
|
-
if len(df_[col].value_counts()) > 0:
|
49
|
-
line = (
|
50
|
-
str(df_[col].value_counts().reset_index().iat[0, 0]) +
|
51
|
-
' | ' +
|
52
|
-
str(df_[col].value_counts().reset_index().iat[0, 1])
|
53
|
-
)
|
54
|
-
mylist.append(line)
|
55
|
-
mylist = pd.Series(mylist, name='Most_Frequent')
|
56
|
-
df = df.join(mylist)
|
57
|
-
|
58
|
-
|
59
|
-
#* skew and kurto
|
60
|
-
skew = stats.skew(df_n, axis=0, bias=True).round(3)
|
61
|
-
skew_t = np.stack((df_n.columns.tolist(),skew), axis=0).T # stack arrays to have corresponding columns
|
62
|
-
skew_tp = pd.DataFrame(skew_t, columns=['var', 'skew']) # convert to dataframe, assign column names
|
63
|
-
kurto = stats.kurtosis(df_n, axis=0, bias=True).round(3)
|
64
|
-
kurto_t = np.stack((df_n.columns.tolist(),kurto), axis=0).T
|
65
|
-
kurto_tp = pd.DataFrame(kurto_t, columns=['var', 'kurto'])
|
66
|
-
df = df.merge(skew_tp, how='outer', left_on='Column', right_on='var').drop('var', axis=1) # merge w/ column names
|
67
|
-
df = df.merge(kurto_tp, how='outer', left_on='Column', right_on='var').drop('var', axis=1)
|
68
|
-
|
69
|
-
|
70
|
-
#* .describe
|
71
|
-
desc = df_.describe().T.drop(axis=1, columns='count')
|
72
|
-
# left outer join on l.Column = r.index
|
73
|
-
df = df.merge(desc, how='outer', left_on='Column', right_index=True)
|
74
|
-
|
75
|
-
|
76
|
-
#* plot
|
77
|
-
if hasPlot:
|
78
|
-
# PLOT
|
79
|
-
# get num columns as list (only to_list() outputs clean strings)
|
80
|
-
cols = df.select_dtypes(np.number).columns.to_list()
|
81
|
-
# 'Columns' is crucial for the plot - but not numeric :)
|
82
|
-
cols.append(df.Column.name)
|
83
|
-
|
84
|
-
# transorm to kvpairs -> auto generate: 'variable' + 'value'
|
85
|
-
df_kval = pd.melt(df[cols], id_vars=['Column'])
|
86
|
-
# set up facets
|
87
|
-
g = sb.FacetGrid(data=df_kval, col_wrap=5,
|
88
|
-
col='variable', sharex=False)
|
89
|
-
# x, y may not be named as such ...
|
90
|
-
_ = g.map(sb.barplot, 'value', 'Column')
|
91
|
-
|
92
|
-
return df
|
93
|
-
|
94
|
-
|
95
|
-
def my_show_skew_kurto(df, hasPlot = False):
|
96
|
-
"""return values for skew and kurtosis
|
97
|
-
|
98
|
-
Args:
|
99
|
-
df (DataFrame): dataframe
|
100
|
-
hasPlot (bool): True if you want to plot the data
|
101
|
-
|
102
|
-
Returns:
|
103
|
-
list: list of values for skew and kurtosis
|
104
|
-
"""
|
105
|
-
|
106
|
-
df_ = df.copy()
|
107
|
-
df_ = df_.select_dtypes(np.number)
|
108
|
-
_skew = stats.skew(df_, axis=0, bias=True)
|
109
|
-
_kurto = stats.kurtosis(df_, axis=0, bias=True)
|
110
|
-
|
111
|
-
# get axis: column-names + skw/kurto columns
|
112
|
-
_cols = np.array(df_.columns)
|
113
|
-
# stack these into array, then transpose
|
114
|
-
_array = np.array([_cols, _skew, _kurto]).T
|
115
|
-
# into dataframe, assigen names for new columns
|
116
|
-
list_skews = pd.DataFrame(data=_array, columns=[
|
117
|
-
'col', 'skewness', 'kurtosis'])
|
118
|
-
|
119
|
-
# PLOT
|
120
|
-
if hasPlot:
|
121
|
-
_fig, _axs = plt.subplots(1, 2, squeeze=False, figsize=(5, 3))
|
122
|
-
_ = sb.barplot(y='col', x='skewness', data=list_skews,
|
123
|
-
orient='horizontal', ax=_axs[0, 0])
|
124
|
-
_ = sb.barplot(y='col', x='kurtosis', data=list_skews,
|
125
|
-
orient='horizontal', ax=_axs[0, 1])
|
126
|
-
return list_skews
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
# def describe_df_LEGACY(
|
131
|
-
# df: pd.DataFrame,
|
132
|
-
# caption: str,
|
133
|
-
# use_plot: bool = True,
|
134
|
-
# use_columns: bool = True,
|
135
|
-
# renderer: Literal["png", "svg", None] = "png",
|
136
|
-
# template: str = os.getenv("THEME_PLOTLY") or "plotly",
|
137
|
-
# fig_cols: int = 3,
|
138
|
-
# fig_offset: int = None,
|
139
|
-
# fig_rowheight: int = 300,
|
140
|
-
# sort_mode: Literal["value", "index"] = "value",
|
141
|
-
# ):
|
142
|
-
# """
|
143
|
-
# This function takes a pandas DataFrame and a caption as input parameters and prints out the caption as a styled header, followed by the shape of the DataFrame and the list of column names. For each column, it prints out the column name, the number of unique values, and the column data type. If the column is a numeric column with more than 100 unique values, it also prints out the minimum, mean, maximum, and sum values. Otherwise, it prints out the first 100 unique values of the column.
|
144
|
-
|
145
|
-
# Args:
|
146
|
-
# df (DataFrame): dataframe
|
147
|
-
# caption (str): caption to describe dataframe
|
148
|
-
# use_plot (bool): display plot?
|
149
|
-
# use_columns (bool): display columns values?
|
150
|
-
# renderer (Literal["png", "svg", None]): renderer for plot
|
151
|
-
# template (str): template for plotly (see https://plotly.com/python/templates/), default: os.getenv("THEME_PLOTLY") or "plotly"
|
152
|
-
# fig_cols (int): number of columns in plot
|
153
|
-
# fig_offset (int): offset for plots as iloc Argument. None = no offset, -1 = omit last plot
|
154
|
-
# fig_rowheight (int): row height for plot (default 300)
|
155
|
-
# sort_mode (Literal["value", "index"]): sort by value or index
|
156
|
-
|
157
|
-
# usage:
|
158
|
-
# describe_df(
|
159
|
-
# df=df,
|
160
|
-
# caption="dataframe",
|
161
|
-
# use_plot=True,
|
162
|
-
# renderer="png",
|
163
|
-
# template="plotly",
|
164
|
-
# fig_cols=3,
|
165
|
-
# fig_offset=None,
|
166
|
-
# sort_mode="value",
|
167
|
-
# )
|
168
|
-
|
169
|
-
# hint: skewness may not properly work if the columns is float and/or has only 1 value
|
170
|
-
# """
|
171
|
-
# # * check if df is empty
|
172
|
-
# if len(df) == 0:
|
173
|
-
# print(f"{Style.bold}{Fore.red}DataFrame is empty!{Style.reset}")
|
174
|
-
# return
|
175
|
-
|
176
|
-
# print(f"{Style.bold}{Fore.red}{'*'*3} {caption} {'*'*3}{Style.reset}")
|
177
|
-
# print(f"{Fore.blue}shape: {Style.reset}({df.shape[0]:_}, {df.shape[1]}) {Fore.blue}columns: {Style.reset}{df.columns.tolist()} ")
|
178
|
-
# print(f"{Fore.blue}duplicates: {Style.reset}{df.duplicated().sum():_}")
|
179
|
-
|
180
|
-
# # ! old version here
|
181
|
-
# # for col in df.columns[:]:
|
182
|
-
# # # * get unique values
|
183
|
-
# # unis = df[col].sort_values().unique()
|
184
|
-
# # header = f"{Fore.yellow}{col}({len(unis):_}|{df[col].dtype}){Style.reset}"
|
185
|
-
# # # * check if num col w/ too many values
|
186
|
-
# # if (df[col].dtype.kind in "biufc") and (len(unis) > 100):
|
187
|
-
# # print(
|
188
|
-
# # f"{header} {Fore.magenta}min:{Style.reset} {df[col].min():_} | {Fore.magenta}median:{Style.reset} {df[col].median().round(2):_} | {Fore.magenta}mean:{Style.reset} {df[col].mean().round(2):_} | {Fore.magenta}std:{Style.reset} {df[col].std().round(2):_} | {Fore.magenta}cv:{Style.reset} {(df[col].std() / df[col].mean()).round(2):_} | {Fore.magenta}max:{Style.reset} {df[col].max():_} | {Fore.magenta}sum:{Style.reset} {df[col].sum():_}"
|
189
|
-
# # )
|
190
|
-
# # else:
|
191
|
-
# # # * limit output to 100 items
|
192
|
-
# # print(f"{header} {unis[:100]}")
|
193
|
-
|
194
|
-
# def get_uniques_header(col: str):
|
195
|
-
# # * get unique values
|
196
|
-
# unis = df[col].sort_values().unique()
|
197
|
-
# # * get header
|
198
|
-
# header = f"{Fore.green}{col}({len(unis):_}|{df[col].dtype}){Style.reset}"
|
199
|
-
# return unis, header
|
200
|
-
|
201
|
-
# # * show all columns
|
202
|
-
# for col in df.columns[:]:
|
203
|
-
# _u, _h = get_uniques_header(col)
|
204
|
-
# if use_columns:
|
205
|
-
# # * limit output to 100 items
|
206
|
-
# print(f"{_h} {_u[:100]}")
|
207
|
-
# else:
|
208
|
-
# print(f"{_h}")
|
209
|
-
|
210
|
-
# print(f"{'*'*3}")
|
211
|
-
# # * only show numerics
|
212
|
-
# for col in df.select_dtypes('number').columns:
|
213
|
-
# _u, _h = get_uniques_header(col)
|
214
|
-
|
215
|
-
# print(
|
216
|
-
# f"{_h} {Fore.magenta}min:{Style.reset} {round(df[col].min(),3):_} | {Fore.magenta}max:{Style.reset} {round(df[col].max(),3):_} | {Fore.magenta}median:{Style.reset} {round(df[col].median(),3):_} | {Fore.magenta}mean:{Style.reset} {round(df[col].mean(),3):_} | {Fore.magenta}std:{Style.reset} {round(df[col].std(),3):_} | {Fore.magenta}cv:{Style.reset} {df[col].std() / round(df[col].mean(),3):_} | {Fore.magenta}sum:{Style.reset} {round(df[col].sum(),3):_} | {Fore.magenta}skew:{Style.reset} {round(stats.skew(df[col]),3)} | {Fore.magenta}kurto:{Style.reset} {round(stats.kurtosis(df[col]),3)}"
|
217
|
-
# )
|
218
|
-
|
219
|
-
# # * show missings
|
220
|
-
# print(f"{Fore.cyan}missings: {Style.reset}{dict(df.isna().sum())}")
|
221
|
-
|
222
|
-
# # * show first 3 rows
|
223
|
-
# display(df[:3])
|
224
|
-
|
225
|
-
# # ! *** PLOTS ***
|
226
|
-
# if not use_plot:
|
227
|
-
# return
|
228
|
-
|
229
|
-
# # * set template
|
230
|
-
# pio.templates.default = template
|
231
|
-
|
232
|
-
# # * respect fig_offset to exclude unwanted plots from maintanance columns
|
233
|
-
# cols = df.iloc[:, :fig_offset].columns
|
234
|
-
# cols_num = df.select_dtypes(np.number).columns.tolist()
|
235
|
-
# # cols_str = list(set(df.columns) - set(cols_num))
|
236
|
-
|
237
|
-
# # * set constant column count, calc rows
|
238
|
-
# fig_rows = math.ceil(len(cols) / fig_cols)
|
239
|
-
|
240
|
-
# fig = make_subplots(
|
241
|
-
# rows=fig_rows,
|
242
|
-
# cols=fig_cols,
|
243
|
-
# shared_xaxes=False,
|
244
|
-
# shared_yaxes=False,
|
245
|
-
# subplot_titles=cols,
|
246
|
-
# )
|
247
|
-
# # * layout settings
|
248
|
-
# fig.layout.height = fig_rowheight * fig_rows
|
249
|
-
# fig.layout.width = 400 * fig_cols
|
250
|
-
|
251
|
-
# # * construct subplots
|
252
|
-
# for i, col in enumerate(cols):
|
253
|
-
# # * get unique values as sorted list
|
254
|
-
# if sort_mode == "value":
|
255
|
-
# span = df[col].value_counts().sort_values(ascending=False)
|
256
|
-
# else:
|
257
|
-
# span = df[col].value_counts().sort_index()
|
258
|
-
|
259
|
-
# # * check if num col w/ too many values (disabled)
|
260
|
-
# if col in cols_num and len(span) > 100 and False:
|
261
|
-
# figsub = px.box(df, x=col, points="outliers")
|
262
|
-
# else:
|
263
|
-
# # * only respect 100 items
|
264
|
-
# figsub = px.bar(
|
265
|
-
# x=span.iloc[:100].index,
|
266
|
-
# y=span.iloc[:100].values,
|
267
|
-
# )
|
268
|
-
# # * grid position
|
269
|
-
# _row = math.floor((i) / fig_cols) + 1
|
270
|
-
# _col = i % fig_cols + 1
|
271
|
-
|
272
|
-
# # * add trace to fig, only data not layout, only 1 series
|
273
|
-
# fig.add_trace(figsub["data"][0], row=_row, col=_col)
|
274
|
-
|
275
|
-
# fig.show(renderer)
|
276
|
-
|