pandas-plots 0.8.6__tar.gz → 0.8.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.8.6
3
+ Version: 0.8.8
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pandas-plots
3
- version = 0.8.6
3
+ version = 0.8.8
4
4
  author = smeisegeier
5
5
  author_email = dexterDSDo@googlemail.com
6
6
  description = A collection of helper for table handling and vizualization
@@ -1,24 +1,25 @@
1
1
  import warnings
2
- warnings.filterwarnings('ignore')
3
2
 
4
- from scipy import stats
5
- from typing import Literal
3
+ warnings.filterwarnings("ignore")
4
+
5
+ import math
6
+ import os
7
+ from typing import Literal, get_args
8
+
6
9
  import numpy as np
7
10
  import pandas as pd
8
11
  import plotly.express as px
9
- import pandas as pd
10
- import math
11
- import os
12
12
  from plotly.subplots import make_subplots
13
+ from scipy import stats
13
14
  # pd.options.mode.chained_assignment = None
15
+
14
16
  from . import txt
15
17
 
16
- # ! check pandas version
17
- assert pd.__version__ > '2.0.0', 'pandas version must be >= 2.0.0'
18
+ AGG_FUNC=Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
18
19
 
19
20
  def describe_df(
20
21
  df: pd.DataFrame,
21
- caption: str,
22
+ caption: str,
22
23
  use_plot: bool = True,
23
24
  use_columns: bool = True,
24
25
  renderer: Literal["png", "svg", None] = "png",
@@ -44,7 +45,7 @@ def describe_df(
44
45
  sort_mode (Literal["value", "index"]): sort by value or index
45
46
  top_n_uniques (int): number of uniques to display
46
47
  top_n_chars_in_index (int): number of characters to display on plot axis
47
-
48
+
48
49
  usage:
49
50
  describe_df(
50
51
  df=df,
@@ -56,12 +57,12 @@ def describe_df(
56
57
  fig_offset=None,
57
58
  sort_mode="value",
58
59
  )
59
-
60
+
60
61
  hint: skewness may not properly work if the columns is float and/or has only 1 value
61
62
  """
62
63
  # * copy df, df col types are modified
63
64
  df = df.copy()
64
-
65
+
65
66
  # * check if df is empty
66
67
  if len(df) == 0:
67
68
  print(f"DataFrame is empty!")
@@ -73,10 +74,11 @@ def describe_df(
73
74
  print(f"🟣 missings: {dict(df.isna().sum())}")
74
75
  print("--- column uniques (all)")
75
76
  print(f"🟠 index {txt.wrap(df.index.tolist()[:top_n_uniques])}")
77
+
76
78
  def get_uniques_header(col: str):
77
79
  # * sorting has issues when col is of mixed type (object)
78
- if df[col].dtype=='object':
79
- df[col]=df[col].astype(str)
80
+ if df[col].dtype == "object":
81
+ df[col] = df[col].astype(str)
80
82
  # * get unique values
81
83
  # unis = df[col].sort_values().unique()
82
84
  unis = list(df[col].value_counts().sort_index().index)
@@ -89,16 +91,18 @@ def describe_df(
89
91
  _u, _h = get_uniques_header(col)
90
92
  if use_columns:
91
93
  # * check col type
92
- is_str=df.loc[:,col].dtype.kind == 'O'
94
+ is_str = df.loc[:, col].dtype.kind == "O"
93
95
  # * wrap output
94
- print(f"{_h} {txt.wrap(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}")
96
+ print(
97
+ f"{_h} {txt.wrap(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}"
98
+ )
95
99
  # print(f"{_h} {_u[:top_n_uniques]}")
96
100
  else:
97
101
  print(f"{_h}")
98
102
 
99
103
  print("--- column stats (numeric)")
100
104
  # * only show numerics
101
- for col in df.select_dtypes('number').columns:
105
+ for col in df.select_dtypes("number").columns:
102
106
  _u, _h = get_uniques_header(col)
103
107
 
104
108
  # * extra care for scipy metrics, these are very vulnarable to nan
@@ -117,7 +121,7 @@ def describe_df(
117
121
  cols = df.iloc[:, :fig_offset].columns
118
122
  cols_num = df.select_dtypes(np.number).columns.tolist()
119
123
  # cols_str = list(set(df.columns) - set(cols_num))
120
-
124
+
121
125
  # * set constant column count, calc rows
122
126
  fig_rows = math.ceil(len(cols) / fig_cols)
123
127
 
@@ -145,17 +149,21 @@ def describe_df(
145
149
  figsub = px.box(df, x=col, points="outliers")
146
150
  else:
147
151
  # * only respect 100 items (fixed value)
148
- x=span.iloc[:100].index
149
- y=span.iloc[:100].values
152
+ x = span.iloc[:100].index
153
+ y = span.iloc[:100].values
150
154
  # * cut long strings
151
- if x.dtype=='object' and top_n_chars_in_index > 0:
152
- x=x.astype(str).tolist()
153
- _cut = lambda s: s[:top_n_chars_in_index] + '..' if len(s) > top_n_chars_in_index else s[:top_n_chars_in_index]
154
- x=[_cut(item) for item in x]
155
+ if x.dtype == "object" and top_n_chars_in_index > 0:
156
+ x = x.astype(str).tolist()
157
+ _cut = lambda s: (
158
+ s[:top_n_chars_in_index] + ".."
159
+ if len(s) > top_n_chars_in_index
160
+ else s[:top_n_chars_in_index]
161
+ )
162
+ x = [_cut(item) for item in x]
155
163
  figsub = px.bar(
156
164
  x=x,
157
165
  y=y,
158
- )
166
+ )
159
167
  # * grid position
160
168
  _row = math.floor((i) / fig_cols) + 1
161
169
  _col = i % fig_cols + 1
@@ -164,7 +172,9 @@ def describe_df(
164
172
  fig.add_trace(figsub["data"][0], row=_row, col=_col)
165
173
 
166
174
  # * set template
167
- fig.update_layout(template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly")
175
+ fig.update_layout(
176
+ template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
177
+ )
168
178
  fig.show(renderer)
169
179
 
170
180
 
@@ -177,12 +187,13 @@ def pivot_df(
177
187
  data_bar_axis: Literal["x", "y", "xy", None] = "xy",
178
188
  pct_axis: Literal["x", "xy", None] = "xy",
179
189
  precision: int = 0,
180
- show_totals: bool = True,
181
- heatmap_axis: Literal["x","y","xy", None] = None,
190
+ heatmap_axis: Literal["x", "y", "xy", None] = None,
191
+ total_mode: AGG_FUNC = "sum",
192
+ total_axis: Literal["x", "y", "xy", None] = "xy",
182
193
  ) -> pd.DataFrame:
183
194
  """
184
195
  A function to pivot a DataFrame based on specified parameters and return the result as a new DataFrame.
185
-
196
+
186
197
  Args:
187
198
  df (pd.DataFrame): The input DataFrame to be pivoted.
188
199
  dropna (bool, optional): Whether to drop NaN values. Defaults to False.
@@ -192,14 +203,17 @@ def pivot_df(
192
203
  data_bar_axis (Literal["x", "y", "xy", None], optional): The axis for displaying data bars. Defaults to "xy".
193
204
  pct_axis (Literal["x", "xy", None], optional): The axis for displaying percentages. Defaults to None.
194
205
  precision (int, optional): The precision for displaying values. Defaults to 0.
195
- show_totals (bool, optional): Whether to show totals in the result. Defaults to False.
196
206
  heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
197
-
207
+ total_mode (Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"], optional): The aggregation mode for displaying totals. Defaults to "sum".
208
+ total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
209
+
198
210
  Returns:
199
211
  pd.DataFrame: The pivoted DataFrame.
200
212
  """
201
213
  # * ensure arguments match parameter definition
202
- if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and data_bar_axis not in ["x","y","xy"]):
214
+ if (pct_axis and pct_axis not in ["x", "xy"]) or (
215
+ data_bar_axis and data_bar_axis not in ["x", "y", "xy"]
216
+ ):
203
217
  print(f"❌ axis not supported")
204
218
  return
205
219
 
@@ -210,6 +224,10 @@ def pivot_df(
210
224
  if not pd.api.types.is_numeric_dtype(df.iloc[:, 2]):
211
225
  print("❌ 3rd column must be numeric")
212
226
  return
227
+
228
+ if total_mode and total_mode not in get_args(AGG_FUNC):
229
+ print(f"❌ total_mode '{total_mode}' not supported")
230
+ return
213
231
 
214
232
  df = df.copy()
215
233
 
@@ -257,25 +275,36 @@ def pivot_df(
257
275
  )
258
276
  df = df.fillna(0) # .astype(_type)
259
277
 
260
- return show_num_df(df, show_totals=show_totals, data_bar_axis=data_bar_axis, pct_axis=pct_axis, swap=swap, precision=precision, heatmap_axis=heatmap_axis)
278
+ return show_num_df(
279
+ df,
280
+ total_mode=total_mode,
281
+ total_axis=total_axis,
282
+ data_bar_axis=data_bar_axis,
283
+ pct_axis=pct_axis,
284
+ swap=swap,
285
+ precision=precision,
286
+ heatmap_axis=heatmap_axis,
287
+ )
288
+
261
289
 
262
290
  def show_num_df(
263
291
  df,
264
- show_total: bool = False,
265
- total_mode: Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"] = "sum",
266
- heatmap_axis: Literal["x","y","xy", None] = None,
267
- data_bar_axis: Literal["x","y","xy", None] = None,
292
+ total_mode: AGG_FUNC = "sum",
293
+ total_axis: Literal["x", "y", "xy", None] = "xy",
294
+ heatmap_axis: Literal["x", "y", "xy", None] = None,
295
+ data_bar_axis: Literal["x", "y", "xy", None] = None,
268
296
  pct_axis: Literal["x", "xy", None] = None,
269
297
  swap: bool = False,
270
- precision: int=0,
298
+ precision: int = 0,
271
299
  ):
272
300
  """
273
- A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
301
+ A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
274
302
 
275
303
  Parameters:
276
304
  - df: the DataFrame to display
277
- - show_total: a boolean indicating whether to show totals
278
305
  - total_mode: a Literal indicating the mode for aggregating totals ["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
306
+ - total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
307
+
279
308
  - heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
280
309
  - data_bar_axis: a Literal indicating the axis for applying data bar coloring ["x","y","xy", None]
281
310
  - pct_axis: a Literal indicating the directions for displaying percentages ["x","xy", None]. "x" means sum up pct per column
@@ -285,43 +314,48 @@ def show_num_df(
285
314
  The function returns a styled representation of the DataFrame.
286
315
  """
287
316
  # * ensure arguments match parameter definition
288
- if any([df[col].dtype.kind not in ['i','u','f'] for col in df.columns]) == True:
317
+ if any([df[col].dtype.kind not in ["i", "u", "f"] for col in df.columns]) == True:
289
318
  print(f"❌ table must contain numeric data only")
290
319
  return
291
-
292
- if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and data_bar_axis not in ["x","y","xy"]) or (heatmap_axis and heatmap_axis not in ["x","y","xy"]):
320
+
321
+ if (
322
+ (pct_axis and pct_axis not in ["x", "xy"])
323
+ or (data_bar_axis and data_bar_axis not in ["x", "y", "xy"])
324
+ or (heatmap_axis and heatmap_axis not in ["x", "y", "xy"])
325
+ ):
293
326
  print(f"❌ axis not supported")
294
327
  return
295
328
 
296
- if (total_mode and total_mode not in ["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]) :
297
- print(f"❌ total mode '{total_mode}' not supported")
329
+ if total_mode and total_mode not in get_args(AGG_FUNC):
330
+ print(f"❌ total_mode '{total_mode}' not supported")
298
331
  return
299
332
 
300
333
  theme = os.getenv("THEME") or "light"
301
-
334
+
302
335
  # * copy df, do not reference original
303
336
  df_ = df.copy() if not swap else df.T.copy()
304
-
305
- # * alter _df, add totals
306
- if show_total:
307
- df_.loc["Total"] = df_.agg(total_mode,axis=0)
337
+
338
+ # * alter df_, add totals
339
+ if total_mode and total_axis in ['x','xy']:
340
+ df_.loc["Total"] = df_.agg(total_mode, axis=0)
341
+ if total_mode and total_axis in ['y','xy']:
308
342
  df_.loc[:, "Total"] = df_.agg(total_mode, axis=1)
309
343
 
310
344
  # * derive style
311
345
  out = df_.style
312
346
 
313
- color_highlight = "lightblue" if theme == "light" else "darkgrey"
347
+ color_highlight = "lightblue" if theme == "light" else "#666666"
314
348
  color_zeros = "grey" if theme == "light" else "grey"
315
349
  color_pct = "grey" if theme == "light" else "yellow"
316
350
  color_values = "black" if theme == "light" else "white"
317
351
  color_minus = "red" if theme == "light" else "red"
318
- cmap_heat="Blues" if theme == "light" else "copper"
352
+ cmap_heat = "Blues" if theme == "light" else "copper"
319
353
 
320
354
  # * apply data bar coloring
321
355
  if data_bar_axis:
322
356
  out.bar(
323
357
  color=f"{color_highlight}",
324
- axis= 0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
358
+ axis=0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
325
359
  )
326
360
 
327
361
  # * all cell formatting in one place
@@ -334,17 +368,18 @@ def show_num_df(
334
368
  # * here cell > 0
335
369
  if show_pct:
336
370
  return f'{cell:_.{precision}f} <span style="color: {color_pct}">({(cell /sum):.1%})</span>'
337
- return f'{cell:_.{precision}f}'
371
+ return f"{cell:_.{precision}f}"
338
372
 
339
373
  # * build pct formatting
340
- if pct_axis =='x':
374
+ if pct_axis == "x":
341
375
  # * totals on either axis influence the sum
342
- divider = 2 if show_total else 1
376
+ divider = 2 if total_axis in ['x','xy'] else 1
343
377
  # * cell formatting to each column instead of altering values w/ df.apply
344
378
  # * uses dictionary comprehension, and a lambda function with two input variables
345
379
  col_sums = df_.sum() / divider
346
380
  formatter = {
347
- col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis) for col in df_.columns
381
+ col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis)
382
+ for col in df_.columns
348
383
  }
349
384
 
350
385
  # ? y is not implemented, needs row wise formatting
@@ -354,14 +389,14 @@ def show_num_df(
354
389
  # row: lambda x, row=row: format_cell(x, row_sums[row]) for row in _df.index
355
390
  # }
356
391
 
357
- elif pct_axis=='xy':
358
- divider = 4 if show_total else 1
392
+ elif pct_axis == "xy":
393
+ divider = 4 if total_axis == 'xy' else 2 if total_axis in ['x','y'] else 1
359
394
  n = df_.sum().sum() / divider
360
395
  formatter = {
361
396
  col: lambda x, col=col: format_cell(x, n, pct_axis) for col in df_.columns
362
397
  }
363
398
  else:
364
- # *
399
+ # *
365
400
  formatter = {
366
401
  col: lambda x, col=col: format_cell(x, x, False) for col in df_.columns
367
402
  }
@@ -369,15 +404,15 @@ def show_num_df(
369
404
  out.format(formatter=formatter)
370
405
 
371
406
  # * apply fonts for cells
372
- out.set_properties(**{'font-family': 'Courier'})
407
+ out.set_properties(**{"font-family": "Courier"})
373
408
 
374
409
  # * apply fonts for th (inkl. index)
375
- _props=[
376
- # ("font-size", "10pt"),
377
- # ("font-weight", "bold"),
378
- # ("font-family", "Courier"),
379
- ("text-align", "right")
380
- ]
410
+ _props = [
411
+ # ("font-size", "10pt"),
412
+ # ("font-weight", "bold"),
413
+ # ("font-family", "Courier"),
414
+ ("text-align", "right")
415
+ ]
381
416
  out.set_table_styles(
382
417
  [
383
418
  dict(selector="th", props=_props),
@@ -386,6 +421,9 @@ def show_num_df(
386
421
  )
387
422
 
388
423
  if heatmap_axis:
389
- out.background_gradient(cmap=cmap_heat, axis=None if heatmap_axis=="xy" else 0 if heatmap_axis=="y" else 1)
424
+ out.background_gradient(
425
+ cmap=cmap_heat,
426
+ axis=None if heatmap_axis == "xy" else 0 if heatmap_axis == "y" else 1,
427
+ )
390
428
 
391
- return out
429
+ return out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.8.6
3
+ Version: 0.8.8
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
File without changes
File without changes