pandas-plots 0.10.1__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.10.1
3
+ Version: 0.11.0
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -154,3 +154,7 @@ _df, _details = ven.show_venn3(
154
154
  ```
155
155
 
156
156
  ![venn](https://github.com/smeisegeier/pandas-plots/blob/main/img/2024-02-19-20-49-52.png?raw=true)
157
+
158
+ ## tags
159
+
160
+ #pandas, #plotly, #visualizations, #statistics
@@ -125,3 +125,7 @@ _df, _details = ven.show_venn3(
125
125
  ```
126
126
 
127
127
  ![venn](https://github.com/smeisegeier/pandas-plots/blob/main/img/2024-02-19-20-49-52.png?raw=true)
128
+
129
+ ## tags
130
+
131
+ #pandas, #plotly, #visualizations, #statistics
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pandas-plots
3
- version = 0.10.1
3
+ version = 0.11.0
4
4
  author = smeisegeier
5
5
  author_email = dexterDSDo@googlemail.com
6
6
  description = A collection of helper for table handling and vizualization
@@ -8,7 +8,7 @@ from PIL import Image
8
8
  import requests
9
9
  import re
10
10
 
11
- from tenacity import retry
11
+ # from devtools import debug
12
12
 
13
13
  URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
14
14
 
@@ -125,53 +125,67 @@ def replace_delimiter_outside_quotes(
125
125
 
126
126
 
127
127
  def wrap_text(
128
- text: str | list, max_items_in_line: int = 70, sep: bool = True, apo: bool = False
128
+ text: str | list,
129
+ max_items_in_line: int = 70,
130
+ use_sep: bool = True,
131
+ use_apo: bool = False,
129
132
  ):
130
133
  """
131
134
  A function that wraps text into lines with a maximum number of items per line.
135
+ Important: enclose this function in a print() statement to print the text
132
136
 
133
137
  Args:
134
138
  text (str | list): The input text or list of words to be wrapped.
135
139
  max_items_in_line (int): The maximum number of items allowed in each line.
136
- sep (bool, optional): Whether to include a comma separator between items. Defaults to True.
137
- apo (bool, optional): Whether to enclose each word in single quotes. Defaults to False.
140
+ use_sep (bool, optional): When list: Whether to include a comma separator between items. Defaults to True.
141
+ use_apo (bool, optional): When list: Whether to enclose each word in single quotes. Defaults to False.
142
+ Returns: the wrapped text
138
143
  """
139
144
 
140
- # * check if text is string, then strip and build word list
145
+ # * check if text is string
141
146
  is_text = isinstance(text, str)
142
147
  if is_text:
148
+ # ! when splitting the text later by blanks, newlines are not correctly handled
149
+ # * to detect them, they must be followed by a blank:
150
+ pattern = r'(\n)(?=\S)' # *forward lookup for newline w/ no blank
151
+ # * add blank after these newlines
152
+ new_text = re.sub(pattern, r"\1 ", text)
153
+ text=new_text
154
+
155
+ # * then strip and build word list
143
156
  text = (
144
157
  text.replace(",", "")
145
158
  .replace("'", "")
146
159
  .replace("[", "")
147
160
  .replace("]", "")
161
+ # * use explicit blanks to prevent newline split
148
162
  .split(" ")
149
163
  )
150
164
 
151
- # * start
165
+ # * loop setup
152
166
  i = 0
153
167
  line = ""
154
-
155
168
  # * loop through words
156
169
  out = ""
157
170
  for word in text:
158
- apo_s = "'" if apo else ""
159
- sep_s = "," if sep and not is_text else ""
171
+ apo_s = "'" if use_apo and not is_text else ""
172
+ sep_s = "," if use_sep and not is_text else ""
160
173
  word_s = f"{apo_s}{str(word)}{apo_s}{sep_s}"
161
174
  # * inc counter
162
175
  i = i + len(word_s)
163
176
  # * construct print line
164
177
  line = line + word_s + " "
165
- # * reset if counter exceeds limit
166
- if i >= max_items_in_line:
178
+ # * reset if counter exceeds limit, or if word ends with newline
179
+ if i >= max_items_in_line or str(word).endswith("\n"):
167
180
  out = out + line + "\n"
168
181
  line = ""
169
182
  i = 0
170
183
  # else:
171
- # * on short lists no reset happens, trigger manually
172
- out = line if not out else out
173
- # * cut last newline
174
- return f"[{out[:-1]}]"
184
+ # * on short lists no line reset happens, so just print the line
185
+ # * else add last line
186
+ out = line if not out else out + line
187
+ # * cut off last newline
188
+ return f"[{out[:-1].strip()}]"
175
189
 
176
190
 
177
191
  def create_barcode_from_url(
@@ -211,21 +225,24 @@ def create_barcode_from_url(
211
225
  # plt.axis('off') # Turn off axis numbers
212
226
  plt.show()
213
227
 
228
+
214
229
  def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFrame:
215
- df_= df.copy()
230
+ df_ = df.copy()
216
231
  if not date_column:
217
- date_column = [col for col in df_.columns if pd.api.types.is_datetime64_any_dtype(df_[col])][0]
232
+ date_column = [
233
+ col for col in df_.columns if pd.api.types.is_datetime64_any_dtype(df_[col])
234
+ ][0]
218
235
  else:
219
236
  df_[date_column] = pd.to_datetime(df_[date_column])
220
237
 
221
238
  if not date_column or not pd.api.types.is_datetime64_any_dtype(df_[date_column]):
222
239
  print("❌ No datetime column found")
223
240
  return
224
-
241
+
225
242
  if [col for col in df_.columns if "YYYY-WW" in col]:
226
243
  print("❌ Added datetime columns already exist")
227
244
  return
228
-
245
+
229
246
  print(f"⏳ Adding datetime columns basing off of: {date_column}")
230
247
 
231
248
  df_["YYYY"] = df_[date_column].dt.year
@@ -235,9 +252,12 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
235
252
  df_["YYYY-MM"] = df_[date_column].dt.to_period("M").astype(str)
236
253
  df_["YYYYQ"] = df_[date_column].dt.to_period("Q").astype(str)
237
254
  df_["YYYY-WW"] = (
238
- df_[date_column].dt.isocalendar().year.astype(str) + "-W" +
239
- df_[date_column].dt.isocalendar().week.astype(str).str.zfill(2)
255
+ df_[date_column].dt.isocalendar().year.astype(str)
256
+ + "-W"
257
+ + df_[date_column].dt.isocalendar().week.astype(str).str.zfill(2)
258
+ )
259
+ df_["DDD"] = df_[date_column].dt.weekday.map(
260
+ {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
240
261
  )
241
- df_["DDD"] = df_[date_column].dt.weekday.map({0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"})
242
-
262
+
243
263
  return df_
@@ -1,4 +1,5 @@
1
1
  import warnings
2
+
2
3
  warnings.filterwarnings("ignore")
3
4
 
4
5
  import math
@@ -14,6 +15,7 @@ from plotly.subplots import make_subplots
14
15
  from scipy import stats
15
16
 
16
17
  from .hlp import wrap_text
18
+
17
19
  # from devtools import debug
18
20
  pd.options.display.colheader_justify = "right"
19
21
  # pd.options.mode.chained_assignment = None
@@ -21,7 +23,9 @@ pd.options.display.colheader_justify = "right"
21
23
  TOTAL_LITERAL = Literal[
22
24
  "sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"
23
25
  ]
24
- KPI_LITERAL = Literal["rag_abs","rag_rel", "min_max_xy", "max_min_xy", "min_max_x", "max_min_x"]
26
+ KPI_LITERAL = Literal[
27
+ "rag_abs", "rag_rel", "min_max_xy", "max_min_xy", "min_max_x", "max_min_x"
28
+ ]
25
29
 
26
30
 
27
31
  def describe_df(
@@ -108,7 +112,7 @@ def describe_df(
108
112
  is_str = df.loc[:, col].dtype.kind == "O"
109
113
  # * wrap output
110
114
  print(
111
- f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}"
115
+ f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
112
116
  )
113
117
  # print(f"{_h} {_u[:top_n_uniques]}")
114
118
  else:
@@ -130,14 +134,16 @@ def describe_df(
130
134
  # ! *** PLOTS ***
131
135
  if not use_plot:
132
136
  return
133
-
137
+
134
138
  # * reduce column names len if selected
135
139
  if top_n_chars_in_columns > 0:
136
140
  # * minumum 10 chars, or display is cluttered
137
- top_n_chars_in_columns = 10 if top_n_chars_in_columns < 10 else top_n_chars_in_columns
141
+ top_n_chars_in_columns = (
142
+ 10 if top_n_chars_in_columns < 10 else top_n_chars_in_columns
143
+ )
138
144
  col_list = []
139
145
  for i, col in enumerate(df.columns):
140
- col_list.append(col[:top_n_chars_in_columns]+"_"+str(i).zfill(3))
146
+ col_list.append(col[:top_n_chars_in_columns] + "_" + str(i).zfill(3))
141
147
  df.columns = col_list
142
148
 
143
149
  # * respect fig_offset to exclude unwanted plots from maintanance columns
@@ -183,7 +189,7 @@ def describe_df(
183
189
  else s[:top_n_chars_in_index]
184
190
  )
185
191
  x = [_cut(item) for item in x]
186
-
192
+
187
193
  figsub = px.bar(
188
194
  x=x,
189
195
  y=y,
@@ -318,7 +324,7 @@ def pivot_df(
318
324
  heatmap_axis=heatmap_axis,
319
325
  kpi_mode=kpi_mode,
320
326
  kpi_rag_list=kpi_rag_list,
321
- kpi_shape=kpi_shape
327
+ kpi_shape=kpi_shape,
322
328
  )
323
329
 
324
330
 
@@ -364,7 +370,9 @@ def show_num_df(
364
370
  """
365
371
  # * ensure arguments match parameter definition
366
372
  if any([df[col].dtype.kind not in ["i", "u", "f"] for col in df.columns]) == True:
367
- print(f"❌ table must contain numeric data only. Maybe you forgot to convert this table with pivot or pivot_table first?")
373
+ print(
374
+ f"❌ table must contain numeric data only. Maybe you forgot to convert this table with pivot or pivot_table first?"
375
+ )
368
376
  return
369
377
 
370
378
  if (
@@ -383,16 +391,16 @@ def show_num_df(
383
391
  print(f"❌ kpi_mode '{kpi_mode}' not supported")
384
392
  return
385
393
 
386
- if (kpi_mode and kpi_mode.startswith("rag")) and (not isinstance(kpi_rag_list, abc.Iterable)
387
- or len(kpi_rag_list) != 2
388
- ):
394
+ if (kpi_mode and kpi_mode.startswith("rag")) and (
395
+ not isinstance(kpi_rag_list, abc.Iterable) or len(kpi_rag_list) != 2
396
+ ):
389
397
  print(f"❌ kpi_rag_list must be a list of 2 if kpi_mode is set")
390
398
  return
391
-
399
+
392
400
  if kpi_mode == "rag_rel":
393
401
  # * transform values into percentiles
394
402
  if all(i <= 1 and i >= 0 for i in kpi_rag_list):
395
- kpi_rag_list = [int(i*100) for i in kpi_rag_list]
403
+ kpi_rag_list = [int(i * 100) for i in kpi_rag_list]
396
404
  else:
397
405
  print(f"❌ kpi_list for relative mode must be between 0 and 1")
398
406
  return
@@ -415,17 +423,21 @@ def show_num_df(
415
423
  df_.loc["Total"] = df_.agg(total_mode, axis=0)
416
424
  if total_mode and total_axis in ["y", "xy"]:
417
425
  df_.loc[:, "Total"] = df_.agg(total_mode, axis=1)
418
-
426
+
419
427
  # hack
420
428
  # * column sum values are distorted by totals, these must be rendered out
421
- col_divider = 2 if (total_axis in ["x", "xy"] and pct_axis == "x" and total_mode=="sum") else 1
429
+ col_divider = (
430
+ 2
431
+ if (total_axis in ["x", "xy"] and pct_axis == "x" and total_mode == "sum")
432
+ else 1
433
+ )
422
434
  col_sum = df_.sum() / col_divider
423
-
435
+
424
436
  # * min values are unaffected
425
437
  col_min = df_.min()
426
438
 
427
439
  # * max values are affected by totals, ignore total row if present
428
- last_row = -1 if (total_axis in ["x", "xy"] and total_mode=="sum") else None
440
+ last_row = -1 if (total_axis in ["x", "xy"] and total_mode == "sum") else None
429
441
  col_max = df_[:last_row].max()
430
442
 
431
443
  # * derive style
@@ -449,15 +461,14 @@ def show_num_df(
449
461
  # align="zero",
450
462
  )
451
463
 
452
-
453
464
  def get_kpi(val: float, col: str) -> str:
454
465
  """
455
466
  Function to calculate and return the appropriate icon based on the given value and key performance indicator (KPI) mode.
456
-
467
+
457
468
  Parameters:
458
469
  val (float): The value to be evaluated.
459
470
  col (str): The column associated with the value.
460
-
471
+
461
472
  Returns:
462
473
  str: The appropriate icon based on the value and KPI mode.
463
474
  """
@@ -466,24 +477,24 @@ def show_num_df(
466
477
 
467
478
  dict_icons = {
468
479
  "squad": {
469
- "light":["🟩", "🟨", "🟥", "⬜"],
470
- "dark":["🟩", "🟨", "🟥", "⬛"]
471
- },
480
+ "light": ["🟩", "🟨", "🟥", "⬜"],
481
+ "dark": ["🟩", "🟨", "🟥", "⬛"],
482
+ },
472
483
  "circle": {
473
- "light":["🟢", "🟡", "🔴", "⚪"],
474
- "dark":["🟢", "🟡", "🔴", "⚫"]
475
- },
484
+ "light": ["🟢", "🟡", "🔴", "⚪"],
485
+ "dark": ["🟢", "🟡", "🔴", "⚫"],
486
+ },
476
487
  }
477
488
  icons = dict_icons[kpi_shape][theme]
478
-
489
+
479
490
  # * transform values into percentiles if relative mode
480
- kpi_rag_list_= kpi_rag_list
481
- if kpi_mode=="rag_rel":
491
+ kpi_rag_list_ = kpi_rag_list
492
+ if kpi_mode == "rag_rel":
482
493
  # * get both percentile thresholds
483
494
  pcntl_1 = np.percentile(df_orig, kpi_rag_list[0])
484
495
  pcntl_2 = np.percentile(df_orig, kpi_rag_list[1])
485
496
  kpi_rag_list_ = [pcntl_1, pcntl_2]
486
-
497
+
487
498
  # * for rag mopde both rel and abs
488
499
  if kpi_mode.startswith("rag"):
489
500
  # * get fitting icon
@@ -500,39 +511,31 @@ def show_num_df(
500
511
  else icons[1] if val > kpi_rag_list_[1] else icons[2]
501
512
  )
502
513
  return icon
503
-
514
+
504
515
  # * for min/max mode, get min and max either from table or column
505
516
  # ! care for max values
506
517
  min_ = tbl_min if kpi_mode.endswith("_xy") else col_min[col]
507
518
  max_ = tbl_max if kpi_mode.endswith("_xy") else col_max[col]
508
519
 
509
520
  # * omit Total column for min/max
510
- if col=="Total":
521
+ if col == "Total":
511
522
  return ""
512
523
 
513
524
  # * calculate order of icons
514
- if kpi_mode.startswith( "min_max"):
515
- result= (
516
- icons[0]
517
- if val == min_
518
- else icons[2] if val == max_ else icons[3]
519
- )
525
+ if kpi_mode.startswith("min_max"):
526
+ result = icons[0] if val == min_ else icons[2] if val == max_ else icons[3]
520
527
  elif kpi_mode.startswith("max_min"):
521
- result= (
522
- icons[0]
523
- if val == max_
524
- else icons[2] if val == min_ else icons[3]
525
- )
528
+ result = icons[0] if val == max_ else icons[2] if val == min_ else icons[3]
526
529
  else:
527
530
  # * no matching mode founf
528
- result=""
531
+ result = ""
529
532
 
530
533
  return result
531
534
 
532
535
  # * all cell formatting in one place
533
536
  def format_cell(val, col):
534
537
  """
535
- A function to format a cell value based on the sum and percentage axis.
538
+ A function to format a cell value based on the sum and percentage axis.
536
539
  Parameters:
537
540
  - val: The value of the cell.
538
541
  - col: The column index of the cell.
@@ -540,8 +543,8 @@ def show_num_df(
540
543
  Returns a formatted string for the cell value.
541
544
  """
542
545
  # * calc sum depending on pct_axis
543
- sum_=tbl_sum if pct_axis=="xy" else col_sum[col] if pct_axis=="x" else val
544
- val_rel= 0 if sum_== 0 else val / sum_
546
+ sum_ = tbl_sum if pct_axis == "xy" else col_sum[col] if pct_axis == "x" else val
547
+ val_rel = 0 if sum_ == 0 else val / sum_
545
548
 
546
549
  # * get kpi icon
547
550
  kpi = get_kpi(val, col=col)
@@ -556,14 +559,11 @@ def show_num_df(
556
559
  if pct_axis:
557
560
  return f'{val:_.{precision}f} <span style="color: {color_pct}">({val_rel:.1%}) {kpi}</span>'
558
561
  if show_as_pct:
559
- return f'{val:.{precision}%} {kpi}'
562
+ return f"{val:.{precision}%} {kpi}"
560
563
  return f"{val:_.{precision}f} {kpi}"
561
564
 
562
565
  # * formatter is now unified, col wise
563
- formatter = {
564
- col: lambda x, col=col: format_cell(x, col=col)
565
- for col in df_.columns
566
- }
566
+ formatter = {col: lambda x, col=col: format_cell(x, col=col) for col in df_.columns}
567
567
 
568
568
  # ? pct_axis y is not implemented, needs row wise formatting
569
569
  # row_sums = _df.sum(axis=1) / divider
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.10.1
3
+ Version: 0.11.0
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -154,3 +154,7 @@ _df, _details = ven.show_venn3(
154
154
  ```
155
155
 
156
156
  ![venn](https://github.com/smeisegeier/pandas-plots/blob/main/img/2024-02-19-20-49-52.png?raw=true)
157
+
158
+ ## tags
159
+
160
+ #pandas, #plotly, #visualizations, #statistics
File without changes