pandas-plots 0.10.1__tar.gz → 0.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas-plots-0.10.1/src/pandas_plots.egg-info → pandas-plots-0.11.0}/PKG-INFO +5 -1
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/README.md +4 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/setup.cfg +1 -1
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots/hlp.py +43 -23
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots/tbl.py +52 -52
- {pandas-plots-0.10.1 → pandas-plots-0.11.0/src/pandas_plots.egg-info}/PKG-INFO +5 -1
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/LICENSE +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/pyproject.toml +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots/pls.py +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots/ven.py +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots.egg-info/SOURCES.txt +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas-plots-0.10.1 → pandas-plots-0.11.0}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.11.0
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -154,3 +154,7 @@ _df, _details = ven.show_venn3(
|
|
154
154
|
```
|
155
155
|
|
156
156
|

|
157
|
+
|
158
|
+
## tags
|
159
|
+
|
160
|
+
#pandas, #plotly, #visualizations, #statistics
|
@@ -8,7 +8,7 @@ from PIL import Image
|
|
8
8
|
import requests
|
9
9
|
import re
|
10
10
|
|
11
|
-
from
|
11
|
+
# from devtools import debug
|
12
12
|
|
13
13
|
URL_REGEX = r"^(?:http|ftp)s?://" # https://stackoverflow.com/a/1617386
|
14
14
|
|
@@ -125,53 +125,67 @@ def replace_delimiter_outside_quotes(
|
|
125
125
|
|
126
126
|
|
127
127
|
def wrap_text(
|
128
|
-
text: str | list,
|
128
|
+
text: str | list,
|
129
|
+
max_items_in_line: int = 70,
|
130
|
+
use_sep: bool = True,
|
131
|
+
use_apo: bool = False,
|
129
132
|
):
|
130
133
|
"""
|
131
134
|
A function that wraps text into lines with a maximum number of items per line.
|
135
|
+
Important: enclose this function in a print() statement to print the text
|
132
136
|
|
133
137
|
Args:
|
134
138
|
text (str | list): The input text or list of words to be wrapped.
|
135
139
|
max_items_in_line (int): The maximum number of items allowed in each line.
|
136
|
-
|
137
|
-
|
140
|
+
use_sep (bool, optional): When list: Whether to include a comma separator between items. Defaults to True.
|
141
|
+
use_apo (bool, optional): When list: Whether to enclose each word in single quotes. Defaults to False.
|
142
|
+
Returns: the wrapped text
|
138
143
|
"""
|
139
144
|
|
140
|
-
# * check if text is string
|
145
|
+
# * check if text is string
|
141
146
|
is_text = isinstance(text, str)
|
142
147
|
if is_text:
|
148
|
+
# ! when splitting the text later by blanks, newlines are not correctly handled
|
149
|
+
# * to detect them, they must be followed by a blank:
|
150
|
+
pattern = r'(\n)(?=\S)' # *forward lookup for newline w/ no blank
|
151
|
+
# * add blank after these newlines
|
152
|
+
new_text = re.sub(pattern, r"\1 ", text)
|
153
|
+
text=new_text
|
154
|
+
|
155
|
+
# * then strip and build word list
|
143
156
|
text = (
|
144
157
|
text.replace(",", "")
|
145
158
|
.replace("'", "")
|
146
159
|
.replace("[", "")
|
147
160
|
.replace("]", "")
|
161
|
+
# * use explicit blanks to prevent newline split
|
148
162
|
.split(" ")
|
149
163
|
)
|
150
164
|
|
151
|
-
# *
|
165
|
+
# * loop setup
|
152
166
|
i = 0
|
153
167
|
line = ""
|
154
|
-
|
155
168
|
# * loop through words
|
156
169
|
out = ""
|
157
170
|
for word in text:
|
158
|
-
apo_s = "'" if
|
159
|
-
sep_s = "," if
|
171
|
+
apo_s = "'" if use_apo and not is_text else ""
|
172
|
+
sep_s = "," if use_sep and not is_text else ""
|
160
173
|
word_s = f"{apo_s}{str(word)}{apo_s}{sep_s}"
|
161
174
|
# * inc counter
|
162
175
|
i = i + len(word_s)
|
163
176
|
# * construct print line
|
164
177
|
line = line + word_s + " "
|
165
|
-
# * reset if counter exceeds limit
|
166
|
-
if i >= max_items_in_line:
|
178
|
+
# * reset if counter exceeds limit, or if word ends with newline
|
179
|
+
if i >= max_items_in_line or str(word).endswith("\n"):
|
167
180
|
out = out + line + "\n"
|
168
181
|
line = ""
|
169
182
|
i = 0
|
170
183
|
# else:
|
171
|
-
# * on short lists no reset happens,
|
172
|
-
|
173
|
-
|
174
|
-
|
184
|
+
# * on short lists no line reset happens, so just print the line
|
185
|
+
# * else add last line
|
186
|
+
out = line if not out else out + line
|
187
|
+
# * cut off last newline
|
188
|
+
return f"[{out[:-1].strip()}]"
|
175
189
|
|
176
190
|
|
177
191
|
def create_barcode_from_url(
|
@@ -211,21 +225,24 @@ def create_barcode_from_url(
|
|
211
225
|
# plt.axis('off') # Turn off axis numbers
|
212
226
|
plt.show()
|
213
227
|
|
228
|
+
|
214
229
|
def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFrame:
|
215
|
-
df_= df.copy()
|
230
|
+
df_ = df.copy()
|
216
231
|
if not date_column:
|
217
|
-
date_column = [
|
232
|
+
date_column = [
|
233
|
+
col for col in df_.columns if pd.api.types.is_datetime64_any_dtype(df_[col])
|
234
|
+
][0]
|
218
235
|
else:
|
219
236
|
df_[date_column] = pd.to_datetime(df_[date_column])
|
220
237
|
|
221
238
|
if not date_column or not pd.api.types.is_datetime64_any_dtype(df_[date_column]):
|
222
239
|
print("❌ No datetime column found")
|
223
240
|
return
|
224
|
-
|
241
|
+
|
225
242
|
if [col for col in df_.columns if "YYYY-WW" in col]:
|
226
243
|
print("❌ Added datetime columns already exist")
|
227
244
|
return
|
228
|
-
|
245
|
+
|
229
246
|
print(f"⏳ Adding datetime columns basing off of: {date_column}")
|
230
247
|
|
231
248
|
df_["YYYY"] = df_[date_column].dt.year
|
@@ -235,9 +252,12 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
|
|
235
252
|
df_["YYYY-MM"] = df_[date_column].dt.to_period("M").astype(str)
|
236
253
|
df_["YYYYQ"] = df_[date_column].dt.to_period("Q").astype(str)
|
237
254
|
df_["YYYY-WW"] = (
|
238
|
-
df_[date_column].dt.isocalendar().year.astype(str)
|
239
|
-
|
255
|
+
df_[date_column].dt.isocalendar().year.astype(str)
|
256
|
+
+ "-W"
|
257
|
+
+ df_[date_column].dt.isocalendar().week.astype(str).str.zfill(2)
|
258
|
+
)
|
259
|
+
df_["DDD"] = df_[date_column].dt.weekday.map(
|
260
|
+
{0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
|
240
261
|
)
|
241
|
-
|
242
|
-
|
262
|
+
|
243
263
|
return df_
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import warnings
|
2
|
+
|
2
3
|
warnings.filterwarnings("ignore")
|
3
4
|
|
4
5
|
import math
|
@@ -14,6 +15,7 @@ from plotly.subplots import make_subplots
|
|
14
15
|
from scipy import stats
|
15
16
|
|
16
17
|
from .hlp import wrap_text
|
18
|
+
|
17
19
|
# from devtools import debug
|
18
20
|
pd.options.display.colheader_justify = "right"
|
19
21
|
# pd.options.mode.chained_assignment = None
|
@@ -21,7 +23,9 @@ pd.options.display.colheader_justify = "right"
|
|
21
23
|
TOTAL_LITERAL = Literal[
|
22
24
|
"sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"
|
23
25
|
]
|
24
|
-
KPI_LITERAL = Literal[
|
26
|
+
KPI_LITERAL = Literal[
|
27
|
+
"rag_abs", "rag_rel", "min_max_xy", "max_min_xy", "min_max_x", "max_min_x"
|
28
|
+
]
|
25
29
|
|
26
30
|
|
27
31
|
def describe_df(
|
@@ -108,7 +112,7 @@ def describe_df(
|
|
108
112
|
is_str = df.loc[:, col].dtype.kind == "O"
|
109
113
|
# * wrap output
|
110
114
|
print(
|
111
|
-
f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70,
|
115
|
+
f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
|
112
116
|
)
|
113
117
|
# print(f"{_h} {_u[:top_n_uniques]}")
|
114
118
|
else:
|
@@ -130,14 +134,16 @@ def describe_df(
|
|
130
134
|
# ! *** PLOTS ***
|
131
135
|
if not use_plot:
|
132
136
|
return
|
133
|
-
|
137
|
+
|
134
138
|
# * reduce column names len if selected
|
135
139
|
if top_n_chars_in_columns > 0:
|
136
140
|
# * minumum 10 chars, or display is cluttered
|
137
|
-
top_n_chars_in_columns =
|
141
|
+
top_n_chars_in_columns = (
|
142
|
+
10 if top_n_chars_in_columns < 10 else top_n_chars_in_columns
|
143
|
+
)
|
138
144
|
col_list = []
|
139
145
|
for i, col in enumerate(df.columns):
|
140
|
-
col_list.append(col[:top_n_chars_in_columns]+"_"+str(i).zfill(3))
|
146
|
+
col_list.append(col[:top_n_chars_in_columns] + "_" + str(i).zfill(3))
|
141
147
|
df.columns = col_list
|
142
148
|
|
143
149
|
# * respect fig_offset to exclude unwanted plots from maintanance columns
|
@@ -183,7 +189,7 @@ def describe_df(
|
|
183
189
|
else s[:top_n_chars_in_index]
|
184
190
|
)
|
185
191
|
x = [_cut(item) for item in x]
|
186
|
-
|
192
|
+
|
187
193
|
figsub = px.bar(
|
188
194
|
x=x,
|
189
195
|
y=y,
|
@@ -318,7 +324,7 @@ def pivot_df(
|
|
318
324
|
heatmap_axis=heatmap_axis,
|
319
325
|
kpi_mode=kpi_mode,
|
320
326
|
kpi_rag_list=kpi_rag_list,
|
321
|
-
kpi_shape=kpi_shape
|
327
|
+
kpi_shape=kpi_shape,
|
322
328
|
)
|
323
329
|
|
324
330
|
|
@@ -364,7 +370,9 @@ def show_num_df(
|
|
364
370
|
"""
|
365
371
|
# * ensure arguments match parameter definition
|
366
372
|
if any([df[col].dtype.kind not in ["i", "u", "f"] for col in df.columns]) == True:
|
367
|
-
print(
|
373
|
+
print(
|
374
|
+
f"❌ table must contain numeric data only. Maybe you forgot to convert this table with pivot or pivot_table first?"
|
375
|
+
)
|
368
376
|
return
|
369
377
|
|
370
378
|
if (
|
@@ -383,16 +391,16 @@ def show_num_df(
|
|
383
391
|
print(f"❌ kpi_mode '{kpi_mode}' not supported")
|
384
392
|
return
|
385
393
|
|
386
|
-
if (kpi_mode and kpi_mode.startswith("rag")) and (
|
387
|
-
or len(kpi_rag_list) != 2
|
388
|
-
|
394
|
+
if (kpi_mode and kpi_mode.startswith("rag")) and (
|
395
|
+
not isinstance(kpi_rag_list, abc.Iterable) or len(kpi_rag_list) != 2
|
396
|
+
):
|
389
397
|
print(f"❌ kpi_rag_list must be a list of 2 if kpi_mode is set")
|
390
398
|
return
|
391
|
-
|
399
|
+
|
392
400
|
if kpi_mode == "rag_rel":
|
393
401
|
# * transform values into percentiles
|
394
402
|
if all(i <= 1 and i >= 0 for i in kpi_rag_list):
|
395
|
-
kpi_rag_list = [int(i*100) for i in kpi_rag_list]
|
403
|
+
kpi_rag_list = [int(i * 100) for i in kpi_rag_list]
|
396
404
|
else:
|
397
405
|
print(f"❌ kpi_list for relative mode must be between 0 and 1")
|
398
406
|
return
|
@@ -415,17 +423,21 @@ def show_num_df(
|
|
415
423
|
df_.loc["Total"] = df_.agg(total_mode, axis=0)
|
416
424
|
if total_mode and total_axis in ["y", "xy"]:
|
417
425
|
df_.loc[:, "Total"] = df_.agg(total_mode, axis=1)
|
418
|
-
|
426
|
+
|
419
427
|
# hack
|
420
428
|
# * column sum values are distorted by totals, these must be rendered out
|
421
|
-
col_divider =
|
429
|
+
col_divider = (
|
430
|
+
2
|
431
|
+
if (total_axis in ["x", "xy"] and pct_axis == "x" and total_mode == "sum")
|
432
|
+
else 1
|
433
|
+
)
|
422
434
|
col_sum = df_.sum() / col_divider
|
423
|
-
|
435
|
+
|
424
436
|
# * min values are unaffected
|
425
437
|
col_min = df_.min()
|
426
438
|
|
427
439
|
# * max values are affected by totals, ignore total row if present
|
428
|
-
last_row = -1 if (total_axis in ["x", "xy"] and total_mode=="sum") else None
|
440
|
+
last_row = -1 if (total_axis in ["x", "xy"] and total_mode == "sum") else None
|
429
441
|
col_max = df_[:last_row].max()
|
430
442
|
|
431
443
|
# * derive style
|
@@ -449,15 +461,14 @@ def show_num_df(
|
|
449
461
|
# align="zero",
|
450
462
|
)
|
451
463
|
|
452
|
-
|
453
464
|
def get_kpi(val: float, col: str) -> str:
|
454
465
|
"""
|
455
466
|
Function to calculate and return the appropriate icon based on the given value and key performance indicator (KPI) mode.
|
456
|
-
|
467
|
+
|
457
468
|
Parameters:
|
458
469
|
val (float): The value to be evaluated.
|
459
470
|
col (str): The column associated with the value.
|
460
|
-
|
471
|
+
|
461
472
|
Returns:
|
462
473
|
str: The appropriate icon based on the value and KPI mode.
|
463
474
|
"""
|
@@ -466,24 +477,24 @@ def show_num_df(
|
|
466
477
|
|
467
478
|
dict_icons = {
|
468
479
|
"squad": {
|
469
|
-
"light":["🟩", "🟨", "🟥", "⬜"],
|
470
|
-
"dark":["🟩", "🟨", "🟥", "⬛"]
|
471
|
-
|
480
|
+
"light": ["🟩", "🟨", "🟥", "⬜"],
|
481
|
+
"dark": ["🟩", "🟨", "🟥", "⬛"],
|
482
|
+
},
|
472
483
|
"circle": {
|
473
|
-
"light":["🟢", "🟡", "🔴", "⚪"],
|
474
|
-
"dark":["🟢", "🟡", "🔴", "⚫"]
|
475
|
-
|
484
|
+
"light": ["🟢", "🟡", "🔴", "⚪"],
|
485
|
+
"dark": ["🟢", "🟡", "🔴", "⚫"],
|
486
|
+
},
|
476
487
|
}
|
477
488
|
icons = dict_icons[kpi_shape][theme]
|
478
|
-
|
489
|
+
|
479
490
|
# * transform values into percentiles if relative mode
|
480
|
-
kpi_rag_list_= kpi_rag_list
|
481
|
-
if kpi_mode=="rag_rel":
|
491
|
+
kpi_rag_list_ = kpi_rag_list
|
492
|
+
if kpi_mode == "rag_rel":
|
482
493
|
# * get both percentile thresholds
|
483
494
|
pcntl_1 = np.percentile(df_orig, kpi_rag_list[0])
|
484
495
|
pcntl_2 = np.percentile(df_orig, kpi_rag_list[1])
|
485
496
|
kpi_rag_list_ = [pcntl_1, pcntl_2]
|
486
|
-
|
497
|
+
|
487
498
|
# * for rag mopde both rel and abs
|
488
499
|
if kpi_mode.startswith("rag"):
|
489
500
|
# * get fitting icon
|
@@ -500,39 +511,31 @@ def show_num_df(
|
|
500
511
|
else icons[1] if val > kpi_rag_list_[1] else icons[2]
|
501
512
|
)
|
502
513
|
return icon
|
503
|
-
|
514
|
+
|
504
515
|
# * for min/max mode, get min and max either from table or column
|
505
516
|
# ! care for max values
|
506
517
|
min_ = tbl_min if kpi_mode.endswith("_xy") else col_min[col]
|
507
518
|
max_ = tbl_max if kpi_mode.endswith("_xy") else col_max[col]
|
508
519
|
|
509
520
|
# * omit Total column for min/max
|
510
|
-
if col=="Total":
|
521
|
+
if col == "Total":
|
511
522
|
return ""
|
512
523
|
|
513
524
|
# * calculate order of icons
|
514
|
-
if kpi_mode.startswith(
|
515
|
-
result=
|
516
|
-
icons[0]
|
517
|
-
if val == min_
|
518
|
-
else icons[2] if val == max_ else icons[3]
|
519
|
-
)
|
525
|
+
if kpi_mode.startswith("min_max"):
|
526
|
+
result = icons[0] if val == min_ else icons[2] if val == max_ else icons[3]
|
520
527
|
elif kpi_mode.startswith("max_min"):
|
521
|
-
result=
|
522
|
-
icons[0]
|
523
|
-
if val == max_
|
524
|
-
else icons[2] if val == min_ else icons[3]
|
525
|
-
)
|
528
|
+
result = icons[0] if val == max_ else icons[2] if val == min_ else icons[3]
|
526
529
|
else:
|
527
530
|
# * no matching mode founf
|
528
|
-
result=""
|
531
|
+
result = ""
|
529
532
|
|
530
533
|
return result
|
531
534
|
|
532
535
|
# * all cell formatting in one place
|
533
536
|
def format_cell(val, col):
|
534
537
|
"""
|
535
|
-
A function to format a cell value based on the sum and percentage axis.
|
538
|
+
A function to format a cell value based on the sum and percentage axis.
|
536
539
|
Parameters:
|
537
540
|
- val: The value of the cell.
|
538
541
|
- col: The column index of the cell.
|
@@ -540,8 +543,8 @@ def show_num_df(
|
|
540
543
|
Returns a formatted string for the cell value.
|
541
544
|
"""
|
542
545
|
# * calc sum depending on pct_axis
|
543
|
-
sum_=tbl_sum if pct_axis=="xy" else col_sum[col] if pct_axis=="x" else val
|
544
|
-
val_rel= 0 if sum_== 0 else val / sum_
|
546
|
+
sum_ = tbl_sum if pct_axis == "xy" else col_sum[col] if pct_axis == "x" else val
|
547
|
+
val_rel = 0 if sum_ == 0 else val / sum_
|
545
548
|
|
546
549
|
# * get kpi icon
|
547
550
|
kpi = get_kpi(val, col=col)
|
@@ -556,14 +559,11 @@ def show_num_df(
|
|
556
559
|
if pct_axis:
|
557
560
|
return f'{val:_.{precision}f} <span style="color: {color_pct}">({val_rel:.1%}) {kpi}</span>'
|
558
561
|
if show_as_pct:
|
559
|
-
return f
|
562
|
+
return f"{val:.{precision}%} {kpi}"
|
560
563
|
return f"{val:_.{precision}f} {kpi}"
|
561
564
|
|
562
565
|
# * formatter is now unified, col wise
|
563
|
-
formatter = {
|
564
|
-
col: lambda x, col=col: format_cell(x, col=col)
|
565
|
-
for col in df_.columns
|
566
|
-
}
|
566
|
+
formatter = {col: lambda x, col=col: format_cell(x, col=col) for col in df_.columns}
|
567
567
|
|
568
568
|
# ? pct_axis y is not implemented, needs row wise formatting
|
569
569
|
# row_sums = _df.sum(axis=1) / divider
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.11.0
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -154,3 +154,7 @@ _df, _details = ven.show_venn3(
|
|
154
154
|
```
|
155
155
|
|
156
156
|

|
157
|
+
|
158
|
+
## tags
|
159
|
+
|
160
|
+
#pandas, #plotly, #visualizations, #statistics
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|