pandas-plots 0.11.22__py3-none-any.whl → 0.11.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/hlp.py +130 -15
- pandas_plots/pls.py +86 -34
- {pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/METADATA +4 -3
- pandas_plots-0.11.24.dist-info/RECORD +10 -0
- {pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/WHEEL +1 -1
- pandas_plots-0.11.22.dist-info/RECORD +0 -10
- {pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/LICENSE +0 -0
- {pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/top_level.txt +0 -0
pandas_plots/hlp.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
import numpy as np
|
3
|
-
import scipy.stats
|
4
1
|
import importlib.metadata as md
|
5
|
-
from platform import python_version
|
6
|
-
from typing import Literal, List
|
7
|
-
|
8
|
-
from enum import Enum, auto
|
9
|
-
import platform
|
10
2
|
import os
|
11
|
-
|
3
|
+
import platform
|
4
|
+
import re
|
5
|
+
from enum import Enum, auto
|
12
6
|
from io import BytesIO
|
7
|
+
from platform import python_version
|
8
|
+
from typing import List, Literal
|
9
|
+
|
10
|
+
import duckdb as ddb
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
import requests
|
14
|
+
import scipy.stats
|
13
15
|
from matplotlib import pyplot as plt
|
14
16
|
from PIL import Image
|
15
|
-
import requests
|
16
|
-
import re
|
17
17
|
|
18
18
|
# from devtools import debug
|
19
19
|
|
@@ -32,7 +32,7 @@ def mean_confidence_interval(df, confidence=0.95):
|
|
32
32
|
Returns:
|
33
33
|
tuple: A tuple containing the mean, interval, lower bound, and upper bound.
|
34
34
|
"""
|
35
|
-
df =
|
35
|
+
df = to_series(df)
|
36
36
|
if df is None:
|
37
37
|
return None
|
38
38
|
a = 1.0 * np.array(df)
|
@@ -53,7 +53,7 @@ def mean_confidence_interval(df, confidence=0.95):
|
|
53
53
|
# return dist.mean - h, dist.mean + h
|
54
54
|
|
55
55
|
|
56
|
-
def
|
56
|
+
def to_series(df) -> pd.Series | None:
|
57
57
|
"""
|
58
58
|
Converts a pandas DataFrame to a pandas Series.
|
59
59
|
|
@@ -103,6 +103,10 @@ def df_to_series(df) -> pd.Series | None:
|
|
103
103
|
s.name = _data_col.name
|
104
104
|
return s
|
105
105
|
|
106
|
+
# * extend objects to enable chaining
|
107
|
+
pd.DataFrame.to_series = to_series
|
108
|
+
pd.Series.to_series = to_series
|
109
|
+
|
106
110
|
|
107
111
|
def replace_delimiter_outside_quotes(
|
108
112
|
input: str, delimiter_old: str = ",", delimiter_new: str = ";", quotechar: str = '"'
|
@@ -234,6 +238,26 @@ def create_barcode_from_url(
|
|
234
238
|
|
235
239
|
|
236
240
|
def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFrame:
|
241
|
+
"""
|
242
|
+
Add datetime columns to a given DataFrame.
|
243
|
+
|
244
|
+
Adds the following columns to the given DataFrame:
|
245
|
+
- YYYY: Year of date_column
|
246
|
+
- MM: Month of date_column
|
247
|
+
- Q: Quarter of date_column
|
248
|
+
- YYYY-MM: Year-month of date_column
|
249
|
+
- YYYYQ: Year-quarter of date_column
|
250
|
+
- YYYY-WW: Year-week of date_column
|
251
|
+
- DDD: Day of the week of date_column
|
252
|
+
|
253
|
+
Args:
|
254
|
+
df (pd.DataFrame): The DataFrame to add datetime columns to.
|
255
|
+
date_column (str, optional): The column to base the added datetime columns off of. Defaults to None.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
pd.DataFrame: The DataFrame with the added datetime columns.
|
259
|
+
This command can be chained.
|
260
|
+
"""
|
237
261
|
df_ = df.copy()
|
238
262
|
if not date_column:
|
239
263
|
date_column = [
|
@@ -269,6 +293,9 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
|
|
269
293
|
|
270
294
|
return df_
|
271
295
|
|
296
|
+
# * extend objects to enable chaining
|
297
|
+
pd.DataFrame.add_datetime_columns = add_datetime_columns
|
298
|
+
|
272
299
|
|
273
300
|
def show_package_version(
|
274
301
|
packages: list[str] = None,
|
@@ -289,7 +316,7 @@ def show_package_version(
|
|
289
316
|
# ! avoid empty list in signature, it will NOT be empty in runtime
|
290
317
|
if packages is None:
|
291
318
|
packages = []
|
292
|
-
|
319
|
+
|
293
320
|
if not isinstance(packages, List):
|
294
321
|
print(f"❌ A list of str must be provided")
|
295
322
|
return
|
@@ -315,6 +342,7 @@ def show_package_version(
|
|
315
342
|
print(out)
|
316
343
|
return
|
317
344
|
|
345
|
+
|
318
346
|
class OperatingSystem(Enum):
|
319
347
|
WINDOWS = auto()
|
320
348
|
LINUX = auto()
|
@@ -333,7 +361,7 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
|
|
333
361
|
- OperatingSystem.MAC
|
334
362
|
|
335
363
|
Returns:
|
336
|
-
bool: True if the desired operating system matches the current operating system, False otherwise.
|
364
|
+
bool: True if the desired operating system matches the current operating system, False otherwise.
|
337
365
|
str: Returns the current operating system (platform.system()) if is_os is None.
|
338
366
|
"""
|
339
367
|
if verbose:
|
@@ -352,3 +380,90 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
|
|
352
380
|
return True
|
353
381
|
else:
|
354
382
|
return False
|
383
|
+
|
384
|
+
|
385
|
+
def add_bitmask_label(
|
386
|
+
data: pd.DataFrame | pd.Series | ddb.DuckDBPyRelation,
|
387
|
+
bitmask_col: str,
|
388
|
+
labels: list[str],
|
389
|
+
separator: str = "|",
|
390
|
+
zero_code: str = "-",
|
391
|
+
keep_col: bool = True,
|
392
|
+
con: ddb.DuckDBPyConnection = None,
|
393
|
+
) -> pd.DataFrame | ddb.DuckDBPyRelation:
|
394
|
+
"""
|
395
|
+
adds a column to the data (DataFrame, Series, or DuckDB Relation) that resolves a bitmask column into human-readable labels.
|
396
|
+
- bitmask_col must have been generated before. its value must be constructed as a bitmask, e.g:
|
397
|
+
- a red, green, blue combination is rendered into binary 110, which means it has green and blue
|
398
|
+
- its value is 6, which will resolved into "g|b" if the list ["r","g","b"] is given
|
399
|
+
|
400
|
+
if the bitmask value is 0, it will be replaced with the zero_code.
|
401
|
+
the method can be chained in pandas as well as in duckdb: df.add_bitmask_label(...)
|
402
|
+
|
403
|
+
Parameters:
|
404
|
+
- data (pd.DataFrame | pd.Series | duckdb.DuckDBPyRelation): Input data.
|
405
|
+
- bitmask_col (str): The name of the column containing bitmask values (ignored if input is Series).
|
406
|
+
- labels (list[str]): Labels corresponding to the bits, in the correct order.
|
407
|
+
- separator (str): Separator for combining labels. Default is "|".
|
408
|
+
- zero_code (str): Value to return for bitmask value 0. Default is "-".
|
409
|
+
- keep_col (bool): If True, retains the bitmask column. If False, removes it. Default is True.
|
410
|
+
- con (duckdb.Connection): DuckDB connection object. Required if data is a DuckDB Relation.
|
411
|
+
|
412
|
+
Returns:
|
413
|
+
- pd.DataFrame | duckdb.DuckDBPyRelation: The modified data with the new column added.
|
414
|
+
"""
|
415
|
+
# * check possible input formats
|
416
|
+
if isinstance(data, ddb.DuckDBPyRelation):
|
417
|
+
if con is None:
|
418
|
+
raise ValueError(
|
419
|
+
"A DuckDB connection must be provided when the input is a DuckDB Relation."
|
420
|
+
)
|
421
|
+
data = data.df() # * Convert DuckDB Relation to DataFrame
|
422
|
+
|
423
|
+
if isinstance(data, pd.Series):
|
424
|
+
bitmask_col = data.name if data.name else "bitmask"
|
425
|
+
data = data.to_frame(name=bitmask_col)
|
426
|
+
|
427
|
+
if not isinstance(data, pd.DataFrame):
|
428
|
+
raise ValueError(
|
429
|
+
"Input must be a pandas DataFrame, Series, or DuckDB Relation."
|
430
|
+
)
|
431
|
+
|
432
|
+
# * get max allowed value by bitshift, eg for 4 labels its 2^4 -1 = 15
|
433
|
+
max_allowable_value = (1 << len(labels)) - 1
|
434
|
+
# * compare against max in col
|
435
|
+
max_value_in_column = data[bitmask_col].max()
|
436
|
+
if max_value_in_column > max_allowable_value:
|
437
|
+
raise ValueError(
|
438
|
+
f"The maximum value in column '{bitmask_col}' ({max_value_in_column}) exceeds "
|
439
|
+
f"the maximum allowable value for {len(labels)} labels ({max_allowable_value}). "
|
440
|
+
f"Ensure the number of labels matches the possible bitmask range."
|
441
|
+
)
|
442
|
+
|
443
|
+
# ? Core logic
|
444
|
+
# * exit if 0
|
445
|
+
def decode_bitmask(value):
|
446
|
+
if value == 0:
|
447
|
+
return zero_code
|
448
|
+
# * iterate over each value as bitfield, on binary 1 fetch assigned label from [labels]
|
449
|
+
return separator.join(
|
450
|
+
[label for i, label in enumerate(labels) if value & (1 << i)]
|
451
|
+
)
|
452
|
+
|
453
|
+
label_col = f"{bitmask_col}_label"
|
454
|
+
data[label_col] = data[bitmask_col].apply(decode_bitmask)
|
455
|
+
|
456
|
+
# * drop value col if not to be kept
|
457
|
+
if not keep_col:
|
458
|
+
data = data.drop(columns=[bitmask_col])
|
459
|
+
|
460
|
+
# * Convert back to DuckDB Relation if original input was a Relation
|
461
|
+
if isinstance(data, pd.DataFrame) and con is not None:
|
462
|
+
return con.from_df(data)
|
463
|
+
|
464
|
+
return data
|
465
|
+
|
466
|
+
|
467
|
+
# * extend objects to enable chaining
|
468
|
+
pd.DataFrame.add_bitmask_label = add_bitmask_label
|
469
|
+
ddb.DuckDBPyRelation.add_bitmask_label = add_bitmask_label
|
pandas_plots/pls.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from pathlib import Path
|
1
2
|
import warnings
|
2
3
|
|
3
4
|
warnings.filterwarnings("ignore")
|
@@ -22,7 +23,8 @@ def plot_quadrants(
|
|
22
23
|
df: pd.DataFrame,
|
23
24
|
title: str = None,
|
24
25
|
caption: str = None,
|
25
|
-
|
26
|
+
png_path: Path | str = None,
|
27
|
+
) -> object:
|
26
28
|
"""
|
27
29
|
Plot a heatmap for the given dataframe, with options for title and caption.
|
28
30
|
|
@@ -35,6 +37,7 @@ def plot_quadrants(
|
|
35
37
|
df columns must contain 2 values
|
36
38
|
title (str, optional): The title for the heatmap to override the default.
|
37
39
|
caption (str, optional): The caption for the heatmap. Defaults to None.
|
40
|
+
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
38
41
|
|
39
42
|
Returns:
|
40
43
|
q1, q2, q3, q4, n: The values for each quadrant and the total count.
|
@@ -93,6 +96,10 @@ def plot_quadrants(
|
|
93
96
|
q3 = heat_wide_out.iloc[0, 0]
|
94
97
|
q4 = heat_wide_out.iloc[0, 1]
|
95
98
|
|
99
|
+
# * save to png if path is provided
|
100
|
+
if png_path is not None:
|
101
|
+
plt.savefig(Path(png_path).as_posix(), format='png')
|
102
|
+
|
96
103
|
return q1, q2, q3, q4, n
|
97
104
|
# * plotly express is not used for the heatmap, although it does not need the derived wide format.
|
98
105
|
# * but theres no option to alter inner values in the heatmap
|
@@ -115,7 +122,8 @@ def plot_stacked_bars(
|
|
115
122
|
sort_values: bool = False,
|
116
123
|
show_total: bool = False,
|
117
124
|
precision: int = 0,
|
118
|
-
|
125
|
+
png_path: Path | str = None,
|
126
|
+
) -> object:
|
119
127
|
"""
|
120
128
|
Generates a stacked bar plot using the provided DataFrame.
|
121
129
|
df *must* comprise the columns (order matters):
|
@@ -140,9 +148,10 @@ def plot_stacked_bars(
|
|
140
148
|
- sort_values: bool = False - Sort axis by index (default) or values
|
141
149
|
- show_total: bool = False - Whether to show the total value
|
142
150
|
- precision: int = 0 - The number of decimal places to round to
|
151
|
+
- png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
143
152
|
|
144
153
|
Returns:
|
145
|
-
|
154
|
+
plot object
|
146
155
|
"""
|
147
156
|
BAR_LENGTH_MULTIPLIER = 1.05
|
148
157
|
|
@@ -322,7 +331,12 @@ def plot_stacked_bars(
|
|
322
331
|
_fig.update_layout(yaxis={"categoryorder": "category descending"})
|
323
332
|
|
324
333
|
_fig.show(renderer)
|
325
|
-
|
334
|
+
|
335
|
+
# * save to png if path is provided
|
336
|
+
if png_path is not None:
|
337
|
+
_fig.write_image(Path(png_path).as_posix())
|
338
|
+
|
339
|
+
return _fig
|
326
340
|
|
327
341
|
|
328
342
|
def plot_bars(
|
@@ -340,7 +354,8 @@ def plot_bars(
|
|
340
354
|
use_ci: bool = False,
|
341
355
|
precision: int = 0,
|
342
356
|
renderer: Literal["png", "svg", None] = "png",
|
343
|
-
|
357
|
+
png_path: Path | str = None,
|
358
|
+
) -> object:
|
344
359
|
"""
|
345
360
|
A function to plot a bar chart based on a *categorical* column (must be string or bool) and a numerical value.
|
346
361
|
Accepts:
|
@@ -366,9 +381,10 @@ def plot_bars(
|
|
366
381
|
- enforces dropna=True
|
367
382
|
- precision: An integer indicating the number of decimal places to round the values to. Default is 0.
|
368
383
|
- renderer: A string indicating the renderer to use for displaying the chart. It can be "png", "svg", or None. Default is "png".
|
384
|
+
- png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
369
385
|
|
370
386
|
Returns:
|
371
|
-
-
|
387
|
+
- plot object
|
372
388
|
"""
|
373
389
|
# * if series, apply value_counts, deselect use_ci
|
374
390
|
if isinstance(df_in, pd.Series):
|
@@ -568,7 +584,12 @@ def plot_bars(
|
|
568
584
|
textposition="outside" if not use_ci else "auto", error_y=dict(thickness=5)
|
569
585
|
)
|
570
586
|
_fig.show(renderer)
|
571
|
-
|
587
|
+
|
588
|
+
# * save to png if path is provided
|
589
|
+
if png_path is not None:
|
590
|
+
_fig.write_image(Path(png_path).as_posix())
|
591
|
+
|
592
|
+
return _fig
|
572
593
|
|
573
594
|
|
574
595
|
def plot_histogram(
|
@@ -586,7 +607,8 @@ def plot_histogram(
|
|
586
607
|
renderer: Literal["png", "svg", None] = "png",
|
587
608
|
caption: str = None,
|
588
609
|
title: str = None,
|
589
|
-
|
610
|
+
png_path: Path | str = None,
|
611
|
+
) -> object:
|
590
612
|
"""
|
591
613
|
A function to plot a histogram based on *numeric* columns in a DataFrame.
|
592
614
|
Accepts:
|
@@ -606,16 +628,18 @@ def plot_histogram(
|
|
606
628
|
renderer (Literal["png", "svg", None]): The renderer for displaying the plot. Default is "png".
|
607
629
|
caption (str): The caption for the plot. Default is None.
|
608
630
|
title (str): The title of the plot. Default is None.
|
631
|
+
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
632
|
+
|
609
633
|
|
610
634
|
Returns:
|
611
|
-
|
635
|
+
plot object
|
612
636
|
"""
|
613
|
-
|
637
|
+
|
614
638
|
# * convert to df if series
|
615
639
|
if isinstance(df_ser, pd.Series):
|
616
640
|
df = df_ser.to_frame()
|
617
641
|
else:
|
618
|
-
df=df_ser
|
642
|
+
df = df_ser
|
619
643
|
|
620
644
|
col_not_num = df.select_dtypes(exclude="number").columns
|
621
645
|
if any(col_not_num):
|
@@ -628,7 +652,7 @@ def plot_histogram(
|
|
628
652
|
df = df.applymap(lambda x: round(x, precision))
|
629
653
|
|
630
654
|
# ! plot
|
631
|
-
_caption=_set_caption(caption)
|
655
|
+
_caption = _set_caption(caption)
|
632
656
|
fig = px.histogram(
|
633
657
|
data_frame=df,
|
634
658
|
histnorm=histnorm,
|
@@ -653,11 +677,16 @@ def plot_histogram(
|
|
653
677
|
"size": 24,
|
654
678
|
},
|
655
679
|
},
|
656
|
-
showlegend=False if df.shape[1]==1 else True,
|
680
|
+
showlegend=False if df.shape[1] == 1 else True,
|
657
681
|
)
|
658
682
|
|
659
683
|
fig.show(renderer)
|
660
|
-
|
684
|
+
|
685
|
+
# * save to png if path is provided
|
686
|
+
if png_path is not None:
|
687
|
+
fig.write_image(Path(png_path).as_posix())
|
688
|
+
|
689
|
+
return fig
|
661
690
|
|
662
691
|
|
663
692
|
def plot_joint(
|
@@ -668,7 +697,8 @@ def plot_joint(
|
|
668
697
|
dropna: bool = False,
|
669
698
|
caption: str = "",
|
670
699
|
title: str = "",
|
671
|
-
|
700
|
+
png_path: Path | str = None,
|
701
|
+
) -> object:
|
672
702
|
"""
|
673
703
|
Generate a seaborn joint plot for *two numeric* columns of a given DataFrame.
|
674
704
|
|
@@ -680,9 +710,10 @@ def plot_joint(
|
|
680
710
|
- dropna: Whether to drop NA values before plotting (default is False).
|
681
711
|
- caption: A caption for the plot.
|
682
712
|
- title: The title of the plot.
|
713
|
+
- png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
683
714
|
|
684
715
|
Returns:
|
685
|
-
|
716
|
+
plot object
|
686
717
|
"""
|
687
718
|
|
688
719
|
if df.shape[1] != 2:
|
@@ -702,7 +733,7 @@ def plot_joint(
|
|
702
733
|
# * set theme and palette
|
703
734
|
sb.set_theme(style="darkgrid", palette="tab10")
|
704
735
|
if os.getenv("THEME") == "dark":
|
705
|
-
_style = "dark_background"
|
736
|
+
_style = "dark_background"
|
706
737
|
_cmap = "rocket"
|
707
738
|
else:
|
708
739
|
_style = "bmh"
|
@@ -720,19 +751,21 @@ def plot_joint(
|
|
720
751
|
"dropna": dropna,
|
721
752
|
# "title": f"{caption}[{ser.name}], n = {len(ser):_}" if not title else title,
|
722
753
|
}
|
723
|
-
dict_hex={"cmap": _cmap}
|
724
|
-
dict_kde={"fill": True, "cmap": _cmap}
|
725
|
-
|
726
|
-
if kind=="hex":
|
754
|
+
dict_hex = {"cmap": _cmap}
|
755
|
+
dict_kde = {"fill": True, "cmap": _cmap}
|
756
|
+
|
757
|
+
if kind == "hex":
|
727
758
|
fig = sb.jointplot(**dict_base, **dict_hex)
|
728
|
-
elif kind=="kde":
|
759
|
+
elif kind == "kde":
|
729
760
|
fig = sb.jointplot(**dict_base, **dict_kde)
|
730
761
|
else:
|
731
762
|
fig = sb.jointplot(**dict_base)
|
732
|
-
|
763
|
+
|
733
764
|
# * emojis dont work in good ol seaborn
|
734
|
-
_caption="" if not caption else f"#{caption}, "
|
735
|
-
fig.figure.suptitle(
|
765
|
+
_caption = "" if not caption else f"#{caption}, "
|
766
|
+
fig.figure.suptitle(
|
767
|
+
title or f"{_caption}[{df.columns[0]}] vs [{df.columns[1]}], n = {len(df):_}"
|
768
|
+
)
|
736
769
|
# * leave some room for the title
|
737
770
|
fig.figure.tight_layout()
|
738
771
|
fig.figure.subplots_adjust(top=0.90)
|
@@ -748,7 +781,11 @@ def plot_joint(
|
|
748
781
|
# dropna=dropna,
|
749
782
|
# cmap=_cmap,
|
750
783
|
# )
|
751
|
-
|
784
|
+
# * save to png if path is provided
|
785
|
+
if png_path is not None:
|
786
|
+
fig.savefig(Path(png_path).as_posix())
|
787
|
+
|
788
|
+
return fig
|
752
789
|
|
753
790
|
|
754
791
|
def plot_box(
|
@@ -764,7 +801,8 @@ def plot_box(
|
|
764
801
|
violin: bool = False,
|
765
802
|
x_min: float = None,
|
766
803
|
x_max: float = None,
|
767
|
-
|
804
|
+
png_path: Path | str = None,
|
805
|
+
) -> object:
|
768
806
|
"""
|
769
807
|
Plots a horizontal box plot for the given pandas Series.
|
770
808
|
|
@@ -779,14 +817,15 @@ def plot_box(
|
|
779
817
|
x_min: The minimum value for the x-axis scale (max and min must be set)
|
780
818
|
x_max: The maximum value for the x-axis scale (max and min must be set)
|
781
819
|
summary: Whether to add a summary table to the plot
|
820
|
+
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
782
821
|
|
783
822
|
Returns:
|
784
|
-
|
823
|
+
plot object
|
785
824
|
"""
|
786
|
-
ser =
|
825
|
+
ser = to_series(ser)
|
787
826
|
if ser is None:
|
788
827
|
return
|
789
|
-
|
828
|
+
|
790
829
|
# * drop na to keep scipy sane
|
791
830
|
n_ = len(ser)
|
792
831
|
ser.dropna(inplace=True)
|
@@ -894,9 +933,15 @@ def plot_box(
|
|
894
933
|
)
|
895
934
|
|
896
935
|
fig.show("png")
|
936
|
+
|
897
937
|
if summary:
|
898
938
|
print_summary(ser)
|
899
|
-
|
939
|
+
|
940
|
+
# * save to png if path is provided
|
941
|
+
if png_path is not None:
|
942
|
+
fig.write_image(Path(png_path).as_posix())
|
943
|
+
|
944
|
+
return fig
|
900
945
|
|
901
946
|
|
902
947
|
def plot_boxes(
|
@@ -909,7 +954,8 @@ def plot_boxes(
|
|
909
954
|
annotations: bool = True,
|
910
955
|
summary: bool = True,
|
911
956
|
title: str = None,
|
912
|
-
|
957
|
+
png_path: Path | str = None,
|
958
|
+
) -> object:
|
913
959
|
"""
|
914
960
|
[Experimental] Plot vertical boxes for each unique item in the DataFrame and add annotations for statistics.
|
915
961
|
|
@@ -922,9 +968,10 @@ def plot_boxes(
|
|
922
968
|
width (int): The width of the plot.
|
923
969
|
annotations (bool): Whether to add annotations to the plot.
|
924
970
|
summary (bool): Whether to add a summary to the plot.
|
971
|
+
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
925
972
|
|
926
973
|
Returns:
|
927
|
-
|
974
|
+
plot object
|
928
975
|
"""
|
929
976
|
|
930
977
|
if (
|
@@ -1037,7 +1084,12 @@ def plot_boxes(
|
|
1037
1084
|
fig.show("png")
|
1038
1085
|
if summary:
|
1039
1086
|
print_summary(df)
|
1040
|
-
|
1087
|
+
|
1088
|
+
# * save to png if path is provided
|
1089
|
+
if png_path is not None:
|
1090
|
+
fig.write_image(Path(png_path).as_posix())
|
1091
|
+
|
1092
|
+
return fig
|
1041
1093
|
|
1042
1094
|
|
1043
1095
|
# def plot_ci_bars_DEPR(df: pd.DataFrame, dropna: bool = True, precision: int = 2) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.24
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -100,14 +100,15 @@ tbl.show_num_df(
|
|
100
100
|
- `show_venn3()` displays a venn diagram for 3 sets
|
101
101
|
|
102
102
|
- `hlp` contains some (variety) helper functions
|
103
|
-
- `
|
103
|
+
- `to_series()` converts a dataframe to a series (`🚨 breaking change`)
|
104
104
|
- `mean_confidence_interval()` calculates mean and confidence interval for a series
|
105
105
|
- `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
|
106
106
|
- `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
|
107
107
|
- `create_barcode_from_url()` creates a barcode from a given URL
|
108
|
-
- `add_datetime_col()` adds a datetime columns to a dataframe
|
108
|
+
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
109
109
|
- `show_package_version` prints version of a list of packages
|
110
110
|
- `get_os` helps to identify and ensure operating system at runtime
|
111
|
+
- `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
111
112
|
|
112
113
|
- `pii` has routines for handling of personally identifiable information
|
113
114
|
- `remove_pii()` logs and deletes pii from a series
|
@@ -0,0 +1,10 @@
|
|
1
|
+
pandas_plots/hlp.py,sha256=N6NrbFagVMMX-ZnV0rIBEz82SeSoOkksfMcCap55W7E,16588
|
2
|
+
pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
3
|
+
pandas_plots/pls.py,sha256=U-tjh0DnYQYg-n02hh_HyvObMerkGCBP8tirKFIEEn4,37376
|
4
|
+
pandas_plots/tbl.py,sha256=A1SqvssDA4ofI_WJ-sdWIb9Bo5X-sELD8pley22Y4X4,28380
|
5
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
+
pandas_plots-0.11.24.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
7
|
+
pandas_plots-0.11.24.dist-info/METADATA,sha256=rapR9ocNOI-6U2PyTtDOTJu2EpZaGxXlywAtOBVHdoA,7220
|
8
|
+
pandas_plots-0.11.24.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
9
|
+
pandas_plots-0.11.24.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
10
|
+
pandas_plots-0.11.24.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
pandas_plots/hlp.py,sha256=wrvy36rnSdg1I4uQjIzzwGmjcN0gvSfKylRf_7GKpXs,12001
|
2
|
-
pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
3
|
-
pandas_plots/pls.py,sha256=C-EUvt9u7aXd6va7BGamf6HSODOnvbERwxu2Gb8PgbQ,35449
|
4
|
-
pandas_plots/tbl.py,sha256=A1SqvssDA4ofI_WJ-sdWIb9Bo5X-sELD8pley22Y4X4,28380
|
5
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
-
pandas_plots-0.11.22.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
7
|
-
pandas_plots-0.11.22.dist-info/METADATA,sha256=YgJjD4QfPZkLutuYg4_5orNjoVwNH2jx9nTsSwYqIlk,7071
|
8
|
-
pandas_plots-0.11.22.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
9
|
-
pandas_plots-0.11.22.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
10
|
-
pandas_plots-0.11.22.dist-info/RECORD,,
|
File without changes
|
File without changes
|