PyPI - pandas-plots - Versions diffs - 0.12.20__py3-none-any.whl → 0.12.22__py3-none-any.whl - Mend

pandas-plots 0.12.20py3-none-any.whl → 0.12.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

pandas_plots/hlp.py CHANGED Viewed

@@ -22,29 +22,34 @@ from PIL import Image
 URL_REGEX = r"^(?:http|ftp)s?://"  # https://stackoverflow.com/a/1617386
-def mean_confidence_interval(df, confidence=0.95):
+def mean_confidence_interval(df, confidence=0.95, use_median=False):
     """
-    Calculate the mean and confidence interval of the input dataframe.
-    source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
+    Calculate the mean or median and confidence interval of the input dataframe.
+    Source: https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
     Parameters:
     df (array-like): The input dataframe.
     confidence (float, optional): The confidence level for the interval. Defaults to 0.95.
+    use_median (bool, optional): If True, calculates median and confidence interval instead of mean. Defaults to False.
     Returns:
-    tuple: A tuple containing the mean, interval, lower bound, and upper bound.
+    tuple: A tuple containing the central value (mean or median), interval, lower bound, and upper bound.
     """
     df = to_series(df)
     if df is None:
         return None
     a = 1.0 * np.array(df)
     n = len(a)
-    mean, se = np.mean(a), scipy.stats.sem(a)
-    # * calculate the margin of error for the confidence interval using the t-distribution with the specified confidence level.
-    margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
-    lower = mean - margin
-    upper = mean + margin
-    return mean, margin, lower, upper
+    if use_median:
+        median = np.median(a)
+        se = 1.253 * scipy.stats.sem(a)  # Approximate standard error for median
+        margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
+        return median, margin, median - margin, median + margin
+    else:
+        mean, se = np.mean(a), scipy.stats.sem(a)
+        margin = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
+        return mean, margin, mean - margin, mean + margin
     # # * Alternative
     # # from statistics import NormalDist
@@ -190,7 +195,8 @@ def wrap_text(
         line = line + word_s + " "
         # * reset if counter exceeds limit, or if word ends with newline
         if i >= max_items_in_line or str(word).endswith("\n"):
-            out = out + line + "\n"
+            # out = out + line + "\n"
+            out = out + line.rstrip() + "  \n"
             line = ""
             i = 0
         # else:
@@ -542,7 +548,9 @@ def add_measures_to_pyg_config(json_path: str, nodes: list[tuple[str, str]] = [(
     Example
     -------
-    `add_measures_to_pyg_config('config.json', [('cnt_tum', 'count(distinct z_tum_id)')], strict=True)`
+    default: `add_measures_to_pyg_config('config.json', [('cnt_tum', 'count(distinct z_tum_id)')], strict=True)`
+    usage: start pygwalker with empty config file but defined config path. make changes on the chart, save the config file. then run this function again - measures will be added
     """
     if not os.path.exists(json_path):
         if strict:

pandas_plots/pls.py CHANGED Viewed

@@ -500,6 +500,7 @@ def plot_bars(
     width: int = 1600,
     title: str = None,
     use_ci: bool = False,
+    ci_agg: Literal["mean", "median"] = "mean",
     precision: int = 0,
     renderer: Literal["png", "svg", None] = "png",
     png_path: Path | str = None,
@@ -569,9 +570,9 @@ def plot_bars(
                 dropna=False,
             )
             .agg(
-                mean=(col_name, "mean"),
+                mean=(col_name, ci_agg),
                 # * retrieve margin from custom func
-                margin=(col_name, lambda x: mean_confidence_interval(x)[1]),
+                margin=(col_name, lambda x: mean_confidence_interval(x, use_median = (ci_agg == "median"))[1]),
             )
             .reset_index()
         )
@@ -653,7 +654,7 @@ def plot_bars(
     # * title str n
     _title_str_n = (
-        f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on means<sub>"
+        f", n={n_len:_} ({n:_})" if not use_ci else f", n={n_len:_})<br><sub>ci(95) on {ci_agg}s<sub>"
     )
     # * title str na
@@ -965,6 +966,7 @@ def plot_box(
     violin: bool = False,
     x_min: float = None,
     x_max: float = None,
+    use_log: bool = False,
     png_path: Path | str = None,
 ) -> object:
     """
@@ -977,10 +979,13 @@ def plot_box(
         height: The height of the plot.
         width: The width of the plot.
         annotations: Whether to add annotations to the plot.
-        violin: Use violin plot or not
-        x_min: The minimum value for the x-axis scale (max and min must be set)
-        x_max: The maximum value for the x-axis scale (max and min must be set)
-        summary: Whether to add a summary table to the plot
+        summary: Whether to add a summary table to the plot.
+        caption: The caption for the plot.
+        title: The title of the plot.
+        violin: Use violin plot or not.
+        x_min: The minimum value for the x-axis scale (max and min must be set).
+        x_max: The maximum value for the x-axis scale (max and min must be set).
+        use_log: Use logarithmic scale for the axis.
         png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
@@ -993,7 +998,7 @@ def plot_box(
     # * drop na to keep scipy sane
     n_ = len(ser)
     ser.dropna(inplace=True)
-    n = len(ser)
+    # n = len(ser)
     # hack
     median = ser.median()
@@ -1011,7 +1016,6 @@ def plot_box(
     lvl3 = height * 0.25
     caption = _set_caption(caption)
     dict = {
         "data_frame": ser,
         "orientation": "h",
@@ -1020,7 +1024,9 @@ def plot_box(
         "width": width,
         "points": points,
         # 'box':True,
-        "title": f"{caption}[{ser.name}], n = {n_:_}({n:_})" if not title else title,
+        "log_x": use_log,   # * logarithmic scale, axis is always x
+        # "notched": True,
+        "title": f"{caption}[{ser.name}], n = {n_:_}" if not title else title,
     }
     fig = px.violin(**{**dict, "box": True}) if violin else px.box(**dict)
@@ -1119,6 +1125,8 @@ def plot_boxes(
     annotations: bool = True,
     summary: bool = True,
     title: str = None,
+    use_log: bool = False,
+    box_width: float = 0.5,
     png_path: Path | str = None,
 ) -> object:
     """
@@ -1133,6 +1141,7 @@ def plot_boxes(
         width (int): The width of the plot.
         annotations (bool): Whether to add annotations to the plot.
         summary (bool): Whether to add a summary to the plot.
+        use_log (bool): Whether to use logarithmic scale for the plot (cannot show negative values).
         png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
@@ -1170,11 +1179,14 @@ def plot_boxes(
         df,
         x=df.iloc[:, 0],
         y=df.iloc[:, 1],
+        color=df.iloc[:, 0],
         template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
         orientation="v",
         height=height,
         width=width,
         points=points,
+        log_y=use_log,
+        # color_discrete_sequence=px.colors.qualitative.Plotly,
         title=(
             f"{caption}[{df.columns[0]}] on [{df.columns[1]}], n = {len(df):_.0f}"
             if not title
@@ -1245,6 +1257,9 @@ def plot_boxes(
     fig.update_xaxes(title_text=df.columns[0])
     fig.update_yaxes(title_text=df.columns[1])
+    fig.update_layout(boxmode="group")  # Ensures boxes are not too compressed
+    fig.update_layout(showlegend=False)
+    fig.update_traces(marker=dict(size=5), width=box_width)  # Adjust width (default ~0.5)
     fig.show("png")
     if summary:

pandas_plots/tbl.py CHANGED Viewed

@@ -121,14 +121,14 @@ def describe_df(
         if df[col].notna().sum() == 0 and df[col].dtype == "float":
             df[col] = df[col].astype(str)
-    print(f"🔵 {'*'*3} df: {caption} {'*'*3}")
-    print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {np.array(df.columns)} ")
+    print(f"🔵 {'*'*3} df: {caption} {'*'*3}  ")
+    print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {np.array(df.columns)}  ")
     # print(f"🟣 shape: ({df.shape[0]:_}, {df.shape[1]}) columns: {df.columns.tolist()} ")
-    print(f"🟣 duplicates: {df.duplicated().sum():_}")
-    print(f"🟣 uniques: {wrap_text(str({col: f'{df[col].nunique():_}' for col in df})) }")
+    print(f"🟣 duplicates: {df.duplicated().sum():_}  ")
+    print(f"🟣 uniques: {wrap_text(str({col: f'{df[col].nunique():_}' for col in df})) }  ")
     # print(f"🟣 uniques: { {col: f'{df[col].nunique():_}' for col in df} }")
     # print(f"🟣 uniques: {{ {', '.join(f'{col}: {df[col].nunique():_}' for col in df)} }}")
-    print(f"🟣 missings: {wrap_text(str({col: f'{df[col].isna().sum():_}' for col in df})) }")
+    print(f"🟣 missings: {wrap_text(str({col: f'{df[col].isna().sum():_}' for col in df})) }  ")
     # print(f"🟣 missings: { {col: f'{df[col].isna().sum():_}' for col in df} }")
     # print(f"🟣 missings: {dict(df.isna().sum())}")
@@ -141,13 +141,13 @@ def describe_df(
         # unis = df[col].sort_values().unique()
         unis = list(df[col].value_counts().sort_index().index)
         # * get header
-        header = f"🟠 {col}({len(unis):_}|{df[col].dtype})"
+        header = f"🟠 {col}({len(unis):_}|{df[col].dtype})  "
         return unis, header
     # hack this block somehow interferes with the plotly renderer. so its run even when use_columns=False
     if use_columns:
-        print("--- column uniques (all)")
-        print(f"🟠 index {wrap_text(df.index.tolist()[:top_n_uniques])}")
+        print("--- column uniques (all)  ")
+        print(f"🟠 index {wrap_text(df.index.tolist()[:top_n_uniques])}  ")
     for col in df.columns[:]:
         _u, _h = get_uniques_header(col)
         # * check col type
@@ -155,10 +155,10 @@ def describe_df(
         # * wrap output
         if use_columns:
                 print(
-                    f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
+                    f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}  "
                 )
-    print("--- column stats (numeric)")
+    print("--- column stats (numeric)  ")
     # * only show numerics
     for col in df.select_dtypes("number").columns:
         _u, _h = get_uniques_header(col)
@@ -793,7 +793,7 @@ def print_summary(df: pd.DataFrame | pd.Series, show: bool = True, name: str=" "
         # * extra care for scipy metrics, these are very vulnarable to nan
         if show:
             print(
-                f"""{name} -> min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto}""")
+                f"""{name} -> min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto}  """)
         summary = {
             "min": min,

{pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pandas-plots
-Version: 0.12.20
+Version: 0.12.22
 Summary: A collection of helper for table handling and visualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier

pandas_plots-0.12.22.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+pandas_plots/hlp.py,sha256=i11Ep9P-u9O0bvexGTELRDUtmLzvNgNHxnkQTGf3DwQ,20838
+pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
+pandas_plots/pls.py,sha256=APvF_cEYN28TtlpNNIJ2NPTA3chTP9ZHtwnVEuZ-skI,49059
+pandas_plots/tbl.py,sha256=RJWBHeKGTAhGpVCY57TsS_dYR-FpInP-TOsKW_tU4V4,32556
+pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
+pandas_plots-0.12.22.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
+pandas_plots-0.12.22.dist-info/METADATA,sha256=0bdvEP5M1SgmSJI3QKLd8MX1RjSrwzxlXWrygQNjHaM,7564
+pandas_plots-0.12.22.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+pandas_plots-0.12.22.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
+pandas_plots-0.12.22.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
+pandas_plots-0.12.22.dist-info/RECORD,,

pandas_plots-0.12.20.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-pandas_plots/hlp.py,sha256=TxduvDztLtMuVg7rRS5mEnhktc-eHf5vI9SG1ppAbxk,20299
-pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
-pandas_plots/pls.py,sha256=S9hKQTPp8XIL9RmzvfeSQE4t2jm18bG4bejwcd-dJF4,48236
-pandas_plots/tbl.py,sha256=LxMKJh4qkGuQZ1DdCZIq1tMS26F6elsqbe_uabvQx4E,32535
-pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
-pandas_plots-0.12.20.dist-info/licenses/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
-pandas_plots-0.12.20.dist-info/METADATA,sha256=E64_SFyau96op38X5-Ld-8mVJkRr9xBKY9cJr2lu5-o,7564
-pandas_plots-0.12.20.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-pandas_plots-0.12.20.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
-pandas_plots-0.12.20.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
-pandas_plots-0.12.20.dist-info/RECORD,,

{pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/WHEEL RENAMED Viewed

File without changes

{pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/pii.py RENAMED Viewed

File without changes

{pandas_plots-0.12.20.dist-info → pandas_plots-0.12.22.dist-info}/top_level.txt RENAMED Viewed

File without changes

pandas-plots 0.12.20__py3-none-any.whl → 0.12.22__py3-none-any.whl

pandas-plots 0.12.20py3-none-any.whl → 0.12.22py3-none-any.whl