PyPI - scimappro - Versions diffs - 0.1.0__py3-none-any.whl - Mend

scimappro 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

scimappro/__init__.py +1 -0
scimappro/pl/__init__.py +1 -0
scimappro/pl/archive/barplot 2.py +425 -0
scimappro/pl/archive/barplot.py +442 -0
scimappro/pl/barplot.py +559 -0
scimappro/pl/heatmap.py +505 -0
scimappro/pl/image_viewer.py +363 -0
scimappro/pl/stacked_barplot.py +407 -0
scimappro/pp/__init__.py +1 -0
scimappro/pp/archive/combat-prestream.py +249 -0
scimappro/pp/archive/rescale.py +407 -0
scimappro/pp/combat.py +331 -0
scimappro/pp/log1p.py +267 -0
scimappro/pp/mcmicro_to_scimap.py +343 -0
scimappro/pp/rescale.py +348 -0
scimappro/tl/__init__.py +1 -0
scimappro/tl/archive/cluster.py +285 -0
scimappro/tl/archive/neighCount.py +313 -0
scimappro/tl/archive/neighCount_beforestream.py +242 -0
scimappro/tl/archive/neighExp.py +275 -0
scimappro/tl/archive/neighLda.py +415 -0
scimappro/tl/archive/phenotype pre rest.py +407 -0
scimappro/tl/archive/phenotype pre stream.py +410 -0
scimappro/tl/archive/phenotype.py +431 -0
scimappro/tl/archive/spatialProximityScore.py +281 -0
scimappro/tl/archive/spatialSimilarityLookup.py +231 -0
scimappro/tl/archive/spatial_aggregate.py +201 -0
scimappro/tl/archive/spatial_cooccurrence.py +289 -0
scimappro/tl/archive/spatial_distance.py +200 -0
scimappro/tl/archive/spatial_distance_prestrream.py +138 -0
scimappro/tl/archive/umap.py +209 -0
scimappro/tl/cluster.py +484 -0
scimappro/tl/foldChange.py +253 -0
scimappro/tl/neighCount.py +498 -0
scimappro/tl/neighExp.py +323 -0
scimappro/tl/neighLDA.py +587 -0
scimappro/tl/neighNMF.py +431 -0
scimappro/tl/phenotype.py +540 -0
scimappro/tl/spatialProximityScore.py +321 -0
scimappro/tl/spatialSimilarityLookup.py +426 -0
scimappro/tl/spatial_aggregate.py +191 -0
scimappro/tl/spatial_cooccurrence.py +342 -0
scimappro/tl/spatial_distance.py +219 -0
scimappro/tl/umap.py +224 -0
scimappro-0.1.0.dist-info/METADATA +23 -0
scimappro-0.1.0.dist-info/RECORD +47 -0
scimappro-0.1.0.dist-info/WHEEL +4 -0

scimappro/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from . import pp as pp

scimappro/pl/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .barplot import barplot

scimappro/pl/archive/barplot 2.py ADDED Viewed

@@ -0,0 +1,425 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Refactored stacked bar plot function with cap_anndata streaming support.
+Optimized for memory usage and computational speed for large datasets.
+Author: Ajit Johnson Nirmal (refactored)
+"""
+import os
+import sys
+import multiprocessing
+import argparse
+from typing import Optional, Tuple, List, Union
+from pathlib import Path
+import pandas as pd  # For Pandas-based operations
+import matplotlib.pyplot as plt
+from matplotlib import rcParams
+import matplotlib.colors as mcolors
+import plotly.graph_objects as go
+import plotly.io as pio
+from tqdm import tqdm
+import polars as pl
+# Optional: Import anndata if available.
+try:
+    import anndata
+except ImportError:
+    anndata = None
+# Set Plotly renderer and font settings.
+pio.renderers.default = 'browser'
+rcParams['pdf.fonttype'] = 42
+def barplot(
+    adata,
+    xAxis: str = 'imageid',
+    yAxis: str = 'phenotype',
+    subsetXAxis: Optional[List[str]] = None,
+    subsetYAxis: Optional[List[str]] = None,
+    orderXAxis: Optional[List[str]] = None,
+    orderYAxis: Optional[List[str]] = None,
+    method: str = 'percent',
+    plottingMode: str = 'standard',  # 'standard' or 'interactive'
+    figSize: Optional[Tuple[float, float]] = None,
+    fontSize: Optional[int] = None,
+    color: Optional[Union[str, List[str], dict]] = None,
+    palette: Optional[str] = None,  # If None, auto palette is selected.
+    alpha: float = 1.0,
+    barWidth: Optional[float] = None,  # Default bar width.
+    streamData: bool = False,          # New parameter for streaming.
+    maxWorkers: Optional[int] = None,
+    verbose: bool = False,
+    outputDir: Optional[str] = None,
+    show: bool = True,
+    dpi: int = 300,
+    transparent: bool = False,
+    watermark: bool = True,
+    matplotlib_bbox_to_anchor=(1, 1.02),
+    matplotlib_legend_loc=2,
+    ax: Optional[plt.Axes] = None,
+    **kwargs,
+):
+    """
+    Generate a stacked bar plot visualizing the distribution of categories within a specified
+    categorical column across different groups in an AnnData object (or .h5ad file).
+    When streamData=True, the function uses cap_anndata to stream the AnnData object from disk.
+    Only the necessary columns from .obs are read into memory. After plotting, the streamed file
+    is flushed and closed, and the function returns None.
+    The outputDir parameter is interpreted as follows:
+      - If outputDir (a string) has a file suffix (e.g. ".pdf", ".html", ".png", etc.) and is not an
+        existing directory, it is treated as the full output file path.
+      - Otherwise, it is treated as a directory, and a default file name is appended based on the plotting mode.
+    """
+    # Determine maxWorkers if not provided.
+    if maxWorkers is None:
+        maxWorkers = max(1, multiprocessing.cpu_count() - 1)
+        if verbose:
+            print(f"Using {maxWorkers} parallel worker(s) for data processing.")
+    # Load the AnnData object.
+    if streamData:
+        try:
+            from cap_anndata import read_h5ad
+        except ImportError:
+            raise ImportError("The 'cap_anndata' package is required when streamData=True.")
+        if isinstance(adata, str):
+            if verbose:
+                print(f"Streaming AnnData from disk: {adata}")
+            cap_adata = read_h5ad(adata, edit=False)
+        else:
+            cap_adata = adata
+        cap_adata.read_obs(columns=[xAxis, yAxis])
+        obs_df = cap_adata.obs.copy().astype(str)
+    else:
+        # If adata is a string in non-streaming mode, load it using anndata.
+        if isinstance(adata, str):
+            if verbose:
+                print(f"Loading AnnData from file path: {adata}")
+            if anndata is None:
+                raise ImportError("The 'anndata' package is required to load h5ad files.")
+            adata = anndata.read_h5ad(adata)
+        obs_df = adata.obs[[xAxis, yAxis]].astype(str)
+    # Convert obs DataFrame to a Polars DataFrame.
+    if verbose:
+        print("Converting data to Polars DataFrame...")
+    plDF = pl.from_pandas(obs_df)
+    # Apply subsetting.
+    if subsetXAxis is not None:
+        if isinstance(subsetXAxis, str):
+            subsetXAxis = [subsetXAxis]
+        plDF = plDF.filter(pl.col(xAxis).is_in(subsetXAxis))
+    if subsetYAxis is not None:
+        if isinstance(subsetYAxis, str):
+            subsetYAxis = [subsetYAxis]
+        plDF = plDF.filter(pl.col(yAxis).is_in(subsetYAxis))
+    # Grouping and aggregation.
+    if verbose:
+        print("Performing groupby and aggregation...")
+    lazyDF = plDF.lazy()
+    if hasattr(lazyDF, "group_by"):
+        groupedLazy = lazyDF.group_by([xAxis, yAxis]).agg(pl.count().alias("count"))
+        groupedDF = groupedLazy.collect()
+    else:
+        if verbose:
+            print("Lazy group_by not available; using eager group_by instead.")
+        groupedDF = plDF.group_by([xAxis, yAxis]).agg(pl.count().alias("count"))
+    # Compute percentages or absolute counts.
+    if method == 'percent':
+        if verbose:
+            print("Calculating percentage proportions for each group...")
+        totals = groupedDF.group_by([xAxis]).agg(pl.col("count").sum().alias("total"))
+        groupedDF = groupedDF.join(totals, on=xAxis)
+        groupedDF = groupedDF.with_columns((pl.col("count") / pl.col("total")).alias("value"))
+    elif method == 'absolute':
+        groupedDF = groupedDF.with_columns(pl.col("count").alias("value"))
+    else:
+        raise ValueError("method should be either 'percent' or 'absolute'")
+    # Convert aggregated data to a Pandas DataFrame for pivoting.
+    if verbose:
+        print("Converting aggregated data to Pandas DataFrame for pivoting...")
+    df = groupedDF.to_pandas()
+    # Apply custom ordering if provided.
+    if orderXAxis is not None:
+        df[xAxis] = pd.Categorical(df[xAxis], categories=orderXAxis, ordered=True)
+    if orderYAxis is not None:
+        df[yAxis] = pd.Categorical(df[yAxis], categories=orderYAxis, ordered=True)
+    df.sort_values(by=[xAxis, yAxis], inplace=True)
+    # Create pivot table.
+    pivotDF = df.pivot(index=xAxis, columns=yAxis, values="value").fillna(0)
+    if verbose:
+        print("Pivot table created:")
+        print(pivotDF)
+    if pivotDF.empty:
+        if verbose:
+            print("Warning: The pivot table is empty. Check your data or subset parameters.")
+        if streamData:
+            if verbose:
+                print("Flushing and closing streaming AnnData object.")
+            try:
+                cap_adata.overwrite(["obs"])
+            except Exception:
+                pass
+            if hasattr(cap_adata, "file"):
+                cap_adata.file.close()
+        return None
+    # --- Plotting ---
+    if plottingMode not in ['standard', 'interactive']:
+        raise ValueError("plottingMode must be either 'standard' or 'interactive'")
+    if plottingMode == 'standard':
+        if verbose:
+            print("Creating standard matplotlib plot...")
+        nCats = len(pivotDF.columns)
+        # Auto-select palette.
+        if color is None:
+            if palette is None:
+                if nCats <= 9:
+                    cmap = plt.get_cmap("Set1")
+                elif nCats <= 20:
+                    cmap = plt.get_cmap("tab20")
+                else:
+                    cmap = plt.get_cmap("gist_ncar")
+            else:
+                cmap = plt.get_cmap(palette)
+        elif isinstance(color, dict):
+            colorMapping = color
+        else:
+            cmap = color
+        # Smart figure sizing: narrow width.
+        if ax is None:
+            if figSize is None:
+                nGroups = len(pivotDF.index)
+                figWidth = max(6, nGroups * 0.3)
+                figSize = (figWidth, 6)
+            fig, ax = plt.subplots(figsize=figSize)
+        else:
+            fig = ax.figure
+        if barWidth is None:
+            barWidth = 0.95
+        xPositions = range(len(pivotDF.index))
+        bottoms = [0] * len(pivotDF.index)
+        categories = list(pivotDF.columns)
+        if (color is None or (not isinstance(color, dict) and not isinstance(cmap, str) and hasattr(cmap, '__call__'))):
+            norm = plt.Normalize(vmin=0, vmax=nCats - 1)
+        # Remove extraneous kwargs.
+        keys_to_remove = [
+            "plottingMode",
+            "matplotlib_cmap",
+            "matplotlib_bbox_to_anchor",
+            "matplotlib_legend_loc",
+            "outputDir"
+        ]
+        for key in keys_to_remove:
+            kwargs.pop(key, None)
+        for idx, cat in enumerate(tqdm(categories, desc="Plotting categories", disable=not verbose)):
+            if isinstance(color, dict):
+                catColor = colorMapping.get(cat, plt.get_cmap("Set1")(norm(idx)) if 'norm' in locals() else None)
+            elif isinstance(cmap, list):
+                catColor = cmap[idx % len(cmap)]
+            elif isinstance(cmap, str):
+                catColor = cmap
+            elif hasattr(cmap, '__call__'):
+                catColor = mcolors.to_hex(cmap(norm(idx)))
+            else:
+                catColor = None
+            values = pivotDF[cat].values
+            ax.bar(
+                xPositions,
+                values,
+                bottom=bottoms,
+                label=str(cat),
+                color=catColor,
+                alpha=alpha,
+                width=barWidth,
+                **kwargs
+            )
+            bottoms = [bottoms[i] + values[i] for i in range(len(values))]
+        handles, labels = ax.get_legend_handles_labels()
+        if len(handles) > 1:
+            handles = list(reversed(handles))
+            labels = list(reversed(labels))
+        ax.legend(handles, labels, bbox_to_anchor=matplotlib_bbox_to_anchor, loc=matplotlib_legend_loc)
+        ax.set_xticks(xPositions)
+        ax.set_xticklabels(pivotDF.index, rotation=45, ha="right", fontsize=fontSize if fontSize else 10)
+        ax.set_xlabel(xAxis, fontsize=fontSize if fontSize else 12)
+        ax.set_ylabel("Percentage" if method == 'percent' else "Count", fontsize=fontSize if fontSize else 12)
+        ax.set_facecolor("white")
+        fig.patch.set_facecolor("white")
+        ax.grid(False)
+        plt.subplots_adjust(bottom=0.3)
+        if watermark:
+            fig.text(
+                0.99, 0.02, "made with scimap",
+                transform=fig.transFigure,
+                fontsize=8,
+                color="#AAAAAA",
+                ha='right',
+                va='bottom',
+                alpha=0.5
+            )
+        plt.tight_layout(rect=[0, 0.05, 1, 1])
+        # Handle output path using pathlib.
+        if outputDir:
+            out_path = Path(outputDir)
+            if out_path.suffix and not out_path.is_dir():
+                fullPath = str(out_path)
+            else:
+                out_path.mkdir(parents=True, exist_ok=True)
+                fullPath = str(out_path / "scimap_barplot.pdf")
+            fig.savefig(fullPath, dpi=dpi, transparent=transparent)
+            if verbose:
+                print(f"Saved plot to {fullPath}")
+        if show:
+            plt.show()
+        else:
+            plt.close(fig)
+    else:
+        if verbose:
+            print("Creating interactive Plotly plot...")
+        fig = go.Figure()
+        nCats = len(pivotDF.columns)
+        if color is None:
+            if palette is None:
+                if nCats <= 9:
+                    cmap = plt.get_cmap("Set1")
+                elif nCats <= 20:
+                    cmap = plt.get_cmap("tab20")
+                else:
+                    cmap = plt.get_cmap("gist_ncar")
+            else:
+                cmap = plt.get_cmap(palette)
+            norm = plt.Normalize(vmin=0, vmax=nCats - 1)
+        xCategories = pivotDF.index.tolist()
+        for idx, cat in enumerate(tqdm(pivotDF.columns, desc="Plotting categories", disable=not verbose)):
+            if isinstance(color, dict):
+                catColor = color.get(cat, None)
+            elif isinstance(color, list):
+                catColor = color[idx % len(color)]
+            elif isinstance(color, str):
+                catColor = color
+            elif cmap is not None:
+                catColor = mcolors.to_hex(cmap(norm(idx)))
+            else:
+                catColor = None
+            fig.add_trace(
+                go.Bar(
+                    x=xCategories,
+                    y=pivotDF[cat].values,
+                    name=str(cat),
+                    marker_color=catColor,
+                    opacity=alpha,
+                    **kwargs
+                )
+            )
+        fig.update_layout(
+            barmode='stack',
+            xaxis_title=xAxis,
+            yaxis_title="Percentage" if method == 'percent' else "Count",
+            plot_bgcolor='rgba(0, 0, 0, 0)',
+            paper_bgcolor='rgba(0, 0, 0, 0)',
+            xaxis=dict(showline=True, linecolor='black', linewidth=1, showgrid=False, zeroline=False),
+            yaxis=dict(showline=True, linecolor='black', linewidth=1, showgrid=False, zeroline=False),
+            margin=dict(b=150)
+        )
+        if watermark:
+            fig.add_annotation(
+                text="made with scimap",
+                xref="paper", yref="paper",
+                x=0.99, y=-0.2,
+                showarrow=False,
+                font=dict(size=8, color="#AAAAAA"),
+                opacity=0.5,
+                xanchor='right',
+                yanchor='top'
+            )
+        if outputDir:
+            out_path = Path(outputDir)
+            if out_path.suffix and not out_path.is_dir():
+                if out_path.suffix.lower() != ".html":
+                    print("Warning: Interactive mode requires a .html file. Changing extension to .html.")
+                    fullPath = str(out_path.with_suffix(".html"))
+                else:
+                    fullPath = str(out_path)
+            else:
+                out_path.mkdir(parents=True, exist_ok=True)
+                fullPath = str(out_path / "scimap_barplot.html")
+            fig.write_html(fullPath)
+            if verbose:
+                print(f"Saved interactive plot to {fullPath}")
+        if show:
+            fig.show()
+    if streamData:
+        if verbose:
+            print("Flushing and closing streaming AnnData object.")
+        try:
+            cap_adata.overwrite(["obs"])
+        except Exception:
+            pass
+        if hasattr(cap_adata, "file"):
+            cap_adata.file.close()
+        return None
+    return (fig, ax) if plottingMode == 'standard' else fig
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate a stacked bar plot from an AnnData object or h5ad file."
+    )
+    parser.add_argument("adata", type=str,
+                        help="Path to an h5ad file or identifier for an AnnData object.")
+    parser.add_argument("--xAxis", type=str, default="imageid",
+                        help="Column for x-axis categories.")
+    parser.add_argument("--yAxis", type=str, default="phenotype",
+                        help="Column for y-axis categories.")
+    parser.add_argument("--method", type=str, default="percent",
+                        choices=["percent", "absolute"], help="Plotting method.")
+    parser.add_argument("--plottingMode", type=str, default="standard",
+                        choices=["standard", "interactive"], help="Plotting mode.")
+    parser.add_argument("--outputDir", type=str, default=None,
+                        help="Output file path (if it has a suffix) or directory.")
+    parser.add_argument("--verbose", action="store_true",
+                        help="Enable verbose output.")
+    parser.add_argument("--streamData", action="store_true",
+                        help="Enable streaming of AnnData using cap_anndata.")
+    args = parser.parse_args()
+    barplot(
+        adata=args.adata,
+        xAxis=args.xAxis,
+        yAxis=args.yAxis,
+        method=args.method,
+        plottingMode=args.plottingMode,
+        streamData=args.streamData,
+        outputDir=args.outputDir,
+        verbose=args.verbose
+    )
+if __name__ == '__main__':
+    main()