PyPI - pycyto - Versions diffs - 0.0.1__tar.gz - Mend

pycyto 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pycyto-0.0.1/PKG-INFO +30 -0
pycyto-0.0.1/pycyto/Findmarkers.py +74 -0
pycyto-0.0.1/pycyto/QC.py +333 -0
pycyto-0.0.1/pycyto/__init__.py +8 -0
pycyto-0.0.1/pycyto/cluster.py +46 -0
pycyto-0.0.1/pycyto/format.py +207 -0
pycyto-0.0.1/pycyto/function.py +467 -0
pycyto-0.0.1/pycyto/labeltransfer.py +65 -0
pycyto-0.0.1/pycyto/plot.py +726 -0
pycyto-0.0.1/pycyto/spatial_metacell.py +97 -0
pycyto-0.0.1/pycyto.egg-info/PKG-INFO +30 -0
pycyto-0.0.1/pycyto.egg-info/SOURCES.txt +15 -0
pycyto-0.0.1/pycyto.egg-info/dependency_links.txt +1 -0
pycyto-0.0.1/pycyto.egg-info/requires.txt +22 -0
pycyto-0.0.1/pycyto.egg-info/top_level.txt +1 -0
pycyto-0.0.1/setup.cfg +4 -0
pycyto-0.0.1/setup.py +36 -0

pycyto-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,30 @@
+Metadata-Version: 2.1
+Name: pycyto
+Version: 0.0.1
+Summary: pycyto is designed to analyze Stereo-seq data.
+Home-page: https://github.com/mgy520/pycyto
+Author: Mao Guangyao
+License: MIT License
+Requires-Python: ==3.8.18
+Requires-Dist: rpy2==3.5.15
+Requires-Dist: anndata2ri==1.3.1
+Requires-Dist: numpy==1.24.4
+Requires-Dist: diffxpy==0.7.4
+Requires-Dist: scipy==1.10.1
+Requires-Dist: pandas==2.0.3
+Requires-Dist: anndata==0.9.2
+Requires-Dist: shapely==2.0.3
+Requires-Dist: dask==2023.5.0
+Requires-Dist: numba==0.58.1
+Requires-Dist: pca==2.0.5
+Requires-Dist: openTSNE==1.0.1
+Requires-Dist: igraph==0.11.4
+Requires-Dist: sinfonia==0.0.3
+Requires-Dist: tangram==0.7.0
+Requires-Dist: scanorama==1.7.4
+Requires-Dist: matplotlib==3.7.4
+Requires-Dist: seaborn==0.13.2
+Requires-Dist: adjustText==1.1.1
+Requires-Dist: scikit-learn==1.3.2
+Requires-Dist: scikit-misc==0.2.0
+Requires-Dist: scikit-image==0.21.0

pycyto-0.0.1/pycyto/Findmarkers.py ADDED Viewed

@@ -0,0 +1,74 @@
+import diffxpy.api as de
+import numpy as np
+from sklearn.utils.sparsefuncs import mean_variance_axis
+from scipy.sparse import issparse
+def findallmarkers(cdata, cluster_key, min_pct=0.1, only_pos=True):
+    """
+        Find marker genes for each cluster in cdata.
+        Parameters:
+            cdata (Anndata): An Anndata object.
+            cluster_key (str): The key representing the cell cluster annotations in the observation metadata of the Anndata object.
+            min_pct (float, optional): The minimum percentage threshold for filtering sparse genes. Defaults to 0.1.
+            only_pos (bool, optional): Whether to consider only genes with positive log2 fold change. Defaults to True.
+        Returns:
+            cdata (Anndata): An Anndata object with marker genes identified for each cell cluster. The marker genes and related statistical information are stored in the `uns` attribute of the Anndata object under the key 'markers_all'.
+        """
+    clusters = cdata.obs[cluster_key].unique()
+    cluster_markers = {}
+    for cluster in clusters:
+        cdata.obs['group'] = ['other' if x == cluster else 'current' for x in cdata.obs[cluster_key]]
+        cluster_cells = cdata[cdata.obs['group'] == 'current']
+        other_cells = cdata[cdata.obs['group'] == 'other']
+        pct1 = np.mean(cluster_cells.X > 0, axis=0).A1 if issparse(cluster_cells.X) else np.mean(cluster_cells.X > 0, axis=0)
+        pct2 = np.mean(other_cells.X > 0, axis=0).A1 if issparse(other_cells.X) else np.mean(other_cells.X > 0, axis=0)
+        if issparse(cdata.X):
+            mean1, _ = mean_variance_axis(axis=0, X=cluster_cells.X)
+            mean2, _ = mean_variance_axis(axis=0, X=other_cells.X)
+        else:
+            mean1 = cluster_cells.X.mean(axis=0)
+            mean2 = other_cells.X.mean(axis=0)
+        selected_genes = (pct1 >= min_pct) | (pct2 >= min_pct)
+        np.float = float
+        test_result = de.test.t_test(
+            data=cdata[:, selected_genes],
+            grouping=cdata.obs["group"],
+            is_logged=True
+        )
+        result = test_result.summary()
+        result['pct1'] = pct1[selected_genes]
+        result['pct2'] = pct2[selected_genes]
+        if only_pos:
+            result = result[result['log2fc'] > 0]
+        cluster_markers[cluster] = result
+    cdata.uns['markers_all'] = cluster_markers
+    return cdata
+def find_markers_between_groups(cdata, cluster_key, group1, group2, min_pct=0.1, only_pos=True):
+    """
+        Find marker genes between two specified groups in cdata.
+        Parameters:
+            cdata (Anndata): An Anndata object containing the single-cell data.
+            cluster_key (str): The key representing the cell cluster annotations in the observation metadata of the Anndata object.
+            group1 (str): The name of the first group for comparison.
+            group2 (str): The name of the second group for comparison.
+            min_pct (float, optional): The minimum percentage threshold for filtering sparse genes. Defaults to 0.1.
+            only_pos (bool, optional): Whether to consider only genes with positive log2 fold change. Defaults to True.
+        Returns:
+            cdata(Anndata): An Anndata object with marker genes identified between the specified groups. The marker genes and related statistical information are stored in the `uns` attribute of the Anndata object under a key formatted as 'DGE_group1_group2'.
+        """
+    tmp = cdata[(cdata.obs[cluster_key] == group1) | (cdata.obs[cluster_key] == group2)].copy()
+    tmp = findallmarkers(cdata=tmp, cluster_key=cluster_key, min_pct=min_pct, only_pos=only_pos)
+    key_name = f"DGE_{group1}_{group2}"
+    cdata.uns[key_name] = tmp.uns['markers_all']
+    return cdata

pycyto-0.0.1/pycyto/QC.py ADDED Viewed

@@ -0,0 +1,333 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from pycyto import function
+def markers_detected_by_diff_ngenes_scatter(adata, markers, image_width=6, image_height=6, xlabel='n_genes',
+                                            ylabel='Markers detection rate',title=None,o=None, **kwargs):
+    """
+        Create a scatter plot to visualize the relationship between the number of genes (n_genes) and the detection rate of markers.
+        Parameters:
+            adata (AnnData):An AnnData object.
+            markers (list): A list of marker gene names.
+            image_width (int, optional): Width of the generated image. Default is 6.
+            image_height (int, optional): Height of the generated image. Default is 6.
+            xlabel (str, optional): Label for the X-axis. Default is 'n_genes'.
+            ylabel (str, optional): Label for the Y-axis. Default is 'Markers detection rate'.
+            title (str, optional): Title of the plot. If not provided, no title is displayed.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the plt.scatter function.
+        """
+    adata.obs['n_genes'] = (adata.X > 0).sum(axis=1)
+    p = []
+    for i in range(len(adata)):
+        genes = adata.var_names[adata.X[i].nonzero()[1]]
+        intersect = set(genes).intersection(set(markers))
+        p.append(len(intersect) / len(set(markers)))
+    adata.obs["Markers detection rate"] = p
+    plt.figure(figsize=(image_width, image_height))
+    plt.scatter(adata.obs['n_genes'], adata.obs['Markers detection rate'], **kwargs)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def markers_detected_by_diff_ngenes_bar(adata, markers, bin=50, image_width=12, image_height=6, xlabel='n_genes',
+                                        ylabel='Markers detection rate', title=None, o=None, **kwargs):
+    """
+        Create a bar plot to visualize the relationship between the number of genes (n_genes) and the detection rate of markers.
+        Parameters:
+            adata (AnnData): An AnnData object.
+            markers (list): A list of marker gene names.
+            bin (int, optional): The bin size for grouping n_genes values. Default is 50.
+            image_width (int, optional): Width of the generated image. Default is 12.
+            image_height (int, optional): Height of the generated image. Default is 6.
+            xlabel (str, optional): Label for the X-axis. Default is 'n_genes'.
+            ylabel (str, optional): Label for the Y-axis. Default is 'Markers detection rate'.
+            title (str, optional): Title of the plot. If not provided, no title is displayed.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the plt.bar function.
+        """
+    adata.obs['n_genes'] = (adata.X > 0).sum(axis=1)
+    bins = np.arange(0, np.ceil(adata.obs['n_genes'].max() / bin) * bin, bin).astype(int)
+    adata.obs['n_genes_subset'] = np.digitize(adata.obs['n_genes'], bins)
+    counts = []
+    for i in range(len(adata)):
+        genes = adata.var_names[adata.X[i].nonzero()[1]]
+        intersect = set(genes).intersection(set(markers))
+        counts.append(len(intersect))
+    adata.obs["Markers intersect counts"] = counts
+    subset_counts = []
+    for i in range(len(bins) - 1):
+        subset = adata[adata.obs['n_genes_subset'] == i + 1]
+        n = 0
+        for cell in subset.obs_names:
+            n += subset.obs.loc[cell, 'Markers intersect counts']
+        if len(subset.obs_names) > 0:
+            n = n / len(subset.obs_names)
+            p = n / len(set(markers))
+            subset_counts.append(p)
+        else:
+            subset_counts.append(0)
+    plt.figure(figsize=(image_width, image_height))
+    plt.bar([f"{bins[i]}-{bins[i+1]}" for i in range(len(bins) - 1)], subset_counts, color='#898989',**kwargs)
+    plt.xticks(rotation=90)
+    plt.ylabel(ylabel)
+    plt.xlabel(xlabel)
+    plt.title(title)
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def detect_markers_by_filter_cells(adata, markers, min_genes_list=list(range(0, 1400, 50)), min_cells_list=[3],
+                                   image_width=5, image_height=5, xlabel='min_genes', ylabel='Percentage',
+                                   title='Percentage of markers remaining',o=None, **kwargs):
+    """
+        Create a line plot that shows how many of the marker genes are still present in the adata after filtering,
+        as you change the min_genes and the min_cells used for filtering.(The proportion of marker genes in markers)
+        Parameters:
+            adata (AnnData): An AnnData object.
+            markers (list): A list of marker gene names.
+            min_genes_list (list, optional): A list of min_genes to consider. Default list(range(0, 1400, 50)).
+            min_cells_list (list, optional): A list of min_cells for gene filtering. Default [3].
+            image_width (int, optional): Width of the generated image. Default is 5.
+            image_height (int, optional): Height of the generated image. Default is 5.
+            xlabel (str, optional): Label for the X-axis. Default is 'min_genes'.
+            ylabel (str, optional): Label for the Y-axis. Default is 'Percentage'.
+            title (str, optional): Title of the plot. Default is 'Percentage of markers remaining'.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the plt.plot function.
+        """
+    marker_overlap = {frac: [] for frac in min_cells_list}
+    for frac in min_cells_list:
+        for min_genes in min_genes_list:
+            adata_filtered = adata.copy()
+            adata_filtered = function.filter_cells(adata_filtered, min_genes=min_genes)
+            adata_filtered = function.filter_genes(adata_filtered, min_cells=frac)
+            remaining_genes = adata_filtered.var_names.tolist()
+            overlap_genes = set(remaining_genes).intersection(set(markers))
+            marker_overlap_percent = len(overlap_genes) / len(set(markers))
+            marker_overlap[frac].append(marker_overlap_percent)
+    plt.figure(figsize=(image_width, image_height))
+    for frac in min_cells_list:
+        plt.plot(min_genes_list, marker_overlap[frac], label=f'min_cells={frac}', **kwargs)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left')
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def detect_markers_by_filter_cells2(adata, markers, min_genes_list=list(range(0, 1400, 50)), min_cells_list=[3],
+                                   image_width=5, image_height=5, xlabel='min_genes', ylabel='Percentage',
+                                   title='Percentage of markers detected',o=None, **kwargs):
+    """
+        Create a line plot that shows how many of the marker genes are still present in the adata after filtering,
+        as you change the min_genes and the min_cells used for filtering.(The proportion of marker genes in adata)
+        Parameters:
+            adata (AnnData): An AnnData object.
+            markers (list): A list of marker gene names.
+            min_genes_list (list, optional): List of minimum gene counts for filtering. Default list(range(0, 1400, 50)).
+            min_cells_list (list, optional): List of minimum cell counts for filtering. Default is [3].
+            image_width (int, optional): Width of the generated image. Default is 5.
+            image_height (int, optional): Height of the generated image. Default is 5.
+            xlabel (str, optional): Label for the X-axis. Default is 'min_genes'.
+            ylabel (str, optional): Label for the Y-axis. Default is 'Percentage'.
+            title (str, optional): Title of the plot. Default is 'Percentage of markers detected'.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the plt.plot function.
+        """
+    ...
+    gene_overlap = {frac: [] for frac in min_cells_list}
+    for frac in min_cells_list:
+        for min_genes in min_genes_list:
+            adata_filtered = adata.copy()
+            adata_filtered = function.filter_cells(adata_filtered, min_genes=min_genes)
+            adata_filtered = function.filter_genes(adata_filtered, min_cells=frac)
+            remaining_genes = adata_filtered.var_names.tolist()
+            overlap_genes = set(remaining_genes).intersection(set(markers))
+            overlap_percent = len(overlap_genes) / len(remaining_genes)
+            gene_overlap[frac].append(overlap_percent)
+    plt.figure(figsize=(image_width, image_height))
+    for frac in min_cells_list:
+        plt.plot(min_genes_list, gene_overlap[frac], label=f'min_cells={frac}', **kwargs)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left')
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def cell_numbers_by_min_genes(adata, min_genes_list=list(range(0, 1400, 50)),
+                                   image_width=5, image_height=5, xlabel='min_genes', ylabel='cell number',
+                                   title=None,o=None, **kwargs):
+    """
+        Create a line plot reflecting the number of cells remaining after filtering with different min_genes
+        Parameters:
+            adata (AnnData): An AnnData object.
+            min_genes_list (list, optional): List of minimum gene counts used for filtering cells. Default list(range(0, 1400, 50)).
+            image_width (int, optional): Width of the generated plot image. Default is 5.
+            image_height (int, optional): Height of the generated plot image. Default is 5.
+            xlabel (str, optional): Label for the X-axis of the plot. Default is 'min_genes'.
+            ylabel (str, optional): Label for the Y-axis of the plot. Default is 'cell number'.
+            title (str, optional): Title of the plot. If not provided, no title will be displayed.
+            o (str, optional): Path and filename for saving the plot image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the plt.plot function.
+        """
+    num_cells = []
+    for min_gene in min_genes_list:
+        adata_filtered = adata.copy()
+        adata_filtered = function.filter_cells(adata_filtered, min_genes=min_gene)
+        num_cells.append(adata_filtered.shape[0])
+    plt.figure(figsize=(image_width, image_height))
+    plt.plot(min_genes_list, num_cells, **kwargs)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def valid_cells_per_slice(adata, min_genes_list=list(range(0, 800, 100)), image_width=5, image_height=5, xlabel='min_genes',
+                          slice='id', library='current object', median_n_genes=[],ylabel='Percentage',title='Percentage of markers detected',
+                          o=None, **kwargs):
+    """
+        Create line plots to illustrate the relationship between the library median n_genes and the percentage of valid cells per slice.
+        Parameters:
+            adata (AnnData): An AnnData object.
+            min_genes_list (list, optional): A list of minimum gene counts for cell filtering. Default is a list from 0 to 800 with a step of 100.
+            image_width (int, optional): Width of the generated image. Default is 5.
+            image_height (int, optional): Height of the generated image. Default is 5.
+            xlabel (str, optional): Label for the X-axis. Default is 'min_genes'.
+            slice (str): Column in the observation data used for grouping slices and analyzing the percentage of valid cells.
+            library (str): Source of data for calculating the median number of genes per slice. Default is 'current object', using the observation data of the current object. Alternatively, provide a list 'median_n_genes' as reference.
+            median_n_genes (list, optional): If 'library' is not 'current object', provide a list of Library median gene counts as reference.
+            ylabel (str, optional): Label for the Y-axis. Default is 'Percentage'.
+            title (str, optional): Title of the plot. Default is 'Percentage of markers detected'.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+            **kwargs (typing.Any): Additional keyword arguments to be passed to the sns.regplot function.
+        """
+    adata.obs['n_genes'] = (adata.X > 0).sum(axis=1)
+    if library == 'current object':
+        library_median_n_genes = adata.obs.groupby(slice)['n_genes'].median().tolist()
+    elif median_n_genes is not None:
+        if not isinstance(median_n_genes, list):
+            raise ValueError("'median_n_genes' must be a list.")
+        library_median_n_genes = median_n_genes
+    else:
+        raise ValueError("If 'library' is not 'current object', 'median_n_genes' must be provided as a list.")
+    plt.figure(figsize=(image_width, image_height))
+    #colors = ['red', 'blue', 'green', 'orange', 'purple', 'gray', 'cyan', 'magenta']
+    s1 = adata.obs.groupby(slice).size()
+    s2_list = []
+    for i, min_genes in enumerate(min_genes_list):
+        adata_filtered = adata.copy()
+        adata_filtered = function.filter_cells(adata_filtered, min_genes=min_genes)
+        s2 = adata_filtered.obs.groupby(slice).size()
+        s2_list.append(s2)
+        p = s2 / s1
+        index_mapping = list(range(len(s1.index)))
+        sns.regplot(x=[library_median_n_genes[index_mapping[j]] for j in range(len(index_mapping))], y=p,
+                    scatter_kws={'s': 2},label=f'min_genes={min_genes}', **kwargs)
+        #sns.regplot(x=[library_median_n_genes[index_mapping[j]] for j in range(len(index_mapping))], y=p,scatter_kws={'s': 2}, label=f'min_genes={min_genes}', color=colors[i])
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.legend(fontsize=6)
+    plt.title(title)
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()
+def additional_seq_vs_seq(adata1, adata2, min_genes_list=list(range(0, 800, 100)), image_width=5, image_height=5, xlabel='min_genes',
+                          slice='id', min_cells=3, ylabel='valid cells P.',title='sequencing vs additional sequencing',slice_id=None,
+                          color='blue', s=15, o=None):
+    """
+        Create a scatter plot to compare the percentage of valid cells in sequencing data and additional sequencing data.
+        Parameters:
+            adata1 (AnnData): An AnnData object representing the sequencing data.
+            adata2 (AnnData): An AnnData object representing the additional sequencing data.
+            min_genes_list (list, optional): A list of minimum gene counts for cell filtering. Default is a list from 0 to 800 with a step of 100.
+            image_width (int, optional): Width of the generated image. Default is 5.
+            image_height (int, optional): Height of the generated image. Default is 5.
+            xlabel (str, optional): Label for the X-axis. Default is 'min_genes'.
+            slice (str): Column in the observation data used for grouping slices.
+            min_cells (int): min_cells to filter genes.
+            ylabel (str, optional): Label for the Y-axis. Default is 'valid cells P.'.
+            title (str, optional): Title of the plot. Default is 'sequencing vs additional sequencing'.
+            slice_id (str): The ID of the slice to analyze.
+            color (str): Color for the scatter plot markers. Default is 'blue'.
+            s (int): Size of the scatter plot markers. Default is 15.
+            o (str, optional): Path and filename for saving the image. If not provided, the image will not be saved.
+        """
+    adata1 = adata1[(adata1.obs[slice] == slice_id),]
+    adata2 = adata2[(adata2.obs[slice] == slice_id),]
+    s1_adata1 = adata1.obs.groupby(slice).size()
+    s1_adata2 = adata2.obs.groupby(slice).size()
+    p_adata1_list = []
+    p_adata2_list = []
+    for min_genes in min_genes_list:
+        adata1_filtered = adata1.copy()
+        adata2_filtered = adata2.copy()
+        adata1_filtered = function.filter_cells(adata1_filtered, min_genes=min_genes)
+        adata1_filtered = function.filter_genes(adata1_filtered, min_cells=min_cells)
+        adata2_filtered = function.filter_cells(adata2_filtered, min_genes=min_genes)
+        adata2_filtered = function.filter_genes(adata2_filtered, min_cells=min_cells)
+        s2_adata1 = adata1_filtered.obs.groupby(slice).size()
+        s2_adata2 = adata2_filtered.obs.groupby(slice).size()
+        p_adata1 = s2_adata1 / s1_adata1
+        p_adata2 = s2_adata2 / s1_adata2
+        p_adata1_list.append(p_adata1)
+        p_adata2_list.append(p_adata2)
+    plt.figure(figsize=(image_width, image_height))
+    if slice_id is None:
+        raise ValueError("Please provide a valid slice_id.")
+    p_adata1 = [p_adata1_list[i][slice_id] for i in range(len(min_genes_list))]
+    p_adata2 = [p_adata2_list[i][slice_id] for i in range(len(min_genes_list))]
+    plt.scatter(min_genes_list, p_adata1, label=f'{slice_id}', marker='o',color=color,s=s)
+    plt.scatter(min_genes_list, p_adata2, label=f'{slice_id}', marker='^',color=color,s=s)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    legend_elements = [
+        plt.Line2D([0], [0], marker='o', color='w', label=f'{slice_id}', markersize=8)]
+    legend_elements.append(
+        plt.Line2D([0], [0], marker='o', color='w', label='sequencing', markerfacecolor=color, markersize=8))
+    legend_elements.append(
+        plt.Line2D([0], [0], marker='^', color='w', label='additional sequencing', markerfacecolor=color, markersize=8))
+    plt.legend(handles=legend_elements,loc='center left', bbox_to_anchor=(1, 0.5))
+    if o is not None and isinstance(o, str):
+        plt.savefig(o)
+    plt.show()

pycyto-0.0.1/pycyto/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .plot import *
+from .function import *
+from .format import *
+from .QC import *
+from .labeltransfer import *
+from .cluster import *
+from .Findmarkers import *
+from .spatial_metacell import *

pycyto-0.0.1/pycyto/cluster.py ADDED Viewed

@@ -0,0 +1,46 @@
+import anndata2ri
+from rpy2.robjects import r
+import numpy as np
+anndata2ri.activate()
+def cluster(cdata, K_set, neighborhood_size, n_PCs):
+    """
+        Stereo-seq data spatial clustering.
+        Parameters:
+            cdata (Anndata): Anndata object containing the single-cell data.
+            K_set (list): List of integers specifying the range of cluster numbers to consider.
+            neighborhood_size (int): Size of the neighborhood for computing adjacency matrix.
+            n_PCs (int): Number of principal components to use.
+        Returns:
+            cdata (Anndata): An Anndata object with cluster labels assigned to observations.
+        """
+    r.assign("cdata", cdata)
+    r.assign("K_set", K_set)
+    r.assign("neighborhood_size", neighborhood_size)
+    r.assign("num_pca_components", n_PCs)
+    r('''
+    cdata <- as(cdata, "SingleCellExperiment")
+    library(SC.MEB)
+    library(SingleCellExperiment)
+    pos = as.matrix(colData(cdata)[,c("row","col")])
+    Adj_sp = getneighborhood_fast(pos, neighborhood_size)
+    y = reducedDim(cdata, "PCA")[,1:n_PCs]
+    set.seed(114)
+    beta_grid = seq(0,4,0.2)
+    parallel=TRUE
+    num_core = 3
+    PX = TRUE
+    maxIter_ICM = 10
+    maxIter = 50
+    fit = SC.MEB(y, Adj_sp, beta_grid = beta_grid, K_set= K_set, parallel=parallel, num_core = num_core, PX = PX, maxIter_ICM=maxIter_ICM, maxIter=maxIter)
+    selectKPlot(fit, K_set = K_set, criterion = "BIC")
+    out = selectK(fit, K_set = K_set, criterion = "BIC")
+    ''')
+    out = r['out']
+    best_K_label_flat = np.array(out.rx2('best_K_label')).flatten()
+    cdata.obs['cluster'] = best_K_label_flat.astype(str)
+    return cdata