PyPI - scez - Versions diffs - 0.0.1__py2.py3-none-any.whl - Mend

scez 0.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scez might be problematic. Click here for more details.

Files changed (11) hide show

scez/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""scez – Single Cell Analysis, Easy Mode!"""
+from . import diffexp as de
+from . import preprocess as pp
+from . import representation as rp
+from . import utils
+import scanpy as sc
+import matplotlib.pyplot as plt
+import tomli
+toml_dict = tomli.load(open('pyproject.toml','rb'))
+__version__ = toml_dict['tool']['poetry']['version']
+sc.settings.verbosity = 1             # verbosity: errors (0), warnings (1), info (2), hints (3)
+sc.settings.set_figure_params(dpi=100, dpi_save=300, frameon=False, figsize=(5, 5), facecolor='white')
+sc.logging.print_header()
+# https://stackoverflow.com/questions/21884271/warning-about-too-many-open-figures
+plt.rcParams.update({'figure.max_open_warning': 0})
+plt.close('all')
+# https://stackoverflow.com/questions/3899980/how-to-change-the-font-size-on-a-matplotlib-plot
+SMALL_SIZE = 6
+MEDIUM_SIZE = 8
+BIGGER_SIZE = 10
+plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
+plt.rc('axes', titlesize=SMALL_SIZE)     # font size of the axes title
+plt.rc('axes', labelsize=MEDIUM_SIZE)    # font size of the x and y labels
+plt.rc('xtick', labelsize=SMALL_SIZE)    # font size of the tick labels
+plt.rc('ytick', labelsize=SMALL_SIZE)    # font size of the tick labels
+plt.rc('legend', fontsize=SMALL_SIZE)    # legend font size
+plt.rc('figure', titlesize=BIGGER_SIZE)  # font size of the figure title

scez/diffexp.py ADDED Viewed

@@ -0,0 +1,199 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import anndata as ad
+from pydeseq2.dds import DeseqDataSet
+from pydeseq2.default_inference import DefaultInference
+from pydeseq2.ds import DeseqStats
+from .utils import run_adjust_text
+from adpbulk import ADPBulk
+def pseudobulk_by_clusters(adt, condition, cluster_col='leiden', method="mean"):
+    # initialize the object
+    adpb = ADPBulk(adt, [cluster_col, condition], method=method)
+    # perform the pseudobulking
+    pseudobulk_matrix = adpb.fit_transform()
+    # retrieve the sample metadata (useful for easy incorporation with edgeR)
+    sample_meta = adpb.get_meta()
+    out = ad.AnnData(
+        X=pseudobulk_matrix,
+        obs=sample_meta.set_index('SampleName')
+    )
+    return out
+def run_deseq(adata, design, tested_level, ref_level, n_cpus=8):
+    inference = DefaultInference(n_cpus=n_cpus)
+    dds = DeseqDataSet(
+        counts=adata.to_df().astype(int),
+        metadata=adata.obs,
+        design_factors=design,  # compare samples based on the "condition"
+        refit_cooks=True,
+        inference=inference,
+    )
+    dds.deseq2()
+    stat_res = DeseqStats(
+        dds,
+        contrast=[design, tested_level, ref_level],
+        inference=inference
+    )
+    stat_res.summary()
+    df = stat_res.results_df
+    return df
+def plot_volcano(df, title=None, labels=None, n_genes=False, side='both',
+                 font_scale=1, dot_size = 5,
+                 color = '#1f77b4', color_highlight = '#FFA500',
+                 ax = None, **kwargs):
+    dot_size_highlight = dot_size * 1.1
+    annotate_font_size = 5 * font_scale
+    scatter_font_size = 8 * font_scale
+    label_font_size = 9 * font_scale
+    title_font_size = 10 * font_scale
+    if 'name' not in df.columns: df['name'] = df.index.to_list()
+    df['-log10(pvalue)'] = - np.log10(df.pvalue)
+    if not ax: fig, ax = plt.subplots(figsize=(3, 3))
+    # Scatter plot
+    ax.scatter(
+        df['log2FoldChange'],
+        df['-log10(pvalue)'],
+        alpha=0.9, s=dot_size, c=color,
+        **kwargs
+    )
+    # Set background color to transparent
+    ax.set_facecolor('none')
+    # Set smaller font size
+    ax.tick_params(axis='both', which='both', labelsize=scatter_font_size)
+    # Set labels
+    ax.set_xlabel('log2FoldChange', fontsize=label_font_size)
+    ax.set_ylabel('-log10(pvalue)', fontsize=label_font_size)
+    # Set plot title
+    if not title:
+        ax.set_title('Volcano Plot', fontsize=title_font_size)
+    else:
+        ax.set_title(title, fontsize=title_font_size)
+    ax.grid(False)
+    # check if `labels` is provided or set that based on `n_genes` and `side`
+    if labels and n_genes:
+        # error message if both labels and n_genes are provided and say one of them is allowed
+        raise ValueError('Provide either labels or n_genes, not both!')
+    elif n_genes and side == 'positive':
+        # Highlight top genes
+        top_genes = df.query('log2FoldChange > 0').nlargest(n_genes, '-log10(pvalue)')
+        labels = [row['name'] for _, row in top_genes.iterrows()]
+    elif n_genes and side == 'negative':
+        # Highlight top genes
+        top_genes = df.query('log2FoldChange < 0').nlargest(n_genes, '-log10(pvalue)')
+        labels = [row['name'] for _, row in top_genes.iterrows()]
+    elif n_genes and side == 'both':
+        # Highlight top genes
+        top_genes = df.nlargest(n_genes, '-log10(pvalue)')
+        labels = [row['name'] for _, row in top_genes.iterrows()]
+    # Highlight the points from given labels
+    if labels:
+        for label in labels:
+            ax.scatter(
+                df.loc[label, 'log2FoldChange'],
+                df.loc[label, '-log10(pvalue)'],
+                s=dot_size_highlight, c=color_highlight
+            )
+        run_adjust_text(
+            df.loc[labels, 'log2FoldChange'],
+            df.loc[labels, '-log10(pvalue)'],
+            labels,
+            font_size=annotate_font_size, ax=ax, use_arrow=False
+        )
+    if not ax:
+        plt.tight_layout()
+        plt.show()
+def plot_top_DEG_violinplot(adata, df, title=None, labels=None, n_genes=False, side='both', font_scale=1, figsize=(10, 4), **kwargs):
+    label_font_size = 9 * font_scale
+    title_font_size = 10 * font_scale
+    if 'name' not in df.columns: df['name'] = df.index.to_list()
+    if labels and n_genes:
+        # error message if both labels and n_genes are provided and say one of them is allowed
+        raise ValueError('Provide either labels or n_genes, not both!')
+    if not labels and not n_genes:
+        # error message if neither labels nor n_genes are provided
+        raise ValueError('Provide either labels or n_genes!')
+    if labels:
+        # Highlight the points from given list
+        selected_genes = df.loc[labels]
+    elif n_genes and side == 'positive':
+        # Highlight top genes
+        selected_genes = df.query('log2FoldChange > 0').nlargest(n_genes, '-log10(pvalue)')
+    elif n_genes and side == 'negative':
+        # Highlight top genes
+        selected_genes = df.query('log2FoldChange < 0').nlargest(n_genes, '-log10(pvalue)')
+    elif n_genes and side == 'both':
+        # Highlight top genes
+        selected_genes = df.nlargest(n_genes, '-log10(pvalue)')
+    # Filter the single-cell dataset for the selected genes
+    subset_adata = adata[:, selected_genes.index]
+    subset_adata.var.index = subset_adata.var.index.str.split('_').str[0]
+    # Convert the subset of adata to a DataFrame
+    subset_df = subset_adata.to_df()
+    # Merge the DataFrame with .obs to include the 'sample' information
+    merged_df = pd.merge(subset_df, adata.obs[['sample']], left_index=True, right_index=True)
+    # Melt the DataFrame to prepare for violin plot
+    melted_df = pd.melt(merged_df, id_vars='sample', var_name='Gene', value_name='Counts')
+    # Create a violin plot
+    plt.figure(figsize=figsize)
+    sns.violinplot(x='Gene', y='Counts', hue='sample', data=melted_df, split=True, inner='quartile', palette='Set2', **kwargs)
+    sns.stripplot(x='Gene', y='Counts', hue='sample', data=melted_df, dodge=True, jitter=True, color='black', size=1, alpha=0.3, **kwargs)
+    plt.xticks(rotation=45, ha='right', fontsize=label_font_size)
+    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=label_font_size)
+    if not title:
+        plt.title('Top Differentially Expressed Genes', fontsize=title_font_size)
+    else:
+        plt.title(title, fontsize=title_font_size)
+    plt.show()
+def write_top_DEGs(df, sample_id, result_dir='.', n_hits=200):
+    df['-log10(pvalue)'] = - np.log10(df.pvalue)
+    df.nlargest(n_hits, '-log10(pvalue)').to_csv(f'{result_dir}/{sample_id}_top_{n_hits}.csv')  # Adjust the number as needed

scez/preprocess.py ADDED Viewed

@@ -0,0 +1,74 @@
+import pandas as pd
+import scanpy as sc
+import scar
+def normalization(adata, target_sum=1e4, max_value=10, final_layer='scaled', keep_initial_layer=True):
+    if keep_initial_layer == True:
+        adata.layers['raw_counts'] = adata.X.copy()
+    elif type(keep_initial_layer) == str:
+        adata.layers[keep_initial_layer] = adata.X.copy()
+    # normalize counts to target_sum (default 1e4)
+    counts = sc.pp.normalize_total(adata, target_sum=target_sum, inplace=False)
+    # log1p transform
+    adata.layers["log1p_norm"] = sc.pp.log1p(counts["X"], copy=True)
+    # scale counts
+    adata.layers['scaled'] = sc.pp.scale(adata, max_value=max_value, copy=True).X
+    # set the final layer
+    adata.X = adata.layers[final_layer]
+def remove_ambient_rna(adata_filtered_feature_bc, adata_raw_feature_bc):
+    scar.setup_anndata(
+        adata = adata_filtered_feature_bc,
+        raw_adata = adata_raw_feature_bc,
+        prob = 0.995,
+        kneeplot = True
+    )
+    adata_scar = scar.model(
+        raw_count=adata_filtered_feature_bc.to_df(), # In the case of Anndata object, scar will automatically use the estimated ambient_profile present in adata.uns.
+        # ambient_profile=adata_filtered_feature_bc.uns['ambient_profile_Gene Expression'],
+        feature_type='mRNA',
+        sparsity=1,
+        # device=device # Both cpu and cuda are supported.
+    )
+    adata_scar.train(
+        epochs=200,
+        batch_size=64,
+        verbose=True
+    )
+    # After training, we can infer the native true signal
+    adata_scar.inference(batch_size=256)  # by defaut, batch_size = None, set a batch_size if getting a memory issue
+    denoised_count = pd.DataFrame(
+        adata_scar.native_counts,
+        index=adata_filtered_feature_bc.obs_names,
+        columns=adata_filtered_feature_bc.var_names
+    )
+    adata = adata_filtered_feature_bc.copy()
+    adata.layers['raw_counts'] = adata.X
+    adata.layers['scar_denoised_counts'] = denoised_count.to_numpy()
+    return adata
+def clustering(
+        adata
+        ):
+    pass
+    # , n_pcs=50, n_neighbors=30, use_highly_variable='Yes',
+    #     use_rep=None, resolution=None
+    # if use_highly_variable == 'Yes':
+    #     sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
+    #     sc.tl.pca(adata, svd_solver='arpack', use_highly_variable=True)
+    # else:
+    #     sc.pp.pca(adata, n_comps=n_pcs)
+    # sc.pp.neighbors(adata, use_rep=use_rep, n_neighbors=n_neighbors)#, n_pcs=n_pcs)
+    # sc.tl.umap(adata)
+    # sc.tl.leiden(adata, resolution=resolution)

scez/representation.py ADDED Viewed

@@ -0,0 +1,44 @@
+from itertools import product
+import matplotlib.pyplot as plt
+import scanpy as sc
+import numpy as np
+def optimising_umap_layout(adata, cluster_key='leiden',MIN_DISTS = [0.1, 1, 2], SPREADS = [0.5, 1, 5]):
+    # https://scanpy-tutorials.readthedocs.io/en/latest/plotting/advanced.html
+    # Copy adata not to modify UMAP in the original adata object
+    adata_temp = adata.copy()
+    # Create grid of plots, with a little extra room for the legends
+    fig, axes = plt.subplots(
+        len(MIN_DISTS), len(SPREADS), figsize=(len(SPREADS) * 3 + 2, len(MIN_DISTS) * 3)
+    )
+    # Loop through different umap parameters, recomputting and replotting UMAP for each of them
+    for (i, min_dist), (j, spread) in product(enumerate(MIN_DISTS), enumerate(SPREADS)):
+        ax = axes[i][j]
+        param_str = " ".join(["min_dist =", str(min_dist), "and spread =", str(spread)])
+        # Recompute UMAP with new parameters
+        sc.tl.umap(adata_temp, min_dist=min_dist, spread=spread)
+        # Create plot, placing it in grid
+        sc.pl.umap(
+            adata_temp,
+            color=[cluster_key],
+            title=param_str,
+            s=40,
+            ax=ax,
+            show=False,
+        )
+    plt.tight_layout()
+    plt.show()
+    plt.close()
+    del adata_temp
+def random_ordering(adata):
+    # Randomly order cells by making a random index and subsetting AnnData based on it
+    # Set a random seed to ensure that the cell ordering will be reproducible
+    np.random.seed(0)
+    random_indices = np.random.permutation(list(range(adata.shape[0])))
+    return random_indices

scez/tests/__init__.py ADDED Viewed

File without changes

scez/tests/test_scez.py ADDED Viewed

@@ -0,0 +1,18 @@
+import unittest
+import matplotlib.pyplot as plt
+import scanpy as sc
+import scez
+import tomli
+toml_dict = tomli.load(open('pyproject.toml','rb'))
+version = toml_dict['tool']['poetry']['version']
+class TestScezConfig(unittest.TestCase):
+    def test_version(self):
+        self.assertEqual(scez.__version__, version)
+    def test_scanpy_settings(self):
+        self.assertEqual(sc.settings.verbosity, 1)
+if __name__ == '__main__':
+    unittest.main()

scez/utils.py ADDED Viewed

@@ -0,0 +1,64 @@
+import pandas as pd
+from matplotlib import pyplot as plt
+from adjustText import adjust_text
+def rank_genes_to_df(adata, n=50):
+    result = adata.uns['rank_genes_groups']
+    groups = result['names'].dtype.names
+    df = pd.DataFrame(
+        {group + '_' + key: result[key][group]
+         for group in groups for key in ['names', 'scores']}).head(n)
+    return df
+def add_marker_feature(adata, marker, marker_name, clusters_name, thr = 0, figsize=(10, 4)):
+    adata.obs[marker_name] = ''
+    adata.obs.loc[adata.to_df().loc[:,marker] <= thr, marker_name] = f'{marker}-'
+    adata.obs.loc[adata.to_df().loc[:,marker] > thr, marker_name] = f'{marker}+'
+    df = pd.concat([
+        adata.obs.groupby([marker_name,clusters_name]).size()[f'{marker}+'],
+        adata.obs.groupby([marker_name,clusters_name]).size()[f'{marker}-']
+    ],axis=1).rename(columns={0:f'{marker}+',1:f'{marker}-'})
+    # Make some labels.
+    labels = df[f'{marker}+'] / df.sum(axis=1) * 100
+    labels = labels.round(decimals=1)
+    labels.sort_values(ascending=False,inplace=True)
+    df = df.loc[labels.index,]
+    ax = df.plot.bar(stacked=True,rot=0,figsize=figsize)
+    rects = ax.patches
+    for rect, label in zip(rects, labels):
+        height = rect.get_height()
+        ax.text(
+            rect.get_x() + rect.get_width() / 2, height + 5, str(label) + "%",
+            ha="center", va="bottom", fontsize=8
+        )
+    ax.set_yscale('log')
+    ax.set_ylabel('# of cells')
+    return ax
+def run_adjust_text(x, y, labels, ax=None, use_arrow=True, font_weight='bold', font_size=8):
+    texts = [
+        plt.text(
+            x[i], y[i],
+            labels[i],
+            fontdict={'weight': font_weight, 'size': font_size},
+            ha='center', va='center'
+        ) for i in range(len(x))
+    ]
+    if use_arrow:
+        adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'), ax = ax)
+    else:
+        adjust_text(texts, ax = ax)

scez-0.0.1.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Abolfazl (Abe)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

scez-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,48 @@
+Metadata-Version: 2.1
+Name: scez
+Version: 0.0.1
+Summary: Single Cell Analysis, Easy Mode!
+License: MIT
+Author: Abe Arab
+Author-email: abarbiology@gmail.com
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Project-URL: Source, https://github.com/abearab/scez
+Description-Content-Type: text/markdown
+## scez – single cell, easy mode
+[![package](https://github.com/abearab/scez/actions/workflows/main.yml/badge.svg)](https://github.com/abearab/scez/actions/workflows/main.yml)
+### Installation
+Make sure you have mamba installed in your base environment. If not, install it with:
+```bash
+conda install mamba -n base -c conda-forge
+```
+Then, create a new conda environment with the provided `environment.yml` file and activate it. This will install all necessary dependencies for scez.
+```bash
+conda env create -f environment.yml
+conda activate scez
+```
+Finally, install scez with:
+```bash
+pip install scez
+```
+Or, if you want to install the latest version from the repository:
+```bash
+pip install git+https://github.com/abearab/scez.git
+```

scez-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+scez/__init__.py,sha256=kmN55oxZUHJfsInsnDWvAtxSNn5KnESir65NGr_ssyo,1356
+scez/diffexp.py,sha256=V8qjqpTZVaoAb-iCa3jYmPDmDBtg5AK9OqNzRow6ic0,6770
+scez/preprocess.py,sha256=3iaxACkmKQjKZdaFBLRAXz1kwkNnhbdnd_gwF8rI4nw,2572
+scez/representation.py,sha256=IW0pwa_yoKf-2a3lbtxRwLTdCjB2PkS9oN4WGCLwED0,1596
+scez/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scez/tests/test_scez.py,sha256=hoBSYB5k1WtF1hY_kyWXw1W7NolBornmTdAng6tDpjw,452
+scez/utils.py,sha256=lPmt01JILiKoBUhhxZeyhl2CfAzailTp4dIefP4FqGQ,1955
+scez-0.0.1.dist-info/LICENSE,sha256=59TS1D5RmGh66RJikXvFoq_ZW9pGirB4zUOUXLfptJM,1071
+scez-0.0.1.dist-info/METADATA,sha256=H5YLkU2mzh80sL9WrXsNX740nA7tTt4NWQFIHi93Kio,1647
+scez-0.0.1.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
+scez-0.0.1.dist-info/RECORD,,

scez-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.0
+Root-Is-Purelib: true
+Tag: py2.py3-none-any