PyPI - sjanpy - Versions diffs - 0.0.1__tar.gz - Mend

sjanpy 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

sjanpy-0.0.1/PKG-INFO +179 -0
sjanpy-0.0.1/README.md +139 -0
sjanpy-0.0.1/pyproject.toml +58 -0
sjanpy-0.0.1/setup.cfg +4 -0
sjanpy-0.0.1/sjanpy/__init__.py +31 -0
sjanpy-0.0.1/sjanpy/ml/__init__.py +20 -0
sjanpy-0.0.1/sjanpy/ml/build_dataset.py +908 -0
sjanpy-0.0.1/sjanpy/pl/__init__.py +17 -0
sjanpy-0.0.1/sjanpy/pl/barplot.py +85 -0
sjanpy-0.0.1/sjanpy/pl/dotplot.py +370 -0
sjanpy-0.0.1/sjanpy/pl/embedding.py +95 -0
sjanpy-0.0.1/sjanpy/pl/nebulosa.py +173 -0
sjanpy-0.0.1/sjanpy/pl/volcano.py +160 -0
sjanpy-0.0.1/sjanpy/pp/__init__.py +6 -0
sjanpy-0.0.1/sjanpy/pp/genecraft.py +222 -0
sjanpy-0.0.1/sjanpy/tl/__init__.py +7 -0
sjanpy-0.0.1/sjanpy/tl/deg.py +364 -0
sjanpy-0.0.1/sjanpy/tl/pres.py +169 -0
sjanpy-0.0.1/sjanpy.egg-info/PKG-INFO +179 -0
sjanpy-0.0.1/sjanpy.egg-info/SOURCES.txt +21 -0
sjanpy-0.0.1/sjanpy.egg-info/dependency_links.txt +1 -0
sjanpy-0.0.1/sjanpy.egg-info/requires.txt +22 -0
sjanpy-0.0.1/sjanpy.egg-info/top_level.txt +1 -0

sjanpy-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,179 @@
+Metadata-Version: 2.4
+Name: sjanpy
+Version: 0.0.1
+Summary: Subjacent Analysis Toolkits for Single-Cell Omics in Python
+License-Expression: MIT
+Project-URL: Documentation, https://chansigit.github.io/sjanpy/
+Project-URL: Repository, https://github.com/chansigit/sjanpy
+Keywords: single-cell,scRNA-seq,visualization,bioinformatics,scanpy
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Scientific/Engineering :: Visualization
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: scanpy
+Requires-Dist: seaborn
+Requires-Dist: matplotlib
+Requires-Dist: scipy
+Requires-Dist: scikit-learn
+Requires-Dist: statsmodels
+Requires-Dist: adjustText
+Requires-Dist: plotly
+Requires-Dist: anndata
+Provides-Extra: ml
+Requires-Dist: torch; extra == "ml"
+Requires-Dist: h5py; extra == "ml"
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Provides-Extra: docs
+Requires-Dist: sphinx>=7.0; extra == "docs"
+Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
+# sjanpy
+[![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue)](https://www.python.org)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+**Subjacent Analysis Toolkits for Single-Cell Omics in Python**
+sjanpy extends the [Scanpy](https://scanpy.readthedocs.io/) / [AnnData](https://anndata.readthedocs.io/) ecosystem with publication-quality visualizations, fast differential expression analysis, and preprocessing utilities for single-cell RNA-seq.
+## Package Structure
+sjanpy follows the Scanpy subpackage convention:
+| Subpackage | Purpose | Key Functions |
+|---|---|---|
+| `sjanpy.pl` | **Plotting** | Embedding, dot plot, bar plot, volcano plot, Nebulosa density |
+| `sjanpy.tl` | **Tools** | Differential expression, Pearson residuals normalization |
+| `sjanpy.pp` | **Preprocessing** | Organism-specific gene filtering (human, mouse, rat) |
+| `sjanpy.ml` | **Machine Learning** | Chunked `.pt` dataset builder from h5ad files |
+## Installation
+```bash
+git clone https://github.com/chansigit/sjanpy.git
+cd sjanpy
+pip install .
+```
+## Quick Start
+### Embedding visualization
+```python
+import scanpy as sc
+from sjanpy.pl import fancy_embedding_pro
+adata = sc.datasets.pbmc3k_processed()
+fancy_embedding_pro(adata, basis='umap', color='louvain')
+```
+### Differential expression
+```python
+from sjanpy.tl import fast_two_group_deg
+from sjanpy.pl import plot_volcano
+deg = fast_two_group_deg(adata, label_col='louvain', lst1=['B cells'], lst2=['CD4 T cells'])
+plot_volcano(deg, logfc_col='log2FC', padj_col='padj')
+```
+### Nebulosa density
+Traditional scatter plots obscure gene expression patterns due to point overlap. Nebulosa uses weighted kernel density estimation to reveal true expression distributions:
+```python
+from sjanpy.pl import nebulosa_density
+nebulosa_density(adata, coord_key='X_umap', gene='CD3D', show=True)
+```
+| Standard scatter | Nebulosa density |
+|---|---|
+| <img width="328" alt="before" src="https://github.com/user-attachments/assets/4c481b00-583b-4e7e-b064-95db59160024" /> | <img width="328" alt="after" src="https://github.com/user-attachments/assets/d4e2cc47-7d73-40d1-9b81-8360083780d1" /> |
+### Gene filtering
+```python
+from sjanpy.pp import filter_human_sc_genes
+# Mask artifact genes from HVG selection (predicted, non-coding, IG variable, etc.)
+adata = filter_human_sc_genes(adata, mask_hvg_only=True)
+```
+### Complex dot plot
+```python
+from sjanpy.pl import complex_dotplot
+complex_dotplot(
+    adata,
+    genes=marker_genes,
+    groupby='cell_type',
+    z_score=True,
+    cluster_rows=True,
+    cmap='RdBu_r',
+)
+```
+## Module Reference
+### `sjanpy.pl` — Plotting
+| Function | Description |
+|---|---|
+| `fancy_embedding_pro` | UMAP/t-SNE with density overlays, auto-labels, equal-aspect axes |
+| `complex_dotplot` | Dot plot with hierarchical clustering and dendrograms |
+| `fan_dotplot` | Polar/radial dot plot layout |
+| `plot_stacked_bar_repel` | Stacked bar plot with smart label placement |
+| `plot_volcano` | Volcano plot for DEG visualization |
+| `plot_cluster_deg_jitter_highlight` | Per-cluster jitter plot with gene annotations |
+| `nebulosa_density` | Weighted KDE density on embeddings |
+| `wkde2d` / `wkde3d` | Low-level 2D/3D weighted kernel density estimation |
+### `sjanpy.tl` — Tools
+| Function / Class | Description |
+|---|---|
+| `fast_two_group_deg` | Vectorized Welch's t-test DEG between two groups |
+| `compute_nested_deg_df` | Within-cluster DEG between two conditions |
+| `clip_logfc_in_nested_deg_df` | Per-cluster quantile clipping of logFC |
+| `generate_highlight_dict` | Select genes to label (top-N, k-times, manual) |
+| `PearsonResidualsScaler` | NB-based Pearson residuals normalization |
+### `sjanpy.pp` — Preprocessing
+| Function | Description |
+|---|---|
+| `filter_human_sc_genes` | Remove/mask artifact genes (human) |
+| `filter_mouse_sc_genes` | Remove/mask artifact genes (mouse) |
+| `filter_rat_sc_genes` | Remove/mask artifact genes (rat) |
+| `get_background_gene_dict` | Catalog artifact gene categories in a dataset |
+### `sjanpy.ml` — Machine Learning
+| Function | Description |
+|---|---|
+| `build_dataset` | Stream h5ad → chunked `.pt` files with condition vectors |
+| `build_condition_schema` | Build encoding schema from condition DSL specs |
+| `process_file` | Process a single h5ad file into chunks |
+## Dependencies
+Core: `numpy`, `pandas`, `scipy`, `matplotlib`, `seaborn`, `scanpy`, `anndata`, `adjustText`, `statsmodels`, `scikit-learn`
+Optional: `plotly` (3D visualization), `torch` / `h5py` (ML dataset building)
+## License
+MIT

sjanpy-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,139 @@
+# sjanpy
+[![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue)](https://www.python.org)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+**Subjacent Analysis Toolkits for Single-Cell Omics in Python**
+sjanpy extends the [Scanpy](https://scanpy.readthedocs.io/) / [AnnData](https://anndata.readthedocs.io/) ecosystem with publication-quality visualizations, fast differential expression analysis, and preprocessing utilities for single-cell RNA-seq.
+## Package Structure
+sjanpy follows the Scanpy subpackage convention:
+| Subpackage | Purpose | Key Functions |
+|---|---|---|
+| `sjanpy.pl` | **Plotting** | Embedding, dot plot, bar plot, volcano plot, Nebulosa density |
+| `sjanpy.tl` | **Tools** | Differential expression, Pearson residuals normalization |
+| `sjanpy.pp` | **Preprocessing** | Organism-specific gene filtering (human, mouse, rat) |
+| `sjanpy.ml` | **Machine Learning** | Chunked `.pt` dataset builder from h5ad files |
+## Installation
+```bash
+git clone https://github.com/chansigit/sjanpy.git
+cd sjanpy
+pip install .
+```
+## Quick Start
+### Embedding visualization
+```python
+import scanpy as sc
+from sjanpy.pl import fancy_embedding_pro
+adata = sc.datasets.pbmc3k_processed()
+fancy_embedding_pro(adata, basis='umap', color='louvain')
+```
+### Differential expression
+```python
+from sjanpy.tl import fast_two_group_deg
+from sjanpy.pl import plot_volcano
+deg = fast_two_group_deg(adata, label_col='louvain', lst1=['B cells'], lst2=['CD4 T cells'])
+plot_volcano(deg, logfc_col='log2FC', padj_col='padj')
+```
+### Nebulosa density
+Traditional scatter plots obscure gene expression patterns due to point overlap. Nebulosa uses weighted kernel density estimation to reveal true expression distributions:
+```python
+from sjanpy.pl import nebulosa_density
+nebulosa_density(adata, coord_key='X_umap', gene='CD3D', show=True)
+```
+| Standard scatter | Nebulosa density |
+|---|---|
+| <img width="328" alt="before" src="https://github.com/user-attachments/assets/4c481b00-583b-4e7e-b064-95db59160024" /> | <img width="328" alt="after" src="https://github.com/user-attachments/assets/d4e2cc47-7d73-40d1-9b81-8360083780d1" /> |
+### Gene filtering
+```python
+from sjanpy.pp import filter_human_sc_genes
+# Mask artifact genes from HVG selection (predicted, non-coding, IG variable, etc.)
+adata = filter_human_sc_genes(adata, mask_hvg_only=True)
+```
+### Complex dot plot
+```python
+from sjanpy.pl import complex_dotplot
+complex_dotplot(
+    adata,
+    genes=marker_genes,
+    groupby='cell_type',
+    z_score=True,
+    cluster_rows=True,
+    cmap='RdBu_r',
+)
+```
+## Module Reference
+### `sjanpy.pl` — Plotting
+| Function | Description |
+|---|---|
+| `fancy_embedding_pro` | UMAP/t-SNE with density overlays, auto-labels, equal-aspect axes |
+| `complex_dotplot` | Dot plot with hierarchical clustering and dendrograms |
+| `fan_dotplot` | Polar/radial dot plot layout |
+| `plot_stacked_bar_repel` | Stacked bar plot with smart label placement |
+| `plot_volcano` | Volcano plot for DEG visualization |
+| `plot_cluster_deg_jitter_highlight` | Per-cluster jitter plot with gene annotations |
+| `nebulosa_density` | Weighted KDE density on embeddings |
+| `wkde2d` / `wkde3d` | Low-level 2D/3D weighted kernel density estimation |
+### `sjanpy.tl` — Tools
+| Function / Class | Description |
+|---|---|
+| `fast_two_group_deg` | Vectorized Welch's t-test DEG between two groups |
+| `compute_nested_deg_df` | Within-cluster DEG between two conditions |
+| `clip_logfc_in_nested_deg_df` | Per-cluster quantile clipping of logFC |
+| `generate_highlight_dict` | Select genes to label (top-N, k-times, manual) |
+| `PearsonResidualsScaler` | NB-based Pearson residuals normalization |
+### `sjanpy.pp` — Preprocessing
+| Function | Description |
+|---|---|
+| `filter_human_sc_genes` | Remove/mask artifact genes (human) |
+| `filter_mouse_sc_genes` | Remove/mask artifact genes (mouse) |
+| `filter_rat_sc_genes` | Remove/mask artifact genes (rat) |
+| `get_background_gene_dict` | Catalog artifact gene categories in a dataset |
+### `sjanpy.ml` — Machine Learning
+| Function | Description |
+|---|---|
+| `build_dataset` | Stream h5ad → chunked `.pt` files with condition vectors |
+| `build_condition_schema` | Build encoding schema from condition DSL specs |
+| `process_file` | Process a single h5ad file into chunks |
+## Dependencies
+Core: `numpy`, `pandas`, `scipy`, `matplotlib`, `seaborn`, `scanpy`, `anndata`, `adjustText`, `statsmodels`, `scikit-learn`
+Optional: `plotly` (3D visualization), `torch` / `h5py` (ML dataset building)
+## License
+MIT

sjanpy-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,58 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "sjanpy"
+version = "0.0.1"
+description = "Subjacent Analysis Toolkits for Single-Cell Omics in Python"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.8"
+keywords = ["single-cell", "scRNA-seq", "visualization", "bioinformatics", "scanpy"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Visualization",
+]
+dependencies = [
+    "numpy",
+    "pandas",
+    "scanpy",
+    "seaborn",
+    "matplotlib",
+    "scipy",
+    "scikit-learn",
+    "statsmodels",
+    "adjustText",
+    "plotly",
+    "anndata",
+]
+[project.optional-dependencies]
+ml = [
+    "torch",
+    "h5py",
+]
+dev = [
+    "pytest",
+]
+docs = [
+    "sphinx>=7.0",
+    "sphinx-rtd-theme>=2.0",
+]
+[project.urls]
+Documentation = "https://chansigit.github.io/sjanpy/"
+Repository = "https://github.com/chansigit/sjanpy"
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["sjanpy*"]

sjanpy-0.0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

sjanpy-0.0.1/sjanpy/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+sjanpy - A collection of Python utilities for single-cell analysis visualization
+"""
+__version__ = "0.0.1"
+from . import pl
+from . import tl
+from . import pp
+from . import ml
+# Backward-compatible lazy imports for old flat API
+# e.g. `from sjanpy import nebulosa` still works
+def __getattr__(name):
+    _compat = {
+        "nebulosa": "pl.nebulosa",
+        "pynebulosa_2d": "pl.nebulosa",
+        "pynebulosa_3d": "pl.nebulosa",
+        "embedding": "pl.embedding",
+        "dotplot": "pl.dotplot",
+        "barplot": "pl.barplot",
+        "deg": "tl.deg",
+        "pres": "tl.pres",
+        "genecraft": "pp.genecraft",
+    }
+    if name in _compat:
+        import importlib
+        parts = _compat[name].split(".")
+        return importlib.import_module(f".{parts[0]}.{parts[1]}", __name__)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

sjanpy-0.0.1/sjanpy/ml/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+from .build_dataset import (
+    # h5py readers
+    read_obs_h5py,
+    read_var_h5py,
+    # Gene filtering
+    load_gene_list,
+    resolve_gene_indices,
+    # Condition DSL
+    parse_numerical_spec,
+    parse_cat_spec,
+    apply_transforms,
+    build_condition_schema,
+    build_condition_tensor,
+    # Condition schema I/O
+    save_condition_schema,
+    load_condition_schema,
+    # Core processing
+    process_file,
+    build_dataset,
+)