sjanpy 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sjanpy-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: sjanpy
3
+ Version: 0.0.1
4
+ Summary: Subjacent Analysis Toolkits for Single-Cell Omics in Python
5
+ License-Expression: MIT
6
+ Project-URL: Documentation, https://chansigit.github.io/sjanpy/
7
+ Project-URL: Repository, https://github.com/chansigit/sjanpy
8
+ Keywords: single-cell,scRNA-seq,visualization,bioinformatics,scanpy
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Classifier: Topic :: Scientific/Engineering :: Visualization
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy
22
+ Requires-Dist: pandas
23
+ Requires-Dist: scanpy
24
+ Requires-Dist: seaborn
25
+ Requires-Dist: matplotlib
26
+ Requires-Dist: scipy
27
+ Requires-Dist: scikit-learn
28
+ Requires-Dist: statsmodels
29
+ Requires-Dist: adjustText
30
+ Requires-Dist: plotly
31
+ Requires-Dist: anndata
32
+ Provides-Extra: ml
33
+ Requires-Dist: torch; extra == "ml"
34
+ Requires-Dist: h5py; extra == "ml"
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest; extra == "dev"
37
+ Provides-Extra: docs
38
+ Requires-Dist: sphinx>=7.0; extra == "docs"
39
+ Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
40
+
41
+ # sjanpy
42
+
43
+ [![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue)](https://www.python.org)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
+
46
+ **Subjacent Analysis Toolkits for Single-Cell Omics in Python**
47
+
48
+ sjanpy extends the [Scanpy](https://scanpy.readthedocs.io/) / [AnnData](https://anndata.readthedocs.io/) ecosystem with publication-quality visualizations, fast differential expression analysis, and preprocessing utilities for single-cell RNA-seq.
49
+
50
+ ## Package Structure
51
+
52
+ sjanpy follows the Scanpy subpackage convention:
53
+
54
+ | Subpackage | Purpose | Key Functions |
55
+ |---|---|---|
56
+ | `sjanpy.pl` | **Plotting** | Embedding, dot plot, bar plot, volcano plot, Nebulosa density |
57
+ | `sjanpy.tl` | **Tools** | Differential expression, Pearson residuals normalization |
58
+ | `sjanpy.pp` | **Preprocessing** | Organism-specific gene filtering (human, mouse, rat) |
59
+ | `sjanpy.ml` | **Machine Learning** | Chunked `.pt` dataset builder from h5ad files |
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ git clone https://github.com/chansigit/sjanpy.git
65
+ cd sjanpy
66
+ pip install .
67
+ ```
68
+
69
+ ## Quick Start
70
+
71
+ ### Embedding visualization
72
+
73
+ ```python
74
+ import scanpy as sc
75
+ from sjanpy.pl import fancy_embedding_pro
76
+
77
+ adata = sc.datasets.pbmc3k_processed()
78
+ fancy_embedding_pro(adata, basis='umap', color='louvain')
79
+ ```
80
+
81
+ ### Differential expression
82
+
83
+ ```python
84
+ from sjanpy.tl import fast_two_group_deg
85
+ from sjanpy.pl import plot_volcano
86
+
87
+ deg = fast_two_group_deg(adata, label_col='louvain', lst1=['B cells'], lst2=['CD4 T cells'])
88
+ plot_volcano(deg, logfc_col='log2FC', padj_col='padj')
89
+ ```
90
+
91
+ ### Nebulosa density
92
+
93
+ Traditional scatter plots obscure gene expression patterns due to point overlap. Nebulosa uses weighted kernel density estimation to reveal true expression distributions:
94
+
95
+ ```python
96
+ from sjanpy.pl import nebulosa_density
97
+
98
+ nebulosa_density(adata, coord_key='X_umap', gene='CD3D', show=True)
99
+ ```
100
+
101
+ | Standard scatter | Nebulosa density |
102
+ |---|---|
103
+ | <img width="328" alt="before" src="https://github.com/user-attachments/assets/4c481b00-583b-4e7e-b064-95db59160024" /> | <img width="328" alt="after" src="https://github.com/user-attachments/assets/d4e2cc47-7d73-40d1-9b81-8360083780d1" /> |
104
+
105
+ ### Gene filtering
106
+
107
+ ```python
108
+ from sjanpy.pp import filter_human_sc_genes
109
+
110
+ # Mask artifact genes from HVG selection (predicted, non-coding, IG variable, etc.)
111
+ adata = filter_human_sc_genes(adata, mask_hvg_only=True)
112
+ ```
113
+
114
+ ### Complex dot plot
115
+
116
+ ```python
117
+ from sjanpy.pl import complex_dotplot
118
+
119
+ complex_dotplot(
120
+ adata,
121
+ genes=marker_genes,
122
+ groupby='cell_type',
123
+ z_score=True,
124
+ cluster_rows=True,
125
+ cmap='RdBu_r',
126
+ )
127
+ ```
128
+
129
+ ## Module Reference
130
+
131
+ ### `sjanpy.pl` — Plotting
132
+
133
+ | Function | Description |
134
+ |---|---|
135
+ | `fancy_embedding_pro` | UMAP/t-SNE with density overlays, auto-labels, equal-aspect axes |
136
+ | `complex_dotplot` | Dot plot with hierarchical clustering and dendrograms |
137
+ | `fan_dotplot` | Polar/radial dot plot layout |
138
+ | `plot_stacked_bar_repel` | Stacked bar plot with smart label placement |
139
+ | `plot_volcano` | Volcano plot for DEG visualization |
140
+ | `plot_cluster_deg_jitter_highlight` | Per-cluster jitter plot with gene annotations |
141
+ | `nebulosa_density` | Weighted KDE density on embeddings |
142
+ | `wkde2d` / `wkde3d` | Low-level 2D/3D weighted kernel density estimation |
143
+
144
+ ### `sjanpy.tl` — Tools
145
+
146
+ | Function / Class | Description |
147
+ |---|---|
148
+ | `fast_two_group_deg` | Vectorized Welch's t-test DEG between two groups |
149
+ | `compute_nested_deg_df` | Within-cluster DEG between two conditions |
150
+ | `clip_logfc_in_nested_deg_df` | Per-cluster quantile clipping of logFC |
151
+ | `generate_highlight_dict` | Select genes to label (top-N, k-times, manual) |
152
+ | `PearsonResidualsScaler` | NB-based Pearson residuals normalization |
153
+
154
+ ### `sjanpy.pp` — Preprocessing
155
+
156
+ | Function | Description |
157
+ |---|---|
158
+ | `filter_human_sc_genes` | Remove/mask artifact genes (human) |
159
+ | `filter_mouse_sc_genes` | Remove/mask artifact genes (mouse) |
160
+ | `filter_rat_sc_genes` | Remove/mask artifact genes (rat) |
161
+ | `get_background_gene_dict` | Catalog artifact gene categories in a dataset |
162
+
163
+ ### `sjanpy.ml` — Machine Learning
164
+
165
+ | Function | Description |
166
+ |---|---|
167
+ | `build_dataset` | Stream h5ad → chunked `.pt` files with condition vectors |
168
+ | `build_condition_schema` | Build encoding schema from condition DSL specs |
169
+ | `process_file` | Process a single h5ad file into chunks |
170
+
171
+ ## Dependencies
172
+
173
+ Core: `numpy`, `pandas`, `scipy`, `matplotlib`, `seaborn`, `scanpy`, `anndata`, `adjustText`, `statsmodels`, `scikit-learn`
174
+
175
+ Optional: `plotly` (3D visualization), `torch` / `h5py` (ML dataset building)
176
+
177
+ ## License
178
+
179
+ MIT
sjanpy-0.0.1/README.md ADDED
@@ -0,0 +1,139 @@
1
+ # sjanpy
2
+
3
+ [![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue)](https://www.python.org)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ **Subjacent Analysis Toolkits for Single-Cell Omics in Python**
7
+
8
+ sjanpy extends the [Scanpy](https://scanpy.readthedocs.io/) / [AnnData](https://anndata.readthedocs.io/) ecosystem with publication-quality visualizations, fast differential expression analysis, and preprocessing utilities for single-cell RNA-seq.
9
+
10
+ ## Package Structure
11
+
12
+ sjanpy follows the Scanpy subpackage convention:
13
+
14
+ | Subpackage | Purpose | Key Functions |
15
+ |---|---|---|
16
+ | `sjanpy.pl` | **Plotting** | Embedding, dot plot, bar plot, volcano plot, Nebulosa density |
17
+ | `sjanpy.tl` | **Tools** | Differential expression, Pearson residuals normalization |
18
+ | `sjanpy.pp` | **Preprocessing** | Organism-specific gene filtering (human, mouse, rat) |
19
+ | `sjanpy.ml` | **Machine Learning** | Chunked `.pt` dataset builder from h5ad files |
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ git clone https://github.com/chansigit/sjanpy.git
25
+ cd sjanpy
26
+ pip install .
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ### Embedding visualization
32
+
33
+ ```python
34
+ import scanpy as sc
35
+ from sjanpy.pl import fancy_embedding_pro
36
+
37
+ adata = sc.datasets.pbmc3k_processed()
38
+ fancy_embedding_pro(adata, basis='umap', color='louvain')
39
+ ```
40
+
41
+ ### Differential expression
42
+
43
+ ```python
44
+ from sjanpy.tl import fast_two_group_deg
45
+ from sjanpy.pl import plot_volcano
46
+
47
+ deg = fast_two_group_deg(adata, label_col='louvain', lst1=['B cells'], lst2=['CD4 T cells'])
48
+ plot_volcano(deg, logfc_col='log2FC', padj_col='padj')
49
+ ```
50
+
51
+ ### Nebulosa density
52
+
53
+ Traditional scatter plots obscure gene expression patterns due to point overlap. Nebulosa uses weighted kernel density estimation to reveal true expression distributions:
54
+
55
+ ```python
56
+ from sjanpy.pl import nebulosa_density
57
+
58
+ nebulosa_density(adata, coord_key='X_umap', gene='CD3D', show=True)
59
+ ```
60
+
61
+ | Standard scatter | Nebulosa density |
62
+ |---|---|
63
+ | <img width="328" alt="before" src="https://github.com/user-attachments/assets/4c481b00-583b-4e7e-b064-95db59160024" /> | <img width="328" alt="after" src="https://github.com/user-attachments/assets/d4e2cc47-7d73-40d1-9b81-8360083780d1" /> |
64
+
65
+ ### Gene filtering
66
+
67
+ ```python
68
+ from sjanpy.pp import filter_human_sc_genes
69
+
70
+ # Mask artifact genes from HVG selection (predicted, non-coding, IG variable, etc.)
71
+ adata = filter_human_sc_genes(adata, mask_hvg_only=True)
72
+ ```
73
+
74
+ ### Complex dot plot
75
+
76
+ ```python
77
+ from sjanpy.pl import complex_dotplot
78
+
79
+ complex_dotplot(
80
+ adata,
81
+ genes=marker_genes,
82
+ groupby='cell_type',
83
+ z_score=True,
84
+ cluster_rows=True,
85
+ cmap='RdBu_r',
86
+ )
87
+ ```
88
+
89
+ ## Module Reference
90
+
91
+ ### `sjanpy.pl` — Plotting
92
+
93
+ | Function | Description |
94
+ |---|---|
95
+ | `fancy_embedding_pro` | UMAP/t-SNE with density overlays, auto-labels, equal-aspect axes |
96
+ | `complex_dotplot` | Dot plot with hierarchical clustering and dendrograms |
97
+ | `fan_dotplot` | Polar/radial dot plot layout |
98
+ | `plot_stacked_bar_repel` | Stacked bar plot with smart label placement |
99
+ | `plot_volcano` | Volcano plot for DEG visualization |
100
+ | `plot_cluster_deg_jitter_highlight` | Per-cluster jitter plot with gene annotations |
101
+ | `nebulosa_density` | Weighted KDE density on embeddings |
102
+ | `wkde2d` / `wkde3d` | Low-level 2D/3D weighted kernel density estimation |
103
+
104
+ ### `sjanpy.tl` — Tools
105
+
106
+ | Function / Class | Description |
107
+ |---|---|
108
+ | `fast_two_group_deg` | Vectorized Welch's t-test DEG between two groups |
109
+ | `compute_nested_deg_df` | Within-cluster DEG between two conditions |
110
+ | `clip_logfc_in_nested_deg_df` | Per-cluster quantile clipping of logFC |
111
+ | `generate_highlight_dict` | Select genes to label (top-N, k-times, manual) |
112
+ | `PearsonResidualsScaler` | NB-based Pearson residuals normalization |
113
+
114
+ ### `sjanpy.pp` — Preprocessing
115
+
116
+ | Function | Description |
117
+ |---|---|
118
+ | `filter_human_sc_genes` | Remove/mask artifact genes (human) |
119
+ | `filter_mouse_sc_genes` | Remove/mask artifact genes (mouse) |
120
+ | `filter_rat_sc_genes` | Remove/mask artifact genes (rat) |
121
+ | `get_background_gene_dict` | Catalog artifact gene categories in a dataset |
122
+
123
+ ### `sjanpy.ml` — Machine Learning
124
+
125
+ | Function | Description |
126
+ |---|---|
127
+ | `build_dataset` | Stream h5ad → chunked `.pt` files with condition vectors |
128
+ | `build_condition_schema` | Build encoding schema from condition DSL specs |
129
+ | `process_file` | Process a single h5ad file into chunks |
130
+
131
+ ## Dependencies
132
+
133
+ Core: `numpy`, `pandas`, `scipy`, `matplotlib`, `seaborn`, `scanpy`, `anndata`, `adjustText`, `statsmodels`, `scikit-learn`
134
+
135
+ Optional: `plotly` (3D visualization), `torch` / `h5py` (ML dataset building)
136
+
137
+ ## License
138
+
139
+ MIT
@@ -0,0 +1,58 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sjanpy"
7
+ version = "0.0.1"
8
+ description = "Subjacent Analysis Toolkits for Single-Cell Omics in Python"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.8"
12
+ keywords = ["single-cell", "scRNA-seq", "visualization", "bioinformatics", "scanpy"]
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Science/Research",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.8",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
23
+ "Topic :: Scientific/Engineering :: Visualization",
24
+ ]
25
+ dependencies = [
26
+ "numpy",
27
+ "pandas",
28
+ "scanpy",
29
+ "seaborn",
30
+ "matplotlib",
31
+ "scipy",
32
+ "scikit-learn",
33
+ "statsmodels",
34
+ "adjustText",
35
+ "plotly",
36
+ "anndata",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ ml = [
41
+ "torch",
42
+ "h5py",
43
+ ]
44
+ dev = [
45
+ "pytest",
46
+ ]
47
+ docs = [
48
+ "sphinx>=7.0",
49
+ "sphinx-rtd-theme>=2.0",
50
+ ]
51
+
52
+ [project.urls]
53
+ Documentation = "https://chansigit.github.io/sjanpy/"
54
+ Repository = "https://github.com/chansigit/sjanpy"
55
+
56
+ [tool.setuptools.packages.find]
57
+ where = ["."]
58
+ include = ["sjanpy*"]
sjanpy-0.0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,31 @@
1
+ """
2
+ sjanpy - A collection of Python utilities for single-cell analysis visualization
3
+ """
4
+
5
+ __version__ = "0.0.1"
6
+
7
+ from . import pl
8
+ from . import tl
9
+ from . import pp
10
+ from . import ml
11
+
12
+
13
+ # Backward-compatible lazy imports for old flat API
14
+ # e.g. `from sjanpy import nebulosa` still works
15
+ def __getattr__(name):
16
+ _compat = {
17
+ "nebulosa": "pl.nebulosa",
18
+ "pynebulosa_2d": "pl.nebulosa",
19
+ "pynebulosa_3d": "pl.nebulosa",
20
+ "embedding": "pl.embedding",
21
+ "dotplot": "pl.dotplot",
22
+ "barplot": "pl.barplot",
23
+ "deg": "tl.deg",
24
+ "pres": "tl.pres",
25
+ "genecraft": "pp.genecraft",
26
+ }
27
+ if name in _compat:
28
+ import importlib
29
+ parts = _compat[name].split(".")
30
+ return importlib.import_module(f".{parts[0]}.{parts[1]}", __name__)
31
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,20 @@
1
+ from .build_dataset import (
2
+ # h5py readers
3
+ read_obs_h5py,
4
+ read_var_h5py,
5
+ # Gene filtering
6
+ load_gene_list,
7
+ resolve_gene_indices,
8
+ # Condition DSL
9
+ parse_numerical_spec,
10
+ parse_cat_spec,
11
+ apply_transforms,
12
+ build_condition_schema,
13
+ build_condition_tensor,
14
+ # Condition schema I/O
15
+ save_condition_schema,
16
+ load_condition_schema,
17
+ # Core processing
18
+ process_file,
19
+ build_dataset,
20
+ )