py-TranspaceR 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-TranspaceR
3
+ Version: 0.1.0
4
+ Summary: Statistical analysis of Spatial transcriptomic data (Python port of TranspaceR)
5
+ Author-email: Pierre Bost <pierre.bost@curie.fr>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/TranspaceR/TranspaceR
8
+ Keywords: spatial,transcriptomics,variogram,geary-c,bioinformatics
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy>=1.21
22
+ Requires-Dist: scipy>=1.7
23
+ Requires-Dist: scikit-learn>=1.0
24
+ Requires-Dist: pandas>=1.3
25
+ Requires-Dist: matplotlib>=3.4
26
+ Provides-Extra: umap
27
+ Requires-Dist: umap-learn>=0.5; extra == "umap"
28
+ Provides-Extra: leiden
29
+ Requires-Dist: python-igraph>=0.10; extra == "leiden"
30
+ Requires-Dist: leidenalg>=0.9; extra == "leiden"
31
+ Provides-Extra: stats
32
+ Requires-Dist: statsmodels>=0.13; extra == "stats"
33
+ Provides-Extra: all
34
+ Requires-Dist: py-TranspaceR[leiden,stats,umap]; extra == "all"
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=7.0; extra == "dev"
37
+ Requires-Dist: py-TranspaceR[all]; extra == "dev"
38
+
39
+ # py-TranspaceR
40
+
41
+ [![Python 3.8+](https://img.shields.io/badge/Python-3.8%2B-blue.svg)](https://www.python.org/downloads/)
42
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
43
+ [![Tests](https://img.shields.io/badge/Tests-29%20passed-brightgreen.svg)](#testing)
44
+ [![Speedup](https://img.shields.io/badge/Speedup-17.7x-orange.svg)](#speed-benchmark)
45
+
46
+ Python port of [TranspaceR](https://github.com/TranspaceR/TranspaceR) — Statistical analysis of Spatial transcriptomic data.
47
+
48
+ ## Correlation Benchmark (Python vs R)
49
+
50
+ | Function | Pearson r | Max Abs Error |
51
+ |---|---|---|
52
+ | `C_normalisation` | 1.000000 | 4.10e-05 |
53
+ | `Otsu_thresholding` | — | 3.54e-05 |
54
+ | `colvars_sparse` | 1.000000 | 4.40e-05 |
55
+ | `Get_variogram_map` | Deterministic match | 0 |
56
+ | `Get_isotropic_vario` | 1.000000 | 0 |
57
+
58
+ All outputs are highly consistent with R references, with errors within floating-point precision.
59
+
60
+ ## Speed Benchmark (39,047 cells x 539 genes)
61
+
62
+ | Function | R Time | Python Time | Speedup |
63
+ |---|---|---|---|
64
+ | `C_normalisation` | 1.47s | 0.157s | 9.4x |
65
+ | `Otsu_thresholding` | 0.31s | 0.031s | 10.0x |
66
+ | `colvars_sparse` | 1.72s | 0.011s | 156x |
67
+ | `Get_variogram_map` | 0.02s | 0.0004s | 50x |
68
+ | `Get_isotropic_vario` | 0.01s | 0.0004s | 25x |
69
+ | **Total** | **3.53s** | **0.20s** | **17.7x** |
70
+
71
+ ### Why faster
72
+
73
+ - NumPy/SciPy compiled C backend vs R interpreted execution
74
+ - Direct CSC sparse matrix memory layout access
75
+ - Broadcasting replaces R's row-wise `apply` loops
76
+
77
+ ## Installation
78
+
79
+ ```bash
80
+ pip install -e ".[all]"
81
+ ```
82
+
83
+ ## Quick Start
84
+
85
+ ```python
86
+ import transspacer as ts
87
+ import numpy as np
88
+ import pandas as pd
89
+
90
+ # Load data
91
+ expr = pd.read_csv("Expression_file.csv.gz", index_col=0)
92
+ meta = pd.read_csv("Meta_data.csv", index_col=0)
93
+
94
+ # Cell-size normalisation
95
+ normed = ts.c_normalisation(expr.values.astype(float), meta["Area"].values)
96
+
97
+ # Otsu thresholding
98
+ threshold = ts.otsu_thresholding(np.log10(expr.values.sum(axis=1) + 1))
99
+
100
+ # Variogram analysis
101
+ result = ts.compute_variogram(normed, meta["cell_centroid_x"].values,
102
+ meta["cell_centroid_y"].values)
103
+
104
+ # Geary's C spatial autocorrelation
105
+ gc = ts.geary_c_score(normed, coords, pvalue_threshold=0.01)
106
+
107
+ # Clustering
108
+ labels = ts.cell_clustering_function(pca_data, K=10, resolution=1.0)
109
+ ```
110
+
111
+ ## Modules
112
+
113
+ | Module | Description |
114
+ |---|---|
115
+ | `fft_utils` | `fftshift`, `ifftshift`, `pad_definitor` |
116
+ | `normalization` | `C_normalisation` cell-size normalisation |
117
+ | `sparse_utils` | Sparse matrix column variance, group aggregation |
118
+ | `variogram` | FFT variogram map, variogram model fitting |
119
+ | `spatial_stats` | Geary's C, NB excess variance / excess zero score |
120
+ | `clustering` | KNN + Leiden/Louvain clustering, UMAP |
121
+ | `gene_selection` | `log2FC`, gene set union |
122
+ | `qc` | Otsu thresholding, QC gene filtering |
123
+ | `plotting` | Spatial visualization, heatmaps, UMAP plots |
124
+
125
+ ## Testing
126
+
127
+ ```bash
128
+ pytest tests/ -q
129
+ # 29 passed
130
+ ```
131
+
132
+ ## Dependencies
133
+
134
+ **Core:** `numpy`, `scipy`, `scikit-learn`, `pandas`, `matplotlib`
135
+
136
+ **Optional:** `umap-learn` (UMAP), `python-igraph` + `leidenalg` (Leiden clustering), `statsmodels` (FDR correction)
137
+
138
+ ## License
139
+
140
+ MIT
@@ -0,0 +1,15 @@
1
+ transspacer/__init__.py,sha256=OozYDtA1-4NAZ-lNNqNwKutVxAgu6dAK2Fh4D5uRwD8,1049
2
+ transspacer/clustering.py,sha256=BR6evtfPvE6NZ5bdORyYlHFZz5OgHWJU-cNXQVrZHrA,6361
3
+ transspacer/fft_utils.py,sha256=UBK_cd_tDkkKIQe38LOTuxqvE-4iheRKkXtd--Jj85k,2562
4
+ transspacer/gene_selection.py,sha256=BVdXvcwnTsgSe4iaoxQPtoVdlmvbZ-rd-zLkuV3L24w,1933
5
+ transspacer/normalization.py,sha256=bytE0lT_E3JuUW-jB-1iwLbZ02QmRhN5n_sV-BPHrdw,651
6
+ transspacer/plotting.py,sha256=Zi9QbQdhX9zNxAJCMyJyB8NupBfw4TXxP7gMdlbc0CA,6906
7
+ transspacer/qc.py,sha256=NcwPAb8AakIikegGwZwtobH6Ograw1MK55o3hCbPsBs,2066
8
+ transspacer/sparse_utils.py,sha256=Ny_bV7tKDbWGbjVIHZA8wegPuqW10Q4AM5Kk5GRyDq8,2041
9
+ transspacer/spatial_stats.py,sha256=mHYO6Vw2IeFswd6rYZTKsgVKqU4ZaZhSKOrSio9oLYg,10840
10
+ transspacer/utils.py,sha256=PzRbtu-NCgrlExnx9wuEZPPcaQEPqd4q8j_ALpRsl6c,5267
11
+ transspacer/variogram.py,sha256=-F0rvtPB8TAsA8gJ1IRiL1ElBhlx3r-9heWTH3115ZA,10964
12
+ py_transpacer-0.1.0.dist-info/METADATA,sha256=NMx3nNk4jGwsGYjnVXemREuQJnkpGwGTho2Gdrphw6Y,4813
13
+ py_transpacer-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ py_transpacer-0.1.0.dist-info/top_level.txt,sha256=b0JQl_A-pWokxEVEyIx8va_gw0drZXrce3b7BUJlsBQ,12
15
+ py_transpacer-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ transspacer
@@ -0,0 +1,19 @@
1
+ """
2
+ TranspaceR: Statistical analysis of Spatial transcriptomic data (Python port)
3
+ """
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ from .fft_utils import fftshift, ifftshift, pad_definitor
8
+ from .normalization import c_normalisation
9
+ from .qc import otsu_thresholding, qc_gene_threshold
10
+ from .sparse_utils import colvars_sparse, aggregate_sparse
11
+ from .gene_selection import calculate_log2fc, select_genes
12
+ from .variogram import get_variogram_map, get_isotropic_vario, process_gene, compute_variogram
13
+ from .spatial_stats import geary_c_score, excess_variance_ratio_nb, excess_zero_score_nb
14
+ from .clustering import cell_clustering_function, clusters_maker, umap_maker
15
+ from .utils import (color_convertion, string_to_colors, compute_radius,
16
+ curate_data, fraction_multiple_samples, load_scimilarity_results)
17
+ from .plotting import (plot_fov, plot_fov_gene, plot_variogram, save_umap,
18
+ save_heatmap_markers, save_annotation_plot, save_boxplot,
19
+ save_dendogram, save_geary_variance_plot, save_tissue_visualization)
@@ -0,0 +1,194 @@
1
+ """Cell clustering, UMAP, and Clusters_maker pipeline."""
2
+
3
+ import numpy as np
4
+ from scipy.sparse import issparse, csc_matrix
5
+ from sklearn.neighbors import NearestNeighbors
6
+ from scipy.sparse.csgraph import connected_components
7
+
8
+ def _check_leiden():
9
+ try:
10
+ import igraph as ig
11
+ import leidenalg
12
+ return ig, leidenalg, True
13
+ except ImportError:
14
+ return None, None, False
15
+
16
+ def _check_umap():
17
+ try:
18
+ import umap as umap_lib
19
+ return umap_lib, True
20
+ except (ImportError, TypeError, Exception):
21
+ return None, False
22
+
23
+ from .sparse_utils import colvars_sparse, aggregate_sparse
24
+ from .gene_selection import calculate_log2fc
25
+
26
+
27
+ def cell_clustering_function(data_correction: np.ndarray, K: int = 30,
28
+ metric: str = "euclidean", n_threads: int = 1,
29
+ resolution: float = 1.0) -> np.ndarray:
30
+ """KNN graph + Louvain/Leiden clustering.
31
+
32
+ Parameters
33
+ ----------
34
+ data_correction : np.ndarray
35
+ Reduced data matrix (cells x components).
36
+ K : int
37
+ Number of neighbors.
38
+ metric : str
39
+ Distance metric.
40
+ n_threads : int
41
+ Number of threads.
42
+ resolution : float
43
+ Clustering resolution.
44
+
45
+ Returns
46
+ -------
47
+ np.ndarray
48
+ Cluster labels (string array, 1-indexed to match R).
49
+ """
50
+ # Build KNN graph
51
+ nn = NearestNeighbors(n_neighbors=K + 1, metric=metric, n_jobs=n_threads)
52
+ nn.fit(data_correction)
53
+ distances, indices = nn.kneighbors(data_correction)
54
+
55
+ # Build adjacency matrix (exclude self-loop at index 0)
56
+ n = data_correction.shape[0]
57
+ adj = np.zeros((n, n))
58
+ for i in range(n):
59
+ for j_idx in range(1, K + 1):
60
+ j = indices[i, j_idx]
61
+ adj[i, j] = 1.0
62
+ adj[j, i] = 1.0
63
+
64
+ # Symmetrize: KNN_matrix = t(KNN) + KNN (already symmetric from above)
65
+
66
+ ig, la, has_leiden = _check_leiden()
67
+ if has_leiden:
68
+ # Use igraph + leiden
69
+ sources, targets = np.where(adj > 0)
70
+ mask = sources < targets
71
+ edges = list(zip(sources[mask], targets[mask]))
72
+ g = ig.Graph(n=n, edges=edges, directed=False)
73
+ g.es["weight"] = [adj[s, t] for s, t in edges]
74
+
75
+ partition = la.find_partition(g, la.RBConfigurationVertexPartition,
76
+ resolution_parameter=resolution,
77
+ n_iterations=-1)
78
+ labels = np.array([str(m + 1) for m in partition.membership]) # 1-indexed
79
+ else:
80
+ # Fallback: simple connected components + modularity-based split
81
+ from sklearn.cluster import SpectralClustering
82
+ n_clusters = max(2, int(n / 100)) # rough estimate
83
+ sc = SpectralClustering(n_clusters=n_clusters, affinity="precomputed",
84
+ assign_labels="discretize", random_state=42)
85
+ labels = sc.fit_predict(adj) + 1 # 1-indexed
86
+ labels = labels.astype(str)
87
+
88
+ return labels
89
+
90
+
91
+ def umap_maker(pca_data: np.ndarray, n_components: int = 2,
92
+ random_state: int = 42) -> np.ndarray:
93
+ """Run UMAP on PCA embedding.
94
+
95
+ Parameters
96
+ ----------
97
+ pca_data : np.ndarray
98
+ PCA-reduced data (cells x components).
99
+ n_components : int
100
+ UMAP dimensions.
101
+ random_state : int
102
+ Random seed.
103
+
104
+ Returns
105
+ -------
106
+ np.ndarray
107
+ UMAP coordinates (cells x 2).
108
+ """
109
+ umap_lib, has_umap = _check_umap()
110
+ if has_umap:
111
+ reducer = umap_lib.UMAP(n_components=n_components, random_state=random_state)
112
+ return reducer.fit_transform(pca_data)
113
+ else:
114
+ from sklearn.manifold import TSNE
115
+ return TSNE(n_components=n_components, random_state=random_state).fit_transform(pca_data)
116
+
117
+
118
+ def clusters_maker(expression, shared_genes=None, K: int = 30,
119
+ metric_used: str = "euclidean", n_threads: int = 1,
120
+ resolution: float = 1.0, nv: int = 50) -> dict:
121
+ """End-to-end: PCA -> clustering -> mean expression -> log2FC.
122
+
123
+ Parameters
124
+ ----------
125
+ expression : np.ndarray or sparse matrix
126
+ Expression matrix (cells x genes).
127
+ shared_genes : list, optional
128
+ Gene indices/names to use. If None, uses all genes.
129
+ K : int
130
+ Number of neighbors for clustering.
131
+ metric_used : str
132
+ Distance metric.
133
+ n_threads : int
134
+ Number of threads.
135
+ resolution : float
136
+ Clustering resolution.
137
+ nv : int
138
+ Number of PCA components.
139
+
140
+ Returns
141
+ -------
142
+ dict
143
+ PCA_data, Data_correction, Clustering, Mean_expression, Log2FC_table.
144
+ """
145
+ from sklearn.decomposition import TruncatedSVD
146
+
147
+ if shared_genes is not None:
148
+ if issparse(expression):
149
+ data_correction = expression[:, shared_genes]
150
+ else:
151
+ data_correction = expression[:, shared_genes]
152
+ else:
153
+ data_correction = expression
154
+
155
+ # PCA via SVD
156
+ if issparse(data_correction):
157
+ svd = TruncatedSVD(n_components=min(nv, data_correction.shape[1] - 1),
158
+ random_state=42)
159
+ pca_u = svd.fit_transform(data_correction)
160
+ else:
161
+ from sklearn.decomposition import PCA
162
+ pca = PCA(n_components=min(nv, data_correction.shape[1]), random_state=42)
163
+ pca_u = pca.fit_transform(data_correction)
164
+
165
+ # Clustering
166
+ clustering = cell_clustering_function(pca_u, K, metric_used, n_threads, resolution)
167
+
168
+ # Mean expression per cluster
169
+ if issparse(data_correction):
170
+ mean_expression = aggregate_sparse(data_correction, clustering).T # genes x groups -> groups x genes
171
+ else:
172
+ groups = np.unique(clustering)
173
+ mean_expression = np.zeros((len(groups), data_correction.shape[1]))
174
+ for i, g in enumerate(groups):
175
+ idx = np.where(clustering == g)[0]
176
+ mean_expression[i, :] = data_correction[idx, :].mean(axis=0)
177
+
178
+ # Log2FC
179
+ n_genes = expression.shape[1]
180
+ genes = list(range(n_genes))
181
+ log2fc_list = {}
182
+ for g_idx in genes:
183
+ log2fc_list[g_idx] = calculate_log2fc(
184
+ expression if not issparse(expression) else expression.toarray(),
185
+ g_idx, clustering
186
+ )
187
+
188
+ return {
189
+ "PCA_data": pca_u,
190
+ "Data_correction": data_correction,
191
+ "Clustering": clustering,
192
+ "Mean_expression": mean_expression,
193
+ "Log2FC_table": log2fc_list
194
+ }
@@ -0,0 +1,97 @@
1
+ """FFT utility functions: fftshift, ifftshift, pad_definitor."""
2
+
3
+ import numpy as np
4
+
5
+
6
+ def fftshift(input_matrix: np.ndarray, dim: int = -1) -> np.ndarray:
7
+ """Shift zero-frequency component to center of spectrum.
8
+
9
+ Parameters
10
+ ----------
11
+ input_matrix : np.ndarray
12
+ 2D array to shift.
13
+ dim : int
14
+ -1 for both dimensions, 1 for rows, 2 for columns.
15
+
16
+ Returns
17
+ -------
18
+ np.ndarray
19
+ Shifted array.
20
+ """
21
+ rows, cols = input_matrix.shape
22
+
23
+ def swap_up_down(m):
24
+ rows_half = int(np.ceil(rows / 2))
25
+ return np.vstack([m[rows_half:, :], m[:rows_half, :]])
26
+
27
+ def swap_left_right(m):
28
+ cols_half = int(np.ceil(cols / 2))
29
+ return np.hstack([m[:, cols_half:], m[:, :cols_half]])
30
+
31
+ if dim == -1:
32
+ return swap_left_right(swap_up_down(input_matrix))
33
+ elif dim == 1:
34
+ return swap_up_down(input_matrix)
35
+ elif dim == 2:
36
+ return swap_left_right(input_matrix)
37
+ else:
38
+ raise ValueError("Invalid dimension parameter")
39
+
40
+
41
+ def ifftshift(input_matrix: np.ndarray, dim: int = -1) -> np.ndarray:
42
+ """Inverse FFT shift.
43
+
44
+ Parameters
45
+ ----------
46
+ input_matrix : np.ndarray
47
+ 2D array to shift.
48
+ dim : int
49
+ -1 for both dimensions, 1 for rows, 2 for columns.
50
+
51
+ Returns
52
+ -------
53
+ np.ndarray
54
+ Shifted array.
55
+ """
56
+ rows, cols = input_matrix.shape
57
+
58
+ def swap_up_down(m):
59
+ rows_half = int(np.floor(rows / 2))
60
+ return np.vstack([m[rows_half:, :], m[:rows_half, :]])
61
+
62
+ def swap_left_right(m):
63
+ cols_half = int(np.floor(cols / 2))
64
+ return np.hstack([m[:, cols_half:], m[:, :cols_half]])
65
+
66
+ if dim == -1:
67
+ return swap_up_down(swap_left_right(input_matrix))
68
+ elif dim == 1:
69
+ return swap_up_down(input_matrix)
70
+ elif dim == 2:
71
+ return swap_left_right(input_matrix)
72
+ else:
73
+ raise ValueError("Invalid dimension parameter")
74
+
75
+
76
+ def pad_definitor(meta_data_x: np.ndarray, meta_data_y: np.ndarray) -> int:
77
+ """Compute automatic padding size from spatial coordinates.
78
+
79
+ Parameters
80
+ ----------
81
+ meta_data_x : np.ndarray
82
+ X coordinates of cell centroids.
83
+ meta_data_y : np.ndarray
84
+ Y coordinates of cell centroids.
85
+
86
+ Returns
87
+ -------
88
+ int
89
+ Padding size (minimum 1).
90
+ """
91
+ xrange = np.max(meta_data_x) - np.min(meta_data_x)
92
+ yrange = np.max(meta_data_y) - np.min(meta_data_y)
93
+ range_val = np.mean([xrange, yrange])
94
+ n_pad = int(np.round(range_val / 200))
95
+ if n_pad == 0:
96
+ n_pad = 1
97
+ return n_pad
@@ -0,0 +1,67 @@
1
+ """Gene selection utilities: log2FC, shared gene selection."""
2
+
3
+ import numpy as np
4
+ from collections import Counter
5
+
6
+
7
+ def calculate_log2fc(expression: np.ndarray, gene_idx: int, clustering: np.ndarray) -> dict:
8
+ """Per-gene log2FC: mean in cluster vs weighted mean in other clusters.
9
+
10
+ Parameters
11
+ ----------
12
+ expression : np.ndarray
13
+ Expression matrix (cells x genes).
14
+ gene_idx : int
15
+ Column index of the gene.
16
+ clustering : np.ndarray
17
+ Cluster labels per cell.
18
+
19
+ Returns
20
+ -------
21
+ dict
22
+ {cluster_label: log2fc_value} for each cluster.
23
+ """
24
+ x = expression[:, gene_idx]
25
+ groups = np.unique(clustering)
26
+ mean_expression = np.array([x[clustering == g].mean() for g in groups])
27
+ counts = Counter(clustering)
28
+ total = len(clustering)
29
+ proportions = {g: counts[g] / total for g in groups}
30
+
31
+ # Build proportion matrix (off-diagonal, row-normalized)
32
+ result = {}
33
+ for i, g in enumerate(groups):
34
+ weighted_other = 0.0
35
+ weight_sum = 0.0
36
+ for j, g2 in enumerate(groups):
37
+ if i != j:
38
+ weighted_other += proportions[g2] * mean_expression[j]
39
+ weight_sum += proportions[g2]
40
+ if weight_sum > 0:
41
+ weighted_other /= weight_sum
42
+ if weighted_other > 0:
43
+ result[g] = np.log2(mean_expression[i] / weighted_other)
44
+ else:
45
+ result[g] = np.nan
46
+ return result
47
+
48
+
49
+ def select_genes(selected_objects: list, selected_names: list = None) -> list:
50
+ """Union of gene lists.
51
+
52
+ Parameters
53
+ ----------
54
+ selected_objects : list of list
55
+ Each element is a list/set of gene names.
56
+ selected_names : list of str, optional
57
+ Names for each gene set.
58
+
59
+ Returns
60
+ -------
61
+ list
62
+ Unique shared genes across all sets.
63
+ """
64
+ shared = set()
65
+ for obj in selected_objects:
66
+ shared.update(obj)
67
+ return sorted(shared)
@@ -0,0 +1,24 @@
1
+ """Cell-size normalization."""
2
+
3
+ import numpy as np
4
+
5
+
6
+ def c_normalisation(y: np.ndarray, scaling_factor: np.ndarray) -> np.ndarray:
7
+ """Normalize expression by cell size: y / (area + 1/tau).
8
+
9
+ Parameters
10
+ ----------
11
+ y : np.ndarray
12
+ Expression matrix (cells x genes).
13
+ scaling_factor : np.ndarray
14
+ Cell areas (length n_cells).
15
+
16
+ Returns
17
+ -------
18
+ np.ndarray
19
+ Normalized expression matrix.
20
+ """
21
+ cell_size = scaling_factor.astype(float)
22
+ tau_parameter = np.mean(y / cell_size[:, np.newaxis], axis=0)
23
+ scaling = cell_size[:, np.newaxis] + 1.0 / tau_parameter[np.newaxis, :]
24
+ return y / scaling