mcDETECT 2.0.2__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcDETECT might be problematic. Click here for more details.
- mcDETECT/__init__.py +1 -9
- mcDETECT/model.py +83 -61
- {mcdetect-2.0.2.dist-info → mcdetect-2.0.4.dist-info}/METADATA +1 -1
- mcdetect-2.0.4.dist-info/RECORD +8 -0
- mcdetect-2.0.2.dist-info/RECORD +0 -8
- {mcdetect-2.0.2.dist-info → mcdetect-2.0.4.dist-info}/WHEEL +0 -0
- {mcdetect-2.0.2.dist-info → mcdetect-2.0.4.dist-info}/licenses/LICENSE +0 -0
- {mcdetect-2.0.2.dist-info → mcdetect-2.0.4.dist-info}/top_level.txt +0 -0
mcDETECT/__init__.py
CHANGED
|
@@ -1,12 +1,4 @@
|
|
|
1
|
-
__version__ = "2.0.
|
|
2
|
-
|
|
3
|
-
# from .utils import find_threshold_index, closest, make_tree, make_rtree, scale, weighted_corr, weighted_spearmanr, assign_palette_to_adata, p_val_to_star, top_columns_above_threshold
|
|
4
|
-
# from .model import mcDETECT
|
|
5
|
-
# from .model import spot_neuron, spot_granule, neighbor_granule, neuron_embedding_one_hot, neuron_embedding_spatial_weight
|
|
6
|
-
|
|
7
|
-
# __all__ = ["mcDETECT",
|
|
8
|
-
# "spot_neuron", "spot_granule", "neighbor_granule", "neuron_embedding_one_hot", "neuron_embedding_spatial_weight",
|
|
9
|
-
# "find_threshold_index", "closest", "make_tree", "make_rtree", "scale", "weighted_corr", "weighted_spearmanr", "assign_palette_to_adata", "p_val_to_star", "top_columns_above_threshold"]
|
|
1
|
+
__version__ = "2.0.4"
|
|
10
2
|
|
|
11
3
|
from . import model
|
|
12
4
|
from . import utils
|
mcDETECT/model.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import anndata
|
|
2
|
-
import math
|
|
3
2
|
import miniball
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
6
5
|
import scanpy as sc
|
|
6
|
+
from collections import Counter
|
|
7
7
|
from rtree import index
|
|
8
|
+
from scipy.sparse import csr_matrix
|
|
8
9
|
from scipy.spatial import cKDTree
|
|
9
10
|
from scipy.stats import poisson
|
|
10
11
|
from shapely.geometry import Point
|
|
@@ -18,12 +19,12 @@ from .utils import *
|
|
|
18
19
|
class mcDETECT:
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def __init__(self, type, transcripts,
|
|
22
|
+
def __init__(self, type, transcripts, gnl_genes, nc_genes = None, eps = 1.5, minspl = None, grid_len = 1.0, cutoff_prob = 0.95, alpha = 5.0, low_bound = 3,
|
|
22
23
|
size_thr = 4.0, in_nucleus_thr = (0.5, 0.5), l = 1.0, rho = 0.2, s = 1.0, nc_top = 20, nc_thr = 0.1):
|
|
23
24
|
|
|
24
25
|
self.type = type # string, iST platform, now support MERSCOPE, Xenium, and CosMx
|
|
25
26
|
self.transcripts = transcripts # dataframe, transcripts file
|
|
26
|
-
self.
|
|
27
|
+
self.gnl_genes = gnl_genes # list, string, all granule markers
|
|
27
28
|
self.nc_genes = nc_genes # list, string, all negative controls
|
|
28
29
|
self.eps = eps # numeric, searching radius epsilon
|
|
29
30
|
self.minspl = minspl # integer, manually select min_samples, i.e., no automatic parameter selection
|
|
@@ -57,10 +58,11 @@ class mcDETECT:
|
|
|
57
58
|
|
|
58
59
|
# [INNER] calculate tissue area, input for poisson_select()
|
|
59
60
|
def tissue_area(self):
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
if not hasattr(self, "_cached_area"):
|
|
62
|
+
x_bins, y_bins = self.construct_grid(grid_len = None)
|
|
63
|
+
hist, _, _ = np.histogram2d(self.transcripts["global_x"], self.transcripts["global_y"], bins = [x_bins, y_bins])
|
|
64
|
+
self._cached_area = np.count_nonzero(hist) * (self.grid_len ** 2)
|
|
65
|
+
return self._cached_area
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
# [INNER] calculate optimal min_samples, input for dbscan()
|
|
@@ -72,24 +74,26 @@ class mcDETECT:
|
|
|
72
74
|
return optimal_m
|
|
73
75
|
|
|
74
76
|
|
|
75
|
-
# [INTERMEDIATE] dictionary, low- and high-in-nucleus spheres for each
|
|
76
|
-
def dbscan(self, target_names = None, write_csv = False, write_path = "./"):
|
|
77
|
+
# [INTERMEDIATE] dictionary, low- and high-in-nucleus spheres for each granule marker
|
|
78
|
+
def dbscan(self, target_names = None, record_cell_id = False, write_csv = False, write_path = "./"):
|
|
77
79
|
|
|
78
80
|
if self.type != "Xenium":
|
|
79
81
|
z_grid = list(self.transcripts["global_z"].unique())
|
|
80
82
|
z_grid.sort()
|
|
81
83
|
|
|
82
84
|
if target_names is None:
|
|
83
|
-
target_names = self.
|
|
85
|
+
target_names = self.gnl_genes
|
|
86
|
+
|
|
84
87
|
transcripts = self.transcripts[self.transcripts["target"].isin(target_names)]
|
|
88
|
+
grouped = {g: df for g, df in transcripts.groupby("target")}
|
|
85
89
|
|
|
86
90
|
num_individual, data_low, data_high = [], {}, {}
|
|
87
91
|
|
|
88
92
|
for j in target_names:
|
|
89
93
|
|
|
90
94
|
# split transcripts
|
|
91
|
-
target =
|
|
92
|
-
others =
|
|
95
|
+
target = grouped[j]
|
|
96
|
+
others = pd.concat([grouped[g] for g in target_names if g != j], ignore_index = True)
|
|
93
97
|
tree = make_tree(d1 = np.array(others["global_x"]), d2 = np.array(others["global_y"]), d3 = np.array(others["global_z"]))
|
|
94
98
|
|
|
95
99
|
# 3D DBSCAN
|
|
@@ -103,17 +107,25 @@ class mcDETECT:
|
|
|
103
107
|
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
|
|
104
108
|
|
|
105
109
|
# iterate over all aggregations
|
|
106
|
-
sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score = [], [], [], [], [], [], [], []
|
|
110
|
+
cell_id, sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score = [], [], [], [], [], [], [], [], []
|
|
107
111
|
|
|
108
112
|
for k in range(n_clusters):
|
|
109
113
|
|
|
114
|
+
# record cell ids
|
|
115
|
+
if record_cell_id:
|
|
116
|
+
temp = target[labels == k]
|
|
117
|
+
temp_cell_id_mode = temp["cell_id"].mode()[0]
|
|
118
|
+
cell_id.append(temp_cell_id_mode)
|
|
119
|
+
|
|
110
120
|
# find minimum enclosing spheres
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
121
|
+
mask = (labels == k)
|
|
122
|
+
coords = X[mask]
|
|
123
|
+
if coords.shape[0] == 0:
|
|
124
|
+
continue
|
|
125
|
+
temp_in_nucleus = np.sum(target["overlaps_nucleus"].values[mask])
|
|
126
|
+
temp_size = coords.shape[0]
|
|
127
|
+
coords_unique = np.unique(coords, axis=0)
|
|
128
|
+
center, r2 = miniball.get_bounding_ball(coords_unique, epsilon=1e-8)
|
|
117
129
|
if self.type != "Xenium":
|
|
118
130
|
closest_z = closest(z_grid, center[2])
|
|
119
131
|
else:
|
|
@@ -139,11 +151,13 @@ class mcDETECT:
|
|
|
139
151
|
sphere_comp.append(total_comp)
|
|
140
152
|
sphere_score.append(local_score)
|
|
141
153
|
|
|
142
|
-
# basic features for all spheres from each
|
|
143
|
-
sphere = pd.DataFrame(list(zip(sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score)),
|
|
144
|
-
|
|
145
|
-
sphere
|
|
146
|
-
|
|
154
|
+
# basic features for all spheres from each granule marker
|
|
155
|
+
sphere = pd.DataFrame(list(zip(sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score, [j] * len(sphere_x))),
|
|
156
|
+
columns = ["sphere_x", "sphere_y", "sphere_z", "layer_z", "sphere_r", "size", "comp", "in_nucleus", "gene"])
|
|
157
|
+
sphere = sphere.astype({"sphere_x": float, "sphere_y": float, "sphere_z": float, "layer_z": float, "sphere_r": float, "size": float, "comp": float, "in_nucleus": float, "gene": str})
|
|
158
|
+
if record_cell_id:
|
|
159
|
+
sphere["cell_id"] = cell_id
|
|
160
|
+
sphere = sphere.astype({"cell_id": str})
|
|
147
161
|
|
|
148
162
|
# split low- and high-in-nucleus spheres
|
|
149
163
|
sphere_low = sphere[(sphere["sphere_r"] < self.size_thr) & (sphere["in_nucleus"] < self.in_nucleus_thr[0])]
|
|
@@ -156,14 +170,14 @@ class mcDETECT:
|
|
|
156
170
|
num_individual.append(sphere_low.shape[0])
|
|
157
171
|
data_low[target_names.index(j)] = sphere_low
|
|
158
172
|
data_high[target_names.index(j)] = sphere_high
|
|
159
|
-
print("{
|
|
173
|
+
print(f"{target_names.index(j) + 1} / {len(target_names)} genes processed!")
|
|
160
174
|
|
|
161
175
|
return np.sum(num_individual), data_low, data_high
|
|
162
176
|
|
|
163
177
|
|
|
164
178
|
# [INNER] merge points from two overlapped spheres, input for remove_overlaps()
|
|
165
179
|
def find_points(self, sphere_a, sphere_b):
|
|
166
|
-
transcripts = self.transcripts[self.transcripts["target"].isin(self.
|
|
180
|
+
transcripts = self.transcripts[self.transcripts["target"].isin(self.gnl_genes)]
|
|
167
181
|
tree_temp = make_tree(d1 = np.array(transcripts["global_x"]), d2 = np.array(transcripts["global_y"]), d3 = np.array(transcripts["global_z"]))
|
|
168
182
|
idx_a = tree_temp.query_ball_point([sphere_a["sphere_x"], sphere_a["sphere_y"], sphere_a["sphere_z"]], sphere_a["sphere_r"])
|
|
169
183
|
points_a = transcripts.iloc[idx_a]
|
|
@@ -184,7 +198,7 @@ class mcDETECT:
|
|
|
184
198
|
# find possible overlaps on 2D by r-tree
|
|
185
199
|
idx_b = make_rtree(set_b)
|
|
186
200
|
for i, sphere_a in set_a.iterrows():
|
|
187
|
-
center_a_3D = (sphere_a.sphere_x, sphere_a.sphere_y, sphere_a.sphere_z)
|
|
201
|
+
center_a_3D = np.array([sphere_a.sphere_x, sphere_a.sphere_y, sphere_a.sphere_z])
|
|
188
202
|
bounds_a = (sphere_a.sphere_x - sphere_a.sphere_r,
|
|
189
203
|
sphere_a.sphere_y - sphere_a.sphere_r,
|
|
190
204
|
sphere_a.sphere_x + sphere_a.sphere_r,
|
|
@@ -195,8 +209,8 @@ class mcDETECT:
|
|
|
195
209
|
for j in possible_overlaps:
|
|
196
210
|
if j in set_b.index:
|
|
197
211
|
sphere_b = set_b.loc[j]
|
|
198
|
-
center_b_3D = (sphere_b.sphere_x, sphere_b.sphere_y, sphere_b.sphere_z)
|
|
199
|
-
dist =
|
|
212
|
+
center_b_3D = np.array([sphere_b.sphere_x, sphere_b.sphere_y, sphere_b.sphere_z])
|
|
213
|
+
dist = np.linalg.norm(center_a_3D - center_b_3D)
|
|
200
214
|
radius_sum = sphere_a.sphere_r + sphere_b.sphere_r
|
|
201
215
|
radius_diff = sphere_a.sphere_r - sphere_b.sphere_r
|
|
202
216
|
|
|
@@ -227,10 +241,10 @@ class mcDETECT:
|
|
|
227
241
|
return set_a, set_b
|
|
228
242
|
|
|
229
243
|
|
|
230
|
-
# [INNER] merge spheres from different
|
|
244
|
+
# [INNER] merge spheres from different granule markers, input for detect()
|
|
231
245
|
def merge_sphere(self, sphere_dict):
|
|
232
246
|
sphere = sphere_dict[0].copy()
|
|
233
|
-
for j in range(1, len(self.
|
|
247
|
+
for j in range(1, len(self.gnl_genes)):
|
|
234
248
|
target_sphere = sphere_dict[j]
|
|
235
249
|
sphere, target_sphere_new = self.remove_overlaps(sphere, target_sphere)
|
|
236
250
|
sphere = pd.concat([sphere, target_sphere_new])
|
|
@@ -268,23 +282,19 @@ class mcDETECT:
|
|
|
268
282
|
# negative control filtering
|
|
269
283
|
nc_transcripts_final = self.transcripts[self.transcripts["target"].isin(nc_genes_final)]
|
|
270
284
|
tree = make_tree(d1 = np.array(nc_transcripts_final["global_x"]), d2 = np.array(nc_transcripts_final["global_y"]), d3 = np.array(nc_transcripts_final["global_z"]))
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
elif len(nc_idx) / temp["size"] < self.nc_thr:
|
|
278
|
-
pass_idx[i] = 2
|
|
279
|
-
sphere = sphere_low[np.array(pass_idx) != 0]
|
|
280
|
-
sphere = sphere.reset_index(drop = True)
|
|
285
|
+
centers = sphere_low[["sphere_x", "sphere_y", "sphere_z"]].to_numpy()
|
|
286
|
+
radii = sphere_low["sphere_r"].to_numpy()
|
|
287
|
+
sizes = sphere_low["size"].to_numpy()
|
|
288
|
+
counts = np.array([len(tree.query_ball_point(c, r)) for c, r in zip(centers, radii)])
|
|
289
|
+
pass_idx = (counts == 0) | (counts / sizes < self.nc_thr)
|
|
290
|
+
sphere = sphere_low[pass_idx].reset_index(drop = True)
|
|
281
291
|
return sphere
|
|
282
292
|
|
|
283
293
|
|
|
284
|
-
# [MAIN] dataframe,
|
|
285
|
-
def detect(self):
|
|
294
|
+
# [MAIN] dataframe, granule metadata
|
|
295
|
+
def detect(self, record_cell_id = False):
|
|
286
296
|
|
|
287
|
-
_, data_low, data_high = self.dbscan()
|
|
297
|
+
_, data_low, data_high = self.dbscan(record_cell_id = record_cell_id)
|
|
288
298
|
|
|
289
299
|
print("Merging spheres...")
|
|
290
300
|
sphere_low, sphere_high = self.merge_sphere(data_low), self.merge_sphere(data_high)
|
|
@@ -296,32 +306,44 @@ class mcDETECT:
|
|
|
296
306
|
return self.nc_filter(sphere_low, sphere_high)
|
|
297
307
|
|
|
298
308
|
|
|
299
|
-
# [MAIN] anndata,
|
|
300
|
-
def profile(self,
|
|
309
|
+
# [MAIN] anndata, granule spatial transcriptome profile
|
|
310
|
+
def profile(self, granule, genes = None, print_itr = False):
|
|
301
311
|
|
|
302
312
|
if genes is None:
|
|
303
313
|
genes = list(self.transcripts["target"].unique())
|
|
304
314
|
transcripts = self.transcripts
|
|
305
315
|
else:
|
|
306
316
|
transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
|
|
317
|
+
|
|
318
|
+
gene_to_idx = {g: i for i, g in enumerate(genes)}
|
|
319
|
+
gene_array = transcripts["target"].to_numpy()
|
|
307
320
|
tree = make_tree(d1 = np.array(transcripts["global_x"]), d2 = np.array(transcripts["global_y"]), d3 = np.array(transcripts["global_z"]))
|
|
308
321
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
322
|
+
n_gnl = granule.shape[0]
|
|
323
|
+
n_gene = len(genes)
|
|
324
|
+
data, row_idx, col_idx = [], [], []
|
|
325
|
+
|
|
326
|
+
# iterate over all granules to count nearby transcripts
|
|
327
|
+
for i in range(n_gnl):
|
|
328
|
+
temp = granule.iloc[i]
|
|
313
329
|
target_idx = tree.query_ball_point([temp["sphere_x"], temp["sphere_y"], temp["layer_z"]], temp["sphere_r"])
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
for
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
330
|
+
if not target_idx:
|
|
331
|
+
continue
|
|
332
|
+
local_genes = gene_array[target_idx] # extract genes for those nearby transcripts
|
|
333
|
+
counts = Counter(local_genes) # count how many times each gene occurs
|
|
334
|
+
for g, cnt in counts.items(): # append nonzero entries to sparse matrix lists
|
|
335
|
+
j = gene_to_idx[g] # get gene column index
|
|
336
|
+
data.append(cnt) # nonzero count
|
|
337
|
+
row_idx.append(i) # row index = granule index
|
|
338
|
+
col_idx.append(j) # column index = gene index
|
|
339
|
+
if print_itr and (i % 5000 == 0):
|
|
340
|
+
print(f"{i} out of {n_gnl} granules profiled!")
|
|
320
341
|
|
|
321
|
-
# construct spatial transcriptome profile
|
|
322
|
-
|
|
323
|
-
adata
|
|
324
|
-
adata.obs["
|
|
342
|
+
# construct sparse spatial transcriptome profile, (n_granules × n_genes)
|
|
343
|
+
X = csr_matrix((data, (row_idx, col_idx)), shape = (n_gnl, n_gene), dtype = np.float32)
|
|
344
|
+
adata = anndata.AnnData(X = X, obs = granule.copy())
|
|
345
|
+
adata.obs["granule_id"] = [f"gnl_{i}" for i in range(n_gnl)]
|
|
346
|
+
adata.obs = adata.obs.astype({"granule_id": str})
|
|
325
347
|
adata.obs.rename(columns = {"sphere_x": "global_x", "sphere_y": "global_y", "sphere_z": "global_z"}, inplace = True)
|
|
326
348
|
adata.var["genes"] = genes
|
|
327
349
|
adata.var_names = genes
|
|
@@ -359,7 +381,7 @@ class mcDETECT:
|
|
|
359
381
|
count_gene, _, _ = np.histogram2d(target_gene["global_x"], target_gene["global_y"], bins = [x_bins, y_bins])
|
|
360
382
|
X[k_idx, :] = count_gene.flatten()
|
|
361
383
|
if k_idx % 100 == 0:
|
|
362
|
-
print("{} out of {} genes profiled!"
|
|
384
|
+
print(f"{k_idx} out of {len(genes)} genes profiled!")
|
|
363
385
|
|
|
364
386
|
# spot id
|
|
365
387
|
spot_id = []
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mcDETECT/__init__.py,sha256=2gqfrrw4FNzDVBMDCXpfBwjDU_esM9r6VoW1_ru4rBs,92
|
|
2
|
+
mcDETECT/model.py,sha256=BJkarQR4wd6d0eb05wqhBTRT6ApJv9A8XwD5blv7c8k,29385
|
|
3
|
+
mcDETECT/utils.py,sha256=0gvqZV7sGi8qvvdC5x9XScyiTXlSfqbUt1zks4t7Xd4,4545
|
|
4
|
+
mcdetect-2.0.4.dist-info/licenses/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
5
|
+
mcdetect-2.0.4.dist-info/METADATA,sha256=thmYqmCQQ4AYQ2VULhMZjsJvAlg26ZrxoNvYvUK9_-c,3016
|
|
6
|
+
mcdetect-2.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
mcdetect-2.0.4.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
8
|
+
mcdetect-2.0.4.dist-info/RECORD,,
|
mcdetect-2.0.2.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
mcDETECT/__init__.py,sha256=_kFD4ZyEYvyCNaOBJ1Wj3fOsRFigUdpaNkttzTr0TjY,783
|
|
2
|
-
mcDETECT/model.py,sha256=zEdHqgwTjDi7HxdLW0aPG2j8uLMPiobNu-BcJraAG8g,28047
|
|
3
|
-
mcDETECT/utils.py,sha256=0gvqZV7sGi8qvvdC5x9XScyiTXlSfqbUt1zks4t7Xd4,4545
|
|
4
|
-
mcdetect-2.0.2.dist-info/licenses/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
5
|
-
mcdetect-2.0.2.dist-info/METADATA,sha256=V0nxFJduH1coDW8F-Yv7vExY-3u7U6o3m7hWV1bCj0k,3016
|
|
6
|
-
mcdetect-2.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
-
mcdetect-2.0.2.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
8
|
-
mcdetect-2.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|