mcDETECT 1.0.12__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcDETECT might be problematic. Click here for more details.
- mcDETECT/__init__.py +3 -2
- mcDETECT/model.py +225 -59
- mcDETECT/utils.py +145 -0
- mcdetect-2.0.1.dist-info/METADATA +40 -0
- mcdetect-2.0.1.dist-info/RECORD +8 -0
- {mcdetect-1.0.12.dist-info → mcdetect-2.0.1.dist-info}/WHEEL +1 -1
- mcdetect-1.0.12.dist-info/METADATA +0 -39
- mcdetect-1.0.12.dist-info/RECORD +0 -7
- {mcdetect-1.0.12.dist-info → mcdetect-2.0.1.dist-info/licenses}/LICENSE +0 -0
- {mcdetect-1.0.12.dist-info → mcdetect-2.0.1.dist-info}/top_level.txt +0 -0
mcDETECT/__init__.py
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
__version__ = "
|
|
2
|
-
from .
|
|
1
|
+
__version__ = "2.0.1"
|
|
2
|
+
from .utils import *
|
|
3
|
+
from .model import mcDETECT, spot_neuron, spot_granule, neighbor_granule, neuron_embedding_one_hot, neuron_embedding_spatial_weight
|
mcDETECT/model.py
CHANGED
|
@@ -9,34 +9,9 @@ from scipy.spatial import cKDTree
|
|
|
9
9
|
from scipy.stats import poisson
|
|
10
10
|
from shapely.geometry import Point
|
|
11
11
|
from sklearn.cluster import DBSCAN
|
|
12
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
def closest(lst, K):
|
|
15
|
-
return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def make_tree(d1 = None, d2 = None, d3 = None):
|
|
19
|
-
active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
|
|
20
|
-
if len(active_dimensions) == 1:
|
|
21
|
-
points = np.c_[active_dimensions[0].ravel()]
|
|
22
|
-
elif len(active_dimensions) == 2:
|
|
23
|
-
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
|
|
24
|
-
elif len(active_dimensions) == 3:
|
|
25
|
-
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
|
|
26
|
-
return cKDTree(points)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def make_rtree(spheres):
|
|
30
|
-
p = index.Property()
|
|
31
|
-
idx = index.Index(properties = p)
|
|
32
|
-
for i, sphere in enumerate(spheres.itertuples()):
|
|
33
|
-
center = Point(sphere.sphere_x, sphere.sphere_y)
|
|
34
|
-
bounds = (center.x - sphere.sphere_r,
|
|
35
|
-
center.y - sphere.sphere_r,
|
|
36
|
-
center.x + sphere.sphere_r,
|
|
37
|
-
center.y + sphere.sphere_r)
|
|
38
|
-
idx.insert(i, bounds)
|
|
39
|
-
return idx
|
|
14
|
+
from .utils import *
|
|
40
15
|
|
|
41
16
|
|
|
42
17
|
class mcDETECT:
|
|
@@ -100,7 +75,7 @@ class mcDETECT:
|
|
|
100
75
|
def dbscan(self, target_names = None, write_csv = False, write_path = "./"):
|
|
101
76
|
|
|
102
77
|
if self.type != "Xenium":
|
|
103
|
-
z_grid = list(
|
|
78
|
+
z_grid = list(self.transcripts["global_z"].unique())
|
|
104
79
|
z_grid.sort()
|
|
105
80
|
|
|
106
81
|
if target_names is None:
|
|
@@ -148,7 +123,7 @@ class mcDETECT:
|
|
|
148
123
|
other_trans = others.iloc[other_idx]
|
|
149
124
|
other_in_nucleus = np.sum(other_trans["overlaps_nucleus"])
|
|
150
125
|
other_size = other_trans.shape[0]
|
|
151
|
-
other_comp = len(
|
|
126
|
+
other_comp = len(other_trans["target"].unique())
|
|
152
127
|
total_size = temp_size + other_size
|
|
153
128
|
total_comp = 1 + other_comp
|
|
154
129
|
local_score = (temp_in_nucleus + other_in_nucleus) / total_size
|
|
@@ -324,7 +299,7 @@ class mcDETECT:
|
|
|
324
299
|
def profile(self, synapse, genes = None, print_itr = False):
|
|
325
300
|
|
|
326
301
|
if genes is None:
|
|
327
|
-
genes = list(
|
|
302
|
+
genes = list(self.transcripts["target"].unique())
|
|
328
303
|
transcripts = self.transcripts
|
|
329
304
|
else:
|
|
330
305
|
transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
|
|
@@ -357,7 +332,7 @@ class mcDETECT:
|
|
|
357
332
|
def spot_expression(self, grid_len, genes = None):
|
|
358
333
|
|
|
359
334
|
if genes is None:
|
|
360
|
-
genes = list(
|
|
335
|
+
genes = list(self.transcripts["target"].unique())
|
|
361
336
|
transcripts = self.transcripts
|
|
362
337
|
else:
|
|
363
338
|
transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
|
|
@@ -401,35 +376,226 @@ class mcDETECT:
|
|
|
401
376
|
adata.var_keys = genes
|
|
402
377
|
return adata
|
|
403
378
|
|
|
379
|
+
|
|
380
|
+
# [MAIN] anndata, spot-level neuron metadata
|
|
381
|
+
def spot_neuron(adata_neuron, spot, grid_len = 50, neuron_loc_key = ["global_x", "global_y"], spot_loc_key = ["global_x", "global_y"]):
|
|
382
|
+
|
|
383
|
+
adata_neuron = adata_neuron.copy()
|
|
384
|
+
neurons = adata_neuron.obs
|
|
385
|
+
spot = spot.copy()
|
|
386
|
+
|
|
387
|
+
half_len = grid_len / 2
|
|
404
388
|
|
|
405
|
-
|
|
406
|
-
|
|
389
|
+
indicator, neuron_count = [], []
|
|
390
|
+
|
|
391
|
+
for _, row in spot.obs.iterrows():
|
|
407
392
|
|
|
408
|
-
|
|
409
|
-
|
|
393
|
+
x = row[spot_loc_key[0]]
|
|
394
|
+
y = row[spot_loc_key[1]]
|
|
395
|
+
neuron_temp = neurons[(neurons[neuron_loc_key[0]] > x - half_len) & (neurons[neuron_loc_key[0]] < x + half_len) & (neurons[neuron_loc_key[1]] > y - half_len) & (neurons[neuron_loc_key[1]] < y + half_len)]
|
|
396
|
+
indicator.append(int(len(neuron_temp) > 0))
|
|
397
|
+
neuron_count.append(len(neuron_temp))
|
|
398
|
+
|
|
399
|
+
spot.obs["indicator"] = indicator
|
|
400
|
+
spot.obs["neuron_count"] = neuron_count
|
|
401
|
+
return spot
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# [MAIN] anndata, spot-level granule metadata
|
|
405
|
+
def spot_granule(granule, spot, grid_len = 50, gnl_loc_key = ["sphere_x", "sphere_y"], spot_loc_key = ["global_x", "global_y"]):
|
|
406
|
+
|
|
407
|
+
granule = granule.copy()
|
|
408
|
+
spot = spot.copy()
|
|
409
|
+
|
|
410
|
+
half_len = grid_len / 2
|
|
411
|
+
|
|
412
|
+
indicator, granule_count, granule_radius, granule_size, granule_score = [], [], [], [], []
|
|
413
|
+
|
|
414
|
+
for _, row in spot.obs.iterrows():
|
|
410
415
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
416
|
+
x = row[spot_loc_key[0]]
|
|
417
|
+
y = row[spot_loc_key[1]]
|
|
418
|
+
gnl_temp = granule[(granule[gnl_loc_key[0]] >= x - half_len) & (granule[gnl_loc_key[0]] < x + half_len) & (granule[gnl_loc_key[1]] >= y - half_len) & (granule[gnl_loc_key[1]] < y + half_len)]
|
|
419
|
+
indicator.append(int(len(gnl_temp) > 0))
|
|
420
|
+
granule_count.append(len(gnl_temp))
|
|
421
|
+
|
|
422
|
+
if len(gnl_temp) == 0:
|
|
423
|
+
granule_radius.append(0)
|
|
424
|
+
granule_size.append(0)
|
|
425
|
+
granule_score.append(0)
|
|
426
|
+
else:
|
|
427
|
+
granule_radius.append(np.nanmean(gnl_temp["sphere_r"]))
|
|
428
|
+
granule_size.append(np.nanmean(gnl_temp["size"]))
|
|
429
|
+
granule_score.append(np.nanmean(gnl_temp["in_nucleus"]))
|
|
430
|
+
|
|
431
|
+
spot.obs["indicator"] = indicator
|
|
432
|
+
spot.obs["gnl_count"] = granule_count
|
|
433
|
+
spot.obs["gnl_radius"] = granule_radius
|
|
434
|
+
spot.obs["gnl_size"] = granule_size
|
|
435
|
+
spot.obs["gnl_score"] = granule_score
|
|
436
|
+
return spot
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
# [Main] anndata, neuron-granule colocalization
|
|
440
|
+
def neighbor_granule(adata_neuron, granule_adata, radius = 10, sigma = None, loc_key = ["global_x", "global_y"]):
|
|
441
|
+
|
|
442
|
+
adata_neuron = adata_neuron.copy()
|
|
443
|
+
granule_adata = granule_adata.copy()
|
|
444
|
+
|
|
445
|
+
if sigma is None:
|
|
446
|
+
sigma = radius / 2
|
|
447
|
+
|
|
448
|
+
# neuron and granule coordinates
|
|
449
|
+
neuron_coords = adata_neuron.obs[loc_key].values
|
|
450
|
+
gnl_coords = granule_adata.obs[loc_key].values
|
|
451
|
+
|
|
452
|
+
# make tree
|
|
453
|
+
tree = make_tree(d1 = gnl_coords[:, 0], d2 = gnl_coords[:, 1])
|
|
454
|
+
|
|
455
|
+
# query neighboring granules for each neuron
|
|
456
|
+
neighbor_indices = tree.query_ball_point(neuron_coords, r = radius)
|
|
457
|
+
|
|
458
|
+
# record count and indices
|
|
459
|
+
granule_counts = np.array([len(indices) for indices in neighbor_indices])
|
|
460
|
+
adata_neuron.obs["neighbor_gnl_count"] = granule_counts
|
|
461
|
+
adata_neuron.uns["neighbor_gnl_indices"] = neighbor_indices
|
|
462
|
+
|
|
463
|
+
# ---------- neighboring granule expression matrix ---------- #
|
|
464
|
+
n_neurons, n_genes = adata_neuron.n_obs, adata_neuron.n_vars
|
|
465
|
+
weighted_expr = np.zeros((n_neurons, n_genes))
|
|
466
|
+
|
|
467
|
+
for i, indices in enumerate(neighbor_indices):
|
|
468
|
+
if len(indices) == 0:
|
|
469
|
+
continue
|
|
470
|
+
distances = np.linalg.norm(gnl_coords[indices] - neuron_coords[i], axis = 1)
|
|
471
|
+
weights = np.exp(- (distances ** 2) / (2 * sigma ** 2))
|
|
472
|
+
weights = weights / weights.sum()
|
|
473
|
+
weighted_expr[i] = np.average(granule_adata.X[indices], axis = 0, weights = weights)
|
|
474
|
+
|
|
475
|
+
adata_neuron.obsm["weighted_gnl_expression"] = weighted_expr
|
|
476
|
+
|
|
477
|
+
# ---------- neighboring granule spatial feature ---------- #
|
|
478
|
+
features = []
|
|
479
|
+
|
|
480
|
+
for i, gnl_idx in enumerate(neighbor_indices):
|
|
429
481
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
482
|
+
feats = {}
|
|
483
|
+
feats["n_granules"] = len(gnl_idx)
|
|
484
|
+
|
|
485
|
+
if len(gnl_idx) == 0:
|
|
486
|
+
feats.update({"mean_distance": np.nan, "std_distance": np.nan, "radius_max": np.nan, "radius_min": np.nan, "density": 0, "center_offset_norm": np.nan, "anisotropy_ratio": np.nan})
|
|
487
|
+
else:
|
|
488
|
+
gnl_pos = gnl_coords[gnl_idx]
|
|
489
|
+
neuron_pos = neuron_coords[i]
|
|
490
|
+
dists = np.linalg.norm(gnl_pos - neuron_pos, axis = 1)
|
|
491
|
+
feats["mean_distance"] = dists.mean()
|
|
492
|
+
feats["std_distance"] = dists.std()
|
|
493
|
+
feats["radius_max"] = dists.max()
|
|
494
|
+
feats["radius_min"] = dists.min()
|
|
495
|
+
feats["density"] = len(gnl_idx) / (np.pi * radius ** 2)
|
|
496
|
+
centroid = gnl_pos.mean(axis = 0)
|
|
497
|
+
offset = centroid - neuron_pos
|
|
498
|
+
feats["center_offset_norm"] = np.linalg.norm(offset)
|
|
499
|
+
cov = np.cov((gnl_pos - neuron_pos).T)
|
|
500
|
+
eigvals = np.linalg.eigvalsh(cov)
|
|
501
|
+
if np.min(eigvals) > 0:
|
|
502
|
+
feats["anisotropy_ratio"] = np.max(eigvals) / np.min(eigvals)
|
|
503
|
+
else:
|
|
504
|
+
feats["anisotropy_ratio"] = np.nan
|
|
505
|
+
|
|
506
|
+
features.append(feats)
|
|
507
|
+
|
|
508
|
+
spatial_df = pd.DataFrame(features, index = adata_neuron.obs_names)
|
|
509
|
+
return adata_neuron, spatial_df
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# [MAIN] numpy array, neuron embeddings based on neighboring granules
|
|
513
|
+
def neuron_embedding_one_hot(adata_neuron, granule_adata, k = 10, radius = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
|
|
514
|
+
|
|
515
|
+
adata_neuron = adata_neuron.copy()
|
|
516
|
+
granule_adata = granule_adata.copy()
|
|
517
|
+
|
|
518
|
+
# neuron and granule coordinates, granule subtypes
|
|
519
|
+
neuron_coords = adata_neuron.obs[loc_key].to_numpy()
|
|
520
|
+
granule_coords = granule_adata.obs[loc_key].to_numpy()
|
|
521
|
+
granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
|
|
522
|
+
|
|
523
|
+
# include padding category
|
|
524
|
+
unique_subtypes = np.unique(granule_subtypes).tolist()
|
|
525
|
+
if padding_value not in unique_subtypes:
|
|
526
|
+
unique_subtypes.append(padding_value)
|
|
527
|
+
|
|
528
|
+
encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
|
|
529
|
+
encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
|
|
530
|
+
S = len(unique_subtypes)
|
|
531
|
+
|
|
532
|
+
# k-d tree
|
|
533
|
+
tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
|
|
534
|
+
distances, indices = tree.query(neuron_coords, k = k, distance_upper_bound = radius)
|
|
535
|
+
|
|
536
|
+
# initialize output
|
|
537
|
+
n_neurons = neuron_coords.shape[0]
|
|
538
|
+
embeddings = np.zeros((n_neurons, k, S), dtype = float)
|
|
539
|
+
|
|
540
|
+
for i in range(n_neurons):
|
|
541
|
+
for k in range(k):
|
|
542
|
+
idx = indices[i, k]
|
|
543
|
+
dist = distances[i, k]
|
|
544
|
+
if idx == granule_coords.shape[0] or np.isinf(dist):
|
|
545
|
+
subtype = padding_value
|
|
546
|
+
else:
|
|
547
|
+
subtype = granule_subtypes[idx]
|
|
548
|
+
onehot = encoder.transform([[subtype]])[0]
|
|
549
|
+
embeddings[i, k, :] = onehot
|
|
550
|
+
|
|
551
|
+
return embeddings, encoder.categories_[0]
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# [MAIN] numpy array, neuron embeddings based on neighboring granules
|
|
555
|
+
def neuron_embedding_spatial_weight(adata_neuron, granule_adata, radius = 10, sigma = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
|
|
556
|
+
|
|
557
|
+
adata_neuron = adata_neuron.copy()
|
|
558
|
+
granule_adata = granule_adata.copy()
|
|
559
|
+
|
|
560
|
+
# neuron and granule coordinates, granule subtypes
|
|
561
|
+
neuron_coords = adata_neuron.obs[loc_key].to_numpy()
|
|
562
|
+
granule_coords = granule_adata.obs[loc_key].to_numpy()
|
|
563
|
+
granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
|
|
564
|
+
|
|
565
|
+
# include padding category
|
|
566
|
+
unique_subtypes = np.unique(granule_subtypes).tolist()
|
|
567
|
+
if padding_value not in unique_subtypes:
|
|
568
|
+
unique_subtypes.append(padding_value)
|
|
569
|
+
|
|
570
|
+
encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
|
|
571
|
+
encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
|
|
572
|
+
S = len(unique_subtypes)
|
|
573
|
+
|
|
574
|
+
# k-d tree
|
|
575
|
+
tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
|
|
576
|
+
all_neighbors = tree.query_ball_point(neuron_coords, r = radius)
|
|
577
|
+
|
|
578
|
+
# initialize output
|
|
579
|
+
n_neurons = neuron_coords.shape[0]
|
|
580
|
+
embeddings = np.zeros((n_neurons, S), dtype = float)
|
|
581
|
+
|
|
582
|
+
for i, neighbor_indices in enumerate(all_neighbors):
|
|
583
|
+
if not neighbor_indices:
|
|
584
|
+
# no neighbors, assign to padding subtype
|
|
585
|
+
embeddings[i] = encoder.transform([[padding_value]])[0]
|
|
586
|
+
continue
|
|
587
|
+
|
|
588
|
+
# get neighbor subtypes and distances
|
|
589
|
+
neighbor_coords = granule_coords[neighbor_indices]
|
|
590
|
+
dists = np.linalg.norm(neuron_coords[i] - neighbor_coords, axis = 1)
|
|
591
|
+
weights = np.exp(- dists / sigma)
|
|
592
|
+
|
|
593
|
+
# encode subtypes to one-hot and weight them
|
|
594
|
+
subtypes = granule_subtypes[neighbor_indices]
|
|
595
|
+
onehots = encoder.transform(subtypes.reshape(-1, 1))
|
|
596
|
+
weighted_sum = (weights[:, np.newaxis] * onehots).sum(axis = 0)
|
|
597
|
+
|
|
598
|
+
# normalize to make it a composition vector
|
|
599
|
+
embeddings[i] = weighted_sum / weights.sum()
|
|
600
|
+
|
|
601
|
+
return embeddings, encoder.categories_[0]
|
mcDETECT/utils.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import seaborn as sns
|
|
5
|
+
from matplotlib import colors as mcolors
|
|
6
|
+
from rtree import index
|
|
7
|
+
from scipy.spatial import cKDTree
|
|
8
|
+
from scipy.stats import rankdata
|
|
9
|
+
from shapely.geometry import Point
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_threshold_index(cumsum_list, threshold = 0.99):
|
|
13
|
+
total = cumsum_list[-1]
|
|
14
|
+
for i, value in enumerate(cumsum_list):
|
|
15
|
+
if value >= threshold * total:
|
|
16
|
+
return i
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def closest(lst, K):
|
|
21
|
+
return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_tree(d1 = None, d2 = None, d3 = None):
|
|
25
|
+
active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
|
|
26
|
+
if len(active_dimensions) == 1:
|
|
27
|
+
points = np.c_[active_dimensions[0].ravel()]
|
|
28
|
+
elif len(active_dimensions) == 2:
|
|
29
|
+
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
|
|
30
|
+
elif len(active_dimensions) == 3:
|
|
31
|
+
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
|
|
32
|
+
return cKDTree(points)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def make_rtree(spheres):
|
|
36
|
+
p = index.Property()
|
|
37
|
+
idx = index.Index(properties = p)
|
|
38
|
+
for i, sphere in enumerate(spheres.itertuples()):
|
|
39
|
+
center = Point(sphere.sphere_x, sphere.sphere_y)
|
|
40
|
+
bounds = (center.x - sphere.sphere_r,
|
|
41
|
+
center.y - sphere.sphere_r,
|
|
42
|
+
center.x + sphere.sphere_r,
|
|
43
|
+
center.y + sphere.sphere_r)
|
|
44
|
+
idx.insert(i, bounds)
|
|
45
|
+
return idx
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def scale(array, max = 1):
|
|
49
|
+
new_array = (array - np.min(array)) / (np.max(array) - np.min(array)) * max
|
|
50
|
+
return new_array
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def weighted_corr(estimated, actual, weights):
|
|
54
|
+
|
|
55
|
+
estimated = np.array(estimated)
|
|
56
|
+
actual = np.array(actual)
|
|
57
|
+
weights = np.array(weights)
|
|
58
|
+
|
|
59
|
+
# weighted mean
|
|
60
|
+
mean_estimated = np.average(estimated, weights = weights)
|
|
61
|
+
mean_actual = np.average(actual, weights = weights)
|
|
62
|
+
|
|
63
|
+
# weighted covariance
|
|
64
|
+
cov_w = np.sum(weights * (estimated - mean_estimated) * (actual - mean_actual)) / np.sum(weights)
|
|
65
|
+
|
|
66
|
+
# weighted variances
|
|
67
|
+
var_estimated = np.sum(weights * (estimated - mean_estimated) ** 2) / np.sum(weights)
|
|
68
|
+
var_actual = np.sum(weights * (actual - mean_actual) ** 2) / np.sum(weights)
|
|
69
|
+
|
|
70
|
+
# weighted correlation coefficient
|
|
71
|
+
weighted_corr = cov_w / np.sqrt(var_estimated * var_actual)
|
|
72
|
+
|
|
73
|
+
return weighted_corr
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def weighted_spearmanr(A, B, weights):
|
|
77
|
+
|
|
78
|
+
A = np.array(A)
|
|
79
|
+
B = np.array(B)
|
|
80
|
+
weights = np.array(weights)
|
|
81
|
+
|
|
82
|
+
# rank the data
|
|
83
|
+
R_A = rankdata(A)
|
|
84
|
+
R_B = rankdata(B)
|
|
85
|
+
|
|
86
|
+
# weighted mean
|
|
87
|
+
mean_R_A_w = np.average(R_A, weights=weights)
|
|
88
|
+
mean_R_B_w = np.average(R_B, weights=weights)
|
|
89
|
+
|
|
90
|
+
# weighted covariance
|
|
91
|
+
cov_w = np.sum(weights * (R_A - mean_R_A_w) * (R_B - mean_R_B_w)) / np.sum(weights)
|
|
92
|
+
|
|
93
|
+
# weighted variances
|
|
94
|
+
var_R_A_w = np.sum(weights * (R_A - mean_R_A_w)**2) / np.sum(weights)
|
|
95
|
+
var_R_B_w = np.sum(weights * (R_B - mean_R_B_w)**2) / np.sum(weights)
|
|
96
|
+
|
|
97
|
+
# weighted Spearman correlation coefficient
|
|
98
|
+
weighted_spearman_corr = cov_w / np.sqrt(var_R_A_w * var_R_B_w)
|
|
99
|
+
|
|
100
|
+
return weighted_spearman_corr
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def assign_palette_to_adata(adata, obs_key = "granule_expr_cluster_hierarchical", cmap_name = "tab10"):
|
|
104
|
+
|
|
105
|
+
adata = adata.copy()
|
|
106
|
+
|
|
107
|
+
# ensure the column is categorical
|
|
108
|
+
if not pd.api.types.is_categorical_dtype(adata.obs[obs_key]):
|
|
109
|
+
adata.obs[obs_key] = adata.obs[obs_key].astype("category")
|
|
110
|
+
|
|
111
|
+
# extract categories and number of levels
|
|
112
|
+
categories = adata.obs[obs_key].cat.categories
|
|
113
|
+
n_categories = len(categories)
|
|
114
|
+
|
|
115
|
+
# choose or extend the colormap
|
|
116
|
+
base_colors = plt.get_cmap(cmap_name).colors
|
|
117
|
+
if n_categories > len(base_colors):
|
|
118
|
+
color_palette = sns.color_palette(cmap_name, n_categories)
|
|
119
|
+
else:
|
|
120
|
+
color_palette = base_colors[:n_categories]
|
|
121
|
+
|
|
122
|
+
# convert to hex and assign
|
|
123
|
+
adata.uns[f"{obs_key}_colors"] = [mcolors.to_hex(c) for c in color_palette]
|
|
124
|
+
|
|
125
|
+
return adata
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def p_val_to_star(p):
|
|
129
|
+
if p > 0.05:
|
|
130
|
+
return "ns"
|
|
131
|
+
elif p > 0.01:
|
|
132
|
+
return "*"
|
|
133
|
+
elif p > 0.001:
|
|
134
|
+
return "**"
|
|
135
|
+
else:
|
|
136
|
+
return "***"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def top_columns_above_threshold(row, threshold=0.5):
|
|
140
|
+
sorted_row = row.sort_values(ascending=False)
|
|
141
|
+
cumsum = sorted_row.cumsum()
|
|
142
|
+
# Find how many top columns are needed to exceed the threshold
|
|
143
|
+
n = (cumsum > threshold).idxmax()
|
|
144
|
+
# Slice up to and including the index that crosses the threshold
|
|
145
|
+
return sorted_row.loc[:n].index.tolist()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcDETECT
|
|
3
|
+
Version: 2.0.1
|
|
4
|
+
Summary: Uncovering the dark transcriptome in polarized neuronal compartments with mcDETECT
|
|
5
|
+
Home-page: https://github.com/chen-yang-yuan/mcDETECT
|
|
6
|
+
Author: Chenyang Yuan
|
|
7
|
+
Author-email: chenyang.yuan@emory.edu
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: anndata
|
|
15
|
+
Requires-Dist: miniball
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: pandas
|
|
18
|
+
Requires-Dist: rtree
|
|
19
|
+
Requires-Dist: scanpy
|
|
20
|
+
Requires-Dist: scikit-learn
|
|
21
|
+
Requires-Dist: scipy
|
|
22
|
+
Requires-Dist: shapely
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
Dynamic: requires-dist
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
Dynamic: summary
|
|
33
|
+
|
|
34
|
+
# mcDETECT
|
|
35
|
+
|
|
36
|
+
## Uncovering the dark transcriptome in polarized neuronal compartments with mcDETECT
|
|
37
|
+
|
|
38
|
+
#### Chenyang Yuan, Krupa Patel, Hongshun Shi, Hsiao-Lin V. Wang, Feng Wang, Ronghua Li, Yangping Li, Victor G. Corces, Hailing Shi, Sulagna Das, Jindan Yu, Peng Jin, Bing Yao* and Jian Hu*
|
|
39
|
+
|
|
40
|
+
mcDETECT is a computational framework designed to study the dark transcriptome related to polarized compartments in brain using *in situ* spatial transcriptomics (iST) data. It begins by examining the subcellular distribution of mRNAs in an iST sample. Each mRNA molecule is treated as a distinct point with its own 3D spatial coordinates considering the thickness of the sample. Unlike many cell-type marker genes, which are typically found within the nucleus or soma, compartmentalized mRNAs often form small aggregates outside the soma. mcDETECT uses a density-based clustering approach to identify these extrasomatic aggregates. This involves calculating the Euclidean distance between mRNA points and defining the neighborhood of each point within a specified search radius. Points are then categorized as core points, border points, or noise points based on their reachability from neighboring points. mcDETECT recognizes each connected bundle of core and border points as a mRNA aggregate. To minimize false positives, it excludes aggregates that substantially overlap with somata, which are estimated by dilating the nuclear masks derived from DAPI staining. mcDETECT then repeats this process for multiple granule markers, merging aggregates from different markers that exhibit high spatial overlap. After aggregating across all markers, an additional filtering step removes aggregates containing mRNAs from negative control genes, which are known to be enriched exclusively in nuclei and somata. The remaining aggregates are considered individual RNA granules. mcDETECT then computes the minimum enclosing sphere for each aggregate to connect neighboring mRNA molecules from all measured genes and summarizes their counts, thereby defining the spatial transcriptome profile of individual RNA granules.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mcDETECT/__init__.py,sha256=o9fQTRgcHPisSCcv0Cy-AtdiTSWrj-ITBK_FQxfnmKE,174
|
|
2
|
+
mcDETECT/model.py,sha256=-r2_Ve0wxOALxiFk0REa58WjMea495yLZ6oXT-WWekw,28046
|
|
3
|
+
mcDETECT/utils.py,sha256=kKw7KnrS-0llqtT32S_PDkag1jk4CWYkSm-FZeIJFAw,4510
|
|
4
|
+
mcdetect-2.0.1.dist-info/licenses/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
5
|
+
mcdetect-2.0.1.dist-info/METADATA,sha256=qMO7hrWgabHHp1_UxlDvsLmQYaBC7Nf85RPNxyBvA8o,3016
|
|
6
|
+
mcdetect-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
mcdetect-2.0.1.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
8
|
+
mcdetect-2.0.1.dist-info/RECORD,,
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: mcDETECT
|
|
3
|
-
Version: 1.0.12
|
|
4
|
-
Summary: mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
|
|
5
|
-
Home-page: https://github.com/chen-yang-yuan/mcDETECT
|
|
6
|
-
Author: Chenyang Yuan
|
|
7
|
-
Author-email: chenyang.yuan@emory.edu
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.6
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: anndata
|
|
15
|
-
Requires-Dist: miniball
|
|
16
|
-
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: pandas
|
|
18
|
-
Requires-Dist: rtree
|
|
19
|
-
Requires-Dist: scanpy
|
|
20
|
-
Requires-Dist: scikit-learn
|
|
21
|
-
Requires-Dist: scipy
|
|
22
|
-
Requires-Dist: shapely
|
|
23
|
-
Dynamic: author
|
|
24
|
-
Dynamic: author-email
|
|
25
|
-
Dynamic: classifier
|
|
26
|
-
Dynamic: description
|
|
27
|
-
Dynamic: description-content-type
|
|
28
|
-
Dynamic: home-page
|
|
29
|
-
Dynamic: requires-dist
|
|
30
|
-
Dynamic: requires-python
|
|
31
|
-
Dynamic: summary
|
|
32
|
-
|
|
33
|
-
# mcDETECT
|
|
34
|
-
|
|
35
|
-
## mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
|
|
36
|
-
|
|
37
|
-
#### Chenyang Yuan, Krupa Patel, Hongshun Shi, Hsiao-Lin V. Wang, Feng Wang, Ronghua Li, Yangping Li, Victor G. Corces, Hailing Shi, Sulagna Das, Jindan Yu, Peng Jin, Bing Yao* and Jian Hu*
|
|
38
|
-
|
|
39
|
-
mcDETECT is a computational framework designed to identify and profile individual synapses using *in situ* spatial transcriptomics (iST) data. It starts by examining the subcellular distribution of synaptic mRNAs in an iST sample. Unlike cell-type specific marker genes, which are typically found within nuclei, mRNAs of synaptic markers often form small aggregations outside the nuclei. mcDETECT uses a density-based clustering approach to identify these extranuclear aggregations. This involves calculating the Euclidean distance between mRNA points and defining the neighborhood of each point within a specified search radius. Points are then categorized into core points, border points, and noise points based on their reachability from neighboring points. mcDETECT recognizes each bundle of core and border points as a synaptic aggregation. To minimize false positives, it excludes aggregations that significantly overlap with nuclei identified by DAPI staining. Subsequently, mcDETECT repeats this process for multiple synaptic markers, merging aggregations from different markers with high overlaps. After encompassing all markers, an additional filtering step is performed to remove aggregations that contain mRNAs from negative control genes, which are known to be enriched only in nuclei. The remaining aggregations are considered individual synaptic aggregations. mcDETECT then uses the minimum enclosing sphere of each aggregation to gather all mRNA molecules and summarizes their counts for all measured genes to define the spatial transcriptome profile of individual synapses.
|
mcdetect-1.0.12.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
mcDETECT/__init__.py,sha256=8DC3jJ35kT7b51bP9HtbDsCRc8_vT6nUaXCZBaSM5Tg,59
|
|
2
|
-
mcDETECT/model.py,sha256=pl6BOByor3Czj1UbxQX7_VzBUyNhz1tG_z7IGz2nR80,21462
|
|
3
|
-
mcdetect-1.0.12.dist-info/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
4
|
-
mcdetect-1.0.12.dist-info/METADATA,sha256=AJjMolAwV98Px9PioTv0U_iJl0ypTKMFRgSvCQbBkAg,2820
|
|
5
|
-
mcdetect-1.0.12.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
6
|
-
mcdetect-1.0.12.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
7
|
-
mcdetect-1.0.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|