mcDETECT 1.0.11__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcDETECT might be problematic. Click here for more details.
- mcDETECT/__init__.py +3 -2
- mcDETECT/model.py +227 -62
- mcDETECT/utils.py +145 -0
- {mcdetect-1.0.11.dist-info → mcdetect-2.0.0.dist-info}/METADATA +4 -3
- mcdetect-2.0.0.dist-info/RECORD +8 -0
- {mcdetect-1.0.11.dist-info → mcdetect-2.0.0.dist-info}/WHEEL +1 -1
- mcdetect-1.0.11.dist-info/RECORD +0 -7
- {mcdetect-1.0.11.dist-info → mcdetect-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {mcdetect-1.0.11.dist-info → mcdetect-2.0.0.dist-info}/top_level.txt +0 -0
mcDETECT/__init__.py
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
__version__ = "
|
|
2
|
-
from .
|
|
1
|
+
__version__ = "2.0.0"
|
|
2
|
+
from .utils import *
|
|
3
|
+
from .model import mcDETECT, spot_neuron, spot_granule, neighbor_granule, neuron_embedding_one_hot, neuron_embedding_spatial_weight
|
mcDETECT/model.py
CHANGED
|
@@ -9,34 +9,9 @@ from scipy.spatial import cKDTree
|
|
|
9
9
|
from scipy.stats import poisson
|
|
10
10
|
from shapely.geometry import Point
|
|
11
11
|
from sklearn.cluster import DBSCAN
|
|
12
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
def closest(lst, K):
|
|
15
|
-
return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def make_tree(d1 = None, d2 = None, d3 = None):
|
|
19
|
-
active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
|
|
20
|
-
if len(active_dimensions) == 1:
|
|
21
|
-
points = np.c_[active_dimensions[0].ravel()]
|
|
22
|
-
elif len(active_dimensions) == 2:
|
|
23
|
-
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
|
|
24
|
-
elif len(active_dimensions) == 3:
|
|
25
|
-
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
|
|
26
|
-
return cKDTree(points)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def make_rtree(spheres):
|
|
30
|
-
p = index.Property()
|
|
31
|
-
idx = index.Index(properties = p)
|
|
32
|
-
for i, sphere in enumerate(spheres.itertuples()):
|
|
33
|
-
center = Point(sphere.sphere_x, sphere.sphere_y)
|
|
34
|
-
bounds = (center.x - sphere.sphere_r,
|
|
35
|
-
center.y - sphere.sphere_r,
|
|
36
|
-
center.x + sphere.sphere_r,
|
|
37
|
-
center.y + sphere.sphere_r)
|
|
38
|
-
idx.insert(i, bounds)
|
|
39
|
-
return idx
|
|
14
|
+
from utils import *
|
|
40
15
|
|
|
41
16
|
|
|
42
17
|
class mcDETECT:
|
|
@@ -100,7 +75,7 @@ class mcDETECT:
|
|
|
100
75
|
def dbscan(self, target_names = None, write_csv = False, write_path = "./"):
|
|
101
76
|
|
|
102
77
|
if self.type != "Xenium":
|
|
103
|
-
z_grid = list(
|
|
78
|
+
z_grid = list(self.transcripts["global_z"].unique())
|
|
104
79
|
z_grid.sort()
|
|
105
80
|
|
|
106
81
|
if target_names is None:
|
|
@@ -148,7 +123,7 @@ class mcDETECT:
|
|
|
148
123
|
other_trans = others.iloc[other_idx]
|
|
149
124
|
other_in_nucleus = np.sum(other_trans["overlaps_nucleus"])
|
|
150
125
|
other_size = other_trans.shape[0]
|
|
151
|
-
other_comp = len(
|
|
126
|
+
other_comp = len(other_trans["target"].unique())
|
|
152
127
|
total_size = temp_size + other_size
|
|
153
128
|
total_comp = 1 + other_comp
|
|
154
129
|
local_score = (temp_in_nucleus + other_in_nucleus) / total_size
|
|
@@ -167,9 +142,7 @@ class mcDETECT:
|
|
|
167
142
|
sphere = pd.DataFrame(list(zip(sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score)),
|
|
168
143
|
columns = ["sphere_x", "sphere_y", "sphere_z", "layer_z", "sphere_r", "size", "comp", "in_nucleus"])
|
|
169
144
|
sphere["gene"] = [j] * sphere.shape[0]
|
|
170
|
-
sphere
|
|
171
|
-
sphere["size"] = pd.to_numeric(sphere["size"])
|
|
172
|
-
sphere["comp"] = pd.to_numeric(sphere["comp"])
|
|
145
|
+
sphere = sphere.astype({"sphere_x": float, "sphere_y": float, "sphere_z": float, "layer_z": int, "sphere_r": float, "size": float, "comp": float, "in_nucleus": int, "gene": str})
|
|
173
146
|
|
|
174
147
|
# split low- and high-in-nucleus spheres
|
|
175
148
|
sphere_low = sphere[(sphere["sphere_r"] < self.size_thr) & (sphere["in_nucleus"] < self.in_nucleus_thr[0])]
|
|
@@ -326,7 +299,7 @@ class mcDETECT:
|
|
|
326
299
|
def profile(self, synapse, genes = None, print_itr = False):
|
|
327
300
|
|
|
328
301
|
if genes is None:
|
|
329
|
-
genes = list(
|
|
302
|
+
genes = list(self.transcripts["target"].unique())
|
|
330
303
|
transcripts = self.transcripts
|
|
331
304
|
else:
|
|
332
305
|
transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
|
|
@@ -347,6 +320,7 @@ class mcDETECT:
|
|
|
347
320
|
# construct spatial transcriptome profile
|
|
348
321
|
adata = anndata.AnnData(X = np.transpose(X), obs = synapse)
|
|
349
322
|
adata.obs["synapse_id"] = ["syn_{}".format(i) for i in range(synapse.shape[0])]
|
|
323
|
+
adata.obs["synapse_id"] = adata.obs["synapse_id"].astype(str)
|
|
350
324
|
adata.obs.rename(columns = {"sphere_x": "global_x", "sphere_y": "global_y", "sphere_z": "global_z"}, inplace = True)
|
|
351
325
|
adata.var["genes"] = genes
|
|
352
326
|
adata.var_names = genes
|
|
@@ -358,7 +332,7 @@ class mcDETECT:
|
|
|
358
332
|
def spot_expression(self, grid_len, genes = None):
|
|
359
333
|
|
|
360
334
|
if genes is None:
|
|
361
|
-
genes = list(
|
|
335
|
+
genes = list(self.transcripts["target"].unique())
|
|
362
336
|
transcripts = self.transcripts
|
|
363
337
|
else:
|
|
364
338
|
transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
|
|
@@ -402,35 +376,226 @@ class mcDETECT:
|
|
|
402
376
|
adata.var_keys = genes
|
|
403
377
|
return adata
|
|
404
378
|
|
|
379
|
+
|
|
380
|
+
# [MAIN] anndata, spot-level neuron metadata
|
|
381
|
+
def spot_neuron(adata_neuron, spot, grid_len = 50, neuron_loc_key = ["global_x", "global_y"], spot_loc_key = ["global_x", "global_y"]):
|
|
382
|
+
|
|
383
|
+
adata_neuron = adata_neuron.copy()
|
|
384
|
+
neurons = adata_neuron.obs
|
|
385
|
+
spot = spot.copy()
|
|
386
|
+
|
|
387
|
+
half_len = grid_len / 2
|
|
405
388
|
|
|
406
|
-
|
|
407
|
-
|
|
389
|
+
indicator, neuron_count = [], []
|
|
390
|
+
|
|
391
|
+
for _, row in spot.obs.iterrows():
|
|
408
392
|
|
|
409
|
-
|
|
410
|
-
|
|
393
|
+
x = row[spot_loc_key[0]]
|
|
394
|
+
y = row[spot_loc_key[1]]
|
|
395
|
+
neuron_temp = neurons[(neurons[neuron_loc_key[0]] > x - half_len) & (neurons[neuron_loc_key[0]] < x + half_len) & (neurons[neuron_loc_key[1]] > y - half_len) & (neurons[neuron_loc_key[1]] < y + half_len)]
|
|
396
|
+
indicator.append(int(len(neuron_temp) > 0))
|
|
397
|
+
neuron_count.append(len(neuron_temp))
|
|
398
|
+
|
|
399
|
+
spot.obs["indicator"] = indicator
|
|
400
|
+
spot.obs["neuron_count"] = neuron_count
|
|
401
|
+
return spot
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# [MAIN] anndata, spot-level granule metadata
|
|
405
|
+
def spot_granule(granule, spot, grid_len = 50, gnl_loc_key = ["sphere_x", "sphere_y"], spot_loc_key = ["global_x", "global_y"]):
|
|
406
|
+
|
|
407
|
+
granule = granule.copy()
|
|
408
|
+
spot = spot.copy()
|
|
409
|
+
|
|
410
|
+
half_len = grid_len / 2
|
|
411
|
+
|
|
412
|
+
indicator, granule_count, granule_radius, granule_size, granule_score = [], [], [], [], []
|
|
413
|
+
|
|
414
|
+
for _, row in spot.obs.iterrows():
|
|
411
415
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
416
|
+
x = row[spot_loc_key[0]]
|
|
417
|
+
y = row[spot_loc_key[1]]
|
|
418
|
+
gnl_temp = granule[(granule[gnl_loc_key[0]] >= x - half_len) & (granule[gnl_loc_key[0]] < x + half_len) & (granule[gnl_loc_key[1]] >= y - half_len) & (granule[gnl_loc_key[1]] < y + half_len)]
|
|
419
|
+
indicator.append(int(len(gnl_temp) > 0))
|
|
420
|
+
granule_count.append(len(gnl_temp))
|
|
421
|
+
|
|
422
|
+
if len(gnl_temp) == 0:
|
|
423
|
+
granule_radius.append(0)
|
|
424
|
+
granule_size.append(0)
|
|
425
|
+
granule_score.append(0)
|
|
426
|
+
else:
|
|
427
|
+
granule_radius.append(np.nanmean(gnl_temp["sphere_r"]))
|
|
428
|
+
granule_size.append(np.nanmean(gnl_temp["size"]))
|
|
429
|
+
granule_score.append(np.nanmean(gnl_temp["in_nucleus"]))
|
|
430
|
+
|
|
431
|
+
spot.obs["indicator"] = indicator
|
|
432
|
+
spot.obs["gnl_count"] = granule_count
|
|
433
|
+
spot.obs["gnl_radius"] = granule_radius
|
|
434
|
+
spot.obs["gnl_size"] = granule_size
|
|
435
|
+
spot.obs["gnl_score"] = granule_score
|
|
436
|
+
return spot
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
# [Main] anndata, neuron-granule colocalization
|
|
440
|
+
def neighbor_granule(adata_neuron, granule_adata, radius = 10, sigma = None, loc_key = ["global_x", "global_y"]):
|
|
441
|
+
|
|
442
|
+
adata_neuron = adata_neuron.copy()
|
|
443
|
+
granule_adata = granule_adata.copy()
|
|
444
|
+
|
|
445
|
+
if sigma is None:
|
|
446
|
+
sigma = radius / 2
|
|
447
|
+
|
|
448
|
+
# neuron and granule coordinates
|
|
449
|
+
neuron_coords = adata_neuron.obs[loc_key].values
|
|
450
|
+
gnl_coords = granule_adata.obs[loc_key].values
|
|
451
|
+
|
|
452
|
+
# make tree
|
|
453
|
+
tree = make_tree(d1 = gnl_coords[:, 0], d2 = gnl_coords[:, 1])
|
|
454
|
+
|
|
455
|
+
# query neighboring granules for each neuron
|
|
456
|
+
neighbor_indices = tree.query_ball_point(neuron_coords, r = radius)
|
|
457
|
+
|
|
458
|
+
# record count and indices
|
|
459
|
+
granule_counts = np.array([len(indices) for indices in neighbor_indices])
|
|
460
|
+
adata_neuron.obs["neighbor_gnl_count"] = granule_counts
|
|
461
|
+
adata_neuron.uns["neighbor_gnl_indices"] = neighbor_indices
|
|
462
|
+
|
|
463
|
+
# ---------- neighboring granule expression matrix ---------- #
|
|
464
|
+
n_neurons, n_genes = adata_neuron.n_obs, adata_neuron.n_vars
|
|
465
|
+
weighted_expr = np.zeros((n_neurons, n_genes))
|
|
466
|
+
|
|
467
|
+
for i, indices in enumerate(neighbor_indices):
|
|
468
|
+
if len(indices) == 0:
|
|
469
|
+
continue
|
|
470
|
+
distances = np.linalg.norm(gnl_coords[indices] - neuron_coords[i], axis = 1)
|
|
471
|
+
weights = np.exp(- (distances ** 2) / (2 * sigma ** 2))
|
|
472
|
+
weights = weights / weights.sum()
|
|
473
|
+
weighted_expr[i] = np.average(granule_adata.X[indices], axis = 0, weights = weights)
|
|
474
|
+
|
|
475
|
+
adata_neuron.obsm["weighted_gnl_expression"] = weighted_expr
|
|
476
|
+
|
|
477
|
+
# ---------- neighboring granule spatial feature ---------- #
|
|
478
|
+
features = []
|
|
479
|
+
|
|
480
|
+
for i, gnl_idx in enumerate(neighbor_indices):
|
|
430
481
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
482
|
+
feats = {}
|
|
483
|
+
feats["n_granules"] = len(gnl_idx)
|
|
484
|
+
|
|
485
|
+
if len(gnl_idx) == 0:
|
|
486
|
+
feats.update({"mean_distance": np.nan, "std_distance": np.nan, "radius_max": np.nan, "radius_min": np.nan, "density": 0, "center_offset_norm": np.nan, "anisotropy_ratio": np.nan})
|
|
487
|
+
else:
|
|
488
|
+
gnl_pos = gnl_coords[gnl_idx]
|
|
489
|
+
neuron_pos = neuron_coords[i]
|
|
490
|
+
dists = np.linalg.norm(gnl_pos - neuron_pos, axis = 1)
|
|
491
|
+
feats["mean_distance"] = dists.mean()
|
|
492
|
+
feats["std_distance"] = dists.std()
|
|
493
|
+
feats["radius_max"] = dists.max()
|
|
494
|
+
feats["radius_min"] = dists.min()
|
|
495
|
+
feats["density"] = len(gnl_idx) / (np.pi * radius ** 2)
|
|
496
|
+
centroid = gnl_pos.mean(axis = 0)
|
|
497
|
+
offset = centroid - neuron_pos
|
|
498
|
+
feats["center_offset_norm"] = np.linalg.norm(offset)
|
|
499
|
+
cov = np.cov((gnl_pos - neuron_pos).T)
|
|
500
|
+
eigvals = np.linalg.eigvalsh(cov)
|
|
501
|
+
if np.min(eigvals) > 0:
|
|
502
|
+
feats["anisotropy_ratio"] = np.max(eigvals) / np.min(eigvals)
|
|
503
|
+
else:
|
|
504
|
+
feats["anisotropy_ratio"] = np.nan
|
|
505
|
+
|
|
506
|
+
features.append(feats)
|
|
507
|
+
|
|
508
|
+
spatial_df = pd.DataFrame(features, index = adata_neuron.obs_names)
|
|
509
|
+
return adata_neuron, spatial_df
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# [MAIN] numpy array, neuron embeddings based on neighboring granules
|
|
513
|
+
def neuron_embedding_one_hot(adata_neuron, granule_adata, k = 10, radius = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
|
|
514
|
+
|
|
515
|
+
adata_neuron = adata_neuron.copy()
|
|
516
|
+
granule_adata = granule_adata.copy()
|
|
517
|
+
|
|
518
|
+
# neuron and granule coordinates, granule subtypes
|
|
519
|
+
neuron_coords = adata_neuron.obs[loc_key].to_numpy()
|
|
520
|
+
granule_coords = granule_adata.obs[loc_key].to_numpy()
|
|
521
|
+
granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
|
|
522
|
+
|
|
523
|
+
# include padding category
|
|
524
|
+
unique_subtypes = np.unique(granule_subtypes).tolist()
|
|
525
|
+
if padding_value not in unique_subtypes:
|
|
526
|
+
unique_subtypes.append(padding_value)
|
|
527
|
+
|
|
528
|
+
encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
|
|
529
|
+
encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
|
|
530
|
+
S = len(unique_subtypes)
|
|
531
|
+
|
|
532
|
+
# k-d tree
|
|
533
|
+
tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
|
|
534
|
+
distances, indices = tree.query(neuron_coords, k = k, distance_upper_bound = radius)
|
|
535
|
+
|
|
536
|
+
# initialize output
|
|
537
|
+
n_neurons = neuron_coords.shape[0]
|
|
538
|
+
embeddings = np.zeros((n_neurons, k, S), dtype = float)
|
|
539
|
+
|
|
540
|
+
for i in range(n_neurons):
|
|
541
|
+
for k in range(k):
|
|
542
|
+
idx = indices[i, k]
|
|
543
|
+
dist = distances[i, k]
|
|
544
|
+
if idx == granule_coords.shape[0] or np.isinf(dist):
|
|
545
|
+
subtype = padding_value
|
|
546
|
+
else:
|
|
547
|
+
subtype = granule_subtypes[idx]
|
|
548
|
+
onehot = encoder.transform([[subtype]])[0]
|
|
549
|
+
embeddings[i, k, :] = onehot
|
|
550
|
+
|
|
551
|
+
return embeddings, encoder.categories_[0]
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# [MAIN] numpy array, neuron embeddings based on neighboring granules
|
|
555
|
+
def neuron_embedding_spatial_weight(adata_neuron, granule_adata, radius = 10, sigma = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
|
|
556
|
+
|
|
557
|
+
adata_neuron = adata_neuron.copy()
|
|
558
|
+
granule_adata = granule_adata.copy()
|
|
559
|
+
|
|
560
|
+
# neuron and granule coordinates, granule subtypes
|
|
561
|
+
neuron_coords = adata_neuron.obs[loc_key].to_numpy()
|
|
562
|
+
granule_coords = granule_adata.obs[loc_key].to_numpy()
|
|
563
|
+
granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
|
|
564
|
+
|
|
565
|
+
# include padding category
|
|
566
|
+
unique_subtypes = np.unique(granule_subtypes).tolist()
|
|
567
|
+
if padding_value not in unique_subtypes:
|
|
568
|
+
unique_subtypes.append(padding_value)
|
|
569
|
+
|
|
570
|
+
encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
|
|
571
|
+
encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
|
|
572
|
+
S = len(unique_subtypes)
|
|
573
|
+
|
|
574
|
+
# k-d tree
|
|
575
|
+
tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
|
|
576
|
+
all_neighbors = tree.query_ball_point(neuron_coords, r = radius)
|
|
577
|
+
|
|
578
|
+
# initialize output
|
|
579
|
+
n_neurons = neuron_coords.shape[0]
|
|
580
|
+
embeddings = np.zeros((n_neurons, S), dtype = float)
|
|
581
|
+
|
|
582
|
+
for i, neighbor_indices in enumerate(all_neighbors):
|
|
583
|
+
if not neighbor_indices:
|
|
584
|
+
# no neighbors, assign to padding subtype
|
|
585
|
+
embeddings[i] = encoder.transform([[padding_value]])[0]
|
|
586
|
+
continue
|
|
587
|
+
|
|
588
|
+
# get neighbor subtypes and distances
|
|
589
|
+
neighbor_coords = granule_coords[neighbor_indices]
|
|
590
|
+
dists = np.linalg.norm(neuron_coords[i] - neighbor_coords, axis = 1)
|
|
591
|
+
weights = np.exp(- dists / sigma)
|
|
592
|
+
|
|
593
|
+
# encode subtypes to one-hot and weight them
|
|
594
|
+
subtypes = granule_subtypes[neighbor_indices]
|
|
595
|
+
onehots = encoder.transform(subtypes.reshape(-1, 1))
|
|
596
|
+
weighted_sum = (weights[:, np.newaxis] * onehots).sum(axis = 0)
|
|
597
|
+
|
|
598
|
+
# normalize to make it a composition vector
|
|
599
|
+
embeddings[i] = weighted_sum / weights.sum()
|
|
600
|
+
|
|
601
|
+
return embeddings, encoder.categories_[0]
|
mcDETECT/utils.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import seaborn as sns
|
|
5
|
+
from matplotlib import colors as mcolors
|
|
6
|
+
from rtree import index
|
|
7
|
+
from scipy.spatial import cKDTree
|
|
8
|
+
from scipy.stats import rankdata
|
|
9
|
+
from shapely.geometry import Point
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_threshold_index(cumsum_list, threshold = 0.99):
|
|
13
|
+
total = cumsum_list[-1]
|
|
14
|
+
for i, value in enumerate(cumsum_list):
|
|
15
|
+
if value >= threshold * total:
|
|
16
|
+
return i
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def closest(lst, K):
|
|
21
|
+
return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_tree(d1 = None, d2 = None, d3 = None):
|
|
25
|
+
active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
|
|
26
|
+
if len(active_dimensions) == 1:
|
|
27
|
+
points = np.c_[active_dimensions[0].ravel()]
|
|
28
|
+
elif len(active_dimensions) == 2:
|
|
29
|
+
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
|
|
30
|
+
elif len(active_dimensions) == 3:
|
|
31
|
+
points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
|
|
32
|
+
return cKDTree(points)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def make_rtree(spheres):
|
|
36
|
+
p = index.Property()
|
|
37
|
+
idx = index.Index(properties = p)
|
|
38
|
+
for i, sphere in enumerate(spheres.itertuples()):
|
|
39
|
+
center = Point(sphere.sphere_x, sphere.sphere_y)
|
|
40
|
+
bounds = (center.x - sphere.sphere_r,
|
|
41
|
+
center.y - sphere.sphere_r,
|
|
42
|
+
center.x + sphere.sphere_r,
|
|
43
|
+
center.y + sphere.sphere_r)
|
|
44
|
+
idx.insert(i, bounds)
|
|
45
|
+
return idx
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def scale(array, max = 1):
|
|
49
|
+
new_array = (array - np.min(array)) / (np.max(array) - np.min(array)) * max
|
|
50
|
+
return new_array
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def weighted_corr(estimated, actual, weights):
|
|
54
|
+
|
|
55
|
+
estimated = np.array(estimated)
|
|
56
|
+
actual = np.array(actual)
|
|
57
|
+
weights = np.array(weights)
|
|
58
|
+
|
|
59
|
+
# weighted mean
|
|
60
|
+
mean_estimated = np.average(estimated, weights = weights)
|
|
61
|
+
mean_actual = np.average(actual, weights = weights)
|
|
62
|
+
|
|
63
|
+
# weighted covariance
|
|
64
|
+
cov_w = np.sum(weights * (estimated - mean_estimated) * (actual - mean_actual)) / np.sum(weights)
|
|
65
|
+
|
|
66
|
+
# weighted variances
|
|
67
|
+
var_estimated = np.sum(weights * (estimated - mean_estimated) ** 2) / np.sum(weights)
|
|
68
|
+
var_actual = np.sum(weights * (actual - mean_actual) ** 2) / np.sum(weights)
|
|
69
|
+
|
|
70
|
+
# weighted correlation coefficient
|
|
71
|
+
weighted_corr = cov_w / np.sqrt(var_estimated * var_actual)
|
|
72
|
+
|
|
73
|
+
return weighted_corr
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def weighted_spearmanr(A, B, weights):
|
|
77
|
+
|
|
78
|
+
A = np.array(A)
|
|
79
|
+
B = np.array(B)
|
|
80
|
+
weights = np.array(weights)
|
|
81
|
+
|
|
82
|
+
# rank the data
|
|
83
|
+
R_A = rankdata(A)
|
|
84
|
+
R_B = rankdata(B)
|
|
85
|
+
|
|
86
|
+
# weighted mean
|
|
87
|
+
mean_R_A_w = np.average(R_A, weights=weights)
|
|
88
|
+
mean_R_B_w = np.average(R_B, weights=weights)
|
|
89
|
+
|
|
90
|
+
# weighted covariance
|
|
91
|
+
cov_w = np.sum(weights * (R_A - mean_R_A_w) * (R_B - mean_R_B_w)) / np.sum(weights)
|
|
92
|
+
|
|
93
|
+
# weighted variances
|
|
94
|
+
var_R_A_w = np.sum(weights * (R_A - mean_R_A_w)**2) / np.sum(weights)
|
|
95
|
+
var_R_B_w = np.sum(weights * (R_B - mean_R_B_w)**2) / np.sum(weights)
|
|
96
|
+
|
|
97
|
+
# weighted Spearman correlation coefficient
|
|
98
|
+
weighted_spearman_corr = cov_w / np.sqrt(var_R_A_w * var_R_B_w)
|
|
99
|
+
|
|
100
|
+
return weighted_spearman_corr
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def assign_palette_to_adata(adata, obs_key = "granule_expr_cluster_hierarchical", cmap_name = "tab10"):
|
|
104
|
+
|
|
105
|
+
adata = adata.copy()
|
|
106
|
+
|
|
107
|
+
# ensure the column is categorical
|
|
108
|
+
if not pd.api.types.is_categorical_dtype(adata.obs[obs_key]):
|
|
109
|
+
adata.obs[obs_key] = adata.obs[obs_key].astype("category")
|
|
110
|
+
|
|
111
|
+
# extract categories and number of levels
|
|
112
|
+
categories = adata.obs[obs_key].cat.categories
|
|
113
|
+
n_categories = len(categories)
|
|
114
|
+
|
|
115
|
+
# choose or extend the colormap
|
|
116
|
+
base_colors = plt.get_cmap(cmap_name).colors
|
|
117
|
+
if n_categories > len(base_colors):
|
|
118
|
+
color_palette = sns.color_palette(cmap_name, n_categories)
|
|
119
|
+
else:
|
|
120
|
+
color_palette = base_colors[:n_categories]
|
|
121
|
+
|
|
122
|
+
# convert to hex and assign
|
|
123
|
+
adata.uns[f"{obs_key}_colors"] = [mcolors.to_hex(c) for c in color_palette]
|
|
124
|
+
|
|
125
|
+
return adata
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def p_val_to_star(p):
|
|
129
|
+
if p > 0.05:
|
|
130
|
+
return "ns"
|
|
131
|
+
elif p > 0.01:
|
|
132
|
+
return "*"
|
|
133
|
+
elif p > 0.001:
|
|
134
|
+
return "**"
|
|
135
|
+
else:
|
|
136
|
+
return "***"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def top_columns_above_threshold(row, threshold=0.5):
|
|
140
|
+
sorted_row = row.sort_values(ascending=False)
|
|
141
|
+
cumsum = sorted_row.cumsum()
|
|
142
|
+
# Find how many top columns are needed to exceed the threshold
|
|
143
|
+
n = (cumsum > threshold).idxmax()
|
|
144
|
+
# Slice up to and including the index that crosses the threshold
|
|
145
|
+
return sorted_row.loc[:n].index.tolist()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mcDETECT
|
|
3
|
-
Version:
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Uncovering the dark transcriptome in polarized neuronal compartments with mcDETECT
|
|
5
5
|
Home-page: https://github.com/chen-yang-yuan/mcDETECT
|
|
6
6
|
Author: Chenyang Yuan
|
|
7
7
|
Author-email: chenyang.yuan@emory.edu
|
|
@@ -26,6 +26,7 @@ Dynamic: classifier
|
|
|
26
26
|
Dynamic: description
|
|
27
27
|
Dynamic: description-content-type
|
|
28
28
|
Dynamic: home-page
|
|
29
|
+
Dynamic: license-file
|
|
29
30
|
Dynamic: requires-dist
|
|
30
31
|
Dynamic: requires-python
|
|
31
32
|
Dynamic: summary
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mcDETECT/__init__.py,sha256=WaU6MqSrj7mF0VntYIJy2N0DNoZcnO_c7XGdt5JrLRc,174
|
|
2
|
+
mcDETECT/model.py,sha256=5fMqJoirE1U2Cb7fd7bkPP5C8ofM1A5Tp5tKU7Rl5_A,28045
|
|
3
|
+
mcDETECT/utils.py,sha256=kKw7KnrS-0llqtT32S_PDkag1jk4CWYkSm-FZeIJFAw,4510
|
|
4
|
+
mcdetect-2.0.0.dist-info/licenses/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
5
|
+
mcdetect-2.0.0.dist-info/METADATA,sha256=hUztmpphcWDcu2eoE294q6-QCKd_Oh5EmRAAyZk1ynk,2818
|
|
6
|
+
mcdetect-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
mcdetect-2.0.0.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
8
|
+
mcdetect-2.0.0.dist-info/RECORD,,
|
mcdetect-1.0.11.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
mcDETECT/__init__.py,sha256=tf3P7OfH-LTbgFaCTkG4itCia54VCCsR6yQ0v6mVWEE,59
|
|
2
|
-
mcDETECT/model.py,sha256=_xTupdmK0LJrgaFF34Chj3JEl45iKs7T8j-ianfyOcE,21375
|
|
3
|
-
mcdetect-1.0.11.dist-info/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
|
|
4
|
-
mcdetect-1.0.11.dist-info/METADATA,sha256=eWBTNSQE9kJACbjBo5zrOM58qotLLkRN1iEC4hBVKe8,2820
|
|
5
|
-
mcdetect-1.0.11.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
6
|
-
mcdetect-1.0.11.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
|
|
7
|
-
mcdetect-1.0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|