mcDETECT 1.0.12__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcDETECT might be problematic. Click here for more details.

mcDETECT/__init__.py CHANGED
@@ -1,2 +1,3 @@
1
- __version__ = "1.0.12"
2
- from .model import closest, mcDETECT
1
+ __version__ = "2.0.1"
2
+ from .utils import *
3
+ from .model import mcDETECT, spot_neuron, spot_granule, neighbor_granule, neuron_embedding_one_hot, neuron_embedding_spatial_weight
mcDETECT/model.py CHANGED
@@ -9,34 +9,9 @@ from scipy.spatial import cKDTree
9
9
  from scipy.stats import poisson
10
10
  from shapely.geometry import Point
11
11
  from sklearn.cluster import DBSCAN
12
+ from sklearn.preprocessing import OneHotEncoder
12
13
 
13
-
14
- def closest(lst, K):
15
- return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
16
-
17
-
18
- def make_tree(d1 = None, d2 = None, d3 = None):
19
- active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
20
- if len(active_dimensions) == 1:
21
- points = np.c_[active_dimensions[0].ravel()]
22
- elif len(active_dimensions) == 2:
23
- points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
24
- elif len(active_dimensions) == 3:
25
- points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
26
- return cKDTree(points)
27
-
28
-
29
- def make_rtree(spheres):
30
- p = index.Property()
31
- idx = index.Index(properties = p)
32
- for i, sphere in enumerate(spheres.itertuples()):
33
- center = Point(sphere.sphere_x, sphere.sphere_y)
34
- bounds = (center.x - sphere.sphere_r,
35
- center.y - sphere.sphere_r,
36
- center.x + sphere.sphere_r,
37
- center.y + sphere.sphere_r)
38
- idx.insert(i, bounds)
39
- return idx
14
+ from .utils import *
40
15
 
41
16
 
42
17
  class mcDETECT:
@@ -100,7 +75,7 @@ class mcDETECT:
100
75
  def dbscan(self, target_names = None, write_csv = False, write_path = "./"):
101
76
 
102
77
  if self.type != "Xenium":
103
- z_grid = list(np.unique(self.transcripts["global_z"]))
78
+ z_grid = list(self.transcripts["global_z"].unique())
104
79
  z_grid.sort()
105
80
 
106
81
  if target_names is None:
@@ -148,7 +123,7 @@ class mcDETECT:
148
123
  other_trans = others.iloc[other_idx]
149
124
  other_in_nucleus = np.sum(other_trans["overlaps_nucleus"])
150
125
  other_size = other_trans.shape[0]
151
- other_comp = len(np.unique(other_trans["target"]))
126
+ other_comp = len(other_trans["target"].unique())
152
127
  total_size = temp_size + other_size
153
128
  total_comp = 1 + other_comp
154
129
  local_score = (temp_in_nucleus + other_in_nucleus) / total_size
@@ -324,7 +299,7 @@ class mcDETECT:
324
299
  def profile(self, synapse, genes = None, print_itr = False):
325
300
 
326
301
  if genes is None:
327
- genes = list(np.unique(self.transcripts["target"]))
302
+ genes = list(self.transcripts["target"].unique())
328
303
  transcripts = self.transcripts
329
304
  else:
330
305
  transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
@@ -357,7 +332,7 @@ class mcDETECT:
357
332
  def spot_expression(self, grid_len, genes = None):
358
333
 
359
334
  if genes is None:
360
- genes = list(np.unique(self.transcripts["target"]))
335
+ genes = list(self.transcripts["target"].unique())
361
336
  transcripts = self.transcripts
362
337
  else:
363
338
  transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
@@ -401,35 +376,226 @@ class mcDETECT:
401
376
  adata.var_keys = genes
402
377
  return adata
403
378
 
379
+
380
+ # [MAIN] anndata, spot-level neuron metadata
381
+ def spot_neuron(adata_neuron, spot, grid_len = 50, neuron_loc_key = ["global_x", "global_y"], spot_loc_key = ["global_x", "global_y"]):
382
+
383
+ adata_neuron = adata_neuron.copy()
384
+ neurons = adata_neuron.obs
385
+ spot = spot.copy()
386
+
387
+ half_len = grid_len / 2
404
388
 
405
- # [MAIN] anndata, spot-level synapse metadata
406
- def spot_synapse(self, synapse, spot):
389
+ indicator, neuron_count = [], []
390
+
391
+ for _, row in spot.obs.iterrows():
407
392
 
408
- x_grid, y_grid = list(np.unique(spot.obs["global_x"])), list(np.unique(spot.obs["global_y"]))
409
- diameter = x_grid[1] - x_grid[0]
393
+ x = row[spot_loc_key[0]]
394
+ y = row[spot_loc_key[1]]
395
+ neuron_temp = neurons[(neurons[neuron_loc_key[0]] > x - half_len) & (neurons[neuron_loc_key[0]] < x + half_len) & (neurons[neuron_loc_key[1]] > y - half_len) & (neurons[neuron_loc_key[1]] < y + half_len)]
396
+ indicator.append(int(len(neuron_temp) > 0))
397
+ neuron_count.append(len(neuron_temp))
398
+
399
+ spot.obs["indicator"] = indicator
400
+ spot.obs["neuron_count"] = neuron_count
401
+ return spot
402
+
403
+
404
+ # [MAIN] anndata, spot-level granule metadata
405
+ def spot_granule(granule, spot, grid_len = 50, gnl_loc_key = ["sphere_x", "sphere_y"], spot_loc_key = ["global_x", "global_y"]):
406
+
407
+ granule = granule.copy()
408
+ spot = spot.copy()
409
+
410
+ half_len = grid_len / 2
411
+
412
+ indicator, granule_count, granule_radius, granule_size, granule_score = [], [], [], [], []
413
+
414
+ for _, row in spot.obs.iterrows():
410
415
 
411
- indicator, synapse_count, synapse_radius, synapse_size, synapse_score = [], [], [], [], []
412
- for i in x_grid:
413
- x_min_temp = i
414
- x_max_temp = i + diameter
415
- for j in y_grid:
416
- y_min_temp = j
417
- y_max_temp = j + diameter
418
- syn_temp = synapse[(synapse["sphere_x"] > x_min_temp) & (synapse["sphere_x"] < x_max_temp) & (synapse["sphere_y"] > y_min_temp) & (synapse["sphere_y"] < y_max_temp)]
419
- indicator.append(int(syn_temp.shape[0] > 0))
420
- synapse_count.append(syn_temp.shape[0])
421
- if syn_temp.shape[0] == 0:
422
- synapse_radius.append(0)
423
- synapse_size.append(0)
424
- synapse_score.append(0)
425
- else:
426
- synapse_radius.append(np.nanmean(syn_temp["sphere_r"]))
427
- synapse_size.append(np.nanmean(syn_temp["size"]))
428
- synapse_score.append(np.nanmean(syn_temp["in_nucleus"]))
416
+ x = row[spot_loc_key[0]]
417
+ y = row[spot_loc_key[1]]
418
+ gnl_temp = granule[(granule[gnl_loc_key[0]] >= x - half_len) & (granule[gnl_loc_key[0]] < x + half_len) & (granule[gnl_loc_key[1]] >= y - half_len) & (granule[gnl_loc_key[1]] < y + half_len)]
419
+ indicator.append(int(len(gnl_temp) > 0))
420
+ granule_count.append(len(gnl_temp))
421
+
422
+ if len(gnl_temp) == 0:
423
+ granule_radius.append(0)
424
+ granule_size.append(0)
425
+ granule_score.append(0)
426
+ else:
427
+ granule_radius.append(np.nanmean(gnl_temp["sphere_r"]))
428
+ granule_size.append(np.nanmean(gnl_temp["size"]))
429
+ granule_score.append(np.nanmean(gnl_temp["in_nucleus"]))
430
+
431
+ spot.obs["indicator"] = indicator
432
+ spot.obs["gnl_count"] = granule_count
433
+ spot.obs["gnl_radius"] = granule_radius
434
+ spot.obs["gnl_size"] = granule_size
435
+ spot.obs["gnl_score"] = granule_score
436
+ return spot
437
+
438
+
439
+ # [Main] anndata, neuron-granule colocalization
440
+ def neighbor_granule(adata_neuron, granule_adata, radius = 10, sigma = None, loc_key = ["global_x", "global_y"]):
441
+
442
+ adata_neuron = adata_neuron.copy()
443
+ granule_adata = granule_adata.copy()
444
+
445
+ if sigma is None:
446
+ sigma = radius / 2
447
+
448
+ # neuron and granule coordinates
449
+ neuron_coords = adata_neuron.obs[loc_key].values
450
+ gnl_coords = granule_adata.obs[loc_key].values
451
+
452
+ # make tree
453
+ tree = make_tree(d1 = gnl_coords[:, 0], d2 = gnl_coords[:, 1])
454
+
455
+ # query neighboring granules for each neuron
456
+ neighbor_indices = tree.query_ball_point(neuron_coords, r = radius)
457
+
458
+ # record count and indices
459
+ granule_counts = np.array([len(indices) for indices in neighbor_indices])
460
+ adata_neuron.obs["neighbor_gnl_count"] = granule_counts
461
+ adata_neuron.uns["neighbor_gnl_indices"] = neighbor_indices
462
+
463
+ # ---------- neighboring granule expression matrix ---------- #
464
+ n_neurons, n_genes = adata_neuron.n_obs, adata_neuron.n_vars
465
+ weighted_expr = np.zeros((n_neurons, n_genes))
466
+
467
+ for i, indices in enumerate(neighbor_indices):
468
+ if len(indices) == 0:
469
+ continue
470
+ distances = np.linalg.norm(gnl_coords[indices] - neuron_coords[i], axis = 1)
471
+ weights = np.exp(- (distances ** 2) / (2 * sigma ** 2))
472
+ weights = weights / weights.sum()
473
+ weighted_expr[i] = np.average(granule_adata.X[indices], axis = 0, weights = weights)
474
+
475
+ adata_neuron.obsm["weighted_gnl_expression"] = weighted_expr
476
+
477
+ # ---------- neighboring granule spatial feature ---------- #
478
+ features = []
479
+
480
+ for i, gnl_idx in enumerate(neighbor_indices):
429
481
 
430
- spot.obs["indicator"] = indicator
431
- spot.obs["syn_count"] = synapse_count
432
- spot.obs["syn_radius"] = synapse_radius
433
- spot.obs["syn_size"] = synapse_size
434
- spot.obs["syn_score"] = synapse_score
435
- return spot
482
+ feats = {}
483
+ feats["n_granules"] = len(gnl_idx)
484
+
485
+ if len(gnl_idx) == 0:
486
+ feats.update({"mean_distance": np.nan, "std_distance": np.nan, "radius_max": np.nan, "radius_min": np.nan, "density": 0, "center_offset_norm": np.nan, "anisotropy_ratio": np.nan})
487
+ else:
488
+ gnl_pos = gnl_coords[gnl_idx]
489
+ neuron_pos = neuron_coords[i]
490
+ dists = np.linalg.norm(gnl_pos - neuron_pos, axis = 1)
491
+ feats["mean_distance"] = dists.mean()
492
+ feats["std_distance"] = dists.std()
493
+ feats["radius_max"] = dists.max()
494
+ feats["radius_min"] = dists.min()
495
+ feats["density"] = len(gnl_idx) / (np.pi * radius ** 2)
496
+ centroid = gnl_pos.mean(axis = 0)
497
+ offset = centroid - neuron_pos
498
+ feats["center_offset_norm"] = np.linalg.norm(offset)
499
+ cov = np.cov((gnl_pos - neuron_pos).T)
500
+ eigvals = np.linalg.eigvalsh(cov)
501
+ if np.min(eigvals) > 0:
502
+ feats["anisotropy_ratio"] = np.max(eigvals) / np.min(eigvals)
503
+ else:
504
+ feats["anisotropy_ratio"] = np.nan
505
+
506
+ features.append(feats)
507
+
508
+ spatial_df = pd.DataFrame(features, index = adata_neuron.obs_names)
509
+ return adata_neuron, spatial_df
510
+
511
+
512
+ # [MAIN] numpy array, neuron embeddings based on neighboring granules
513
+ def neuron_embedding_one_hot(adata_neuron, granule_adata, k = 10, radius = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
514
+
515
+ adata_neuron = adata_neuron.copy()
516
+ granule_adata = granule_adata.copy()
517
+
518
+ # neuron and granule coordinates, granule subtypes
519
+ neuron_coords = adata_neuron.obs[loc_key].to_numpy()
520
+ granule_coords = granule_adata.obs[loc_key].to_numpy()
521
+ granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
522
+
523
+ # include padding category
524
+ unique_subtypes = np.unique(granule_subtypes).tolist()
525
+ if padding_value not in unique_subtypes:
526
+ unique_subtypes.append(padding_value)
527
+
528
+ encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
529
+ encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
530
+ S = len(unique_subtypes)
531
+
532
+ # k-d tree
533
+ tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
534
+ distances, indices = tree.query(neuron_coords, k = k, distance_upper_bound = radius)
535
+
536
+ # initialize output
537
+ n_neurons = neuron_coords.shape[0]
538
+ embeddings = np.zeros((n_neurons, k, S), dtype = float)
539
+
540
+ for i in range(n_neurons):
541
+ for k in range(k):
542
+ idx = indices[i, k]
543
+ dist = distances[i, k]
544
+ if idx == granule_coords.shape[0] or np.isinf(dist):
545
+ subtype = padding_value
546
+ else:
547
+ subtype = granule_subtypes[idx]
548
+ onehot = encoder.transform([[subtype]])[0]
549
+ embeddings[i, k, :] = onehot
550
+
551
+ return embeddings, encoder.categories_[0]
552
+
553
+
554
+ # [MAIN] numpy array, neuron embeddings based on neighboring granules
555
+ def neuron_embedding_spatial_weight(adata_neuron, granule_adata, radius = 10, sigma = 10, loc_key = ["global_x", "global_y"], gnl_subtype_key = "granule_subtype_kmeans", padding_value = "Others"):
556
+
557
+ adata_neuron = adata_neuron.copy()
558
+ granule_adata = granule_adata.copy()
559
+
560
+ # neuron and granule coordinates, granule subtypes
561
+ neuron_coords = adata_neuron.obs[loc_key].to_numpy()
562
+ granule_coords = granule_adata.obs[loc_key].to_numpy()
563
+ granule_subtypes = granule_adata.obs[gnl_subtype_key].astype(str).to_numpy()
564
+
565
+ # include padding category
566
+ unique_subtypes = np.unique(granule_subtypes).tolist()
567
+ if padding_value not in unique_subtypes:
568
+ unique_subtypes.append(padding_value)
569
+
570
+ encoder = OneHotEncoder(categories = [unique_subtypes], sparse = False, handle_unknown = "ignore")
571
+ encoder.fit(np.array(unique_subtypes).reshape(-1, 1))
572
+ S = len(unique_subtypes)
573
+
574
+ # k-d tree
575
+ tree = make_tree(d1 = granule_coords[:, 0], d2 = granule_coords[:, 1])
576
+ all_neighbors = tree.query_ball_point(neuron_coords, r = radius)
577
+
578
+ # initialize output
579
+ n_neurons = neuron_coords.shape[0]
580
+ embeddings = np.zeros((n_neurons, S), dtype = float)
581
+
582
+ for i, neighbor_indices in enumerate(all_neighbors):
583
+ if not neighbor_indices:
584
+ # no neighbors, assign to padding subtype
585
+ embeddings[i] = encoder.transform([[padding_value]])[0]
586
+ continue
587
+
588
+ # get neighbor subtypes and distances
589
+ neighbor_coords = granule_coords[neighbor_indices]
590
+ dists = np.linalg.norm(neuron_coords[i] - neighbor_coords, axis = 1)
591
+ weights = np.exp(- dists / sigma)
592
+
593
+ # encode subtypes to one-hot and weight them
594
+ subtypes = granule_subtypes[neighbor_indices]
595
+ onehots = encoder.transform(subtypes.reshape(-1, 1))
596
+ weighted_sum = (weights[:, np.newaxis] * onehots).sum(axis = 0)
597
+
598
+ # normalize to make it a composition vector
599
+ embeddings[i] = weighted_sum / weights.sum()
600
+
601
+ return embeddings, encoder.categories_[0]
mcDETECT/utils.py ADDED
@@ -0,0 +1,145 @@
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ from matplotlib import colors as mcolors
6
+ from rtree import index
7
+ from scipy.spatial import cKDTree
8
+ from scipy.stats import rankdata
9
+ from shapely.geometry import Point
10
+
11
+
12
+ def find_threshold_index(cumsum_list, threshold = 0.99):
13
+ total = cumsum_list[-1]
14
+ for i, value in enumerate(cumsum_list):
15
+ if value >= threshold * total:
16
+ return i
17
+ return None
18
+
19
+
20
+ def closest(lst, K):
21
+ return lst[min(range(len(lst)), key = lambda i: abs(lst[i] - K))]
22
+
23
+
24
+ def make_tree(d1 = None, d2 = None, d3 = None):
25
+ active_dimensions = [dimension for dimension in [d1, d2, d3] if dimension is not None]
26
+ if len(active_dimensions) == 1:
27
+ points = np.c_[active_dimensions[0].ravel()]
28
+ elif len(active_dimensions) == 2:
29
+ points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel()]
30
+ elif len(active_dimensions) == 3:
31
+ points = np.c_[active_dimensions[0].ravel(), active_dimensions[1].ravel(), active_dimensions[2].ravel()]
32
+ return cKDTree(points)
33
+
34
+
35
+ def make_rtree(spheres):
36
+ p = index.Property()
37
+ idx = index.Index(properties = p)
38
+ for i, sphere in enumerate(spheres.itertuples()):
39
+ center = Point(sphere.sphere_x, sphere.sphere_y)
40
+ bounds = (center.x - sphere.sphere_r,
41
+ center.y - sphere.sphere_r,
42
+ center.x + sphere.sphere_r,
43
+ center.y + sphere.sphere_r)
44
+ idx.insert(i, bounds)
45
+ return idx
46
+
47
+
48
+ def scale(array, max = 1):
49
+ new_array = (array - np.min(array)) / (np.max(array) - np.min(array)) * max
50
+ return new_array
51
+
52
+
53
+ def weighted_corr(estimated, actual, weights):
54
+
55
+ estimated = np.array(estimated)
56
+ actual = np.array(actual)
57
+ weights = np.array(weights)
58
+
59
+ # weighted mean
60
+ mean_estimated = np.average(estimated, weights = weights)
61
+ mean_actual = np.average(actual, weights = weights)
62
+
63
+ # weighted covariance
64
+ cov_w = np.sum(weights * (estimated - mean_estimated) * (actual - mean_actual)) / np.sum(weights)
65
+
66
+ # weighted variances
67
+ var_estimated = np.sum(weights * (estimated - mean_estimated) ** 2) / np.sum(weights)
68
+ var_actual = np.sum(weights * (actual - mean_actual) ** 2) / np.sum(weights)
69
+
70
+ # weighted correlation coefficient
71
+ weighted_corr = cov_w / np.sqrt(var_estimated * var_actual)
72
+
73
+ return weighted_corr
74
+
75
+
76
+ def weighted_spearmanr(A, B, weights):
77
+
78
+ A = np.array(A)
79
+ B = np.array(B)
80
+ weights = np.array(weights)
81
+
82
+ # rank the data
83
+ R_A = rankdata(A)
84
+ R_B = rankdata(B)
85
+
86
+ # weighted mean
87
+ mean_R_A_w = np.average(R_A, weights=weights)
88
+ mean_R_B_w = np.average(R_B, weights=weights)
89
+
90
+ # weighted covariance
91
+ cov_w = np.sum(weights * (R_A - mean_R_A_w) * (R_B - mean_R_B_w)) / np.sum(weights)
92
+
93
+ # weighted variances
94
+ var_R_A_w = np.sum(weights * (R_A - mean_R_A_w)**2) / np.sum(weights)
95
+ var_R_B_w = np.sum(weights * (R_B - mean_R_B_w)**2) / np.sum(weights)
96
+
97
+ # weighted Spearman correlation coefficient
98
+ weighted_spearman_corr = cov_w / np.sqrt(var_R_A_w * var_R_B_w)
99
+
100
+ return weighted_spearman_corr
101
+
102
+
103
+ def assign_palette_to_adata(adata, obs_key = "granule_expr_cluster_hierarchical", cmap_name = "tab10"):
104
+
105
+ adata = adata.copy()
106
+
107
+ # ensure the column is categorical
108
+ if not pd.api.types.is_categorical_dtype(adata.obs[obs_key]):
109
+ adata.obs[obs_key] = adata.obs[obs_key].astype("category")
110
+
111
+ # extract categories and number of levels
112
+ categories = adata.obs[obs_key].cat.categories
113
+ n_categories = len(categories)
114
+
115
+ # choose or extend the colormap
116
+ base_colors = plt.get_cmap(cmap_name).colors
117
+ if n_categories > len(base_colors):
118
+ color_palette = sns.color_palette(cmap_name, n_categories)
119
+ else:
120
+ color_palette = base_colors[:n_categories]
121
+
122
+ # convert to hex and assign
123
+ adata.uns[f"{obs_key}_colors"] = [mcolors.to_hex(c) for c in color_palette]
124
+
125
+ return adata
126
+
127
+
128
+ def p_val_to_star(p):
129
+ if p > 0.05:
130
+ return "ns"
131
+ elif p > 0.01:
132
+ return "*"
133
+ elif p > 0.001:
134
+ return "**"
135
+ else:
136
+ return "***"
137
+
138
+
139
+ def top_columns_above_threshold(row, threshold=0.5):
140
+ sorted_row = row.sort_values(ascending=False)
141
+ cumsum = sorted_row.cumsum()
142
+ # Find how many top columns are needed to exceed the threshold
143
+ n = (cumsum > threshold).idxmax()
144
+ # Slice up to and including the index that crosses the threshold
145
+ return sorted_row.loc[:n].index.tolist()
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcDETECT
3
+ Version: 2.0.1
4
+ Summary: Uncovering the dark transcriptome in polarized neuronal compartments with mcDETECT
5
+ Home-page: https://github.com/chen-yang-yuan/mcDETECT
6
+ Author: Chenyang Yuan
7
+ Author-email: chenyang.yuan@emory.edu
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: anndata
15
+ Requires-Dist: miniball
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pandas
18
+ Requires-Dist: rtree
19
+ Requires-Dist: scanpy
20
+ Requires-Dist: scikit-learn
21
+ Requires-Dist: scipy
22
+ Requires-Dist: shapely
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
33
+
34
+ # mcDETECT
35
+
36
+ ## Uncovering the dark transcriptome in polarized neuronal compartments with mcDETECT
37
+
38
+ #### Chenyang Yuan, Krupa Patel, Hongshun Shi, Hsiao-Lin V. Wang, Feng Wang, Ronghua Li, Yangping Li, Victor G. Corces, Hailing Shi, Sulagna Das, Jindan Yu, Peng Jin, Bing Yao* and Jian Hu*
39
+
40
+ mcDETECT is a computational framework designed to study the dark transcriptome related to polarized compartments in brain using *in situ* spatial transcriptomics (iST) data. It begins by examining the subcellular distribution of mRNAs in an iST sample. Each mRNA molecule is treated as a distinct point with its own 3D spatial coordinates considering the thickness of the sample. Unlike many cell-type marker genes, which are typically found within the nucleus or soma, compartmentalized mRNAs often form small aggregates outside the soma. mcDETECT uses a density-based clustering approach to identify these extrasomatic aggregates. This involves calculating the Euclidean distance between mRNA points and defining the neighborhood of each point within a specified search radius. Points are then categorized as core points, border points, or noise points based on their reachability from neighboring points. mcDETECT recognizes each connected bundle of core and border points as a mRNA aggregate. To minimize false positives, it excludes aggregates that substantially overlap with somata, which are estimated by dilating the nuclear masks derived from DAPI staining. mcDETECT then repeats this process for multiple granule markers, merging aggregates from different markers that exhibit high spatial overlap. After aggregating across all markers, an additional filtering step removes aggregates containing mRNAs from negative control genes, which are known to be enriched exclusively in nuclei and somata. The remaining aggregates are considered individual RNA granules. mcDETECT then computes the minimum enclosing sphere for each aggregate to connect neighboring mRNA molecules from all measured genes and summarizes their counts, thereby defining the spatial transcriptome profile of individual RNA granules.
@@ -0,0 +1,8 @@
1
+ mcDETECT/__init__.py,sha256=o9fQTRgcHPisSCcv0Cy-AtdiTSWrj-ITBK_FQxfnmKE,174
2
+ mcDETECT/model.py,sha256=-r2_Ve0wxOALxiFk0REa58WjMea495yLZ6oXT-WWekw,28046
3
+ mcDETECT/utils.py,sha256=kKw7KnrS-0llqtT32S_PDkag1jk4CWYkSm-FZeIJFAw,4510
4
+ mcdetect-2.0.1.dist-info/licenses/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
5
+ mcdetect-2.0.1.dist-info/METADATA,sha256=qMO7hrWgabHHp1_UxlDvsLmQYaBC7Nf85RPNxyBvA8o,3016
6
+ mcdetect-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ mcdetect-2.0.1.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
8
+ mcdetect-2.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,39 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: mcDETECT
3
- Version: 1.0.12
4
- Summary: mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
5
- Home-page: https://github.com/chen-yang-yuan/mcDETECT
6
- Author: Chenyang Yuan
7
- Author-email: chenyang.yuan@emory.edu
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: anndata
15
- Requires-Dist: miniball
16
- Requires-Dist: numpy
17
- Requires-Dist: pandas
18
- Requires-Dist: rtree
19
- Requires-Dist: scanpy
20
- Requires-Dist: scikit-learn
21
- Requires-Dist: scipy
22
- Requires-Dist: shapely
23
- Dynamic: author
24
- Dynamic: author-email
25
- Dynamic: classifier
26
- Dynamic: description
27
- Dynamic: description-content-type
28
- Dynamic: home-page
29
- Dynamic: requires-dist
30
- Dynamic: requires-python
31
- Dynamic: summary
32
-
33
- # mcDETECT
34
-
35
- ## mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
36
-
37
- #### Chenyang Yuan, Krupa Patel, Hongshun Shi, Hsiao-Lin V. Wang, Feng Wang, Ronghua Li, Yangping Li, Victor G. Corces, Hailing Shi, Sulagna Das, Jindan Yu, Peng Jin, Bing Yao* and Jian Hu*
38
-
39
- mcDETECT is a computational framework designed to identify and profile individual synapses using *in situ* spatial transcriptomics (iST) data. It starts by examining the subcellular distribution of synaptic mRNAs in an iST sample. Unlike cell-type specific marker genes, which are typically found within nuclei, mRNAs of synaptic markers often form small aggregations outside the nuclei. mcDETECT uses a density-based clustering approach to identify these extranuclear aggregations. This involves calculating the Euclidean distance between mRNA points and defining the neighborhood of each point within a specified search radius. Points are then categorized into core points, border points, and noise points based on their reachability from neighboring points. mcDETECT recognizes each bundle of core and border points as a synaptic aggregation. To minimize false positives, it excludes aggregations that significantly overlap with nuclei identified by DAPI staining. Subsequently, mcDETECT repeats this process for multiple synaptic markers, merging aggregations from different markers with high overlaps. After encompassing all markers, an additional filtering step is performed to remove aggregations that contain mRNAs from negative control genes, which are known to be enriched only in nuclei. The remaining aggregations are considered individual synaptic aggregations. mcDETECT then uses the minimum enclosing sphere of each aggregation to gather all mRNA molecules and summarizes their counts for all measured genes to define the spatial transcriptome profile of individual synapses.
@@ -1,7 +0,0 @@
1
- mcDETECT/__init__.py,sha256=8DC3jJ35kT7b51bP9HtbDsCRc8_vT6nUaXCZBaSM5Tg,59
2
- mcDETECT/model.py,sha256=pl6BOByor3Czj1UbxQX7_VzBUyNhz1tG_z7IGz2nR80,21462
3
- mcdetect-1.0.12.dist-info/LICENSE,sha256=uxq-shEWOGTIGVnQLmpElILmfCkuUhFZRAMnZUiKvtg,1070
4
- mcdetect-1.0.12.dist-info/METADATA,sha256=AJjMolAwV98Px9PioTv0U_iJl0ypTKMFRgSvCQbBkAg,2820
5
- mcdetect-1.0.12.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
6
- mcdetect-1.0.12.dist-info/top_level.txt,sha256=WwzBojt5U-T2hZ8llO6XgpM9OFIBkWQQldQKu19O8EY,9
7
- mcdetect-1.0.12.dist-info/RECORD,,