mcDETECT 1.0.9__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcDETECT might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mcDETECT
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
5
5
  Home-page: https://github.com/chen-yang-yuan/mcDETECT
6
6
  Author: Chenyang Yuan
@@ -0,0 +1,2 @@
1
+ __version__ = "1.0.11"
2
+ from .model import closest, mcDETECT
@@ -68,8 +68,8 @@ class mcDETECT:
68
68
  def construct_grid(self, grid_len = None):
69
69
  if grid_len is None:
70
70
  grid_len = self.grid_len
71
- x_min, x_max = np.min(self.transcripts['global_x']), np.max(self.transcripts['global_x'])
72
- y_min, y_max = np.min(self.transcripts['global_y']), np.max(self.transcripts['global_y'])
71
+ x_min, x_max = np.min(self.transcripts["global_x"]), np.max(self.transcripts["global_x"])
72
+ y_min, y_max = np.min(self.transcripts["global_y"]), np.max(self.transcripts["global_y"])
73
73
  x_min = np.floor(x_min / grid_len) * grid_len
74
74
  x_max = np.ceil(x_max / grid_len) * grid_len
75
75
  y_min = np.floor(y_min / grid_len) * grid_len
@@ -82,14 +82,14 @@ class mcDETECT:
82
82
  # [INNER] calculate tissue area, input for poisson_select()
83
83
  def tissue_area(self):
84
84
  x_bins, y_bins = self.construct_grid(grid_len = None)
85
- hist, _, _ = np.histogram2d(self.transcripts['global_x'], self.transcripts['global_y'], bins = [x_bins, y_bins])
85
+ hist, _, _ = np.histogram2d(self.transcripts["global_x"], self.transcripts["global_y"], bins = [x_bins, y_bins])
86
86
  area = np.count_nonzero(hist) * (self.grid_len ** 2)
87
87
  return area
88
88
 
89
89
 
90
90
  # [INNER] calculate optimal min_samples, input for dbscan()
91
91
  def poisson_select(self, gene_name):
92
- num_trans = np.sum(self.transcripts['target'] == gene_name)
92
+ num_trans = np.sum(self.transcripts["target"] == gene_name)
93
93
  bg_density = num_trans / self.tissue_area()
94
94
  cutoff_density = poisson.ppf(self.cutoff_prob, mu = self.alpha * bg_density * (np.pi * self.eps ** 2))
95
95
  optimal_m = int(max(cutoff_density, self.low_bound))
@@ -97,32 +97,32 @@ class mcDETECT:
97
97
 
98
98
 
99
99
  # [INTERMEDIATE] dictionary, low- and high-in-nucleus spheres for each synaptic marker
100
- def dbscan(self, target_names = None, write_csv = False, write_path = './'):
100
+ def dbscan(self, target_names = None, write_csv = False, write_path = "./"):
101
101
 
102
- if self.type != 'Xenium':
103
- z_grid = list(np.unique(self.transcripts['global_z']))
102
+ if self.type != "Xenium":
103
+ z_grid = list(np.unique(self.transcripts["global_z"]))
104
104
  z_grid.sort()
105
105
 
106
106
  if target_names is None:
107
107
  target_names = self.syn_genes
108
- transcripts = self.transcripts[self.transcripts['target'].isin(target_names)]
108
+ transcripts = self.transcripts[self.transcripts["target"].isin(target_names)]
109
109
 
110
110
  num_individual, data_low, data_high = [], {}, {}
111
111
 
112
112
  for j in target_names:
113
113
 
114
114
  # split transcripts
115
- target = transcripts[transcripts['target'] == j]
116
- others = transcripts[transcripts['target'] != j]
117
- tree = make_tree(d1 = np.array(others['global_x']), d2 = np.array(others['global_y']), d3 = np.array(others['global_z']))
115
+ target = transcripts[transcripts["target"] == j]
116
+ others = transcripts[transcripts["target"] != j]
117
+ tree = make_tree(d1 = np.array(others["global_x"]), d2 = np.array(others["global_y"]), d3 = np.array(others["global_z"]))
118
118
 
119
119
  # 3D DBSCAN
120
120
  if self.minspl is None:
121
121
  min_spl = self.poisson_select(j)
122
122
  else:
123
123
  min_spl = self.minspl
124
- X = np.array(target[['global_x', 'global_y', 'global_z']])
125
- db = DBSCAN(eps = self.eps, min_samples = min_spl, algorithm = 'kd_tree').fit(X)
124
+ X = np.array(target[["global_x", "global_y", "global_z"]])
125
+ db = DBSCAN(eps = self.eps, min_samples = min_spl, algorithm = "kd_tree").fit(X)
126
126
  labels = db.labels_
127
127
  n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
128
128
 
@@ -133,12 +133,12 @@ class mcDETECT:
133
133
 
134
134
  # find minimum enclosing spheres
135
135
  temp = target[labels == k]
136
- temp_in_nucleus = np.sum(temp['overlaps_nucleus'])
136
+ temp_in_nucleus = np.sum(temp["overlaps_nucleus"])
137
137
  temp_size = temp.shape[0]
138
- temp = temp[['global_x', 'global_y', 'global_z']]
138
+ temp = temp[["global_x", "global_y", "global_z"]]
139
139
  temp = temp.drop_duplicates()
140
140
  center, r2 = miniball.get_bounding_ball(np.array(temp), epsilon=1e-8)
141
- if self.type != 'Xenium':
141
+ if self.type != "Xenium":
142
142
  closest_z = closest(z_grid, center[2])
143
143
  else:
144
144
  closest_z = center[2]
@@ -146,9 +146,9 @@ class mcDETECT:
146
146
  # calculate size, composition, and in-nucleus score
147
147
  other_idx = tree.query_ball_point([center[0], center[1], center[2]], np.sqrt(r2))
148
148
  other_trans = others.iloc[other_idx]
149
- other_in_nucleus = np.sum(other_trans['overlaps_nucleus'])
149
+ other_in_nucleus = np.sum(other_trans["overlaps_nucleus"])
150
150
  other_size = other_trans.shape[0]
151
- other_comp = len(np.unique(other_trans['target']))
151
+ other_comp = len(np.unique(other_trans["target"]))
152
152
  total_size = temp_size + other_size
153
153
  total_comp = 1 + other_comp
154
154
  local_score = (temp_in_nucleus + other_in_nucleus) / total_size
@@ -165,16 +165,19 @@ class mcDETECT:
165
165
 
166
166
  # basic features for all spheres from each synaptic marker
167
167
  sphere = pd.DataFrame(list(zip(sphere_x, sphere_y, sphere_z, layer_z, sphere_r, sphere_size, sphere_comp, sphere_score)),
168
- columns = ['sphere_x', 'sphere_y', 'sphere_z', 'layer_z', 'sphere_r', 'size', 'comp', 'in_nucleus'])
169
- sphere['gene'] = [j] * sphere.shape[0]
168
+ columns = ["sphere_x", "sphere_y", "sphere_z", "layer_z", "sphere_r", "size", "comp", "in_nucleus"])
169
+ sphere["gene"] = [j] * sphere.shape[0]
170
+ sphere["gene"] = sphere["gene"].astype(str)
171
+ sphere["size"] = pd.to_numeric(sphere["size"])
172
+ sphere["comp"] = pd.to_numeric(sphere["comp"])
170
173
 
171
174
  # split low- and high-in-nucleus spheres
172
- sphere_low = sphere[(sphere['sphere_r'] < self.size_thr) & (sphere['in_nucleus'] < self.in_nucleus_thr[0])]
173
- sphere_high = sphere[(sphere['sphere_r'] < self.size_thr) & (sphere['in_nucleus'] > self.in_nucleus_thr[1])]
175
+ sphere_low = sphere[(sphere["sphere_r"] < self.size_thr) & (sphere["in_nucleus"] < self.in_nucleus_thr[0])]
176
+ sphere_high = sphere[(sphere["sphere_r"] < self.size_thr) & (sphere["in_nucleus"] > self.in_nucleus_thr[1])]
174
177
 
175
178
  if write_csv:
176
- sphere_low.to_csv(write_path + j + ' sphere.csv', index=0)
177
- sphere_high.to_csv(write_path + j + ' sphere_high.csv', index=0)
179
+ sphere_low.to_csv(write_path + j + " sphere.csv", index=0)
180
+ sphere_high.to_csv(write_path + j + " sphere_high.csv", index=0)
178
181
 
179
182
  num_individual.append(sphere_low.shape[0])
180
183
  data_low[target_names.index(j)] = sphere_low
@@ -186,16 +189,16 @@ class mcDETECT:
186
189
 
187
190
  # [INNER] merge points from two overlapped spheres, input for remove_overlaps()
188
191
  def find_points(self, sphere_a, sphere_b):
189
- transcripts = self.transcripts[self.transcripts['target'].isin(self.syn_genes)]
190
- tree_temp = make_tree(d1 = np.array(transcripts['global_x']), d2 = np.array(transcripts['global_y']), d3 = np.array(transcripts['global_z']))
191
- idx_a = tree_temp.query_ball_point([sphere_a['sphere_x'], sphere_a['sphere_y'], sphere_a['sphere_z']], sphere_a['sphere_r'])
192
+ transcripts = self.transcripts[self.transcripts["target"].isin(self.syn_genes)]
193
+ tree_temp = make_tree(d1 = np.array(transcripts["global_x"]), d2 = np.array(transcripts["global_y"]), d3 = np.array(transcripts["global_z"]))
194
+ idx_a = tree_temp.query_ball_point([sphere_a["sphere_x"], sphere_a["sphere_y"], sphere_a["sphere_z"]], sphere_a["sphere_r"])
192
195
  points_a = transcripts.iloc[idx_a]
193
- points_a = points_a[points_a['target'] == sphere_a['gene']]
194
- idx_b = tree_temp.query_ball_point([sphere_b['sphere_x'], sphere_b['sphere_y'], sphere_b['sphere_z']], sphere_b['sphere_r'])
196
+ points_a = points_a[points_a["target"] == sphere_a["gene"]]
197
+ idx_b = tree_temp.query_ball_point([sphere_b["sphere_x"], sphere_b["sphere_y"], sphere_b["sphere_z"]], sphere_b["sphere_r"])
195
198
  points_b = transcripts.iloc[idx_b]
196
- points_b = points_b[points_b['target'] == sphere_b['gene']]
199
+ points_b = points_b[points_b["target"] == sphere_b["gene"]]
197
200
  points = pd.concat([points_a, points_b])
198
- points = points[['global_x', 'global_y', 'global_z']]
201
+ points = points[["global_x", "global_y", "global_z"]]
199
202
  return points
200
203
 
201
204
 
@@ -239,10 +242,10 @@ class mcDETECT:
239
242
  elif not c1 and c2_1: # replace A with new sphere and remove B
240
243
  points_union = np.array(self.find_points(sphere_a, sphere_b))
241
244
  new_center, new_radius = miniball.get_bounding_ball(points_union, epsilon=1e-8)
242
- set_a.loc[i, 'sphere_x'] = new_center[0]
243
- set_a.loc[i, 'sphere_y'] = new_center[1]
244
- set_a.loc[i, 'sphere_z'] = new_center[2]
245
- set_a.loc[i, 'sphere_r'] = self.s * new_radius
245
+ set_a.loc[i, "sphere_x"] = new_center[0]
246
+ set_a.loc[i, "sphere_y"] = new_center[1]
247
+ set_a.loc[i, "sphere_z"] = new_center[2]
248
+ set_a.loc[i, "sphere_r"] = self.s * new_radius
246
249
  set_b.drop(index = j, inplace = True)
247
250
 
248
251
  set_a = set_a.reset_index(drop = True)
@@ -268,36 +271,36 @@ class mcDETECT:
268
271
  adata_low = self.profile(sphere_low, self.nc_genes)
269
272
  adata_high = self.profile(sphere_high, self.nc_genes)
270
273
  adata = anndata.concat([adata_low, adata_high], axis = 0, merge = "same")
271
- adata.var['genes'] = adata.var.index
274
+ adata.var["genes"] = adata.var.index
272
275
  adata.obs_keys = list(np.arange(adata.shape[0]))
273
- adata.obs['type'] = ['low'] * adata_low.shape[0] + ['high'] * adata_high.shape[0]
274
- adata.obs['type'] = pd.Categorical(adata.obs['type'], categories = ["low", "high"], ordered = True)
276
+ adata.obs["type"] = ["low"] * adata_low.shape[0] + ["high"] * adata_high.shape[0]
277
+ adata.obs["type"] = pd.Categorical(adata.obs["type"], categories = ["low", "high"], ordered = True)
275
278
 
276
279
  # DE analysis of negative control genes
277
- sc.tl.rank_genes_groups(adata, 'type', method = 't-test')
278
- names = adata.uns['rank_genes_groups']['names']
280
+ sc.tl.rank_genes_groups(adata, "type", method = "t-test")
281
+ names = adata.uns["rank_genes_groups"]["names"]
279
282
  names = pd.DataFrame(names)
280
- logfc = adata.uns['rank_genes_groups']['logfoldchanges']
283
+ logfc = adata.uns["rank_genes_groups"]["logfoldchanges"]
281
284
  logfc = pd.DataFrame(logfc)
282
- pvals = adata.uns['rank_genes_groups']['pvals']
285
+ pvals = adata.uns["rank_genes_groups"]["pvals"]
283
286
  pvals = pd.DataFrame(pvals)
284
287
 
285
288
  # select top upregulated negative control genes
286
- df = pd.DataFrame({'names': names["high"], 'logfc': logfc["high"], 'pvals': pvals["high"]})
287
- df = df[df['logfc'] >= 0]
288
- df = df.sort_values(by = ['pvals'], ascending = True)
289
- nc_genes_final = list(df['names'].head(self.nc_top))
289
+ df = pd.DataFrame({"names": names["high"], "logfc": logfc["high"], "pvals": pvals["high"]})
290
+ df = df[df["logfc"] >= 0]
291
+ df = df.sort_values(by = ["pvals"], ascending = True)
292
+ nc_genes_final = list(df["names"].head(self.nc_top))
290
293
 
291
294
  # negative control filtering
292
- nc_transcripts_final = self.transcripts[self.transcripts['target'].isin(nc_genes_final)]
293
- tree = make_tree(d1 = np.array(nc_transcripts_final['global_x']), d2 = np.array(nc_transcripts_final['global_y']), d3 = np.array(nc_transcripts_final['global_z']))
295
+ nc_transcripts_final = self.transcripts[self.transcripts["target"].isin(nc_genes_final)]
296
+ tree = make_tree(d1 = np.array(nc_transcripts_final["global_x"]), d2 = np.array(nc_transcripts_final["global_y"]), d3 = np.array(nc_transcripts_final["global_z"]))
294
297
  pass_idx = [0] * sphere_low.shape[0]
295
298
  for i in range(sphere_low.shape[0]):
296
299
  temp = sphere_low.iloc[i]
297
- nc_idx = tree.query_ball_point([temp['sphere_x'], temp['sphere_y'], temp['sphere_z']], temp['sphere_r'])
300
+ nc_idx = tree.query_ball_point([temp["sphere_x"], temp["sphere_y"], temp["sphere_z"]], temp["sphere_r"])
298
301
  if len(nc_idx) == 0:
299
302
  pass_idx[i] = 1
300
- elif len(nc_idx) / temp['size'] < self.nc_thr:
303
+ elif len(nc_idx) / temp["size"] < self.nc_thr:
301
304
  pass_idx[i] = 2
302
305
  sphere = sphere_low[np.array(pass_idx) != 0]
303
306
  sphere = sphere.reset_index(drop = True)
@@ -323,29 +326,29 @@ class mcDETECT:
323
326
  def profile(self, synapse, genes = None, print_itr = False):
324
327
 
325
328
  if genes is None:
326
- genes = list(np.unique(self.transcripts['target']))
329
+ genes = list(np.unique(self.transcripts["target"]))
327
330
  transcripts = self.transcripts
328
331
  else:
329
- transcripts = self.transcripts[self.transcripts['target'].isin(genes)]
330
- tree = make_tree(d1 = np.array(transcripts['global_x']), d2 = np.array(transcripts['global_y']), d3 = np.array(transcripts['global_z']))
332
+ transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
333
+ tree = make_tree(d1 = np.array(transcripts["global_x"]), d2 = np.array(transcripts["global_y"]), d3 = np.array(transcripts["global_z"]))
331
334
 
332
335
  # construct gene count matrix
333
336
  X = np.zeros((len(genes), synapse.shape[0]))
334
337
  for i in range(synapse.shape[0]):
335
338
  temp = synapse.iloc[i]
336
- target_idx = tree.query_ball_point([temp['sphere_x'], temp['sphere_y'], temp['layer_z']], temp['sphere_r'])
339
+ target_idx = tree.query_ball_point([temp["sphere_x"], temp["sphere_y"], temp["layer_z"]], temp["sphere_r"])
337
340
  target_trans = transcripts.iloc[target_idx]
338
- target_gene = list(target_trans['target'])
341
+ target_gene = list(target_trans["target"])
339
342
  for j in np.unique(target_gene):
340
343
  X[genes.index(j), i] = target_gene.count(j)
341
344
  if (print_itr) & (i % 5000 == 0):
342
- print('{} out of {} synapses profiled!'.format(i, synapse.shape[0]))
345
+ print("{} out of {} synapses profiled!".format(i, synapse.shape[0]))
343
346
 
344
347
  # construct spatial transcriptome profile
345
348
  adata = anndata.AnnData(X = np.transpose(X), obs = synapse)
346
- adata.obs['synapse_id'] = ['syn_{}'.format(i) for i in range(synapse.shape[0])]
347
- adata.obs.rename(columns = {'sphere_x': 'global_x', 'sphere_y': 'global_y', 'sphere_z': 'global_z'}, inplace = True)
348
- adata.var['genes'] = genes
349
+ adata.obs["synapse_id"] = ["syn_{}".format(i) for i in range(synapse.shape[0])]
350
+ adata.obs.rename(columns = {"sphere_x": "global_x", "sphere_y": "global_y", "sphere_z": "global_z"}, inplace = True)
351
+ adata.var["genes"] = genes
349
352
  adata.var_names = genes
350
353
  adata.var_keys = genes
351
354
  return adata
@@ -355,10 +358,10 @@ class mcDETECT:
355
358
  def spot_expression(self, grid_len, genes = None):
356
359
 
357
360
  if genes is None:
358
- genes = list(np.unique(self.transcripts['target']))
361
+ genes = list(np.unique(self.transcripts["target"]))
359
362
  transcripts = self.transcripts
360
363
  else:
361
- transcripts = self.transcripts[self.transcripts['target'].isin(genes)]
364
+ transcripts = self.transcripts[self.transcripts["target"].isin(genes)]
362
365
 
363
366
  # construct bins
364
367
  x_bins, y_bins = self.construct_grid(grid_len = grid_len)
@@ -377,8 +380,8 @@ class mcDETECT:
377
380
 
378
381
  # count matrix
379
382
  for k_idx, k in enumerate(genes):
380
- target_gene = transcripts[transcripts['target'] == k]
381
- count_gene, _, _ = np.histogram2d(target_gene['global_x'], target_gene['global_y'], bins = [x_bins, y_bins])
383
+ target_gene = transcripts[transcripts["target"] == k]
384
+ count_gene, _, _ = np.histogram2d(target_gene["global_x"], target_gene["global_y"], bins = [x_bins, y_bins])
382
385
  X[k_idx, :] = count_gene.flatten()
383
386
  if k_idx % 100 == 0:
384
387
  print("{} out of {} genes profiled!".format(k_idx, len(genes)))
@@ -386,15 +389,15 @@ class mcDETECT:
386
389
  # spot id
387
390
  spot_id = []
388
391
  for i in range(len(global_x)):
389
- id = 'spot_' + str(i)
392
+ id = "spot_" + str(i)
390
393
  spot_id.append(id)
391
394
 
392
395
  # assemble data
393
396
  adata = anndata.AnnData(X = np.transpose(X))
394
- adata.obs['spot_id'] = spot_id
395
- adata.obs['global_x'] = global_x
396
- adata.obs['global_y'] = global_y
397
- adata.var['genes'] = genes
397
+ adata.obs["spot_id"] = spot_id
398
+ adata.obs["global_x"] = global_x
399
+ adata.obs["global_y"] = global_y
400
+ adata.var["genes"] = genes
398
401
  adata.var_names = genes
399
402
  adata.var_keys = genes
400
403
  return adata
@@ -403,7 +406,7 @@ class mcDETECT:
403
406
  # [MAIN] anndata, spot-level synapse metadata
404
407
  def spot_synapse(self, synapse, spot):
405
408
 
406
- x_grid, y_grid = list(np.unique(spot.obs['global_x'])), list(np.unique(spot.obs['global_y']))
409
+ x_grid, y_grid = list(np.unique(spot.obs["global_x"])), list(np.unique(spot.obs["global_y"]))
407
410
  diameter = x_grid[1] - x_grid[0]
408
411
 
409
412
  indicator, synapse_count, synapse_radius, synapse_size, synapse_score = [], [], [], [], []
@@ -413,7 +416,7 @@ class mcDETECT:
413
416
  for j in y_grid:
414
417
  y_min_temp = j
415
418
  y_max_temp = j + diameter
416
- syn_temp = synapse[(synapse['sphere_x'] > x_min_temp) & (synapse['sphere_x'] < x_max_temp) & (synapse['sphere_y'] > y_min_temp) & (synapse['sphere_y'] < y_max_temp)]
419
+ syn_temp = synapse[(synapse["sphere_x"] > x_min_temp) & (synapse["sphere_x"] < x_max_temp) & (synapse["sphere_y"] > y_min_temp) & (synapse["sphere_y"] < y_max_temp)]
417
420
  indicator.append(int(syn_temp.shape[0] > 0))
418
421
  synapse_count.append(syn_temp.shape[0])
419
422
  if syn_temp.shape[0] == 0:
@@ -421,13 +424,13 @@ class mcDETECT:
421
424
  synapse_size.append(0)
422
425
  synapse_score.append(0)
423
426
  else:
424
- synapse_radius.append(np.nanmean(syn_temp['sphere_r']))
425
- synapse_size.append(np.nanmean(syn_temp['size']))
426
- synapse_score.append(np.nanmean(syn_temp['in_nucleus']))
427
+ synapse_radius.append(np.nanmean(syn_temp["sphere_r"]))
428
+ synapse_size.append(np.nanmean(syn_temp["size"]))
429
+ synapse_score.append(np.nanmean(syn_temp["in_nucleus"]))
427
430
 
428
- spot.obs['indicator'] = indicator
429
- spot.obs['syn_count'] = synapse_count
430
- spot.obs['syn_radius'] = synapse_radius
431
- spot.obs['syn_size'] = synapse_size
432
- spot.obs['syn_score'] = synapse_score
431
+ spot.obs["indicator"] = indicator
432
+ spot.obs["syn_count"] = synapse_count
433
+ spot.obs["syn_radius"] = synapse_radius
434
+ spot.obs["syn_size"] = synapse_size
435
+ spot.obs["syn_score"] = synapse_score
433
436
  return spot
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mcDETECT
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: mcDETECT: Decoding 3D Spatial Synaptic Transcriptomes with Subcellular-Resolution Spatial Transcriptomics
5
5
  Home-page: https://github.com/chen-yang-yuan/mcDETECT
6
6
  Author: Chenyang Yuan
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name = "mcDETECT",
5
- version = "1.0.9",
5
+ version = "1.0.11",
6
6
  packages = find_packages(),
7
7
  install_requires = ["anndata", "miniball", "numpy", "pandas", "rtree", "scanpy", "scikit-learn", "scipy", "shapely"],
8
8
  author = "Chenyang Yuan",
@@ -1,2 +0,0 @@
1
- __version__ = "1.0.9"
2
- from .model import mcDETECT
File without changes
File without changes
File without changes
File without changes