geney 1.2.30__py2.py3-none-any.whl → 1.2.32__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/oncosplice.py CHANGED
@@ -415,20 +415,29 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
415
415
  return report
416
416
 
417
417
 
418
+ import asyncio
419
+ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai'):
420
+ import sys, os
421
+ from pathlib import Path
422
+ needed_path = Path('/tamir2/yoramzar/Projects/Cancer_mut/Utils')
423
+ needed_file1 = needed_path / 'rest_api_utils.py'
424
+ needed_file2 = needed_path / 'uniprot_utils.py'
425
+
426
+ if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.X_OK)) and (needed_file2.is_file() and os.access(needed_file2, os.X_OK)):
427
+ sys.path.append(str(needed_path))
428
+ import uniprot_utils as uput
418
429
 
430
+ else:
431
+ raise SystemError("Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
419
432
 
420
- def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
421
- import requests
422
- import threading
433
+ # Define async functions
434
+ async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
435
+ return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
423
436
 
424
- def background_request(url, result):
425
- return {'data': 'success'}
426
437
 
427
438
  gene = Gene(mut_id.split(':')[0], organism=organism)
428
-
429
- domains = {}
430
- request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
431
- request_thread.start()
439
+ # request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
440
+ # request_thread.start()
432
441
 
433
442
  mutation = get_mutation(mut_id, rev=gene.rev)
434
443
 
@@ -441,6 +450,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
441
450
  results.append({'transcript_id': transcript.transcript_id})
442
451
  continue
443
452
 
453
+ task1 = asyncio.create_task(background_request(transcript.transcript_id))
454
+
444
455
  transcript.generate_pre_mrna()
445
456
  transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
446
457
  transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
@@ -451,7 +462,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
451
462
 
452
463
  missplicing = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine), threshold=splicing_threshold)
453
464
  transcript.pre_mrna += mutation
454
-
465
+ result1 = await task1
466
+ print(result1)
455
467
  for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
456
468
  transcript.acceptors = new_boundaries['acceptors']
457
469
  transcript.donors = new_boundaries['donors']
geney/pangolin_utils.py CHANGED
@@ -73,7 +73,8 @@ def pangolin_predict_probs(true_seq, models):
73
73
 
74
74
  scores.append(np.mean(score, axis=0))
75
75
 
76
- splicing_pred = np.array(scores).max(axis=0)
76
+ # splicing_pred = np.array(scores).max(axis=0)
77
+ splicing_pred = np.array(scores).mean(axis=0)
77
78
  donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
78
79
  acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
79
80
  # print(acceptor_probs)
geney/seqmat_utils.py CHANGED
@@ -140,6 +140,29 @@ class SeqMat:
140
140
  end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
141
141
  return self.seqmat[:, start_pos:end_pos]
142
142
 
143
+ def asymmetric_subseq(self, center, left_context, right_context, padding='$'):
144
+ center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
145
+ start_idx = center_idx - left_context
146
+ end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
147
+ left_padding = max(0, -start_idx)
148
+ right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
149
+ valid_start_idx = max(0, start_idx)
150
+ valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
151
+ valid_subseq = self.seq[valid_start_idx:valid_end_idx]
152
+ padded_subseq = (padding * left_padding) + valid_subseq + (padding * right_padding)
153
+ return padded_subseq
154
+
155
+ def asymmetric_indices(self, center, left_context, right_context):
156
+ center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
157
+ start_idx = center_idx - left_context
158
+ end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
159
+ left_padding = max(0, -start_idx)
160
+ right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
161
+ valid_start_idx = max(0, start_idx)
162
+ valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
163
+ valid_subseq = self.indices[valid_start_idx:valid_end_idx]
164
+ return valid_subseq
165
+
143
166
  def subseq_suffix(self, start):
144
167
  start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
145
168
  return self.seqmat[:, start_pos:]
@@ -166,7 +189,7 @@ class SeqMat:
166
189
  # if seq_length % 3 != 0:
167
190
  # temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
168
191
 
169
- if temp.seq[:3] == 'ATG':
192
+ if temp.seq[1:3] == 'TG':
170
193
  for i in range(3, len(temp.seq), 3):
171
194
  codon = temp.seq[i:i + 3]
172
195
  if codon in ['TAA', 'TAG', 'TGA']:
@@ -179,6 +202,11 @@ class SeqMat:
179
202
  else:
180
203
  return SeqMat('ATG')
181
204
 
205
+ def translate(self, tis_index):
206
+ from Bio import Seq
207
+ return Seq(self.orf_seqmat(tis_index).seq).translate()
208
+
209
+
182
210
  class Gene:
183
211
  def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
184
212
  gene_files = list((config[organism]['MRNA_PATH'] / 'protein_coding').glob(f'*_{gene_name}.pkl'))
@@ -419,7 +447,11 @@ class Transcript:
419
447
 
420
448
  def generate_protein(self, inplace=True, domains=None):
421
449
  protein = str(Seq(self.orf.seq).translate()).replace('*', '')
422
- cons_vector = self.cons_vector
450
+ if hasattr(self, 'cons_vector'):
451
+ cons_vector = self.cons_vector
452
+ else:
453
+ cons_vector = np.ones(len(protein))
454
+
423
455
  if domains is not None and np.all(np.isin(domains, np.arange(0, len(protein)))):
424
456
  all_indices = np.arange(cons_vector.size)
425
457
  mask = ~np.isin(all_indices, domains)
geney/utils.py CHANGED
@@ -16,10 +16,11 @@ def is_monotonic(A):
16
16
 
17
17
 
18
18
  def available_genes(organism='hg38'):
19
- from geney import config_setup
20
- annotation_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
19
+ from geney import config
20
+ annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
21
21
  return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
22
22
 
23
+
23
24
  def contains(a, x):
24
25
  """returns true if sorted sequence `a` contains `x`"""
25
26
  i = bisect_left(a, x)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.30
3
+ Version: 1.2.32
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -6,20 +6,20 @@ geney/graphic_utils.py,sha256=tjm6IDQ1BdfSeuPYzjlqAUHFQoDYH9jXTzJjKFS4Hh4,11078
6
6
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
7
7
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
8
8
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
9
- geney/oncosplice.py,sha256=zOOLdY_9tHpwwMQxTS358MCUlhQ6x-XmwlwlIrqwIwc,20902
10
- geney/pangolin_utils.py,sha256=MP4wGgiw36NAPfwpaXJ5mD4Q-DTbkL3xHcSlYtuZODw,2939
9
+ geney/oncosplice.py,sha256=7wf0_-Gkc_G9HhUXjORHk3buZ66JzVzSFVQ4EZOtUAE,21787
10
+ geney/pangolin_utils.py,sha256=ETTGpuaQgdZ1v8H0NP8sbTEfGWu0VXUFUS7wsURsTc4,2991
11
11
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
12
- geney/seqmat_utils.py,sha256=fawiPa4PPhmbx6wPynt8SG6eowZKUZ2yN32r6B8Ba-g,16802
12
+ geney/seqmat_utils.py,sha256=TDWhE5oVTGJceaO6YmE7I_BEWRxWLT74_3rkmY1M0Fs,18368
13
13
  geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
14
14
  geney/splicing_utils.py,sha256=q47EdcsHrp4aLIPVWvkGBJSzS3l3DKiD9DNDsPpZdHk,16075
15
15
  geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
16
16
  geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
17
- geney/utils.py,sha256=WbV1DBllQyvzoDiYkidRiTX5MBpQGr99M4hTUQ0BKo8,2185
17
+ geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
18
18
  geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
20
20
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
21
21
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
22
- geney-1.2.30.dist-info/METADATA,sha256=T8wma9mdUQjDbYAvMviVcdGPFJId-piDzxGpIVdcXMo,948
23
- geney-1.2.30.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
- geney-1.2.30.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
- geney-1.2.30.dist-info/RECORD,,
22
+ geney-1.2.32.dist-info/METADATA,sha256=aHeSBHWq3b1li4G_CI2ClUEHJc5SfWHowqKrkZbQPGk,948
23
+ geney-1.2.32.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
+ geney-1.2.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
+ geney-1.2.32.dist-info/RECORD,,
File without changes