geney 1.2.30__py2.py3-none-any.whl → 1.2.31__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/oncosplice.py +22 -10
- geney/pangolin_utils.py +2 -1
- geney/seqmat_utils.py +24 -2
- geney/utils.py +3 -2
- {geney-1.2.30.dist-info → geney-1.2.31.dist-info}/METADATA +1 -1
- {geney-1.2.30.dist-info → geney-1.2.31.dist-info}/RECORD +8 -8
- {geney-1.2.30.dist-info → geney-1.2.31.dist-info}/WHEEL +0 -0
- {geney-1.2.30.dist-info → geney-1.2.31.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -415,20 +415,29 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
|
|
|
415
415
|
return report
|
|
416
416
|
|
|
417
417
|
|
|
418
|
+
import asyncio
|
|
419
|
+
async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai'):
|
|
420
|
+
import sys, os
|
|
421
|
+
from pathlib import Path
|
|
422
|
+
needed_path = Path('/tamir2/yoramzar/Projects/Cancer_mut/Utils')
|
|
423
|
+
needed_file1 = needed_path / 'rest_api_utils.py'
|
|
424
|
+
needed_file2 = needed_path / 'uniprot_utils.py'
|
|
425
|
+
|
|
426
|
+
if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.X_OK)) and (needed_file2.is_file() and os.access(needed_file2, os.X_OK)):
|
|
427
|
+
sys.path.append(str(needed_path))
|
|
428
|
+
import uniprot_utils as uput
|
|
418
429
|
|
|
430
|
+
else:
|
|
431
|
+
raise SystemError("Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
|
|
419
432
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
433
|
+
# Define async functions
|
|
434
|
+
async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
|
|
435
|
+
return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
|
|
423
436
|
|
|
424
|
-
def background_request(url, result):
|
|
425
|
-
return {'data': 'success'}
|
|
426
437
|
|
|
427
438
|
gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
|
|
431
|
-
request_thread.start()
|
|
439
|
+
# request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
|
|
440
|
+
# request_thread.start()
|
|
432
441
|
|
|
433
442
|
mutation = get_mutation(mut_id, rev=gene.rev)
|
|
434
443
|
|
|
@@ -441,6 +450,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
|
|
|
441
450
|
results.append({'transcript_id': transcript.transcript_id})
|
|
442
451
|
continue
|
|
443
452
|
|
|
453
|
+
task1 = asyncio.create_task(background_request(transcript.transcript_id))
|
|
454
|
+
|
|
444
455
|
transcript.generate_pre_mrna()
|
|
445
456
|
transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
446
457
|
transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
|
|
@@ -451,7 +462,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
|
|
|
451
462
|
|
|
452
463
|
missplicing = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine), threshold=splicing_threshold)
|
|
453
464
|
transcript.pre_mrna += mutation
|
|
454
|
-
|
|
465
|
+
result1 = await task1
|
|
466
|
+
print(result1)
|
|
455
467
|
for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
|
|
456
468
|
transcript.acceptors = new_boundaries['acceptors']
|
|
457
469
|
transcript.donors = new_boundaries['donors']
|
geney/pangolin_utils.py
CHANGED
|
@@ -73,7 +73,8 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
73
73
|
|
|
74
74
|
scores.append(np.mean(score, axis=0))
|
|
75
75
|
|
|
76
|
-
splicing_pred = np.array(scores).max(axis=0)
|
|
76
|
+
# splicing_pred = np.array(scores).max(axis=0)
|
|
77
|
+
splicing_pred = np.array(scores).mean(axis=0)
|
|
77
78
|
donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
|
|
78
79
|
acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
|
|
79
80
|
# print(acceptor_probs)
|
geney/seqmat_utils.py
CHANGED
|
@@ -140,6 +140,19 @@ class SeqMat:
|
|
|
140
140
|
end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
|
|
141
141
|
return self.seqmat[:, start_pos:end_pos]
|
|
142
142
|
|
|
143
|
+
def asymmetric_subseq(self, center, left_context, right_context, padding='$'):
|
|
144
|
+
center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
|
|
145
|
+
start_idx = center_idx - left_context
|
|
146
|
+
end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
|
|
147
|
+
left_padding = max(0, -start_idx)
|
|
148
|
+
right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
|
|
149
|
+
valid_start_idx = max(0, start_idx)
|
|
150
|
+
valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
|
|
151
|
+
valid_subseq = self.seq[valid_start_idx:valid_end_idx]
|
|
152
|
+
padded_subseq = (padding * left_padding) + valid_subseq + (padding * right_padding)
|
|
153
|
+
return padded_subseq
|
|
154
|
+
|
|
155
|
+
|
|
143
156
|
def subseq_suffix(self, start):
|
|
144
157
|
start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
|
|
145
158
|
return self.seqmat[:, start_pos:]
|
|
@@ -166,7 +179,7 @@ class SeqMat:
|
|
|
166
179
|
# if seq_length % 3 != 0:
|
|
167
180
|
# temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
|
|
168
181
|
|
|
169
|
-
if temp.seq[:3] == '
|
|
182
|
+
if temp.seq[1:3] == 'TG':
|
|
170
183
|
for i in range(3, len(temp.seq), 3):
|
|
171
184
|
codon = temp.seq[i:i + 3]
|
|
172
185
|
if codon in ['TAA', 'TAG', 'TGA']:
|
|
@@ -179,6 +192,11 @@ class SeqMat:
|
|
|
179
192
|
else:
|
|
180
193
|
return SeqMat('ATG')
|
|
181
194
|
|
|
195
|
+
def translate(self, tis_index):
|
|
196
|
+
from Bio import Seq
|
|
197
|
+
return Seq(self.orf_seqmat(tis_index).seq).translate()
|
|
198
|
+
|
|
199
|
+
|
|
182
200
|
class Gene:
|
|
183
201
|
def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
|
|
184
202
|
gene_files = list((config[organism]['MRNA_PATH'] / 'protein_coding').glob(f'*_{gene_name}.pkl'))
|
|
@@ -419,7 +437,11 @@ class Transcript:
|
|
|
419
437
|
|
|
420
438
|
def generate_protein(self, inplace=True, domains=None):
|
|
421
439
|
protein = str(Seq(self.orf.seq).translate()).replace('*', '')
|
|
422
|
-
|
|
440
|
+
if hasattr(self, 'cons_vector'):
|
|
441
|
+
cons_vector = self.cons_vector
|
|
442
|
+
else:
|
|
443
|
+
cons_vector = np.ones(len(protein))
|
|
444
|
+
|
|
423
445
|
if domains is not None and np.all(np.isin(domains, np.arange(0, len(protein)))):
|
|
424
446
|
all_indices = np.arange(cons_vector.size)
|
|
425
447
|
mask = ~np.isin(all_indices, domains)
|
geney/utils.py
CHANGED
|
@@ -16,10 +16,11 @@ def is_monotonic(A):
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def available_genes(organism='hg38'):
|
|
19
|
-
from geney import
|
|
20
|
-
annotation_path =
|
|
19
|
+
from geney import config
|
|
20
|
+
annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
|
|
21
21
|
return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
|
|
22
22
|
|
|
23
|
+
|
|
23
24
|
def contains(a, x):
|
|
24
25
|
"""returns true if sorted sequence `a` contains `x`"""
|
|
25
26
|
i = bisect_left(a, x)
|
|
@@ -6,20 +6,20 @@ geney/graphic_utils.py,sha256=tjm6IDQ1BdfSeuPYzjlqAUHFQoDYH9jXTzJjKFS4Hh4,11078
|
|
|
6
6
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
7
7
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
8
8
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
9
|
-
geney/oncosplice.py,sha256=
|
|
10
|
-
geney/pangolin_utils.py,sha256=
|
|
9
|
+
geney/oncosplice.py,sha256=7wf0_-Gkc_G9HhUXjORHk3buZ66JzVzSFVQ4EZOtUAE,21787
|
|
10
|
+
geney/pangolin_utils.py,sha256=ETTGpuaQgdZ1v8H0NP8sbTEfGWu0VXUFUS7wsURsTc4,2991
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
|
-
geney/seqmat_utils.py,sha256=
|
|
12
|
+
geney/seqmat_utils.py,sha256=jTW2LZEAh5ftgWp0sblq_6AfI21uC4_bPu6zxvLhPWA,17755
|
|
13
13
|
geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
|
|
14
14
|
geney/splicing_utils.py,sha256=q47EdcsHrp4aLIPVWvkGBJSzS3l3DKiD9DNDsPpZdHk,16075
|
|
15
15
|
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
16
16
|
geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
|
|
17
|
-
geney/utils.py,sha256=
|
|
17
|
+
geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
|
|
18
18
|
geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
|
|
20
20
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
21
21
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
22
|
-
geney-1.2.
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
22
|
+
geney-1.2.31.dist-info/METADATA,sha256=vRYAE-ITb5427-B42evqLtx6LOrfhZ7wlGONrlje9vo,948
|
|
23
|
+
geney-1.2.31.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
24
|
+
geney-1.2.31.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
25
|
+
geney-1.2.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|