geney 1.2.30__py2.py3-none-any.whl → 1.2.32__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/oncosplice.py +22 -10
- geney/pangolin_utils.py +2 -1
- geney/seqmat_utils.py +34 -2
- geney/utils.py +3 -2
- {geney-1.2.30.dist-info → geney-1.2.32.dist-info}/METADATA +1 -1
- {geney-1.2.30.dist-info → geney-1.2.32.dist-info}/RECORD +8 -8
- {geney-1.2.30.dist-info → geney-1.2.32.dist-info}/WHEEL +0 -0
- {geney-1.2.30.dist-info → geney-1.2.32.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -415,20 +415,29 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
|
|
|
415
415
|
return report
|
|
416
416
|
|
|
417
417
|
|
|
418
|
+
import asyncio
|
|
419
|
+
async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai'):
|
|
420
|
+
import sys, os
|
|
421
|
+
from pathlib import Path
|
|
422
|
+
needed_path = Path('/tamir2/yoramzar/Projects/Cancer_mut/Utils')
|
|
423
|
+
needed_file1 = needed_path / 'rest_api_utils.py'
|
|
424
|
+
needed_file2 = needed_path / 'uniprot_utils.py'
|
|
425
|
+
|
|
426
|
+
if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.X_OK)) and (needed_file2.is_file() and os.access(needed_file2, os.X_OK)):
|
|
427
|
+
sys.path.append(str(needed_path))
|
|
428
|
+
import uniprot_utils as uput
|
|
418
429
|
|
|
430
|
+
else:
|
|
431
|
+
raise SystemError("Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
|
|
419
432
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
433
|
+
# Define async functions
|
|
434
|
+
async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
|
|
435
|
+
return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
|
|
423
436
|
|
|
424
|
-
def background_request(url, result):
|
|
425
|
-
return {'data': 'success'}
|
|
426
437
|
|
|
427
438
|
gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
|
|
431
|
-
request_thread.start()
|
|
439
|
+
# request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
|
|
440
|
+
# request_thread.start()
|
|
432
441
|
|
|
433
442
|
mutation = get_mutation(mut_id, rev=gene.rev)
|
|
434
443
|
|
|
@@ -441,6 +450,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
|
|
|
441
450
|
results.append({'transcript_id': transcript.transcript_id})
|
|
442
451
|
continue
|
|
443
452
|
|
|
453
|
+
task1 = asyncio.create_task(background_request(transcript.transcript_id))
|
|
454
|
+
|
|
444
455
|
transcript.generate_pre_mrna()
|
|
445
456
|
transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
446
457
|
transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
|
|
@@ -451,7 +462,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
|
|
|
451
462
|
|
|
452
463
|
missplicing = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine), threshold=splicing_threshold)
|
|
453
464
|
transcript.pre_mrna += mutation
|
|
454
|
-
|
|
465
|
+
result1 = await task1
|
|
466
|
+
print(result1)
|
|
455
467
|
for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
|
|
456
468
|
transcript.acceptors = new_boundaries['acceptors']
|
|
457
469
|
transcript.donors = new_boundaries['donors']
|
geney/pangolin_utils.py
CHANGED
|
@@ -73,7 +73,8 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
73
73
|
|
|
74
74
|
scores.append(np.mean(score, axis=0))
|
|
75
75
|
|
|
76
|
-
splicing_pred = np.array(scores).max(axis=0)
|
|
76
|
+
# splicing_pred = np.array(scores).max(axis=0)
|
|
77
|
+
splicing_pred = np.array(scores).mean(axis=0)
|
|
77
78
|
donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
|
|
78
79
|
acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
|
|
79
80
|
# print(acceptor_probs)
|
geney/seqmat_utils.py
CHANGED
|
@@ -140,6 +140,29 @@ class SeqMat:
|
|
|
140
140
|
end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
|
|
141
141
|
return self.seqmat[:, start_pos:end_pos]
|
|
142
142
|
|
|
143
|
+
def asymmetric_subseq(self, center, left_context, right_context, padding='$'):
|
|
144
|
+
center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
|
|
145
|
+
start_idx = center_idx - left_context
|
|
146
|
+
end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
|
|
147
|
+
left_padding = max(0, -start_idx)
|
|
148
|
+
right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
|
|
149
|
+
valid_start_idx = max(0, start_idx)
|
|
150
|
+
valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
|
|
151
|
+
valid_subseq = self.seq[valid_start_idx:valid_end_idx]
|
|
152
|
+
padded_subseq = (padding * left_padding) + valid_subseq + (padding * right_padding)
|
|
153
|
+
return padded_subseq
|
|
154
|
+
|
|
155
|
+
def asymmetric_indices(self, center, left_context, right_context):
|
|
156
|
+
center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
|
|
157
|
+
start_idx = center_idx - left_context
|
|
158
|
+
end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
|
|
159
|
+
left_padding = max(0, -start_idx)
|
|
160
|
+
right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
|
|
161
|
+
valid_start_idx = max(0, start_idx)
|
|
162
|
+
valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
|
|
163
|
+
valid_subseq = self.indices[valid_start_idx:valid_end_idx]
|
|
164
|
+
return valid_subseq
|
|
165
|
+
|
|
143
166
|
def subseq_suffix(self, start):
|
|
144
167
|
start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
|
|
145
168
|
return self.seqmat[:, start_pos:]
|
|
@@ -166,7 +189,7 @@ class SeqMat:
|
|
|
166
189
|
# if seq_length % 3 != 0:
|
|
167
190
|
# temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
|
|
168
191
|
|
|
169
|
-
if temp.seq[:3] == '
|
|
192
|
+
if temp.seq[1:3] == 'TG':
|
|
170
193
|
for i in range(3, len(temp.seq), 3):
|
|
171
194
|
codon = temp.seq[i:i + 3]
|
|
172
195
|
if codon in ['TAA', 'TAG', 'TGA']:
|
|
@@ -179,6 +202,11 @@ class SeqMat:
|
|
|
179
202
|
else:
|
|
180
203
|
return SeqMat('ATG')
|
|
181
204
|
|
|
205
|
+
def translate(self, tis_index):
|
|
206
|
+
from Bio import Seq
|
|
207
|
+
return Seq(self.orf_seqmat(tis_index).seq).translate()
|
|
208
|
+
|
|
209
|
+
|
|
182
210
|
class Gene:
|
|
183
211
|
def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
|
|
184
212
|
gene_files = list((config[organism]['MRNA_PATH'] / 'protein_coding').glob(f'*_{gene_name}.pkl'))
|
|
@@ -419,7 +447,11 @@ class Transcript:
|
|
|
419
447
|
|
|
420
448
|
def generate_protein(self, inplace=True, domains=None):
|
|
421
449
|
protein = str(Seq(self.orf.seq).translate()).replace('*', '')
|
|
422
|
-
|
|
450
|
+
if hasattr(self, 'cons_vector'):
|
|
451
|
+
cons_vector = self.cons_vector
|
|
452
|
+
else:
|
|
453
|
+
cons_vector = np.ones(len(protein))
|
|
454
|
+
|
|
423
455
|
if domains is not None and np.all(np.isin(domains, np.arange(0, len(protein)))):
|
|
424
456
|
all_indices = np.arange(cons_vector.size)
|
|
425
457
|
mask = ~np.isin(all_indices, domains)
|
geney/utils.py
CHANGED
|
@@ -16,10 +16,11 @@ def is_monotonic(A):
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def available_genes(organism='hg38'):
|
|
19
|
-
from geney import
|
|
20
|
-
annotation_path =
|
|
19
|
+
from geney import config
|
|
20
|
+
annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
|
|
21
21
|
return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
|
|
22
22
|
|
|
23
|
+
|
|
23
24
|
def contains(a, x):
|
|
24
25
|
"""returns true if sorted sequence `a` contains `x`"""
|
|
25
26
|
i = bisect_left(a, x)
|
|
@@ -6,20 +6,20 @@ geney/graphic_utils.py,sha256=tjm6IDQ1BdfSeuPYzjlqAUHFQoDYH9jXTzJjKFS4Hh4,11078
|
|
|
6
6
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
7
7
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
8
8
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
9
|
-
geney/oncosplice.py,sha256=
|
|
10
|
-
geney/pangolin_utils.py,sha256=
|
|
9
|
+
geney/oncosplice.py,sha256=7wf0_-Gkc_G9HhUXjORHk3buZ66JzVzSFVQ4EZOtUAE,21787
|
|
10
|
+
geney/pangolin_utils.py,sha256=ETTGpuaQgdZ1v8H0NP8sbTEfGWu0VXUFUS7wsURsTc4,2991
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
|
-
geney/seqmat_utils.py,sha256=
|
|
12
|
+
geney/seqmat_utils.py,sha256=TDWhE5oVTGJceaO6YmE7I_BEWRxWLT74_3rkmY1M0Fs,18368
|
|
13
13
|
geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
|
|
14
14
|
geney/splicing_utils.py,sha256=q47EdcsHrp4aLIPVWvkGBJSzS3l3DKiD9DNDsPpZdHk,16075
|
|
15
15
|
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
16
16
|
geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
|
|
17
|
-
geney/utils.py,sha256=
|
|
17
|
+
geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
|
|
18
18
|
geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
|
|
20
20
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
21
21
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
22
|
-
geney-1.2.
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
22
|
+
geney-1.2.32.dist-info/METADATA,sha256=aHeSBHWq3b1li4G_CI2ClUEHJc5SfWHowqKrkZbQPGk,948
|
|
23
|
+
geney-1.2.32.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
24
|
+
geney-1.2.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
25
|
+
geney-1.2.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|