geney 1.3.30__py2.py3-none-any.whl → 1.3.32__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/oncosplice.py +11 -8
- geney/splicing_utils.py +6 -1
- {geney-1.3.30.dist-info → geney-1.3.32.dist-info}/METADATA +1 -1
- {geney-1.3.30.dist-info → geney-1.3.32.dist-info}/RECORD +6 -6
- {geney-1.3.30.dist-info → geney-1.3.32.dist-info}/WHEEL +0 -0
- {geney-1.3.30.dist-info → geney-1.3.32.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -5,7 +5,7 @@ from tqdm import tqdm
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
7
|
from .SeqMats import SeqMat, MutSeqMat
|
|
8
|
-
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
|
|
8
|
+
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing, Missplicing
|
|
9
9
|
from .Gene import Gene
|
|
10
10
|
import copy
|
|
11
11
|
from . import config
|
|
@@ -305,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
|
|
|
305
305
|
|
|
306
306
|
|
|
307
307
|
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
|
|
308
|
-
window_length=13, organism='hg38',
|
|
308
|
+
window_length=13, organism='hg38', splicing_engine=None):
|
|
309
309
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
310
310
|
reference_gene_proteins = {
|
|
311
311
|
transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
@@ -335,10 +335,13 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
335
335
|
assert len(reference_transcript.protein) == len(
|
|
336
336
|
reference_transcript.cons_vector), f"Protein ({len(reference_transcript.protein)}) and conservation vector ({len(reference_transcript.cons_vector)}) must be same length."
|
|
337
337
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
338
|
+
if splicing_engine is None:
|
|
339
|
+
missplicing = Missplicing()
|
|
340
|
+
else:
|
|
341
|
+
missplicing = find_transcript_missplicing_seqs(
|
|
342
|
+
reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
|
|
343
|
+
mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
|
|
344
|
+
reference_transcript.acceptors, threshold=splicing_threshold, engine=splicing_engine)
|
|
342
345
|
alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing.aberrant_splicing)
|
|
343
346
|
|
|
344
347
|
for i, new_boundaries in enumerate(alternative_splicing_paths):
|
|
@@ -359,7 +362,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
359
362
|
|
|
360
363
|
report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
|
|
361
364
|
report['mut_id'] = mut_id
|
|
362
|
-
report['
|
|
365
|
+
report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
|
|
363
366
|
report['oncosplice_score'] = affected_cons_scores
|
|
364
367
|
report['percentile'] = percentile
|
|
365
368
|
report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
|
@@ -376,7 +379,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
376
379
|
['mut_id', 'transcript_id', 'isoform_id', 'primary_transcript', 'missplicing', 'full_missplicing',
|
|
377
380
|
'exon_changes', 'splicing_codes', 'affected_exon', 'affected_intron', 'mutation_distance_from_5',
|
|
378
381
|
'mutation_distance_from_3', 'engine', 'reference_resemblance', 'oncosplice_score', 'percentile',
|
|
379
|
-
'isoform_prevalence', 'reference_protein', 'variant_protein']]
|
|
382
|
+
'isoform_prevalence', 'reference_protein', 'variant_protein', 'splicing_engine']]
|
|
380
383
|
|
|
381
384
|
#
|
|
382
385
|
# import asyncio
|
geney/splicing_utils.py
CHANGED
|
@@ -403,6 +403,7 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
403
403
|
m2.get(k, {}).get('reference') or
|
|
404
404
|
mb.get(k, {}).get('reference')
|
|
405
405
|
)
|
|
406
|
+
|
|
406
407
|
if ref_val is None:
|
|
407
408
|
ref_val = 0
|
|
408
409
|
|
|
@@ -460,7 +461,7 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
460
461
|
|
|
461
462
|
|
|
462
463
|
class Missplicing:
|
|
463
|
-
def __init__(self, splicing_dict, threshold=0.5):
|
|
464
|
+
def __init__(self, splicing_dict=None, threshold=0.5):
|
|
464
465
|
"""
|
|
465
466
|
Initialize a Missplicing object.
|
|
466
467
|
|
|
@@ -475,6 +476,8 @@ class Missplicing:
|
|
|
475
476
|
}
|
|
476
477
|
threshold (float): The threshold above which a delta is considered significant.
|
|
477
478
|
"""
|
|
479
|
+
if splicing_dict is None:
|
|
480
|
+
splicing_dict = {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
478
481
|
self.missplicing = splicing_dict
|
|
479
482
|
self.threshold = threshold
|
|
480
483
|
|
|
@@ -521,6 +524,8 @@ class Missplicing:
|
|
|
521
524
|
"""
|
|
522
525
|
if threshold is None:
|
|
523
526
|
threshold = self.threshold
|
|
527
|
+
if threshold is None:
|
|
528
|
+
threshold = 0
|
|
524
529
|
|
|
525
530
|
return {
|
|
526
531
|
event: {
|
|
@@ -11,12 +11,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
11
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
12
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
13
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
14
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=_nHLzzFRzxSWm-lYA9cQHTpiA1d8DMQIlch2CI4qZZg,22554
|
|
15
15
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
18
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
19
|
+
geney/splicing_utils.py,sha256=H79ISQXr7LIxNwxtUZ0pMjgnjTCi65nGgNo7T-Rt-zo,38708
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
22
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.32.dist-info/METADATA,sha256=Li7y-k33A_VcI9h0zQWAVtUpaswrecnnjx-Kqqvgrw8,990
|
|
29
|
+
geney-1.3.32.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|