geney 1.3.29__py2.py3-none-any.whl → 1.3.31__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/Gene.py +9 -0
- geney/oncosplice.py +11 -8
- geney/splicing_utils.py +3 -1
- {geney-1.3.29.dist-info → geney-1.3.31.dist-info}/METADATA +1 -1
- {geney-1.3.29.dist-info → geney-1.3.31.dist-info}/RECORD +7 -7
- {geney-1.3.29.dist-info → geney-1.3.31.dist-info}/WHEEL +0 -0
- {geney-1.3.29.dist-info → geney-1.3.31.dist-info}/top_level.txt +0 -0
geney/Gene.py
CHANGED
|
@@ -138,6 +138,9 @@ class Gene:
|
|
|
138
138
|
if tid is None:
|
|
139
139
|
tid = self.primary_transcript
|
|
140
140
|
|
|
141
|
+
if tid is None:
|
|
142
|
+
return Transcript()
|
|
143
|
+
|
|
141
144
|
if tid not in self.transcripts:
|
|
142
145
|
raise AttributeError(f"Transcript '{tid}' not found in gene '{self.gene_name}'.")
|
|
143
146
|
|
|
@@ -170,6 +173,12 @@ class Gene:
|
|
|
170
173
|
self._primary_transcript = protein_coding[0]
|
|
171
174
|
return self._primary_transcript
|
|
172
175
|
|
|
176
|
+
# # Fallback 2: find a proitein coding transcript that is not fully defined
|
|
177
|
+
# protein_coding = [k for k, v in self.transcripts.items() if v.get('transcript_biotype') == 'protein_coding_CDS_not_defined']
|
|
178
|
+
# if protein_coding:
|
|
179
|
+
# self._primary_transcript = protein_coding[0]
|
|
180
|
+
# return self._primary_transcript
|
|
181
|
+
|
|
173
182
|
# No primary or protein-coding transcript found
|
|
174
183
|
self._primary_transcript = None
|
|
175
184
|
return None
|
geney/oncosplice.py
CHANGED
|
@@ -5,7 +5,7 @@ from tqdm import tqdm
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
7
|
from .SeqMats import SeqMat, MutSeqMat
|
|
8
|
-
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
|
|
8
|
+
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing, Missplicing
|
|
9
9
|
from .Gene import Gene
|
|
10
10
|
import copy
|
|
11
11
|
from . import config
|
|
@@ -305,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
|
|
|
305
305
|
|
|
306
306
|
|
|
307
307
|
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
|
|
308
|
-
window_length=13, organism='hg38',
|
|
308
|
+
window_length=13, organism='hg38', splicing_engine=None):
|
|
309
309
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
310
310
|
reference_gene_proteins = {
|
|
311
311
|
transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
@@ -335,10 +335,13 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
335
335
|
assert len(reference_transcript.protein) == len(
|
|
336
336
|
reference_transcript.cons_vector), f"Protein ({len(reference_transcript.protein)}) and conservation vector ({len(reference_transcript.cons_vector)}) must be same length."
|
|
337
337
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
338
|
+
if splicing_engine is None:
|
|
339
|
+
missplicing = Missplicing()
|
|
340
|
+
else:
|
|
341
|
+
missplicing = find_transcript_missplicing_seqs(
|
|
342
|
+
reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
|
|
343
|
+
mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
|
|
344
|
+
reference_transcript.acceptors, threshold=splicing_threshold, engine=splicing_engine)
|
|
342
345
|
alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing.aberrant_splicing)
|
|
343
346
|
|
|
344
347
|
for i, new_boundaries in enumerate(alternative_splicing_paths):
|
|
@@ -359,7 +362,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
359
362
|
|
|
360
363
|
report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
|
|
361
364
|
report['mut_id'] = mut_id
|
|
362
|
-
report['
|
|
365
|
+
report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
|
|
363
366
|
report['oncosplice_score'] = affected_cons_scores
|
|
364
367
|
report['percentile'] = percentile
|
|
365
368
|
report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
|
@@ -376,7 +379,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
376
379
|
['mut_id', 'transcript_id', 'isoform_id', 'primary_transcript', 'missplicing', 'full_missplicing',
|
|
377
380
|
'exon_changes', 'splicing_codes', 'affected_exon', 'affected_intron', 'mutation_distance_from_5',
|
|
378
381
|
'mutation_distance_from_3', 'engine', 'reference_resemblance', 'oncosplice_score', 'percentile',
|
|
379
|
-
'isoform_prevalence', 'reference_protein', 'variant_protein']]
|
|
382
|
+
'isoform_prevalence', 'reference_protein', 'variant_protein', 'splicing_engine']]
|
|
380
383
|
|
|
381
384
|
#
|
|
382
385
|
# import asyncio
|
geney/splicing_utils.py
CHANGED
|
@@ -460,7 +460,7 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
460
460
|
|
|
461
461
|
|
|
462
462
|
class Missplicing:
|
|
463
|
-
def __init__(self, splicing_dict, threshold=0.5):
|
|
463
|
+
def __init__(self, splicing_dict=None, threshold=0.5):
|
|
464
464
|
"""
|
|
465
465
|
Initialize a Missplicing object.
|
|
466
466
|
|
|
@@ -475,6 +475,8 @@ class Missplicing:
|
|
|
475
475
|
}
|
|
476
476
|
threshold (float): The threshold above which a delta is considered significant.
|
|
477
477
|
"""
|
|
478
|
+
if splicing_dict is None:
|
|
479
|
+
splicing_dict = {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
478
480
|
self.missplicing = splicing_dict
|
|
479
481
|
self.threshold = threshold
|
|
480
482
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
|
-
geney/Gene.py,sha256=
|
|
2
|
+
geney/Gene.py,sha256=1pqKI3hGZndi7VY4j66ObhJjnz8YiNtBLGJTIHWVujA,6670
|
|
3
3
|
geney/SeqMats.py,sha256=aLpqd7RJSEU07jdPXpbtZPeb2D9BxrZuW6BTkcXpNE4,18819
|
|
4
4
|
geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
@@ -11,12 +11,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
11
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
12
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
13
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
14
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=_nHLzzFRzxSWm-lYA9cQHTpiA1d8DMQIlch2CI4qZZg,22554
|
|
15
15
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
18
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
19
|
+
geney/splicing_utils.py,sha256=Fd-T5KC3swuUzrxg0YKnUYQX-J98yWuvrsyzJHM3Rus,38651
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
22
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.31.dist-info/METADATA,sha256=4B9E4JtQM_t7CiVr02rbZmZgnFPbJE-REz9441hDGI8,990
|
|
29
|
+
geney-1.3.31.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.31.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|