geney 1.3.33__py2.py3-none-any.whl → 1.3.35__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/Transcript.py +1 -1
- geney/oncosplice.py +17 -9
- {geney-1.3.33.dist-info → geney-1.3.35.dist-info}/METADATA +1 -1
- {geney-1.3.33.dist-info → geney-1.3.35.dist-info}/RECORD +6 -6
- {geney-1.3.33.dist-info → geney-1.3.35.dist-info}/WHEEL +0 -0
- {geney-1.3.33.dist-info → geney-1.3.35.dist-info}/top_level.txt +0 -0
geney/Transcript.py
CHANGED
|
@@ -374,6 +374,6 @@ class Transcript:
|
|
|
374
374
|
protein = str(Seq(self.orf.seq).translate()).replace('*', '')
|
|
375
375
|
|
|
376
376
|
# Use existing cons_vector or default to an array of ones
|
|
377
|
-
self.cons_vector = self.cons_vector if hasattr(self, 'cons_vector') else np.ones(len(protein))
|
|
377
|
+
self.cons_vector = self.cons_vector if hasattr(self, 'cons_vector') and len(self.cons_vector) == len(protein) else np.ones(len(protein))
|
|
378
378
|
self.protein = protein
|
|
379
379
|
return self
|
geney/oncosplice.py
CHANGED
|
@@ -305,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
|
|
|
305
305
|
|
|
306
306
|
|
|
307
307
|
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
|
|
308
|
-
window_length=13, organism='hg38', splicing_engine=None):
|
|
308
|
+
window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False):
|
|
309
309
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
310
310
|
reference_gene_proteins = {
|
|
311
311
|
transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
@@ -313,7 +313,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
313
313
|
|
|
314
314
|
mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
315
315
|
results = []
|
|
316
|
-
for reference_transcript in tqdm(gene):
|
|
316
|
+
for reference_transcript in tqdm(gene, desc=f'Processing {mut_id}...'):
|
|
317
317
|
if (cons_required and not reference_transcript.cons_available) or (
|
|
318
318
|
protein_coding and not reference_transcript.transcript_biotype == 'protein_coding'):
|
|
319
319
|
continue
|
|
@@ -338,12 +338,20 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
338
338
|
if splicing_engine is None:
|
|
339
339
|
missplicing = Missplicing()
|
|
340
340
|
else:
|
|
341
|
-
missplicing =
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
341
|
+
missplicing, no_splicing_record = None, True
|
|
342
|
+
if splicing_db is not None:
|
|
343
|
+
missplicing = splicing_db.get_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene, transcript_id=reference_transcript.transcript_id)
|
|
344
|
+
no_splicing_record = missplicing is None
|
|
345
|
+
|
|
346
|
+
if missplicing is None:
|
|
347
|
+
missplicing = find_transcript_missplicing_seqs(
|
|
348
|
+
reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
|
|
349
|
+
mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
|
|
350
|
+
reference_transcript.acceptors, threshold=splicing_threshold, engine=splicing_engine)
|
|
351
|
+
if no_splicing_record and splicing_db is not None:
|
|
352
|
+
splicing_db.get_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene, transcript_id=reference_transcript.transcript_id, data=missplicing.missplicing)
|
|
346
353
|
|
|
354
|
+
alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing.aberrant_splicing)
|
|
347
355
|
for i, new_boundaries in enumerate(alternative_splicing_paths):
|
|
348
356
|
mutated_transcript.acceptors = new_boundaries['acceptors']
|
|
349
357
|
mutated_transcript.donors = new_boundaries['donors']
|
|
@@ -368,7 +376,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
368
376
|
report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
|
369
377
|
report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
370
378
|
report['full_missplicing'] = missplicing.aberrant_splicing
|
|
371
|
-
report['missplicing'] = missplicing.max_delta
|
|
379
|
+
report['missplicing'] = missplicing.max_delta
|
|
372
380
|
report['reference_resemblance'] = reference_gene_proteins.get(mutated_transcript.protein, None)
|
|
373
381
|
results.append(report)
|
|
374
382
|
|
|
@@ -378,7 +386,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
378
386
|
return pd.DataFrame(results)[
|
|
379
387
|
['mut_id', 'transcript_id', 'isoform_id', 'primary_transcript', 'missplicing', 'full_missplicing',
|
|
380
388
|
'exon_changes', 'splicing_codes', 'affected_exon', 'affected_intron', 'mutation_distance_from_5',
|
|
381
|
-
'mutation_distance_from_3', '
|
|
389
|
+
'mutation_distance_from_3', 'reference_resemblance', 'oncosplice_score', 'percentile',
|
|
382
390
|
'isoform_prevalence', 'reference_protein', 'variant_protein', 'splicing_engine']]
|
|
383
391
|
|
|
384
392
|
#
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
2
|
geney/Gene.py,sha256=1pqKI3hGZndi7VY4j66ObhJjnz8YiNtBLGJTIHWVujA,6670
|
|
3
3
|
geney/SeqMats.py,sha256=aLpqd7RJSEU07jdPXpbtZPeb2D9BxrZuW6BTkcXpNE4,18819
|
|
4
|
-
geney/Transcript.py,sha256=
|
|
4
|
+
geney/Transcript.py,sha256=s2QtC5IGAJNZ1Uf3S-eTCzTXy6lrDzXbxb9pcLMwbQg,14485
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
7
7
|
geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -11,7 +11,7 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
11
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
12
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
13
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
14
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=3_XAUbzpCTYBI9W1uag75ed93lsNw8UuvnM6LjwTXZY,23221
|
|
15
15
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.35.dist-info/METADATA,sha256=yhGQQBVx7g7hjOBXPgMZMb5-R9gfnnpjFVNWnRJuPwQ,990
|
|
29
|
+
geney-1.3.35.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.35.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.35.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|