geney 1.3.79__py2.py3-none-any.whl → 1.4.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/Gene.py +9 -10
- geney/Oncosplice.py +400 -0
- geney/SpliceSimulator.py +407 -0
- geney/Transcript.py +54 -56
- geney/__init__.py +47 -19
- geney/_config_setup.py +16 -0
- geney/_graphic_utils.py +269 -0
- geney/_gtex_utils.py +68 -0
- geney/_immune_utils.py +125 -0
- geney/{oncosplice.py → _oncosplice.py} +199 -156
- geney/_splicing_utils.py +693 -0
- geney/_survival_utils.py +143 -0
- geney/_tcga_utils.py +405 -0
- geney/_tis_utils.py +172 -0
- geney/immune_utils.py +1 -1
- geney/pipelines.py +66 -0
- geney/power_utils.py +1 -1
- geney/utils/Fasta_segment.py +260 -0
- geney/utils/SeqMats.py +423 -0
- geney/utils/TranscriptLibrary.py +55 -0
- geney/utils/__init__.py +20 -0
- geney/utils/mutation_utils.py +104 -0
- geney/utils/pangolin_utils.py +173 -0
- geney/utils/spliceai_utils.py +123 -0
- geney/utils/splicing_utils.py +525 -0
- geney/utils/utils.py +89 -0
- {geney-1.3.79.dist-info → geney-1.4.0.dist-info}/METADATA +1 -1
- geney-1.4.0.dist-info/RECORD +51 -0
- {geney-1.3.79.dist-info → geney-1.4.0.dist-info}/WHEEL +1 -1
- geney-1.3.79.dist-info/RECORD +0 -31
- {geney-1.3.79.dist-info → geney-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,22 +1,17 @@
|
|
|
1
1
|
from Bio import pairwise2
|
|
2
2
|
import re
|
|
3
|
-
import hashlib
|
|
4
3
|
from datetime import datetime
|
|
5
4
|
from tqdm import tqdm
|
|
6
5
|
import pandas as pd
|
|
7
6
|
import numpy as np
|
|
8
|
-
from .SeqMats import
|
|
9
|
-
from .
|
|
7
|
+
from geney.utils.SeqMats import MutSeqMat
|
|
8
|
+
from ._splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing, Missplicing
|
|
10
9
|
from .Gene import Gene
|
|
11
|
-
import copy
|
|
12
|
-
from . import config
|
|
13
10
|
|
|
14
|
-
from .tis_utils import find_tis
|
|
15
11
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
return full_hash[:length]
|
|
12
|
+
##############################################################################################################################
|
|
13
|
+
######################################## ONCOSPLICE CALCULATIONS #############################################################
|
|
14
|
+
##############################################################################################################################
|
|
20
15
|
|
|
21
16
|
def find_continuous_gaps(sequence):
|
|
22
17
|
"""Find continuous gap sequences in an alignment."""
|
|
@@ -221,6 +216,23 @@ def moving_average_conv(vector, window_size, factor=1):
|
|
|
221
216
|
|
|
222
217
|
return np.convolve(vector, np.ones(window_size), mode='same') / window_size
|
|
223
218
|
|
|
219
|
+
|
|
220
|
+
def oncosplice_score(reference_protein, variant_protein, conservation_vector, window_length=13):
|
|
221
|
+
alignment = get_logical_alignment(reference_protein, variant_protein)
|
|
222
|
+
deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
223
|
+
modified_positions = find_modified_positions(len(reference_protein), deleted, inserted)
|
|
224
|
+
temp_cons = np.convolve(conservation_vector * modified_positions,
|
|
225
|
+
np.ones(window_length)) / window_length
|
|
226
|
+
percentile = (
|
|
227
|
+
sorted(conservation_vector).index(
|
|
228
|
+
next(x for x in sorted(conservation_vector) if x >= max(temp_cons))) / len(
|
|
229
|
+
conservation_vector))
|
|
230
|
+
return max(temp_cons), percentile
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
##############################################################################################################################
|
|
234
|
+
#################################### ANNOTATION FUNCTIONS ####################################################################
|
|
235
|
+
##############################################################################################################################
|
|
224
236
|
def find_splice_site_proximity(pos, transcript):
|
|
225
237
|
for i, (ex_start, ex_end) in enumerate(transcript.exons):
|
|
226
238
|
if min(ex_start, ex_end) <= pos <= max(ex_start, ex_end):
|
|
@@ -231,7 +243,6 @@ def find_splice_site_proximity(pos, transcript):
|
|
|
231
243
|
return None, i + 1, abs(pos - in_end), abs(pos - in_start)
|
|
232
244
|
|
|
233
245
|
return None, None, np.inf, np.inf
|
|
234
|
-
|
|
235
246
|
def define_missplicing_events(ref, var):
|
|
236
247
|
ref_introns, ref_exons = ref.introns, ref.exons
|
|
237
248
|
var_introns, var_exons = var.introns, var.exons
|
|
@@ -300,149 +311,166 @@ def missense_effect(r, v):
|
|
|
300
311
|
nt_changes += f'{r.orf.seq[p:p+3]}{p}{v.orf.seq[p:p+3]},'
|
|
301
312
|
return aa_changes, nt_changes
|
|
302
313
|
|
|
303
|
-
# Annotating
|
|
304
|
-
def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attributes=[], var_attributes=[]):
|
|
305
|
-
affected_exon, affected_intron, distance_from_5, distance_from_3 = find_splice_site_proximity(np.floor(mut.indices[0]),
|
|
306
|
-
reference_transcript)
|
|
307
|
-
|
|
308
|
-
report = {}
|
|
309
|
-
report['primary_transcript'] = reference_transcript.primary_transcript
|
|
310
|
-
report['transcript_id'] = reference_transcript.transcript_id
|
|
311
|
-
report['reference_protein'] = reference_transcript.protein
|
|
312
|
-
report['variant_protein'] = variant_transcript.protein
|
|
313
|
-
report['variant_protein_length'] = len(variant_transcript.protein)
|
|
314
|
-
descriptions = define_missplicing_events(reference_transcript, variant_transcript)
|
|
315
|
-
report['exon_changes'] = '|'.join([v for v in descriptions if v])
|
|
316
|
-
report['splicing_codes'] = summarize_missplicing_event(*descriptions)
|
|
317
|
-
report['affected_exon'] = affected_exon
|
|
318
|
-
report['affected_intron'] = affected_intron
|
|
319
|
-
report['mutation_distance_from_5'] = distance_from_5
|
|
320
|
-
report['mutation_distance_from_3'] = distance_from_3
|
|
321
|
-
aa_c, c_c = missense_effect(reference_transcript, variant_transcript)
|
|
322
|
-
report['missense_effect'] = aa_c
|
|
323
|
-
report['codon_change'] = c_c
|
|
324
|
-
return report
|
|
325
314
|
|
|
315
|
+
def oncosplice_score(reference_protein, variant_protein, conservation_vector, window_length=13):
|
|
316
|
+
alignment = get_logical_alignment(reference_protein, variant_protein)
|
|
317
|
+
deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
318
|
+
modified_positions = find_modified_positions(len(reference_protein), deleted, inserted)
|
|
319
|
+
temp_cons = np.convolve(conservation_vector * modified_positions,
|
|
320
|
+
np.ones(window_length)) / window_length
|
|
321
|
+
percentile = (
|
|
322
|
+
sorted(conservation_vector).index(
|
|
323
|
+
next(x for x in sorted(conservation_vector) if x >= max(temp_cons))) / len(
|
|
324
|
+
conservation_vector))
|
|
325
|
+
return max(temp_cons), percentile
|
|
326
326
|
|
|
327
|
-
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
328
|
-
window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False,
|
|
329
|
-
tis_engine=None, target_transcripts=None):
|
|
330
327
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
328
|
+
# # Annotating
|
|
329
|
+
# def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attributes=[], var_attributes=[]):
|
|
330
|
+
# affected_exon, affected_intron, distance_from_5, distance_from_3 = find_splice_site_proximity(np.floor(mut.indices[0]),
|
|
331
|
+
# reference_transcript)
|
|
332
|
+
#
|
|
333
|
+
# report = {}
|
|
334
|
+
# report['primary_transcript'] = reference_transcript.primary_transcript
|
|
335
|
+
# report['transcript_id'] = reference_transcript.transcript_id
|
|
336
|
+
# report['reference_protein'] = reference_transcript.protein
|
|
337
|
+
# report['variant_protein'] = variant_transcript.protein
|
|
338
|
+
# report['variant_protein_length'] = len(variant_transcript.protein)
|
|
339
|
+
# descriptions = define_missplicing_events(reference_transcript, variant_transcript)
|
|
340
|
+
# report['exon_changes'] = '|'.join([v for v in descriptions if v])
|
|
341
|
+
# report['splicing_codes'] = summarize_missplicing_event(*descriptions)
|
|
342
|
+
# report['affected_exon'] = affected_exon
|
|
343
|
+
# report['affected_intron'] = affected_intron
|
|
344
|
+
# report['mutation_distance_from_5'] = distance_from_5
|
|
345
|
+
# report['mutation_distance_from_3'] = distance_from_3
|
|
346
|
+
# aa_c, c_c = missense_effect(reference_transcript, variant_transcript)
|
|
347
|
+
# report['missense_effect'] = aa_c
|
|
348
|
+
# report['codon_change'] = c_c
|
|
349
|
+
# return report
|
|
350
|
+
#
|
|
351
|
+
# ##############################################################################################################################
|
|
352
|
+
# ######################################## ONCOSPLICE PIPELINES ################################################################
|
|
353
|
+
# ##############################################################################################################################
|
|
354
|
+
#
|
|
355
|
+
# def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
356
|
+
# window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False,
|
|
357
|
+
# tis_engine=None, target_transcripts=None):
|
|
358
|
+
#
|
|
359
|
+
# gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
360
|
+
# reference_gene_proteins = {
|
|
361
|
+
# transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
362
|
+
# transcript in gene if transcript.transcript_biotype == 'protein_coding'}
|
|
363
|
+
#
|
|
364
|
+
# mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
365
|
+
# if gene.rev:
|
|
366
|
+
# mutations = [m.reverse_complement() for m in mutations[::-1]]
|
|
367
|
+
#
|
|
368
|
+
# results = []
|
|
369
|
+
# for reference_transcript in tqdm(gene, desc=f'Processing {mut_id}...'):
|
|
370
|
+
# if target_transcripts is not None and reference_transcript.transcript_id not in target_transcripts:
|
|
371
|
+
# continue
|
|
372
|
+
#
|
|
373
|
+
# # if (cons_required and not reference_transcript.cons_available) or (
|
|
374
|
+
# # protein_coding and not reference_transcript.transcript_biotype == 'protein_coding'):
|
|
375
|
+
# if protein_coding and not reference_transcript.transcript_biotype == 'protein_coding':
|
|
376
|
+
# print("Not protein coding...")
|
|
377
|
+
# continue
|
|
378
|
+
#
|
|
379
|
+
# current_mutations = [m for m in mutations if m in reference_transcript]
|
|
380
|
+
# if len(current_mutations) == 0:
|
|
381
|
+
# print(f"No mutations within transcript ({reference_transcript.transcript_start} > {reference_transcript.transcript_end})...")
|
|
382
|
+
# continue
|
|
383
|
+
#
|
|
384
|
+
# center = np.mean([m.indices[0] for m in current_mutations]) // 1
|
|
385
|
+
#
|
|
386
|
+
# mutated_transcript = reference_transcript.clone()
|
|
387
|
+
#
|
|
388
|
+
# for mutation in current_mutations:
|
|
389
|
+
# mutated_transcript.mutate(mutation, inplace=True)
|
|
390
|
+
#
|
|
391
|
+
# reference_transcript.generate_mature_mrna().generate_protein()
|
|
392
|
+
#
|
|
393
|
+
# if len(reference_transcript.protein) < window_length:
|
|
394
|
+
# print(f"> Window length issue {reference_transcript.transcript_id}")
|
|
395
|
+
# continue
|
|
396
|
+
#
|
|
397
|
+
# reference_transcript.cons_vector = transform_conservation_vector(reference_transcript.cons_vector,
|
|
398
|
+
# window=window_length)
|
|
399
|
+
#
|
|
400
|
+
# assert len(reference_transcript.protein) == len(
|
|
401
|
+
# reference_transcript.cons_vector), f"Protein ({len(reference_transcript.protein)}) and conservation vector ({len(reference_transcript.cons_vector)}) must be same length."
|
|
402
|
+
#
|
|
403
|
+
# if splicing_engine is None:
|
|
404
|
+
# missplicing = Missplicing()
|
|
405
|
+
# else:
|
|
406
|
+
# missplicing, no_splicing_record = None, True
|
|
407
|
+
# if splicing_db is not None:
|
|
408
|
+
# missplicing = Missplicing(splicing_db.get_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene.gene_name, transcript_id=reference_transcript.transcript_id))
|
|
409
|
+
# no_splicing_record = missplicing is None
|
|
410
|
+
#
|
|
411
|
+
# if missplicing is None:
|
|
412
|
+
# missplicing = find_transcript_missplicing_seqs(
|
|
413
|
+
# reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
|
|
414
|
+
# mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
|
|
415
|
+
# reference_transcript.acceptors, threshold=splicing_threshold, engine=splicing_engine)
|
|
416
|
+
# if no_splicing_record and splicing_db is not None:
|
|
417
|
+
# splicing_db.store_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene.gene_name, transcript_id=reference_transcript.transcript_id, data=missplicing.missplicing)
|
|
418
|
+
#
|
|
419
|
+
# alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing) #.missplicing)
|
|
420
|
+
# for i, new_boundaries in enumerate(alternative_splicing_paths):
|
|
421
|
+
# print("iterating through new boundaries...")
|
|
422
|
+
#
|
|
423
|
+
# mutated_transcript.acceptors = new_boundaries['acceptors']
|
|
424
|
+
# mutated_transcript.donors = new_boundaries['donors']
|
|
425
|
+
# mutated_transcript.generate_mature_mrna().generate_protein()
|
|
426
|
+
#
|
|
427
|
+
# ### Experimental
|
|
428
|
+
# # mutated_transcript.generate_mature_mrna()
|
|
429
|
+
# # if tis_engine is None:
|
|
430
|
+
# # tis_candidates = [(mutated_transcript.tis, 1)]
|
|
431
|
+
# # else:
|
|
432
|
+
# # from tis_utils import tis_predictor
|
|
433
|
+
# # tis_candidates = tis_predictor(mutated_transcript.mature_mrna)
|
|
434
|
+
# #
|
|
435
|
+
# # for tis_candidate, tis_score in tis_candidates:
|
|
436
|
+
# # mutated_transcript.generate_protein(tis_candidate)
|
|
437
|
+
# ######
|
|
438
|
+
#
|
|
439
|
+
# alignment = get_logical_alignment(reference_transcript.protein, mutated_transcript.protein)
|
|
440
|
+
# deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
441
|
+
# modified_positions = find_modified_positions(len(reference_transcript.protein), deleted, inserted)
|
|
442
|
+
# temp_cons = np.convolve(reference_transcript.cons_vector * modified_positions,
|
|
443
|
+
# np.ones(window_length)) / window_length
|
|
444
|
+
# affected_cons_scores = max(temp_cons)
|
|
445
|
+
# percentile = (
|
|
446
|
+
# sorted(reference_transcript.cons_vector).index(
|
|
447
|
+
# next(x for x in sorted(reference_transcript.cons_vector) if x >= affected_cons_scores)) / len(
|
|
448
|
+
# reference_transcript.cons_vector))
|
|
449
|
+
#
|
|
450
|
+
# report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
|
|
451
|
+
# report['mut_id'] = mut_id
|
|
452
|
+
# report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
|
|
453
|
+
# # report['tis_engine'] = tis_engine if tis_engine is not None else 'None'
|
|
454
|
+
# # report['tis_pos'] = tis_candidate
|
|
455
|
+
# # report['tis_score'] = tis_score
|
|
456
|
+
# report['oncosplice_score'] = affected_cons_scores
|
|
457
|
+
# report['percentile'] = percentile
|
|
458
|
+
# report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
|
459
|
+
# report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
460
|
+
# report['full_missplicing'] = missplicing.aberrant_splicing
|
|
461
|
+
# report['missplicing'] = missplicing.max_delta
|
|
462
|
+
# report['reference_resemblance'] = reference_gene_proteins.get(mutated_transcript.protein, None)
|
|
463
|
+
# results.append(report)
|
|
464
|
+
#
|
|
465
|
+
# if len(results) == 0:
|
|
466
|
+
# # print("Nothing...")
|
|
467
|
+
# return pd.DataFrame()
|
|
468
|
+
#
|
|
469
|
+
# return pd.DataFrame(results)[
|
|
470
|
+
# ['mut_id', 'transcript_id', 'isoform_id', 'primary_transcript', 'missplicing', 'full_missplicing',
|
|
471
|
+
# 'exon_changes', 'splicing_codes', 'affected_exon', 'affected_intron', 'mutation_distance_from_5',
|
|
472
|
+
# 'mutation_distance_from_3', 'missense_effect', 'codon_change', 'missense_position', 'reference_resemblance',
|
|
473
|
+
# 'oncosplice_score', 'percentile', 'isoform_prevalence', 'reference_protein', 'variant_protein', 'splicing_engine']]
|
|
446
474
|
|
|
447
475
|
|
|
448
476
|
def process_splicing_path(new_boundaries, reference_transcript, mutated_transcript,
|
|
@@ -451,11 +479,23 @@ def process_splicing_path(new_boundaries, reference_transcript, mutated_transcri
|
|
|
451
479
|
"""
|
|
452
480
|
Processes a single alternative splicing path and returns an annotation report.
|
|
453
481
|
"""
|
|
482
|
+
|
|
483
|
+
base_results = {'mut_id': mut_id,
|
|
484
|
+
'transcript_id': transcript_id,
|
|
485
|
+
'execution_time': start_time,
|
|
486
|
+
'status': '',
|
|
487
|
+
'splicing_engine': splicing_engine if splicing_engine is not None else 'None',
|
|
488
|
+
'isoform_id': short_hash_of_list(mutated_transcript.exons),
|
|
489
|
+
}
|
|
454
490
|
# Update acceptors and donors
|
|
455
491
|
mutated_transcript.acceptors = new_boundaries['acceptors']
|
|
456
492
|
mutated_transcript.donors = new_boundaries['donors']
|
|
457
493
|
mutated_transcript.generate_mature_mrna().generate_protein()
|
|
458
494
|
|
|
495
|
+
if len(mutated_transcript.protein) <= 1:
|
|
496
|
+
base_results['status'] = 'Variant protein not viable.'
|
|
497
|
+
return base_results
|
|
498
|
+
|
|
459
499
|
# Align reference and mutated proteins
|
|
460
500
|
alignment = get_logical_alignment(reference_transcript.protein, mutated_transcript.protein)
|
|
461
501
|
deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
@@ -490,7 +530,7 @@ def process_splicing_path(new_boundaries, reference_transcript, mutated_transcri
|
|
|
490
530
|
return report
|
|
491
531
|
|
|
492
532
|
|
|
493
|
-
def
|
|
533
|
+
def oncosplice(row, splicing_threshold=0.5, window_length=13,
|
|
494
534
|
organism='hg38', splicing_engine='spliceai'):
|
|
495
535
|
"""
|
|
496
536
|
Process a given mutation-transcript pair to analyze alternative splicing events
|
|
@@ -512,7 +552,11 @@ def oncosplice_df(row, splicing_threshold=0.5, window_length=13,
|
|
|
512
552
|
"""
|
|
513
553
|
|
|
514
554
|
start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Log function start time
|
|
515
|
-
mut_id
|
|
555
|
+
mut_id = row.mut_id
|
|
556
|
+
if 'transcript_id' not in row:
|
|
557
|
+
transcript_id = None
|
|
558
|
+
else:
|
|
559
|
+
transcript_id = row.transcript_id
|
|
516
560
|
|
|
517
561
|
# Default response template (to ensure all IDs are included)
|
|
518
562
|
base_result = {
|
|
@@ -528,7 +572,7 @@ def oncosplice_df(row, splicing_threshold=0.5, window_length=13,
|
|
|
528
572
|
base_result['status'] = 'Gene not found'
|
|
529
573
|
return pd.DataFrame([base_result])
|
|
530
574
|
|
|
531
|
-
mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
575
|
+
# mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
532
576
|
reference_transcript = gene.transcript(transcript_id)
|
|
533
577
|
|
|
534
578
|
if reference_transcript is None:
|
|
@@ -548,7 +592,7 @@ def oncosplice_df(row, splicing_threshold=0.5, window_length=13,
|
|
|
548
592
|
return pd.DataFrame([base_result])
|
|
549
593
|
|
|
550
594
|
# Filter mutations relevant to this transcript
|
|
551
|
-
current_mutations = [m for m in
|
|
595
|
+
current_mutations = [m for m in mut_id.split('|') if m in reference_transcript]
|
|
552
596
|
if not current_mutations:
|
|
553
597
|
base_result['status'] = 'No relevant mutations'
|
|
554
598
|
return pd.DataFrame([base_result])
|
|
@@ -602,7 +646,6 @@ def oncosplice_df(row, splicing_threshold=0.5, window_length=13,
|
|
|
602
646
|
]
|
|
603
647
|
|
|
604
648
|
|
|
605
|
-
#
|
|
606
649
|
# import asyncio
|
|
607
650
|
# async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
608
651
|
# window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
|