geney 1.2.53__py2.py3-none-any.whl → 1.2.55__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/oncosplice.py +57 -63
- geney/pangolin_utils.py +3 -5
- geney/splicing_utils.py +116 -119
- geney/survival_utils.py +1 -1
- {geney-1.2.53.dist-info → geney-1.2.55.dist-info}/METADATA +1 -1
- {geney-1.2.53.dist-info → geney-1.2.55.dist-info}/RECORD +8 -8
- {geney-1.2.53.dist-info → geney-1.2.55.dist-info}/WHEEL +0 -0
- {geney-1.2.53.dist-info → geney-1.2.55.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -361,67 +361,64 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut):
|
|
|
361
361
|
return report
|
|
362
362
|
|
|
363
363
|
|
|
364
|
-
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
return report
|
|
364
|
+
# def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
|
|
365
|
+
# gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
366
|
+
# reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
|
|
367
|
+
# mutations = [get_mutation(m, rev=gene.rev) for m in mut_id.split('|')]
|
|
368
|
+
#
|
|
369
|
+
# results = []
|
|
370
|
+
# for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
|
|
371
|
+
# if cons_required and not transcript.cons_available:
|
|
372
|
+
# continue
|
|
373
|
+
#
|
|
374
|
+
# if all(mutation not in transcript for mutation in mutations):
|
|
375
|
+
# # results.append({'transcript_id': transcript.transcript_id})
|
|
376
|
+
# continue
|
|
377
|
+
#
|
|
378
|
+
# transcript.generate_pre_mrna()
|
|
379
|
+
# transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
380
|
+
# transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
|
|
381
|
+
# ref_protein, cons_vector = transcript.protein, transcript.cons_vector
|
|
382
|
+
# reference_transcript = copy.deepcopy(transcript)
|
|
383
|
+
#
|
|
384
|
+
# assert len(ref_protein) == len(cons_vector), f"Protein ({len(ref_protein)}) and conservation vector ({len(cons_vector)}) must be same length. {ref_protein}, \n>{cons_vector}\n>{transcript.cons_seq}"
|
|
385
|
+
#
|
|
386
|
+
# missplicing = Missplicing(find_transcript_missplicing(transcript, mutations, engine=engine, threshold=splicing_threshold), threshold=splicing_threshold)
|
|
387
|
+
# for mutation in mutations:
|
|
388
|
+
# transcript.pre_mrna += mutation
|
|
389
|
+
#
|
|
390
|
+
# for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
|
|
391
|
+
# transcript.acceptors = new_boundaries['acceptors']
|
|
392
|
+
# transcript.donors = new_boundaries['donors']
|
|
393
|
+
# transcript.generate_mature_mrna().generate_protein()
|
|
394
|
+
#
|
|
395
|
+
# alignment = get_logical_alignment(reference_transcript.protein, transcript.protein)
|
|
396
|
+
# deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
397
|
+
# modified_positions = find_modified_positions(len(ref_protein), deleted, inserted)
|
|
398
|
+
# temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
|
|
399
|
+
# affected_cons_scores = max(temp_cons)
|
|
400
|
+
# percentile = (
|
|
401
|
+
# sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
|
|
402
|
+
# cons_vector))
|
|
403
|
+
#
|
|
404
|
+
# report = OncospliceAnnotator(reference_transcript, transcript, mutation)
|
|
405
|
+
# report['mut_id'] = mut_id
|
|
406
|
+
# report['oncosplice_score'] = affected_cons_scores
|
|
407
|
+
# report['percentile'] = percentile
|
|
408
|
+
# report['isoform_id'] = i
|
|
409
|
+
# report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
410
|
+
# report['full_missplicing'] = missplicing.aberrant_splicing
|
|
411
|
+
# report['missplicing'] = max(missplicing)
|
|
412
|
+
# report['reference_resemblance'] = reference_gene_proteins.get(transcript.protein, None)
|
|
413
|
+
# results.append(report)
|
|
414
|
+
#
|
|
415
|
+
# report = pd.DataFrame(results)
|
|
416
|
+
# return report
|
|
418
417
|
|
|
419
418
|
|
|
420
419
|
import asyncio
|
|
421
|
-
|
|
422
|
-
|
|
423
420
|
async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
424
|
-
window_length=13, organism='hg38', engine='spliceai', use_cons=True):
|
|
421
|
+
window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
|
|
425
422
|
import sys, os
|
|
426
423
|
needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
|
|
427
424
|
needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
|
|
@@ -452,20 +449,17 @@ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=Tr
|
|
|
452
449
|
|
|
453
450
|
gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
454
451
|
reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
|
|
455
|
-
|
|
456
452
|
mutations = [get_mutation(mut_id, rev=gene.rev) for mut_id in mut_id.split('|')]
|
|
457
|
-
|
|
458
453
|
results = []
|
|
459
454
|
for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
|
|
460
|
-
if not transcript.cons_available:
|
|
455
|
+
if require_cons and not transcript.cons_available:
|
|
461
456
|
continue
|
|
462
457
|
|
|
463
458
|
if all(mutation not in transcript for mutation in mutations):
|
|
464
|
-
results.append({'transcript_id': transcript.transcript_id})
|
|
459
|
+
# results.append({'transcript_id': transcript.transcript_id})
|
|
465
460
|
continue
|
|
466
461
|
|
|
467
462
|
task1 = asyncio.create_task(background_request(tid))
|
|
468
|
-
|
|
469
463
|
transcript.generate_pre_mrna()
|
|
470
464
|
transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
471
465
|
transcript.generate_mature_mrna().generate_protein(inplace=True)
|
|
@@ -475,7 +469,7 @@ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=Tr
|
|
|
475
469
|
cons_vector = np.ones(len(ref_protein))
|
|
476
470
|
|
|
477
471
|
if sum(cons_vector) == 0:
|
|
478
|
-
cons_vector = np.ones(len(ref_protein))
|
|
472
|
+
cons_vector = np.ones(len(ref_protein)) #/len(ref_protein)
|
|
479
473
|
|
|
480
474
|
reference_transcript = copy.deepcopy(transcript)
|
|
481
475
|
|
geney/pangolin_utils.py
CHANGED
|
@@ -52,12 +52,10 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
52
52
|
model_nums = [0, 1, 2, 3, 4, 5, 6]
|
|
53
53
|
INDEX_MAP = {0: 1, 1: 2, 2: 4, 3: 5, 4: 7, 5: 8, 6: 10, 7: 11}
|
|
54
54
|
|
|
55
|
-
# seq = 'N'*5000 + true_seq + 'N'*5000
|
|
56
55
|
seq = true_seq
|
|
57
56
|
true_seq = true_seq[5000:-5000]
|
|
58
|
-
acceptor_dinucleotide = np.array([true_seq[i - 2:i] == 'AG' for i in range(len(true_seq))])
|
|
59
|
-
|
|
60
|
-
donor_dinucleotide = np.array([true_seq[i -2:i] == 'GT' for i in range(len(true_seq))])
|
|
57
|
+
acceptor_dinucleotide = np.array([true_seq[i - 2:i] == 'AG' for i in range(len(true_seq))]) # np.ones(len(true_seq)) #
|
|
58
|
+
donor_dinucleotide = np.array([true_seq[i+1:i+3] == 'GT' for i in range(len(true_seq))]) #np.ones(len(true_seq)) #
|
|
61
59
|
|
|
62
60
|
seq = pang_one_hot_encode(seq).T
|
|
63
61
|
seq = torch.from_numpy(np.expand_dims(seq, axis=0)).float()
|
|
@@ -78,4 +76,4 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
78
76
|
splicing_pred = np.array(scores).max(axis=0)
|
|
79
77
|
donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
|
|
80
78
|
acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
|
|
81
|
-
return donor_probs, acceptor_probs
|
|
79
|
+
return donor_probs, acceptor_probs
|
geney/splicing_utils.py
CHANGED
|
@@ -1,128 +1,125 @@
|
|
|
1
|
-
import networkx as nx
|
|
2
1
|
import numpy as np
|
|
3
2
|
from .mutation_utils import get_mutation
|
|
4
3
|
from .seqmat_utils import Gene
|
|
5
4
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_adjacency_list(acceptors, donors, transcript_start, transcript_end, max_distance=50, rev=False):
|
|
9
|
+
# Append the transcript end to donors to allow connection to the end point
|
|
10
|
+
donors.append((transcript_end, 1))
|
|
11
|
+
acceptors = sorted(acceptors, key=lambda x: (x[0], x[1] if not rev else -x[1]), reverse=rev)
|
|
12
|
+
donors = sorted(donors, key=lambda x: (x[0], x[1] if not rev else -x[1]), reverse=rev)
|
|
13
|
+
|
|
14
|
+
# Initialize adjacency list to store downstream connections
|
|
15
|
+
adjacency_list = defaultdict(list)
|
|
16
|
+
|
|
17
|
+
# Connect each donor to the nearest acceptor(s) within the distance threshold
|
|
18
|
+
for d_pos, d_prob in donors:
|
|
19
|
+
running_prob = 1
|
|
20
|
+
for a_pos, a_prob in acceptors:
|
|
21
|
+
correct_orientation = (a_pos > d_pos and not rev) or (a_pos < d_pos and rev)
|
|
22
|
+
distance_valid = abs(a_pos - d_pos) <= max_distance
|
|
23
|
+
if correct_orientation and distance_valid:
|
|
24
|
+
in_between_acceptors = sum([d_pos < a < a_pos for a, _ in acceptors]) if not rev else sum([a_pos < a < d_pos for a, _ in acceptors])
|
|
25
|
+
in_between_donors = sum([d_pos < d < a_pos for d, _ in donors]) if not rev else sum([a_pos < d < d_pos for d, _ in donors])
|
|
26
|
+
in_between_naturals = 0
|
|
27
|
+
if in_between_donors == 0 or in_between_acceptors == 0:
|
|
28
|
+
adjacency_list[(d_pos, 'donor')].append((a_pos, 'acceptor', a_prob))
|
|
29
|
+
running_prob -= a_prob
|
|
30
|
+
|
|
31
|
+
else:
|
|
32
|
+
if running_prob > 0:
|
|
33
|
+
adjacency_list[(d_pos, 'donor')].append((a_pos, 'acceptor', a_prob*running_prob))
|
|
34
|
+
running_prob -= a_prob
|
|
35
|
+
else:
|
|
36
|
+
break
|
|
37
|
+
|
|
38
|
+
# Connect each acceptor to the nearest donor(s) within the distance threshold
|
|
39
|
+
for a_pos, a_prob in acceptors:
|
|
40
|
+
running_prob = 1
|
|
41
|
+
for d_pos, d_prob in donors:
|
|
42
|
+
correct_orientation = (d_pos > a_pos and not rev) or (d_pos < a_pos and rev)
|
|
43
|
+
distance_valid = abs(d_pos - a_pos) <= max_distance
|
|
44
|
+
if correct_orientation and distance_valid:
|
|
45
|
+
in_between_acceptors = sum([a_pos < a < d_pos for a, _ in acceptors]) if not rev else sum([d_pos < a < a_pos for a, _ in acceptors])
|
|
46
|
+
in_between_donors = sum([a_pos < d < d_pos for d, _ in donors]) if not rev else sum([d_pos < d < a_pos for d, _ in donors])
|
|
47
|
+
in_between_naturals = 0
|
|
48
|
+
tag = 'donor' if d_pos != transcript_end else 'transcript_end'
|
|
49
|
+
|
|
50
|
+
if in_between_acceptors == 0:
|
|
51
|
+
adjacency_list[(a_pos, 'acceptor')].append((d_pos, tag, d_prob))
|
|
52
|
+
running_prob -= d_prob
|
|
53
|
+
else:
|
|
54
|
+
if running_prob > 0:
|
|
55
|
+
adjacency_list[(a_pos, 'acceptor')].append((d_pos, tag, d_prob*running_prob))
|
|
56
|
+
running_prob -= d_prob
|
|
57
|
+
else:
|
|
58
|
+
break
|
|
59
|
+
|
|
60
|
+
# Connect the transcript start to the nearest donor(s) within the distance threshold
|
|
61
|
+
running_prob = 1
|
|
62
|
+
for d_pos, d_prob in donors:
|
|
63
|
+
if ((d_pos > transcript_start and not rev) or (d_pos < transcript_start and rev)) and abs(
|
|
64
|
+
d_pos - transcript_start) <= max_distance:
|
|
65
|
+
adjacency_list[(transcript_start, 'transcript_start')].append((d_pos, 'donor', d_prob))
|
|
66
|
+
running_prob -= d_prob
|
|
67
|
+
if running_prob <= 0:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
# Normalize probabilities to ensure they sum up to 1 for each list of connections
|
|
71
|
+
for k, next_nodes in adjacency_list.items():
|
|
72
|
+
prob_sum = sum([c for a, b, c in next_nodes])
|
|
73
|
+
adjacency_list[k] = [(a, b, round(c / prob_sum, 3)) for a, b, c in next_nodes] if prob_sum > 0 else next_nodes
|
|
74
|
+
|
|
75
|
+
return adjacency_list
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def find_all_paths(graph, start, end, path=[], probability=1.0):
|
|
79
|
+
path = path + [start] # Add current node to the path
|
|
80
|
+
if start == end:
|
|
81
|
+
yield path, probability # If end is reached, yield the path and its cumulative probability
|
|
82
|
+
return
|
|
83
|
+
if start not in graph:
|
|
84
|
+
return # If the start node has no outgoing edges, return
|
|
85
|
+
|
|
86
|
+
for (next_node, node_type, prob) in graph[start]:
|
|
87
|
+
# Recur for each connected node, updating the probability
|
|
88
|
+
yield from find_all_paths(graph, (next_node, node_type), end, path, probability * prob)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def prepare_splice_sites(acceptors, donors, aberrant_splicing):
|
|
92
|
+
acceptors = {p: 1 for p in acceptors}
|
|
93
|
+
donors = {p: 1 for p in donors}
|
|
94
|
+
|
|
95
|
+
for p, v in aberrant_splicing[f'missed_donors'].items():
|
|
96
|
+
donors[p] = v['absolute']
|
|
97
|
+
|
|
98
|
+
for p, v in aberrant_splicing[f'discovered_donors'].items():
|
|
99
|
+
donors[p] = v['absolute']
|
|
100
|
+
|
|
101
|
+
for p, v in aberrant_splicing[f'missed_acceptors'].items():
|
|
102
|
+
acceptors[p] = v['absolute']
|
|
103
|
+
|
|
104
|
+
for p, v in aberrant_splicing[f'discovered_acceptors'].items():
|
|
105
|
+
acceptors[p] = v['absolute']
|
|
106
|
+
|
|
107
|
+
acceptors = {int(k): v for k, v in acceptors.items()}
|
|
108
|
+
donors = {int(k): v for k, v in donors.items()}
|
|
109
|
+
return list(acceptors.items()), list(donors.items())
|
|
41
110
|
|
|
42
111
|
|
|
43
112
|
def develop_aberrant_splicing(transcript, aberrant_splicing):
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
nodes = [s for s in nodes if s.prob > 0]
|
|
55
|
-
|
|
56
|
-
# Sort nodes based on position, respecting transcript direction
|
|
57
|
-
nodes.sort(key=lambda x: x.pos, reverse=transcript.rev)
|
|
58
|
-
|
|
59
|
-
# Create the directed graph
|
|
60
|
-
G = create_splice_graph(nodes, transcript.rev)
|
|
61
|
-
|
|
62
|
-
# Compute new paths and their probabilities sequentially
|
|
63
|
-
new_paths, prob_sum = compute_paths_sequential(G, transcript, exon_starts, exon_ends)
|
|
64
|
-
|
|
65
|
-
# Normalize probabilities and filter based on threshold
|
|
66
|
-
new_paths = normalize_and_filter_paths(new_paths, prob_sum)
|
|
67
|
-
|
|
68
|
-
return list(new_paths.values())
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def prepare_splice_sites(transcript_sites, transcript_boundary, aberrant_splicing, site_type):
|
|
72
|
-
"""
|
|
73
|
-
Prepare and return a dictionary of splice sites (acceptors or donors) including transcript boundaries
|
|
74
|
-
and aberrant splicing information.
|
|
75
|
-
"""
|
|
76
|
-
site_dict = {v: 1 for v in transcript_sites}
|
|
77
|
-
site_dict.update({transcript_boundary: 1})
|
|
78
|
-
site_dict.update({s: v['absolute'] for s, v in aberrant_splicing[f'missed_{site_type}'].items()})
|
|
79
|
-
site_dict.update({s: v['absolute'] for s, v in aberrant_splicing[f'discovered_{site_type}'].items()})
|
|
80
|
-
return site_dict
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def create_splice_graph(nodes, reverse_direction):
|
|
84
|
-
"""
|
|
85
|
-
Create and return a directed graph with splice sites as nodes and edges based on splice site type
|
|
86
|
-
and probability of occurrence.
|
|
87
|
-
"""
|
|
88
|
-
G = nx.DiGraph()
|
|
89
|
-
G.add_nodes_from([n.pos for n in nodes])
|
|
90
|
-
|
|
91
|
-
for i in range(len(nodes)):
|
|
92
|
-
trailing_prob = 0
|
|
93
|
-
in_between = set()
|
|
94
|
-
curr_node = nodes[i]
|
|
95
|
-
|
|
96
|
-
for j in range(i + 1, len(nodes)):
|
|
97
|
-
next_node = nodes[j]
|
|
98
|
-
in_between.add(next_node.ss_type)
|
|
99
|
-
|
|
100
|
-
if curr_node.ss_type != next_node.ss_type:
|
|
101
|
-
new_prob = next_node.prob - trailing_prob
|
|
102
|
-
if new_prob > 0:
|
|
103
|
-
G.add_edge(curr_node.pos, next_node.pos, weight=new_prob)
|
|
104
|
-
trailing_prob += next_node.prob
|
|
105
|
-
return G
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def normalize_and_filter_paths(new_paths, prob_sum):
|
|
109
|
-
"""
|
|
110
|
-
Normalize path probabilities and filter out paths with a probability less than 0.01.
|
|
111
|
-
"""
|
|
112
|
-
for i, d in new_paths.items():
|
|
113
|
-
d['path_weight'] = round(d['path_weight'] / prob_sum, 3)
|
|
114
|
-
new_paths = {k: v for k, v in new_paths.items() if v['path_weight'] > 0.00001}
|
|
115
|
-
return new_paths
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def path_weight_mult(G, path, weight):
|
|
119
|
-
"""
|
|
120
|
-
Calculate the multiplicative weight of the path.
|
|
121
|
-
"""
|
|
122
|
-
cost = 1
|
|
123
|
-
for node, nbr in zip(path[:-1], path[1:]):
|
|
124
|
-
cost *= G[node][nbr][weight]
|
|
125
|
-
return cost
|
|
113
|
+
all_acceptors, all_donors = prepare_splice_sites(transcript.acceptors, transcript.donors, aberrant_splicing)
|
|
114
|
+
adj_list = generate_adjacency_list(all_acceptors, all_donors, transcript_start=transcript.transcript_start,
|
|
115
|
+
transcript_end=transcript.transcript_end, rev=transcript.rev,
|
|
116
|
+
max_distance=100000)
|
|
117
|
+
end_node = (transcript.transcript_end, 'transcript_end')
|
|
118
|
+
start_node = (transcript.transcript_start, 'transcript_start')
|
|
119
|
+
for path, prob in find_all_paths(adj_list, start_node, end_node):
|
|
120
|
+
yield {'acceptors': [p[0] for p in path if p[1] == 'acceptor'],
|
|
121
|
+
'donors': [p[0] for p in path if p[1] == 'donor'], 'path_weight': prob}
|
|
122
|
+
|
|
126
123
|
|
|
127
124
|
|
|
128
125
|
# Missplicing Detection
|
|
@@ -176,7 +173,7 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
176
173
|
ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
|
|
177
174
|
var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
|
|
178
175
|
|
|
179
|
-
print(ref_seq)
|
|
176
|
+
# print(ref_seq)
|
|
180
177
|
|
|
181
178
|
if engine == 'spliceai':
|
|
182
179
|
from .spliceai_utils import sai_predict_probs, sai_models
|
geney/survival_utils.py
CHANGED
|
@@ -2,7 +2,7 @@ import pandas as pd
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import matplotlib.pyplot as plt
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from scipy.integrate import
|
|
5
|
+
# from scipy.integrate import trapezoid
|
|
6
6
|
from geney.utils import unload_pickle, unload_json, contains
|
|
7
7
|
from lifelines.exceptions import ConvergenceError
|
|
8
8
|
from lifelines import KaplanMeierFitter
|
|
@@ -6,13 +6,13 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
6
6
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
7
7
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
8
8
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
9
|
-
geney/oncosplice.py,sha256
|
|
10
|
-
geney/pangolin_utils.py,sha256=
|
|
9
|
+
geney/oncosplice.py,sha256=-_b0ZSxWa-bSYDoVMt605lJlx8-rXf0WsKsFrMoF6Vg,23707
|
|
10
|
+
geney/pangolin_utils.py,sha256=NJEdY43L_2lielY1hZOjlak0baHqXTa1ITrvx8Tkg5o,2878
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
12
|
geney/seqmat_utils.py,sha256=2cRXT_Ox4IdzCM8x3H2HexxFZzjo5WHs0HZiUQv8fBM,18347
|
|
13
13
|
geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
|
|
14
|
-
geney/splicing_utils.py,sha256=
|
|
15
|
-
geney/survival_utils.py,sha256=
|
|
14
|
+
geney/splicing_utils.py,sha256=t0vE5KTAdYOYJLa9wjaSJ1jqiHhsDxZs64OxrgR-Sqc,16811
|
|
15
|
+
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
16
16
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
17
17
|
geney/tis_utils.py,sha256=vA2ci4gNfwwQZlCjPpO5ehvL2NRVeM7lHI_VyfT-_10,8049
|
|
18
18
|
geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
|
|
@@ -20,7 +20,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
20
20
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
21
21
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
22
22
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
26
|
-
geney-1.2.
|
|
23
|
+
geney-1.2.55.dist-info/METADATA,sha256=bMKlTktE8jhYNpbxWMnp6Z168gk4NafThjukv45vYI4,948
|
|
24
|
+
geney-1.2.55.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
25
|
+
geney-1.2.55.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
26
|
+
geney-1.2.55.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|