geney 1.3.63__py2.py3-none-any.whl → 1.3.65__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/SeqMats.py +1 -0
- geney/oncosplice.py +1 -1
- geney/spliceai_utils.py +1 -1
- geney/splicing_utils.py +64 -14
- {geney-1.3.63.dist-info → geney-1.3.65.dist-info}/METADATA +1 -1
- {geney-1.3.63.dist-info → geney-1.3.65.dist-info}/RECORD +8 -8
- {geney-1.3.63.dist-info → geney-1.3.65.dist-info}/WHEEL +0 -0
- {geney-1.3.63.dist-info → geney-1.3.65.dist-info}/top_level.txt +0 -0
geney/SeqMats.py
CHANGED
geney/oncosplice.py
CHANGED
|
@@ -368,7 +368,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
368
368
|
if no_splicing_record and splicing_db is not None:
|
|
369
369
|
splicing_db.store_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene.gene_name, transcript_id=reference_transcript.transcript_id, data=missplicing.missplicing)
|
|
370
370
|
|
|
371
|
-
alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing
|
|
371
|
+
alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing) #.missplicing)
|
|
372
372
|
for i, new_boundaries in enumerate(alternative_splicing_paths):
|
|
373
373
|
print("iterating through new boundaries...")
|
|
374
374
|
|
geney/spliceai_utils.py
CHANGED
|
@@ -73,4 +73,4 @@ def run_spliceai_seq(seq, indices, threshold=0):
|
|
|
73
73
|
ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
|
|
74
74
|
acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
|
|
75
75
|
donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
|
|
76
|
-
return
|
|
76
|
+
return donor_indices, acceptor_indices
|
geney/splicing_utils.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import pandas as pd
|
|
3
|
+
from tensorflow.python.framework.test_ops import none_eager_fallback
|
|
4
|
+
|
|
3
5
|
from .Gene import Gene
|
|
4
6
|
from .SeqMats import MutSeqMat
|
|
5
7
|
from collections import defaultdict
|
|
@@ -119,15 +121,19 @@ def prepare_splice_sites(acceptors, donors, aberrant_splicing):
|
|
|
119
121
|
|
|
120
122
|
|
|
121
123
|
def develop_aberrant_splicing(transcript, aberrant_splicing):
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
124
|
+
if not aberrant_splicing:
|
|
125
|
+
yield {'acceptors': transcript.acceptors, 'donors': transcript.donors, 'path_weight': 1}
|
|
126
|
+
|
|
127
|
+
else:
|
|
128
|
+
all_acceptors, all_donors = prepare_splice_sites(transcript.acceptors, transcript.donors, aberrant_splicing.missplicing)
|
|
129
|
+
adj_list = generate_adjacency_list(all_acceptors, all_donors, transcript_start=transcript.transcript_start,
|
|
130
|
+
transcript_end=transcript.transcript_end, rev=transcript.rev,
|
|
131
|
+
max_distance=100000)
|
|
132
|
+
end_node = (transcript.transcript_end, 'transcript_end')
|
|
133
|
+
start_node = (transcript.transcript_start, 'transcript_start')
|
|
134
|
+
for path, prob in find_all_paths(adj_list, start_node, end_node):
|
|
135
|
+
yield {'acceptors': [p[0] for p in path if p[1] == 'acceptor'],
|
|
136
|
+
'donors': [p[0] for p in path if p[1] == 'donor'], 'path_weight': prob}
|
|
131
137
|
|
|
132
138
|
|
|
133
139
|
|
|
@@ -193,7 +199,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
193
199
|
# Prepare reference sequence with padding
|
|
194
200
|
ref_indices = transcript.indices
|
|
195
201
|
ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
|
|
196
|
-
|
|
202
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq, engine)
|
|
197
203
|
|
|
198
204
|
# Verify lengths
|
|
199
205
|
assert len(ref_seq_donor_probs) == len(ref_indices), (
|
|
@@ -206,10 +212,10 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
206
212
|
)
|
|
207
213
|
|
|
208
214
|
# Create dictionaries and sort them by probability in descending order
|
|
209
|
-
donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
210
|
-
key=lambda item: item[1], reverse=True)
|
|
211
|
-
acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
212
|
-
key=lambda item: item[1], reverse=True)
|
|
215
|
+
donor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
216
|
+
key=lambda item: item[1], reverse=True))
|
|
217
|
+
acceptor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
218
|
+
key=lambda item: item[1], reverse=True))
|
|
213
219
|
|
|
214
220
|
return donor_probs, acceptor_probs
|
|
215
221
|
|
|
@@ -456,6 +462,50 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
456
462
|
return pd.concat(results)
|
|
457
463
|
|
|
458
464
|
|
|
465
|
+
def process_pairwise_epistasis_explicit(mid, engine='spliceai'):
|
|
466
|
+
donor_probs, acceptor_probs = {}, {}
|
|
467
|
+
lower_pos, upper_pos = int(mid.split(':')[2]), int(mid.split(':')[6])
|
|
468
|
+
g = Gene.from_file(mid.split(':')[0]).transcript().generate_pre_mrna()
|
|
469
|
+
print(g.rev)
|
|
470
|
+
if g.rev:
|
|
471
|
+
lower_pos, upper_pos, factor = upper_pos, lower_pos, -1
|
|
472
|
+
else:
|
|
473
|
+
factor = 1
|
|
474
|
+
|
|
475
|
+
lb, ub = lower_pos - (factor * 7500), upper_pos + (factor * 7500)
|
|
476
|
+
|
|
477
|
+
for m in ['wild_type'] + mid.split('|') + [mid]:
|
|
478
|
+
transcript = g.clone().pre_mrna
|
|
479
|
+
if m != 'wild_type':
|
|
480
|
+
mutations = [MutSeqMat.from_mutid(cm) for cm in m.split('|')]
|
|
481
|
+
if g.rev:
|
|
482
|
+
mutations = [m.reverse_complement() for m in mutations]
|
|
483
|
+
for mutation in mutations:
|
|
484
|
+
if mutation in transcript:
|
|
485
|
+
transcript.mutate(mutation, inplace=True)
|
|
486
|
+
|
|
487
|
+
donors, acceptors = find_transcript_splicing(transcript[lb:ub], engine=engine)
|
|
488
|
+
donor_probs[m] = donors
|
|
489
|
+
acceptor_probs[m] = acceptors
|
|
490
|
+
|
|
491
|
+
acceptors = pd.DataFrame.from_dict(acceptor_probs).T
|
|
492
|
+
donors = pd.DataFrame.from_dict(donor_probs).T
|
|
493
|
+
|
|
494
|
+
acceptors = acceptors.map(lambda x: 0 if x < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
495
|
+
acceptors = acceptors.loc[:, acceptors.nunique() > 1]
|
|
496
|
+
donors = donors.map(lambda x: 0 if abs(x) < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
497
|
+
donors = donors.loc[:, donors.nunique() > 1]
|
|
498
|
+
|
|
499
|
+
donors.loc['residual'] = (donors.iloc[3] - donors.iloc[0]) - (
|
|
500
|
+
(donors.iloc[1] - donors.iloc[0]) + (donors.iloc[2] - donors.iloc[0]))
|
|
501
|
+
acceptors.loc['residual'] = (acceptors.iloc[3] - acceptors.iloc[0]) - (
|
|
502
|
+
(acceptors.iloc[1] - acceptors.iloc[0]) + (acceptors.iloc[2] - acceptors.iloc[0]))
|
|
503
|
+
|
|
504
|
+
donors = donors.loc[:, donors.loc['residual'].abs() > 0.1]
|
|
505
|
+
acceptors = acceptors.loc[:, acceptors.loc['residual'].abs() > 0.1]
|
|
506
|
+
|
|
507
|
+
return acceptors, donors
|
|
508
|
+
|
|
459
509
|
|
|
460
510
|
class Missplicing:
|
|
461
511
|
def __init__(self, splicing_dict=None, threshold=0.5):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
2
|
geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
|
|
3
|
-
geney/SeqMats.py,sha256=
|
|
3
|
+
geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
|
|
4
4
|
geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
@@ -11,12 +11,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
11
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
12
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
13
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
14
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=YZvAnbe8gj9fPvs2DldeQpqhhe_QR9xBLe_0tcm9tdg,24793
|
|
15
15
|
geney/pangolin_utils.py,sha256=9jdBXlOcRaUdfi-UpUxHA0AkTMZkUF-Lt7HVZ1nEm3s,2973
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
|
-
geney/spliceai_utils.py,sha256=
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
18
|
+
geney/spliceai_utils.py,sha256=tVY0T6F6l3fNoaktpn7Kq0oH5ZM0ThFYt9nPi_lfakw,3077
|
|
19
|
+
geney/splicing_utils.py,sha256=W-N0ENZJv1PdnVlHuaN_2az2-7Zl6cHYe_CYR1G41U4,40766
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
22
|
geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.65.dist-info/METADATA,sha256=L-doIh0XdJuxs4gg1Dhs5mLoa_1zI8_bboq4cnlfvfA,990
|
|
29
|
+
geney-1.3.65.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.65.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.65.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|