geney 1.3.64__py2.py3-none-any.whl → 1.3.65__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/SeqMats.py +1 -0
- geney/spliceai_utils.py +1 -1
- geney/splicing_utils.py +49 -5
- {geney-1.3.64.dist-info → geney-1.3.65.dist-info}/METADATA +1 -1
- {geney-1.3.64.dist-info → geney-1.3.65.dist-info}/RECORD +7 -7
- {geney-1.3.64.dist-info → geney-1.3.65.dist-info}/WHEEL +0 -0
- {geney-1.3.64.dist-info → geney-1.3.65.dist-info}/top_level.txt +0 -0
geney/SeqMats.py
CHANGED
geney/spliceai_utils.py
CHANGED
|
@@ -73,4 +73,4 @@ def run_spliceai_seq(seq, indices, threshold=0):
|
|
|
73
73
|
ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
|
|
74
74
|
acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
|
|
75
75
|
donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
|
|
76
|
-
return
|
|
76
|
+
return donor_indices, acceptor_indices
|
geney/splicing_utils.py
CHANGED
|
@@ -199,7 +199,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
199
199
|
# Prepare reference sequence with padding
|
|
200
200
|
ref_indices = transcript.indices
|
|
201
201
|
ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
|
|
202
|
-
|
|
202
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq, engine)
|
|
203
203
|
|
|
204
204
|
# Verify lengths
|
|
205
205
|
assert len(ref_seq_donor_probs) == len(ref_indices), (
|
|
@@ -212,10 +212,10 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
212
212
|
)
|
|
213
213
|
|
|
214
214
|
# Create dictionaries and sort them by probability in descending order
|
|
215
|
-
donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
216
|
-
key=lambda item: item[1], reverse=True)
|
|
217
|
-
acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
218
|
-
key=lambda item: item[1], reverse=True)
|
|
215
|
+
donor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
216
|
+
key=lambda item: item[1], reverse=True))
|
|
217
|
+
acceptor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
218
|
+
key=lambda item: item[1], reverse=True))
|
|
219
219
|
|
|
220
220
|
return donor_probs, acceptor_probs
|
|
221
221
|
|
|
@@ -462,6 +462,50 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
462
462
|
return pd.concat(results)
|
|
463
463
|
|
|
464
464
|
|
|
465
|
+
def process_pairwise_epistasis_explicit(mid, engine='spliceai'):
|
|
466
|
+
donor_probs, acceptor_probs = {}, {}
|
|
467
|
+
lower_pos, upper_pos = int(mid.split(':')[2]), int(mid.split(':')[6])
|
|
468
|
+
g = Gene.from_file(mid.split(':')[0]).transcript().generate_pre_mrna()
|
|
469
|
+
print(g.rev)
|
|
470
|
+
if g.rev:
|
|
471
|
+
lower_pos, upper_pos, factor = upper_pos, lower_pos, -1
|
|
472
|
+
else:
|
|
473
|
+
factor = 1
|
|
474
|
+
|
|
475
|
+
lb, ub = lower_pos - (factor * 7500), upper_pos + (factor * 7500)
|
|
476
|
+
|
|
477
|
+
for m in ['wild_type'] + mid.split('|') + [mid]:
|
|
478
|
+
transcript = g.clone().pre_mrna
|
|
479
|
+
if m != 'wild_type':
|
|
480
|
+
mutations = [MutSeqMat.from_mutid(cm) for cm in m.split('|')]
|
|
481
|
+
if g.rev:
|
|
482
|
+
mutations = [m.reverse_complement() for m in mutations]
|
|
483
|
+
for mutation in mutations:
|
|
484
|
+
if mutation in transcript:
|
|
485
|
+
transcript.mutate(mutation, inplace=True)
|
|
486
|
+
|
|
487
|
+
donors, acceptors = find_transcript_splicing(transcript[lb:ub], engine=engine)
|
|
488
|
+
donor_probs[m] = donors
|
|
489
|
+
acceptor_probs[m] = acceptors
|
|
490
|
+
|
|
491
|
+
acceptors = pd.DataFrame.from_dict(acceptor_probs).T
|
|
492
|
+
donors = pd.DataFrame.from_dict(donor_probs).T
|
|
493
|
+
|
|
494
|
+
acceptors = acceptors.map(lambda x: 0 if x < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
495
|
+
acceptors = acceptors.loc[:, acceptors.nunique() > 1]
|
|
496
|
+
donors = donors.map(lambda x: 0 if abs(x) < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
497
|
+
donors = donors.loc[:, donors.nunique() > 1]
|
|
498
|
+
|
|
499
|
+
donors.loc['residual'] = (donors.iloc[3] - donors.iloc[0]) - (
|
|
500
|
+
(donors.iloc[1] - donors.iloc[0]) + (donors.iloc[2] - donors.iloc[0]))
|
|
501
|
+
acceptors.loc['residual'] = (acceptors.iloc[3] - acceptors.iloc[0]) - (
|
|
502
|
+
(acceptors.iloc[1] - acceptors.iloc[0]) + (acceptors.iloc[2] - acceptors.iloc[0]))
|
|
503
|
+
|
|
504
|
+
donors = donors.loc[:, donors.loc['residual'].abs() > 0.1]
|
|
505
|
+
acceptors = acceptors.loc[:, acceptors.loc['residual'].abs() > 0.1]
|
|
506
|
+
|
|
507
|
+
return acceptors, donors
|
|
508
|
+
|
|
465
509
|
|
|
466
510
|
class Missplicing:
|
|
467
511
|
def __init__(self, splicing_dict=None, threshold=0.5):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
2
|
geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
|
|
3
|
-
geney/SeqMats.py,sha256=
|
|
3
|
+
geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
|
|
4
4
|
geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
@@ -15,8 +15,8 @@ geney/oncosplice.py,sha256=YZvAnbe8gj9fPvs2DldeQpqhhe_QR9xBLe_0tcm9tdg,24793
|
|
|
15
15
|
geney/pangolin_utils.py,sha256=9jdBXlOcRaUdfi-UpUxHA0AkTMZkUF-Lt7HVZ1nEm3s,2973
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
|
-
geney/spliceai_utils.py,sha256=
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
18
|
+
geney/spliceai_utils.py,sha256=tVY0T6F6l3fNoaktpn7Kq0oH5ZM0ThFYt9nPi_lfakw,3077
|
|
19
|
+
geney/splicing_utils.py,sha256=W-N0ENZJv1PdnVlHuaN_2az2-7Zl6cHYe_CYR1G41U4,40766
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
22
|
geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.65.dist-info/METADATA,sha256=L-doIh0XdJuxs4gg1Dhs5mLoa_1zI8_bboq4cnlfvfA,990
|
|
29
|
+
geney-1.3.65.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.65.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.65.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|