geney 1.3.64__py2.py3-none-any.whl → 1.3.66__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/SeqMats.py +1 -0
- geney/spliceai_utils.py +1 -1
- geney/splicing_utils.py +53 -8
- {geney-1.3.64.dist-info → geney-1.3.66.dist-info}/METADATA +1 -1
- {geney-1.3.64.dist-info → geney-1.3.66.dist-info}/RECORD +7 -7
- {geney-1.3.64.dist-info → geney-1.3.66.dist-info}/WHEEL +0 -0
- {geney-1.3.64.dist-info → geney-1.3.66.dist-info}/top_level.txt +0 -0
geney/SeqMats.py
CHANGED
geney/spliceai_utils.py
CHANGED
|
@@ -73,4 +73,4 @@ def run_spliceai_seq(seq, indices, threshold=0):
|
|
|
73
73
|
ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
|
|
74
74
|
acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
|
|
75
75
|
donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
|
|
76
|
-
return
|
|
76
|
+
return donor_indices, acceptor_indices
|
geney/splicing_utils.py
CHANGED
|
@@ -166,7 +166,7 @@ def run_splicing_engine(seq, engine='spliceai'):
|
|
|
166
166
|
match engine:
|
|
167
167
|
case 'spliceai':
|
|
168
168
|
from .spliceai_utils import sai_predict_probs, sai_models
|
|
169
|
-
|
|
169
|
+
acceptor_probs, donor_probs = sai_predict_probs(seq, models=sai_models)
|
|
170
170
|
|
|
171
171
|
case 'pangolin':
|
|
172
172
|
from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
@@ -199,7 +199,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
199
199
|
# Prepare reference sequence with padding
|
|
200
200
|
ref_indices = transcript.indices
|
|
201
201
|
ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
|
|
202
|
-
|
|
202
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq, engine)
|
|
203
203
|
|
|
204
204
|
# Verify lengths
|
|
205
205
|
assert len(ref_seq_donor_probs) == len(ref_indices), (
|
|
@@ -212,10 +212,11 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
|
|
|
212
212
|
)
|
|
213
213
|
|
|
214
214
|
# Create dictionaries and sort them by probability in descending order
|
|
215
|
-
donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
216
|
-
key=lambda item: item[1], reverse=True)
|
|
217
|
-
|
|
218
|
-
|
|
215
|
+
donor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
216
|
+
key=lambda item: item[1], reverse=True))
|
|
217
|
+
|
|
218
|
+
acceptor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
219
|
+
key=lambda item: item[1], reverse=True))
|
|
219
220
|
|
|
220
221
|
return donor_probs, acceptor_probs
|
|
221
222
|
|
|
@@ -332,8 +333,8 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
|
|
|
332
333
|
if ref_seq.seq == var_seq.seq:
|
|
333
334
|
return Missplicing({'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}})
|
|
334
335
|
|
|
335
|
-
|
|
336
|
-
|
|
336
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq.seq, engine)
|
|
337
|
+
mut_seq_donor_probs, mut_seq_acceptor_probs = run_splicing_engine(var_seq.seq, engine)
|
|
337
338
|
ref_indices = ref_seq.indices[5000:-5000]
|
|
338
339
|
mut_indices = var_seq.indices[5000:-5000]
|
|
339
340
|
visible_donors = np.intersect1d(donors, ref_indices)
|
|
@@ -462,6 +463,50 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
|
|
|
462
463
|
return pd.concat(results)
|
|
463
464
|
|
|
464
465
|
|
|
466
|
+
def process_pairwise_epistasis_explicit(mid, engine='spliceai'):
|
|
467
|
+
donor_probs, acceptor_probs = {}, {}
|
|
468
|
+
lower_pos, upper_pos = int(mid.split(':')[2]), int(mid.split(':')[6])
|
|
469
|
+
g = Gene.from_file(mid.split(':')[0]).transcript().generate_pre_mrna()
|
|
470
|
+
print(g.rev)
|
|
471
|
+
if g.rev:
|
|
472
|
+
lower_pos, upper_pos, factor = upper_pos, lower_pos, -1
|
|
473
|
+
else:
|
|
474
|
+
factor = 1
|
|
475
|
+
|
|
476
|
+
lb, ub = lower_pos - (factor * 7500), upper_pos + (factor * 7500)
|
|
477
|
+
|
|
478
|
+
for m in ['wild_type'] + mid.split('|') + [mid]:
|
|
479
|
+
transcript = g.clone().pre_mrna
|
|
480
|
+
if m != 'wild_type':
|
|
481
|
+
mutations = [MutSeqMat.from_mutid(cm) for cm in m.split('|')]
|
|
482
|
+
if g.rev:
|
|
483
|
+
mutations = [m.reverse_complement() for m in mutations]
|
|
484
|
+
for mutation in mutations:
|
|
485
|
+
if mutation in transcript:
|
|
486
|
+
transcript.mutate(mutation, inplace=True)
|
|
487
|
+
|
|
488
|
+
donors, acceptors = find_transcript_splicing(transcript[lb:ub], engine=engine)
|
|
489
|
+
donor_probs[m] = donors
|
|
490
|
+
acceptor_probs[m] = acceptors
|
|
491
|
+
|
|
492
|
+
acceptors = pd.DataFrame.from_dict(acceptor_probs).T
|
|
493
|
+
donors = pd.DataFrame.from_dict(donor_probs).T
|
|
494
|
+
|
|
495
|
+
acceptors = acceptors.map(lambda x: 0 if x < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
496
|
+
acceptors = acceptors.loc[:, acceptors.nunique() > 1]
|
|
497
|
+
donors = donors.map(lambda x: 0 if abs(x) < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
|
|
498
|
+
donors = donors.loc[:, donors.nunique() > 1]
|
|
499
|
+
|
|
500
|
+
donors.loc['residual'] = (donors.iloc[3] - donors.iloc[0]) - (
|
|
501
|
+
(donors.iloc[1] - donors.iloc[0]) + (donors.iloc[2] - donors.iloc[0]))
|
|
502
|
+
acceptors.loc['residual'] = (acceptors.iloc[3] - acceptors.iloc[0]) - (
|
|
503
|
+
(acceptors.iloc[1] - acceptors.iloc[0]) + (acceptors.iloc[2] - acceptors.iloc[0]))
|
|
504
|
+
|
|
505
|
+
donors = donors.loc[:, donors.loc['residual'].abs() > 0.1]
|
|
506
|
+
acceptors = acceptors.loc[:, acceptors.loc['residual'].abs() > 0.1]
|
|
507
|
+
|
|
508
|
+
return acceptors, donors
|
|
509
|
+
|
|
465
510
|
|
|
466
511
|
class Missplicing:
|
|
467
512
|
def __init__(self, splicing_dict=None, threshold=0.5):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
2
|
geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
|
|
3
|
-
geney/SeqMats.py,sha256=
|
|
3
|
+
geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
|
|
4
4
|
geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
@@ -15,8 +15,8 @@ geney/oncosplice.py,sha256=YZvAnbe8gj9fPvs2DldeQpqhhe_QR9xBLe_0tcm9tdg,24793
|
|
|
15
15
|
geney/pangolin_utils.py,sha256=9jdBXlOcRaUdfi-UpUxHA0AkTMZkUF-Lt7HVZ1nEm3s,2973
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
|
-
geney/spliceai_utils.py,sha256=
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
18
|
+
geney/spliceai_utils.py,sha256=tVY0T6F6l3fNoaktpn7Kq0oH5ZM0ThFYt9nPi_lfakw,3077
|
|
19
|
+
geney/splicing_utils.py,sha256=_nXLCK41GhcrkXHXAqkhNV2IcwFltSxrR-rm8fUIrfE,40767
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
22
|
geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
|
|
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.66.dist-info/METADATA,sha256=bl8lWCBcJsbfBPJmkoY8xG0n6G7z7X1C-6jA1bSevCk,990
|
|
29
|
+
geney-1.3.66.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.66.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.66.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|