geney 1.2.51__py2.py3-none-any.whl → 1.2.53__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/oncosplice.py +5 -1
- geney/pangolin_utils.py +2 -1
- geney/splicing_utils.py +2 -13
- {geney-1.2.51.dist-info → geney-1.2.53.dist-info}/METADATA +1 -1
- {geney-1.2.51.dist-info → geney-1.2.53.dist-info}/RECORD +7 -7
- {geney-1.2.51.dist-info → geney-1.2.53.dist-info}/WHEEL +0 -0
- {geney-1.2.51.dist-info → geney-1.2.53.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -421,7 +421,7 @@ import asyncio
|
|
|
421
421
|
|
|
422
422
|
|
|
423
423
|
async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
424
|
-
window_length=13, organism='hg38', engine='spliceai'):
|
|
424
|
+
window_length=13, organism='hg38', engine='spliceai', use_cons=True):
|
|
425
425
|
import sys, os
|
|
426
426
|
needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
|
|
427
427
|
needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
|
|
@@ -470,6 +470,10 @@ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=Tr
|
|
|
470
470
|
transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
471
471
|
transcript.generate_mature_mrna().generate_protein(inplace=True)
|
|
472
472
|
ref_protein, cons_vector = transcript.protein, transcript.cons_vector
|
|
473
|
+
|
|
474
|
+
if not use_cons:
|
|
475
|
+
cons_vector = np.ones(len(ref_protein))
|
|
476
|
+
|
|
473
477
|
if sum(cons_vector) == 0:
|
|
474
478
|
cons_vector = np.ones(len(ref_protein))/len(ref_protein)
|
|
475
479
|
|
geney/pangolin_utils.py
CHANGED
|
@@ -56,7 +56,8 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
56
56
|
seq = true_seq
|
|
57
57
|
true_seq = true_seq[5000:-5000]
|
|
58
58
|
acceptor_dinucleotide = np.array([true_seq[i - 2:i] == 'AG' for i in range(len(true_seq))])
|
|
59
|
-
donor_dinucleotide = np.array([true_seq[i + 1:i + 3] == 'GT' for i in range(len(true_seq))])
|
|
59
|
+
# donor_dinucleotide = np.array([true_seq[i + 1:i + 3] == 'GT' for i in range(len(true_seq))])
|
|
60
|
+
donor_dinucleotide = np.array([true_seq[i -2:i] == 'GT' for i in range(len(true_seq))])
|
|
60
61
|
|
|
61
62
|
seq = pang_one_hot_encode(seq).T
|
|
62
63
|
seq = torch.from_numpy(np.expand_dims(seq, axis=0)).float()
|
geney/splicing_utils.py
CHANGED
|
@@ -150,17 +150,10 @@ def find_ss_changes(ref_dct, mut_dct, known_splice_sites, threshold=0.5):
|
|
|
150
150
|
|
|
151
151
|
def find_transcript_missplicing(transcript, mutations, context=5000, window=2500, threshold=0.5, engine='spliceai'):
|
|
152
152
|
from functools import reduce
|
|
153
|
-
|
|
154
153
|
ref = transcript.pre_mrna
|
|
155
|
-
# for mutation in mutations:
|
|
156
|
-
# var = ref + mutation
|
|
157
|
-
|
|
158
154
|
var = reduce(lambda acc, mutation: acc + mutation, mutations, ref)
|
|
159
|
-
|
|
160
155
|
center = int(np.mean([mutation.position for mutation in mutations]) // 1)
|
|
161
|
-
|
|
162
156
|
total_context = context + window
|
|
163
|
-
|
|
164
157
|
length = ref.seqmat.shape[-1]
|
|
165
158
|
center_index = ref.rel_pos(center)
|
|
166
159
|
ref_start_pad = max(0, total_context - center_index)
|
|
@@ -173,9 +166,6 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
173
166
|
|
|
174
167
|
ref = ref.inspect(center, context=total_context)
|
|
175
168
|
var = var.inspect(center, context=total_context)
|
|
176
|
-
#
|
|
177
|
-
# ref_indices = np.concatenate([np.zeros(ref_start_pad), ref.inspect(center, context = window).indices, np.zeros(ref_end_pad)])
|
|
178
|
-
# mut_indices = np.concatenate([np.zeros(var_start_pad), var.inspect(center, context = window).indices, np.zeros(var_end_pad)])
|
|
179
169
|
|
|
180
170
|
ref_indices = np.concatenate([np.zeros(ref_start_pad), ref.indices, np.zeros(ref_end_pad)])
|
|
181
171
|
mut_indices = np.concatenate([np.zeros(var_start_pad), var.indices, np.zeros(var_end_pad)])
|
|
@@ -186,13 +176,12 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
186
176
|
ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
|
|
187
177
|
var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
|
|
188
178
|
|
|
189
|
-
|
|
179
|
+
print(ref_seq)
|
|
180
|
+
|
|
190
181
|
if engine == 'spliceai':
|
|
191
182
|
from .spliceai_utils import sai_predict_probs, sai_models
|
|
192
183
|
ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, models=sai_models)
|
|
193
184
|
mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq, models=sai_models)
|
|
194
|
-
# ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
|
|
195
|
-
# mut_seq_acceptor_probs, mut_seq_donor_probs = mut_seq_probs_temp[0, :], mut_seq_probs_temp[1, :]
|
|
196
185
|
|
|
197
186
|
elif engine == 'pangolin':
|
|
198
187
|
from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
@@ -6,12 +6,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
6
6
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
7
7
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
8
8
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
9
|
-
geney/oncosplice.py,sha256=
|
|
10
|
-
geney/pangolin_utils.py,sha256=
|
|
9
|
+
geney/oncosplice.py,sha256=hPmB9sEPs9lr22BlPGKpQUOd59vUjAttXZ6QKf4A-kg,23534
|
|
10
|
+
geney/pangolin_utils.py,sha256=rVi_U23nhw6wCc44fBeD3sv-FshLTGE1UMMtIYwgr9U,2967
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
12
|
geney/seqmat_utils.py,sha256=2cRXT_Ox4IdzCM8x3H2HexxFZzjo5WHs0HZiUQv8fBM,18347
|
|
13
13
|
geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
|
|
14
|
-
geney/splicing_utils.py,sha256=
|
|
14
|
+
geney/splicing_utils.py,sha256=GpzvdXKBawTXp8SUJiC9aGV3_RX7LzkkUfk4_kdu5rI,15562
|
|
15
15
|
geney/survival_utils.py,sha256=FKcXGL7FNp9yuVyNaUudVgn-JBuZfrhvS1c1NZE-tZ4,6880
|
|
16
16
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
17
17
|
geney/tis_utils.py,sha256=vA2ci4gNfwwQZlCjPpO5ehvL2NRVeM7lHI_VyfT-_10,8049
|
|
@@ -20,7 +20,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
20
20
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
21
21
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
22
22
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
26
|
-
geney-1.2.
|
|
23
|
+
geney-1.2.53.dist-info/METADATA,sha256=o8uT_lpsOsrQyXfD84OkN-EFUaVJGmo18IOQK75cUjw,948
|
|
24
|
+
geney-1.2.53.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
25
|
+
geney-1.2.53.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
26
|
+
geney-1.2.53.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|