geney 1.3.1__py2.py3-none-any.whl → 1.3.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/oncosplice.py +1 -1
- geney/splicing_utils.py +89 -35
- {geney-1.3.1.dist-info → geney-1.3.3.dist-info}/METADATA +1 -1
- {geney-1.3.1.dist-info → geney-1.3.3.dist-info}/RECORD +6 -6
- {geney-1.3.1.dist-info → geney-1.3.3.dist-info}/WHEEL +0 -0
- {geney-1.3.1.dist-info → geney-1.3.3.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -4,7 +4,7 @@ import hashlib
|
|
|
4
4
|
from tqdm import tqdm
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
|
-
from
|
|
7
|
+
from .SeqMats import SeqMat, MutSeqMat
|
|
8
8
|
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
|
|
9
9
|
from .tis_utils import find_tis
|
|
10
10
|
|
geney/splicing_utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
2
|
+
from ._mutation_utils import get_mutation
|
|
3
|
+
from .Gene import Gene
|
|
4
4
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
|
|
@@ -176,6 +176,7 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
176
176
|
center_index = var.rel_pos(center)
|
|
177
177
|
if center_index is None:
|
|
178
178
|
raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
|
|
179
|
+
|
|
179
180
|
var_start_pad = max(0, total_context - center_index)
|
|
180
181
|
var_end_pad = max(0, total_context - (length - center_index))
|
|
181
182
|
|
|
@@ -238,6 +239,59 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
238
239
|
temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
239
240
|
return temp
|
|
240
241
|
|
|
242
|
+
def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai', just_ss=False):
|
|
243
|
+
if ref_seq.seq == var_seq.seq:
|
|
244
|
+
return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
245
|
+
|
|
246
|
+
if engine == 'spliceai':
|
|
247
|
+
from .spliceai_utils import sai_predict_probs, sai_models
|
|
248
|
+
ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq.seq, models=sai_models)
|
|
249
|
+
mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq.seq, models=sai_models)
|
|
250
|
+
|
|
251
|
+
elif engine == 'pangolin':
|
|
252
|
+
from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
253
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq.seq , models=pang_models, just_ss=just_ss)
|
|
254
|
+
mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq.seq, models=pang_models, just_ss=just_ss)
|
|
255
|
+
|
|
256
|
+
else:
|
|
257
|
+
raise ValueError(f"{engine} not implemented")
|
|
258
|
+
|
|
259
|
+
ref_indices = ref_seq.indices[5000:-5000]
|
|
260
|
+
mut_indices = var_seq.indices[5000:-5000]
|
|
261
|
+
visible_donors = np.intersect1d(donors, ref_indices)
|
|
262
|
+
visible_acceptors = np.intersect1d(acceptors, ref_indices)
|
|
263
|
+
|
|
264
|
+
assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
|
|
265
|
+
assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
|
|
266
|
+
|
|
267
|
+
iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
|
|
268
|
+
{p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
|
|
269
|
+
visible_acceptors,
|
|
270
|
+
threshold=0.1)
|
|
271
|
+
|
|
272
|
+
assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
|
|
273
|
+
assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
|
|
274
|
+
|
|
275
|
+
idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
|
|
276
|
+
{p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
|
|
277
|
+
visible_donors,
|
|
278
|
+
threshold=0.1)
|
|
279
|
+
|
|
280
|
+
ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
|
|
281
|
+
ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
|
|
282
|
+
|
|
283
|
+
lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
|
|
284
|
+
visible_acceptors if p not in mut_indices and p not in dap}
|
|
285
|
+
lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
|
|
286
|
+
if p not in mut_indices and p not in ddp}
|
|
287
|
+
dap.update(lost_acceptors)
|
|
288
|
+
ddp.update(lost_donors)
|
|
289
|
+
|
|
290
|
+
missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
|
|
291
|
+
missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
292
|
+
temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
293
|
+
return Missplicing(temp, threshold=threshold)
|
|
294
|
+
|
|
241
295
|
|
|
242
296
|
class Missplicing:
|
|
243
297
|
def __init__(self, splicing_dict, threshold=0.5):
|
|
@@ -378,36 +432,36 @@ def missplicing(mut_id, splicing_threshold=0.5, primary_transcript=True, organis
|
|
|
378
432
|
import sqlite3
|
|
379
433
|
import json
|
|
380
434
|
|
|
381
|
-
def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
435
|
+
# def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
436
|
+
# conn = sqlite3.connect(os.path.join(tool_parameters['splicing_db'], 'mutation_data.db'))
|
|
437
|
+
# cursor = conn.cursor()
|
|
438
|
+
# # Create table if it doesn't exist
|
|
439
|
+
# cursor.execute('''CREATE TABLE IF NOT EXISTS mutations (
|
|
440
|
+
# tool TEXT,
|
|
441
|
+
# gene TEXT,
|
|
442
|
+
# mutation_id TEXT,
|
|
443
|
+
# transcript_id TEXT,
|
|
444
|
+
# data TEXT,
|
|
445
|
+
# PRIMARY KEY (tool, gene, mutation_id, transcript_id)
|
|
446
|
+
# )''')
|
|
447
|
+
#
|
|
448
|
+
# # Look up entry
|
|
449
|
+
# cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
|
|
450
|
+
# (tool, gene, mutation_id, transcript_id))
|
|
451
|
+
# row = cursor.fetchone()
|
|
452
|
+
#
|
|
453
|
+
# # If entry is found and force_recompute is False, return data
|
|
454
|
+
# if row and not force_recompute:
|
|
455
|
+
# return json.loads(row[0])
|
|
456
|
+
#
|
|
457
|
+
# # Otherwise, compute the dictionary
|
|
458
|
+
# computed_data = find_transcript_missplicing(tool, mutation_id, transcript_id) # Replace with actual function
|
|
459
|
+
#
|
|
460
|
+
# # Store in the database
|
|
461
|
+
# data_json = json.dumps(computed_data)
|
|
462
|
+
# cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
|
|
463
|
+
# (tool, mutation_id, transcript_id, data_json))
|
|
464
|
+
# conn.commit()
|
|
465
|
+
# conn.close()
|
|
466
|
+
#
|
|
467
|
+
# return computed_data
|
|
@@ -10,12 +10,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
10
10
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
11
11
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
12
12
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
13
|
-
geney/oncosplice.py,sha256=
|
|
13
|
+
geney/oncosplice.py,sha256=FdvuROk2G7wwLoB5lLzYia8Smw9hHZeVs-J2MUoAwlU,22106
|
|
14
14
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
15
15
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
16
16
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
17
17
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
18
|
-
geney/splicing_utils.py,sha256=
|
|
18
|
+
geney/splicing_utils.py,sha256=GK0ZzI-eBVxUvuI_bV7-WckvXTmhO83Yx0wS83tyI-w,22524
|
|
19
19
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
20
20
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
21
21
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -24,7 +24,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
24
24
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
25
25
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
26
26
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
27
|
-
geney-1.3.
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
27
|
+
geney-1.3.3.dist-info/METADATA,sha256=aEuwIsdWgmM-Gmdk4jON64JBr0vj7ify5PBQul57RmQ,994
|
|
28
|
+
geney-1.3.3.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
29
|
+
geney-1.3.3.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
30
|
+
geney-1.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|