geney 1.3.1__py2.py3-none-any.whl → 1.3.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -4,7 +4,7 @@ import hashlib
4
4
  from tqdm import tqdm
5
5
  import pandas as pd
6
6
  import numpy as np
7
- from seqmats import SeqMat, MutSeqMat
7
+ from .SeqMats import SeqMat, MutSeqMat
8
8
  from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
9
9
  from .tis_utils import find_tis
10
10
 
geney/splicing_utils.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
- from .mutation_utils import get_mutation
3
- from .seqmat_utils import Gene
2
+ from ._mutation_utils import get_mutation
3
+ from .Gene import Gene
4
4
 
5
5
  from collections import defaultdict
6
6
 
@@ -176,6 +176,7 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
176
176
  center_index = var.rel_pos(center)
177
177
  if center_index is None:
178
178
  raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
179
+
179
180
  var_start_pad = max(0, total_context - center_index)
180
181
  var_end_pad = max(0, total_context - (length - center_index))
181
182
 
@@ -238,6 +239,59 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
238
239
  temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
239
240
  return temp
240
241
 
242
+ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai', just_ss=False):
243
+ if ref_seq.seq == var_seq.seq:
244
+ return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
245
+
246
+ if engine == 'spliceai':
247
+ from .spliceai_utils import sai_predict_probs, sai_models
248
+ ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq.seq, models=sai_models)
249
+ mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq.seq, models=sai_models)
250
+
251
+ elif engine == 'pangolin':
252
+ from .pangolin_utils import pangolin_predict_probs, pang_models
253
+ ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq.seq , models=pang_models, just_ss=just_ss)
254
+ mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq.seq, models=pang_models, just_ss=just_ss)
255
+
256
+ else:
257
+ raise ValueError(f"{engine} not implemented")
258
+
259
+ ref_indices = ref_seq.indices[5000:-5000]
260
+ mut_indices = var_seq.indices[5000:-5000]
261
+ visible_donors = np.intersect1d(donors, ref_indices)
262
+ visible_acceptors = np.intersect1d(acceptors, ref_indices)
263
+
264
+ assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
265
+ assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
266
+
267
+ iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
268
+ {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
269
+ visible_acceptors,
270
+ threshold=0.1)
271
+
272
+ assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
273
+ assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
274
+
275
+ idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
276
+ {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
277
+ visible_donors,
278
+ threshold=0.1)
279
+
280
+ ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
281
+ ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
282
+
283
+ lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
284
+ visible_acceptors if p not in mut_indices and p not in dap}
285
+ lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
286
+ if p not in mut_indices and p not in ddp}
287
+ dap.update(lost_acceptors)
288
+ ddp.update(lost_donors)
289
+
290
+ missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
291
+ missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
292
+ temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
293
+ return Missplicing(temp, threshold=threshold)
294
+
241
295
 
242
296
  class Missplicing:
243
297
  def __init__(self, splicing_dict, threshold=0.5):
@@ -378,36 +432,36 @@ def missplicing(mut_id, splicing_threshold=0.5, primary_transcript=True, organis
378
432
  import sqlite3
379
433
  import json
380
434
 
381
- def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
382
- conn = sqlite3.connect(os.path.join(tool_parameters['splicing_db'], 'mutation_data.db'))
383
- cursor = conn.cursor()
384
- # Create table if it doesn't exist
385
- cursor.execute('''CREATE TABLE IF NOT EXISTS mutations (
386
- tool TEXT,
387
- gene TEXT,
388
- mutation_id TEXT,
389
- transcript_id TEXT,
390
- data TEXT,
391
- PRIMARY KEY (tool, gene, mutation_id, transcript_id)
392
- )''')
393
-
394
- # Look up entry
395
- cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
396
- (tool, gene, mutation_id, transcript_id))
397
- row = cursor.fetchone()
398
-
399
- # If entry is found and force_recompute is False, return data
400
- if row and not force_recompute:
401
- return json.loads(row[0])
402
-
403
- # Otherwise, compute the dictionary
404
- computed_data = find_transcript_missplicing(tool, mutation_id, transcript_id) # Replace with actual function
405
-
406
- # Store in the database
407
- data_json = json.dumps(computed_data)
408
- cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
409
- (tool, mutation_id, transcript_id, data_json))
410
- conn.commit()
411
- conn.close()
412
-
413
- return computed_data
435
+ # def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
436
+ # conn = sqlite3.connect(os.path.join(tool_parameters['splicing_db'], 'mutation_data.db'))
437
+ # cursor = conn.cursor()
438
+ # # Create table if it doesn't exist
439
+ # cursor.execute('''CREATE TABLE IF NOT EXISTS mutations (
440
+ # tool TEXT,
441
+ # gene TEXT,
442
+ # mutation_id TEXT,
443
+ # transcript_id TEXT,
444
+ # data TEXT,
445
+ # PRIMARY KEY (tool, gene, mutation_id, transcript_id)
446
+ # )''')
447
+ #
448
+ # # Look up entry
449
+ # cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
450
+ # (tool, gene, mutation_id, transcript_id))
451
+ # row = cursor.fetchone()
452
+ #
453
+ # # If entry is found and force_recompute is False, return data
454
+ # if row and not force_recompute:
455
+ # return json.loads(row[0])
456
+ #
457
+ # # Otherwise, compute the dictionary
458
+ # computed_data = find_transcript_missplicing(tool, mutation_id, transcript_id) # Replace with actual function
459
+ #
460
+ # # Store in the database
461
+ # data_json = json.dumps(computed_data)
462
+ # cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
463
+ # (tool, mutation_id, transcript_id, data_json))
464
+ # conn.commit()
465
+ # conn.close()
466
+ #
467
+ # return computed_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.1
3
+ Version: 1.3.3
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -10,12 +10,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
10
10
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
11
11
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
12
12
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
13
- geney/oncosplice.py,sha256=kzXeZ-4aYo1O1v83PkQZcnDa1JR6BqVeQaJ3Vg54jzU,22105
13
+ geney/oncosplice.py,sha256=FdvuROk2G7wwLoB5lLzYia8Smw9hHZeVs-J2MUoAwlU,22106
14
14
  geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
15
15
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
16
16
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
17
17
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
18
- geney/splicing_utils.py,sha256=4xYXy_dIbqdbVfxsEj_OCuM-MsQ24gi4fIv0vQjAYcQ,19215
18
+ geney/splicing_utils.py,sha256=GK0ZzI-eBVxUvuI_bV7-WckvXTmhO83Yx0wS83tyI-w,22524
19
19
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
20
20
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
21
21
  geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -24,7 +24,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
24
24
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
25
25
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
26
26
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
27
- geney-1.3.1.dist-info/METADATA,sha256=uXhxk4kbe7lBIGqOVvUNuIpUgTrhcUm9lz6uv9N9AGw,994
28
- geney-1.3.1.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
- geney-1.3.1.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
- geney-1.3.1.dist-info/RECORD,,
27
+ geney-1.3.3.dist-info/METADATA,sha256=aEuwIsdWgmM-Gmdk4jON64JBr0vj7ify5PBQul57RmQ,994
28
+ geney-1.3.3.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
+ geney-1.3.3.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
+ geney-1.3.3.dist-info/RECORD,,
File without changes