geney 1.3.30__py2.py3-none-any.whl → 1.3.32__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/oncosplice.py CHANGED
@@ -5,7 +5,7 @@ from tqdm import tqdm
5
5
  import pandas as pd
6
6
  import numpy as np
7
7
  from .SeqMats import SeqMat, MutSeqMat
8
- from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
8
+ from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing, Missplicing
9
9
  from .Gene import Gene
10
10
  import copy
11
11
  from . import config
@@ -305,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
305
305
 
306
306
 
307
307
  def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
308
- window_length=13, organism='hg38', engine='spliceai'):
308
+ window_length=13, organism='hg38', splicing_engine=None):
309
309
  gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
310
310
  reference_gene_proteins = {
311
311
  transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
@@ -335,10 +335,13 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
335
335
  assert len(reference_transcript.protein) == len(
336
336
  reference_transcript.cons_vector), f"Protein ({len(reference_transcript.protein)}) and conservation vector ({len(reference_transcript.cons_vector)}) must be same length."
337
337
 
338
- missplicing = find_transcript_missplicing_seqs(
339
- reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
340
- mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
341
- reference_transcript.acceptors, threshold=splicing_threshold, engine=engine)
338
+ if splicing_engine is None:
339
+ missplicing = Missplicing()
340
+ else:
341
+ missplicing = find_transcript_missplicing_seqs(
342
+ reference_transcript.pre_mrna.get_context(center, context=7500, padding='N'),
343
+ mutated_transcript.pre_mrna.get_context(center, context=7500, padding='N'), reference_transcript.donors,
344
+ reference_transcript.acceptors, threshold=splicing_threshold, engine=splicing_engine)
342
345
  alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing.aberrant_splicing)
343
346
 
344
347
  for i, new_boundaries in enumerate(alternative_splicing_paths):
@@ -359,7 +362,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
359
362
 
360
363
  report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
361
364
  report['mut_id'] = mut_id
362
- report['engine'] = engine
365
+ report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
363
366
  report['oncosplice_score'] = affected_cons_scores
364
367
  report['percentile'] = percentile
365
368
  report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
@@ -376,7 +379,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
376
379
  ['mut_id', 'transcript_id', 'isoform_id', 'primary_transcript', 'missplicing', 'full_missplicing',
377
380
  'exon_changes', 'splicing_codes', 'affected_exon', 'affected_intron', 'mutation_distance_from_5',
378
381
  'mutation_distance_from_3', 'engine', 'reference_resemblance', 'oncosplice_score', 'percentile',
379
- 'isoform_prevalence', 'reference_protein', 'variant_protein']]
382
+ 'isoform_prevalence', 'reference_protein', 'variant_protein', 'splicing_engine']]
380
383
 
381
384
  #
382
385
  # import asyncio
geney/splicing_utils.py CHANGED
@@ -403,6 +403,7 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
403
403
  m2.get(k, {}).get('reference') or
404
404
  mb.get(k, {}).get('reference')
405
405
  )
406
+
406
407
  if ref_val is None:
407
408
  ref_val = 0
408
409
 
@@ -460,7 +461,7 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
460
461
 
461
462
 
462
463
  class Missplicing:
463
- def __init__(self, splicing_dict, threshold=0.5):
464
+ def __init__(self, splicing_dict=None, threshold=0.5):
464
465
  """
465
466
  Initialize a Missplicing object.
466
467
 
@@ -475,6 +476,8 @@ class Missplicing:
475
476
  }
476
477
  threshold (float): The threshold above which a delta is considered significant.
477
478
  """
479
+ if splicing_dict is None:
480
+ splicing_dict = {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
478
481
  self.missplicing = splicing_dict
479
482
  self.threshold = threshold
480
483
 
@@ -521,6 +524,8 @@ class Missplicing:
521
524
  """
522
525
  if threshold is None:
523
526
  threshold = self.threshold
527
+ if threshold is None:
528
+ threshold = 0
524
529
 
525
530
  return {
526
531
  event: {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.30
3
+ Version: 1.3.32
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -11,12 +11,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
11
11
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
12
12
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
13
13
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
14
- geney/oncosplice.py,sha256=LOgpdsTC1JotC2qFnqNMefl0t77vvWCJ5XctZsbrLNA,22343
14
+ geney/oncosplice.py,sha256=_nHLzzFRzxSWm-lYA9cQHTpiA1d8DMQIlch2CI4qZZg,22554
15
15
  geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
16
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
17
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
18
18
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
19
- geney/splicing_utils.py,sha256=elu5KtkcNGTWhviimIRt0DRWNcjwZrshDphM5XFPImA,38485
19
+ geney/splicing_utils.py,sha256=H79ISQXr7LIxNwxtUZ0pMjgnjTCi65nGgNo7T-Rt-zo,38708
20
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
21
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
22
22
  geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
25
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
26
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
27
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
28
- geney-1.3.30.dist-info/METADATA,sha256=ZS6Ft5TJYV5WFTOM47ny7m7eSA76C2EmCEMD2yuBhcQ,990
29
- geney-1.3.30.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
- geney-1.3.30.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
- geney-1.3.30.dist-info/RECORD,,
28
+ geney-1.3.32.dist-info/METADATA,sha256=Li7y-k33A_VcI9h0zQWAVtUpaswrecnnjx-Kqqvgrw8,990
29
+ geney-1.3.32.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.32.dist-info/RECORD,,
File without changes