geney 1.3.63__py2.py3-none-any.whl → 1.3.65__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/SeqMats.py CHANGED
@@ -8,6 +8,7 @@ class SeqMat:
8
8
  ROW_INDS = 1
9
9
  ROW_SUPERINDS = 2
10
10
  ROW_MUTATED = 3
11
+ ROW_ANNOTATION = 4
11
12
 
12
13
  def __init__(self, seqmat, alphabet=None):
13
14
  self.seqmat = seqmat
geney/oncosplice.py CHANGED
@@ -368,7 +368,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
368
368
  if no_splicing_record and splicing_db is not None:
369
369
  splicing_db.store_mutation_data(engine=splicing_engine, mut_id=mut_id, gene=gene.gene_name, transcript_id=reference_transcript.transcript_id, data=missplicing.missplicing)
370
370
 
371
- alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing.missplicing)
371
+ alternative_splicing_paths = develop_aberrant_splicing(reference_transcript, missplicing) #.missplicing)
372
372
  for i, new_boundaries in enumerate(alternative_splicing_paths):
373
373
  print("iterating through new boundaries...")
374
374
 
geney/spliceai_utils.py CHANGED
@@ -73,4 +73,4 @@ def run_spliceai_seq(seq, indices, threshold=0):
73
73
  ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
74
74
  acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
75
75
  donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
76
- return acceptor_indices, donor_indices
76
+ return donor_indices, acceptor_indices
geney/splicing_utils.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ from tensorflow.python.framework.test_ops import none_eager_fallback
4
+
3
5
  from .Gene import Gene
4
6
  from .SeqMats import MutSeqMat
5
7
  from collections import defaultdict
@@ -119,15 +121,19 @@ def prepare_splice_sites(acceptors, donors, aberrant_splicing):
119
121
 
120
122
 
121
123
  def develop_aberrant_splicing(transcript, aberrant_splicing):
122
- all_acceptors, all_donors = prepare_splice_sites(transcript.acceptors, transcript.donors, aberrant_splicing)
123
- adj_list = generate_adjacency_list(all_acceptors, all_donors, transcript_start=transcript.transcript_start,
124
- transcript_end=transcript.transcript_end, rev=transcript.rev,
125
- max_distance=100000)
126
- end_node = (transcript.transcript_end, 'transcript_end')
127
- start_node = (transcript.transcript_start, 'transcript_start')
128
- for path, prob in find_all_paths(adj_list, start_node, end_node):
129
- yield {'acceptors': [p[0] for p in path if p[1] == 'acceptor'],
130
- 'donors': [p[0] for p in path if p[1] == 'donor'], 'path_weight': prob}
124
+ if not aberrant_splicing:
125
+ yield {'acceptors': transcript.acceptors, 'donors': transcript.donors, 'path_weight': 1}
126
+
127
+ else:
128
+ all_acceptors, all_donors = prepare_splice_sites(transcript.acceptors, transcript.donors, aberrant_splicing.missplicing)
129
+ adj_list = generate_adjacency_list(all_acceptors, all_donors, transcript_start=transcript.transcript_start,
130
+ transcript_end=transcript.transcript_end, rev=transcript.rev,
131
+ max_distance=100000)
132
+ end_node = (transcript.transcript_end, 'transcript_end')
133
+ start_node = (transcript.transcript_start, 'transcript_start')
134
+ for path, prob in find_all_paths(adj_list, start_node, end_node):
135
+ yield {'acceptors': [p[0] for p in path if p[1] == 'acceptor'],
136
+ 'donors': [p[0] for p in path if p[1] == 'donor'], 'path_weight': prob}
131
137
 
132
138
 
133
139
 
@@ -193,7 +199,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
193
199
  # Prepare reference sequence with padding
194
200
  ref_indices = transcript.indices
195
201
  ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
196
- ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq, engine)
202
+ ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq, engine)
197
203
 
198
204
  # Verify lengths
199
205
  assert len(ref_seq_donor_probs) == len(ref_indices), (
@@ -206,10 +212,10 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
206
212
  )
207
213
 
208
214
  # Create dictionaries and sort them by probability in descending order
209
- donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
210
- key=lambda item: item[1], reverse=True)
211
- acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
212
- key=lambda item: item[1], reverse=True)
215
+ donor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
216
+ key=lambda item: item[1], reverse=True))
217
+ acceptor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
218
+ key=lambda item: item[1], reverse=True))
213
219
 
214
220
  return donor_probs, acceptor_probs
215
221
 
@@ -456,6 +462,50 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
456
462
  return pd.concat(results)
457
463
 
458
464
 
465
+ def process_pairwise_epistasis_explicit(mid, engine='spliceai'):
466
+ donor_probs, acceptor_probs = {}, {}
467
+ lower_pos, upper_pos = int(mid.split(':')[2]), int(mid.split(':')[6])
468
+ g = Gene.from_file(mid.split(':')[0]).transcript().generate_pre_mrna()
469
+ print(g.rev)
470
+ if g.rev:
471
+ lower_pos, upper_pos, factor = upper_pos, lower_pos, -1
472
+ else:
473
+ factor = 1
474
+
475
+ lb, ub = lower_pos - (factor * 7500), upper_pos + (factor * 7500)
476
+
477
+ for m in ['wild_type'] + mid.split('|') + [mid]:
478
+ transcript = g.clone().pre_mrna
479
+ if m != 'wild_type':
480
+ mutations = [MutSeqMat.from_mutid(cm) for cm in m.split('|')]
481
+ if g.rev:
482
+ mutations = [m.reverse_complement() for m in mutations]
483
+ for mutation in mutations:
484
+ if mutation in transcript:
485
+ transcript.mutate(mutation, inplace=True)
486
+
487
+ donors, acceptors = find_transcript_splicing(transcript[lb:ub], engine=engine)
488
+ donor_probs[m] = donors
489
+ acceptor_probs[m] = acceptors
490
+
491
+ acceptors = pd.DataFrame.from_dict(acceptor_probs).T
492
+ donors = pd.DataFrame.from_dict(donor_probs).T
493
+
494
+ acceptors = acceptors.map(lambda x: 0 if x < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
495
+ acceptors = acceptors.loc[:, acceptors.nunique() > 1]
496
+ donors = donors.map(lambda x: 0 if abs(x) < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
497
+ donors = donors.loc[:, donors.nunique() > 1]
498
+
499
+ donors.loc['residual'] = (donors.iloc[3] - donors.iloc[0]) - (
500
+ (donors.iloc[1] - donors.iloc[0]) + (donors.iloc[2] - donors.iloc[0]))
501
+ acceptors.loc['residual'] = (acceptors.iloc[3] - acceptors.iloc[0]) - (
502
+ (acceptors.iloc[1] - acceptors.iloc[0]) + (acceptors.iloc[2] - acceptors.iloc[0]))
503
+
504
+ donors = donors.loc[:, donors.loc['residual'].abs() > 0.1]
505
+ acceptors = acceptors.loc[:, acceptors.loc['residual'].abs() > 0.1]
506
+
507
+ return acceptors, donors
508
+
459
509
 
460
510
  class Missplicing:
461
511
  def __init__(self, splicing_dict=None, threshold=0.5):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.63
3
+ Version: 1.3.65
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -1,6 +1,6 @@
1
1
  geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
2
2
  geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
3
- geney/SeqMats.py,sha256=ksS1JW3vDj-CRBpcy-K28UFZSfJaMuc-JNNM26T09us,19259
3
+ geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
4
4
  geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
5
5
  geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
6
6
  geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
@@ -11,12 +11,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
11
11
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
12
12
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
13
13
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
14
- geney/oncosplice.py,sha256=5O8oXnh73SEdweuaCQs0gmIja0CDjEj8KNL0aXg5lQQ,24790
14
+ geney/oncosplice.py,sha256=YZvAnbe8gj9fPvs2DldeQpqhhe_QR9xBLe_0tcm9tdg,24793
15
15
  geney/pangolin_utils.py,sha256=9jdBXlOcRaUdfi-UpUxHA0AkTMZkUF-Lt7HVZ1nEm3s,2973
16
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
17
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
18
- geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
19
- geney/splicing_utils.py,sha256=GDSuuy8kWBh2_4UDwqjclF9gKnnCoLc_neOEsSrNWQM,38512
18
+ geney/spliceai_utils.py,sha256=tVY0T6F6l3fNoaktpn7Kq0oH5ZM0ThFYt9nPi_lfakw,3077
19
+ geney/splicing_utils.py,sha256=W-N0ENZJv1PdnVlHuaN_2az2-7Zl6cHYe_CYR1G41U4,40766
20
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
21
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
22
22
  geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
25
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
26
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
27
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
28
- geney-1.3.63.dist-info/METADATA,sha256=lis1MZ3LovEMzTuqZthCmPxQ26p4XbeXxdhbtb4qgPo,990
29
- geney-1.3.63.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
- geney-1.3.63.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
- geney-1.3.63.dist-info/RECORD,,
28
+ geney-1.3.65.dist-info/METADATA,sha256=L-doIh0XdJuxs4gg1Dhs5mLoa_1zI8_bboq4cnlfvfA,990
29
+ geney-1.3.65.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.65.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.65.dist-info/RECORD,,
File without changes