geney 1.3.64__py2.py3-none-any.whl → 1.3.66__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/SeqMats.py CHANGED
@@ -8,6 +8,7 @@ class SeqMat:
8
8
  ROW_INDS = 1
9
9
  ROW_SUPERINDS = 2
10
10
  ROW_MUTATED = 3
11
+ ROW_ANNOTATION = 4
11
12
 
12
13
  def __init__(self, seqmat, alphabet=None):
13
14
  self.seqmat = seqmat
geney/spliceai_utils.py CHANGED
@@ -73,4 +73,4 @@ def run_spliceai_seq(seq, indices, threshold=0):
73
73
  ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
74
74
  acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
75
75
  donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
76
- return acceptor_indices, donor_indices
76
+ return donor_indices, acceptor_indices
geney/splicing_utils.py CHANGED
@@ -166,7 +166,7 @@ def run_splicing_engine(seq, engine='spliceai'):
166
166
  match engine:
167
167
  case 'spliceai':
168
168
  from .spliceai_utils import sai_predict_probs, sai_models
169
- donor_probs, acceptor_probs = sai_predict_probs(seq, models=sai_models)
169
+ acceptor_probs, donor_probs = sai_predict_probs(seq, models=sai_models)
170
170
 
171
171
  case 'pangolin':
172
172
  from .pangolin_utils import pangolin_predict_probs, pang_models
@@ -199,7 +199,7 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
199
199
  # Prepare reference sequence with padding
200
200
  ref_indices = transcript.indices
201
201
  ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
202
- ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq, engine)
202
+ ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq, engine)
203
203
 
204
204
  # Verify lengths
205
205
  assert len(ref_seq_donor_probs) == len(ref_indices), (
@@ -212,10 +212,11 @@ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict
212
212
  )
213
213
 
214
214
  # Create dictionaries and sort them by probability in descending order
215
- donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
216
- key=lambda item: item[1], reverse=True)
217
- acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
218
- key=lambda item: item[1], reverse=True)
215
+ donor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
216
+ key=lambda item: item[1], reverse=True))
217
+
218
+ acceptor_probs = dict(sorted(((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
219
+ key=lambda item: item[1], reverse=True))
219
220
 
220
221
  return donor_probs, acceptor_probs
221
222
 
@@ -332,8 +333,8 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
332
333
  if ref_seq.seq == var_seq.seq:
333
334
  return Missplicing({'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}})
334
335
 
335
- ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq.seq, engine)
336
- mut_seq_acceptor_probs, mut_seq_donor_probs = run_splicing_engine(var_seq.seq, engine)
336
+ ref_seq_donor_probs, ref_seq_acceptor_probs = run_splicing_engine(ref_seq.seq, engine)
337
+ mut_seq_donor_probs, mut_seq_acceptor_probs = run_splicing_engine(var_seq.seq, engine)
337
338
  ref_indices = ref_seq.indices[5000:-5000]
338
339
  mut_indices = var_seq.indices[5000:-5000]
339
340
  visible_donors = np.intersect1d(donors, ref_indices)
@@ -462,6 +463,50 @@ def process_pairwise_epistasis(mids, engine='pangolin', fprint=False, db=None):
462
463
  return pd.concat(results)
463
464
 
464
465
 
466
+ def process_pairwise_epistasis_explicit(mid, engine='spliceai'):
467
+ donor_probs, acceptor_probs = {}, {}
468
+ lower_pos, upper_pos = int(mid.split(':')[2]), int(mid.split(':')[6])
469
+ g = Gene.from_file(mid.split(':')[0]).transcript().generate_pre_mrna()
470
+ print(g.rev)
471
+ if g.rev:
472
+ lower_pos, upper_pos, factor = upper_pos, lower_pos, -1
473
+ else:
474
+ factor = 1
475
+
476
+ lb, ub = lower_pos - (factor * 7500), upper_pos + (factor * 7500)
477
+
478
+ for m in ['wild_type'] + mid.split('|') + [mid]:
479
+ transcript = g.clone().pre_mrna
480
+ if m != 'wild_type':
481
+ mutations = [MutSeqMat.from_mutid(cm) for cm in m.split('|')]
482
+ if g.rev:
483
+ mutations = [m.reverse_complement() for m in mutations]
484
+ for mutation in mutations:
485
+ if mutation in transcript:
486
+ transcript.mutate(mutation, inplace=True)
487
+
488
+ donors, acceptors = find_transcript_splicing(transcript[lb:ub], engine=engine)
489
+ donor_probs[m] = donors
490
+ acceptor_probs[m] = acceptors
491
+
492
+ acceptors = pd.DataFrame.from_dict(acceptor_probs).T
493
+ donors = pd.DataFrame.from_dict(donor_probs).T
494
+
495
+ acceptors = acceptors.map(lambda x: 0 if x < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
496
+ acceptors = acceptors.loc[:, acceptors.nunique() > 1]
497
+ donors = donors.map(lambda x: 0 if abs(x) < 0.01 else round(x, 2) if isinstance(x, (int, float)) else x).round(2)
498
+ donors = donors.loc[:, donors.nunique() > 1]
499
+
500
+ donors.loc['residual'] = (donors.iloc[3] - donors.iloc[0]) - (
501
+ (donors.iloc[1] - donors.iloc[0]) + (donors.iloc[2] - donors.iloc[0]))
502
+ acceptors.loc['residual'] = (acceptors.iloc[3] - acceptors.iloc[0]) - (
503
+ (acceptors.iloc[1] - acceptors.iloc[0]) + (acceptors.iloc[2] - acceptors.iloc[0]))
504
+
505
+ donors = donors.loc[:, donors.loc['residual'].abs() > 0.1]
506
+ acceptors = acceptors.loc[:, acceptors.loc['residual'].abs() > 0.1]
507
+
508
+ return acceptors, donors
509
+
465
510
 
466
511
  class Missplicing:
467
512
  def __init__(self, splicing_dict=None, threshold=0.5):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.64
3
+ Version: 1.3.66
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -1,6 +1,6 @@
1
1
  geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
2
2
  geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
3
- geney/SeqMats.py,sha256=ksS1JW3vDj-CRBpcy-K28UFZSfJaMuc-JNNM26T09us,19259
3
+ geney/SeqMats.py,sha256=9-eJnfU2w3LGc0XvVvFEO_QrBneTkC6xkZKDfTcEw5o,19282
4
4
  geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
5
5
  geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
6
6
  geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
@@ -15,8 +15,8 @@ geney/oncosplice.py,sha256=YZvAnbe8gj9fPvs2DldeQpqhhe_QR9xBLe_0tcm9tdg,24793
15
15
  geney/pangolin_utils.py,sha256=9jdBXlOcRaUdfi-UpUxHA0AkTMZkUF-Lt7HVZ1nEm3s,2973
16
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
17
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
18
- geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
19
- geney/splicing_utils.py,sha256=cVLXArVWJIxGaaWttm_dxJQbo6OBBkry4OFozaw-M2U,38768
18
+ geney/spliceai_utils.py,sha256=tVY0T6F6l3fNoaktpn7Kq0oH5ZM0ThFYt9nPi_lfakw,3077
19
+ geney/splicing_utils.py,sha256=_nXLCK41GhcrkXHXAqkhNV2IcwFltSxrR-rm8fUIrfE,40767
20
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
21
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
22
22
  geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
@@ -25,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
25
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
26
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
27
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
28
- geney-1.3.64.dist-info/METADATA,sha256=UBUHpebp0LZs-uSCEwFF0snqM-FoUX8mC2-1HaBx3cY,990
29
- geney-1.3.64.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
- geney-1.3.64.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
- geney-1.3.64.dist-info/RECORD,,
28
+ geney-1.3.66.dist-info/METADATA,sha256=bl8lWCBcJsbfBPJmkoY8xG0n6G7z7X1C-6jA1bSevCk,990
29
+ geney-1.3.66.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.66.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.66.dist-info/RECORD,,
File without changes