geney 1.2.24__py2.py3-none-any.whl → 1.2.26__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/seqmat_utils.py CHANGED
@@ -140,6 +140,14 @@ class SeqMat:
140
140
  end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
141
141
  return self.seqmat[:, start_pos:end_pos]
142
142
 
143
+ def subseq_suffix(self, start):
144
+ start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
145
+ return self.seqmat[:, start_pos:]
146
+
147
+ def subseq_prefix(self, end):
148
+ end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0]
149
+ return self.seqmat[:, :end_pos]
150
+
143
151
  def inspect(self, pos, context=500):
144
152
  condition = np.where(self.seqmat[1, :] == pos)[0][0]
145
153
  return SeqMat().set_seqmat(self.seqmat[:, max(0, condition - context):min(self.seqmat.shape[-1], condition + context + 1)])
@@ -147,6 +155,29 @@ class SeqMat:
147
155
  def rel_pos(self, pos):
148
156
  return np.where(self.seqmat[1, :] == pos)[0][0]
149
157
 
158
+ def orf_seqmat(self, tis_index):
159
+ if tis_index not in self.seqmat[1, :]:
160
+ return SeqMat('ATG')
161
+
162
+ temp = SeqMat().set_seqmat(self.subseq_suffix(tis_index))
163
+ # Ensure the sequence length is divisible by 3
164
+ # seq_length = len(temp.seq)
165
+
166
+ # if seq_length % 3 != 0:
167
+ # temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
168
+
169
+ if temp.seq[:3] == 'ATG':
170
+ for i in range(3, len(temp.seq), 3):
171
+ codon = temp.seq[i:i + 3]
172
+ if codon in ['TAA', 'TAG', 'TGA']:
173
+ index = temp.seqmat[1, i-3]
174
+ return SeqMat().set_seqmat(temp.subseq_prefix(index)) # Not include the stop codon
175
+
176
+ # If no stop codon is found, return the full sequence
177
+ return SeqMat().set_seqmat(temp.seq)
178
+
179
+ else:
180
+ return SeqMat('ATG')
150
181
 
151
182
  class Gene:
152
183
  def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
@@ -371,22 +402,20 @@ class Transcript:
371
402
 
372
403
  return mature_mrna
373
404
 
374
- # def find_end_codon(self):
375
- # first_stop_index = next((i for i in range(0, len(orf) - 2, 3) if orf[i:i + 3] in {"TAG", "TAA", "TGA"}),
376
- # len(orf) - 3)
377
- # while first_stop_index % 3 != 0:
378
- # first_stop_index -= 1
379
- #
380
- # orf = orf[:first_stop_index + 3]
381
- # return None
405
+ def find_end_codon(self, orf):
406
+ first_stop_index = next((i for i in range(0, len(orf) - 2, 3) if orf[i:i + 3] in {"TAG", "TAA", "TGA"}),
407
+ len(orf) - 3)
408
+ return first_stop_index
382
409
 
383
410
  @property
384
411
  def orf(self):
385
412
  if not (hasattr(self, 'TIS') and hasattr(self, 'TTS')):
386
413
  print("Cannot create protein without set TIS and TTS values.")
387
414
  return self
415
+ # If self.TIS not in seqmat, then no orf and no protein
416
+ return self.mature_mrna.orf_seqmat(self.TIS)
388
417
 
389
- return SeqMat().set_seqmat(self.mature_mrna.raw_subseq(self.TIS, self.TTS))
418
+ # return SeqMat().set_seqmat(self.mature_mrna.raw_subseq(self.TIS, self.TTS))
390
419
 
391
420
  def generate_protein(self, inplace=True, domains=None):
392
421
  protein = str(Seq(self.orf.seq).translate()).replace('*', '')
geney/spliceai_utils.py CHANGED
@@ -1,11 +1,10 @@
1
1
 
2
2
  #### SpliceAI Modules
3
- import tensorflow as tf
4
3
  from keras.models import load_model
5
4
  from pkg_resources import resource_filename
6
5
  from spliceai.utils import one_hot_encode
7
6
  import numpy as np
8
- import tensorflow
7
+ import tensorflow as tf
9
8
 
10
9
  # Check if GPU is available
11
10
  if tf.config.list_physical_devices('GPU'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.24
3
+ Version: 1.2.26
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -9,8 +9,8 @@ geney/mutation_utils.py,sha256=C-K8F8wyN5joI3ZuP-d7IMYTI43YPDXUc3IgAJ07o8Q,1546
9
9
  geney/oncosplice.py,sha256=3jJc1-CWubH2ElHEjyQtsr9JYVmfPQEpq7EX-IfY-t8,20806
10
10
  geney/pangolin_utils.py,sha256=S2uMjQnnxqWSnfuMaEjo-wq52DVKFiXt__L5VPdtzyU,2939
11
11
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
12
- geney/seqmat_utils.py,sha256=4MiN6rGeQMfWK6bXOHGddxBffx8v4sT1THkZe-AceXE,15611
13
- geney/spliceai_utils.py,sha256=BiTRIfrovX9qo9xup6bFWp0qkvmW9NVPY98Zw8-OaL0,1891
12
+ geney/seqmat_utils.py,sha256=xFpUPVYrCOubHHsYKxU8ZtqSkKKg7rG6CrSG3If39YY,16804
13
+ geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
14
14
  geney/splicing_utils.py,sha256=pS3jZEpmnDkbT1jjaJh-O6I--Xm22e5dj-GQu7IAZSQ,15943
15
15
  geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
16
16
  geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
@@ -19,7 +19,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
19
19
  geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
20
20
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
21
21
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
22
- geney-1.2.24.dist-info/METADATA,sha256=sJGrKawFcaFyF1QwLOHn7dNYeznt5f2fKL7NDZvqqq8,948
23
- geney-1.2.24.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
- geney-1.2.24.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
- geney-1.2.24.dist-info/RECORD,,
22
+ geney-1.2.26.dist-info/METADATA,sha256=6OhOJDeX1vCw3qo1MbO6pXecr_vBwpwngLG_3fvwGrQ,948
23
+ geney-1.2.26.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
+ geney-1.2.26.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
+ geney-1.2.26.dist-info/RECORD,,
File without changes