geney 1.2.24__py2.py3-none-any.whl → 1.2.25__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/seqmat_utils.py +38 -9
- geney/spliceai_utils.py +1 -2
- {geney-1.2.24.dist-info → geney-1.2.25.dist-info}/METADATA +1 -1
- {geney-1.2.24.dist-info → geney-1.2.25.dist-info}/RECORD +6 -6
- {geney-1.2.24.dist-info → geney-1.2.25.dist-info}/WHEEL +0 -0
- {geney-1.2.24.dist-info → geney-1.2.25.dist-info}/top_level.txt +0 -0
geney/seqmat_utils.py
CHANGED
|
@@ -140,6 +140,14 @@ class SeqMat:
|
|
|
140
140
|
end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
|
|
141
141
|
return self.seqmat[:, start_pos:end_pos]
|
|
142
142
|
|
|
143
|
+
def subseq_suffix(self, start):
|
|
144
|
+
start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
|
|
145
|
+
return self.seqmat[:, start_pos:]
|
|
146
|
+
|
|
147
|
+
def subseq_prefix(self, end):
|
|
148
|
+
start_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0]
|
|
149
|
+
return self.seqmat[:, :end]
|
|
150
|
+
|
|
143
151
|
def inspect(self, pos, context=500):
|
|
144
152
|
condition = np.where(self.seqmat[1, :] == pos)[0][0]
|
|
145
153
|
return SeqMat().set_seqmat(self.seqmat[:, max(0, condition - context):min(self.seqmat.shape[-1], condition + context + 1)])
|
|
@@ -147,6 +155,29 @@ class SeqMat:
|
|
|
147
155
|
def rel_pos(self, pos):
|
|
148
156
|
return np.where(self.seqmat[1, :] == pos)[0][0]
|
|
149
157
|
|
|
158
|
+
def orf_seqmat(self, tis_index):
|
|
159
|
+
if tis_index not in self.seqmat[1, :]:
|
|
160
|
+
return SeqMat('ATG')
|
|
161
|
+
|
|
162
|
+
temp = SeqMat().set_seqmat(self.subseq_suffix(tis_index))
|
|
163
|
+
# Ensure the sequence length is divisible by 3
|
|
164
|
+
seq_length = len(temp.seq)
|
|
165
|
+
|
|
166
|
+
if seq_length % 3 != 0:
|
|
167
|
+
temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
|
|
168
|
+
|
|
169
|
+
if temp.seq[:3] == 'ATG':
|
|
170
|
+
for i in range(3, len(temp.seq), 3):
|
|
171
|
+
codon = temp.seq[i:i + 3]
|
|
172
|
+
if codon in ['TAA', 'TAG', 'TGA']:
|
|
173
|
+
index = temp.seqmat[1, i-3]
|
|
174
|
+
return SeqMat().set_seqmat(temp.subseq_prefix(index)) # Not include the stop codon
|
|
175
|
+
|
|
176
|
+
# If no stop codon is found, return the full sequence
|
|
177
|
+
return SeqMat().set_seqmat(temp.seq)
|
|
178
|
+
|
|
179
|
+
else:
|
|
180
|
+
return SeqMat('ATG')
|
|
150
181
|
|
|
151
182
|
class Gene:
|
|
152
183
|
def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
|
|
@@ -371,22 +402,20 @@ class Transcript:
|
|
|
371
402
|
|
|
372
403
|
return mature_mrna
|
|
373
404
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
# first_stop_index -= 1
|
|
379
|
-
#
|
|
380
|
-
# orf = orf[:first_stop_index + 3]
|
|
381
|
-
# return None
|
|
405
|
+
def find_end_codon(self, orf):
|
|
406
|
+
first_stop_index = next((i for i in range(0, len(orf) - 2, 3) if orf[i:i + 3] in {"TAG", "TAA", "TGA"}),
|
|
407
|
+
len(orf) - 3)
|
|
408
|
+
return first_stop_index
|
|
382
409
|
|
|
383
410
|
@property
|
|
384
411
|
def orf(self):
|
|
385
412
|
if not (hasattr(self, 'TIS') and hasattr(self, 'TTS')):
|
|
386
413
|
print("Cannot create protein without set TIS and TTS values.")
|
|
387
414
|
return self
|
|
415
|
+
# If self.TIS not in seqmat, then no orf and no protein
|
|
416
|
+
return self.mature_mrna.orf_seqmat(self.TIS)
|
|
388
417
|
|
|
389
|
-
return SeqMat().set_seqmat(self.mature_mrna.raw_subseq(self.TIS, self.TTS))
|
|
418
|
+
# return SeqMat().set_seqmat(self.mature_mrna.raw_subseq(self.TIS, self.TTS))
|
|
390
419
|
|
|
391
420
|
def generate_protein(self, inplace=True, domains=None):
|
|
392
421
|
protein = str(Seq(self.orf.seq).translate()).replace('*', '')
|
geney/spliceai_utils.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
#### SpliceAI Modules
|
|
3
|
-
import tensorflow as tf
|
|
4
3
|
from keras.models import load_model
|
|
5
4
|
from pkg_resources import resource_filename
|
|
6
5
|
from spliceai.utils import one_hot_encode
|
|
7
6
|
import numpy as np
|
|
8
|
-
import tensorflow
|
|
7
|
+
import tensorflow as tf
|
|
9
8
|
|
|
10
9
|
# Check if GPU is available
|
|
11
10
|
if tf.config.list_physical_devices('GPU'):
|
|
@@ -9,8 +9,8 @@ geney/mutation_utils.py,sha256=C-K8F8wyN5joI3ZuP-d7IMYTI43YPDXUc3IgAJ07o8Q,1546
|
|
|
9
9
|
geney/oncosplice.py,sha256=3jJc1-CWubH2ElHEjyQtsr9JYVmfPQEpq7EX-IfY-t8,20806
|
|
10
10
|
geney/pangolin_utils.py,sha256=S2uMjQnnxqWSnfuMaEjo-wq52DVKFiXt__L5VPdtzyU,2939
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
|
-
geney/seqmat_utils.py,sha256=
|
|
13
|
-
geney/spliceai_utils.py,sha256=
|
|
12
|
+
geney/seqmat_utils.py,sha256=JZk-dY7SjHESK1NaXfWuDpZGLJkeT_03ZfZS6gHBEr0,16796
|
|
13
|
+
geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
|
|
14
14
|
geney/splicing_utils.py,sha256=pS3jZEpmnDkbT1jjaJh-O6I--Xm22e5dj-GQu7IAZSQ,15943
|
|
15
15
|
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
16
16
|
geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
|
|
@@ -19,7 +19,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
19
19
|
geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
|
|
20
20
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
21
21
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
22
|
-
geney-1.2.
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
22
|
+
geney-1.2.25.dist-info/METADATA,sha256=RUiiqgqgJ0Qr9_iJvEds3xzIrFhPiW9qd-DBkdb2SxI,948
|
|
23
|
+
geney-1.2.25.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
24
|
+
geney-1.2.25.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
25
|
+
geney-1.2.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|