geney 1.2.59__py2.py3-none-any.whl → 1.2.61__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/pangolin_utils.py +2 -1
- geney/seqmat_utils.py +9 -2
- geney/splicing_utils.py +38 -1
- {geney-1.2.59.dist-info → geney-1.2.61.dist-info}/METADATA +1 -1
- {geney-1.2.59.dist-info → geney-1.2.61.dist-info}/RECORD +7 -7
- {geney-1.2.59.dist-info → geney-1.2.61.dist-info}/WHEEL +0 -0
- {geney-1.2.59.dist-info → geney-1.2.61.dist-info}/top_level.txt +0 -0
geney/pangolin_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ from pangolin.model import *
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import sys
|
|
7
7
|
|
|
8
|
-
pang_model_nums = [0, 1, 2, 3, 4, 5, 6]
|
|
8
|
+
pang_model_nums = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
9
9
|
pang_models = []
|
|
10
10
|
|
|
11
11
|
device = torch.device('cpu')
|
|
@@ -52,6 +52,7 @@ def pangolin_predict_probs(true_seq, models, just_ss=False):
|
|
|
52
52
|
model_nums = [0, 2, 4, 6]
|
|
53
53
|
else:
|
|
54
54
|
model_nums = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
55
|
+
|
|
55
56
|
INDEX_MAP = {0: 1, 1: 2, 2: 4, 3: 5, 4: 7, 5: 8, 6: 10, 7: 11}
|
|
56
57
|
|
|
57
58
|
seq = true_seq
|
geney/seqmat_utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
|
3
3
|
import copy
|
|
4
4
|
from Bio.Seq import Seq
|
|
5
5
|
|
|
6
|
-
NT_ALPHABET = ['A', 'T', 'G', 'C']
|
|
6
|
+
NT_ALPHABET = ['A', 'T', 'G', 'C', 'N']
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
'''
|
|
@@ -119,7 +119,6 @@ class SeqMat:
|
|
|
119
119
|
|
|
120
120
|
@property
|
|
121
121
|
def seq(self):
|
|
122
|
-
|
|
123
122
|
return ''.join(self.vectorized_map_v2c(self.seqmat[self.ROW_SEQ, :])).replace('-', '')
|
|
124
123
|
|
|
125
124
|
@property
|
|
@@ -242,6 +241,14 @@ class Gene:
|
|
|
242
241
|
key = list(self.transcripts.keys())[index]
|
|
243
242
|
return Transcript(self.transcripts[key])
|
|
244
243
|
|
|
244
|
+
def splice_sites(self):
|
|
245
|
+
from collections import Counter
|
|
246
|
+
acceptors, donors = [], []
|
|
247
|
+
for transcript in self.transcripts.values():
|
|
248
|
+
acceptors.extend(transcript['acceptors'])
|
|
249
|
+
donors.extend(transcript['donors'])
|
|
250
|
+
return Counter(acceptors), Counter(donors)
|
|
251
|
+
|
|
245
252
|
def transcript(self, tid=None):
|
|
246
253
|
if tid is None:
|
|
247
254
|
tid = self.primary_transcript
|
geney/splicing_utils.py
CHANGED
|
@@ -173,7 +173,7 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
173
173
|
ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
|
|
174
174
|
var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
print(len(ref_seq))
|
|
177
177
|
|
|
178
178
|
if engine == 'spliceai':
|
|
179
179
|
from .spliceai_utils import sai_predict_probs, sai_models
|
|
@@ -358,3 +358,40 @@ def missplicing(mut_id, splicing_threshold=0.5, primary_transcript=True, organis
|
|
|
358
358
|
|
|
359
359
|
return results
|
|
360
360
|
|
|
361
|
+
|
|
362
|
+
import sqlite3
|
|
363
|
+
import json
|
|
364
|
+
|
|
365
|
+
def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
366
|
+
conn = sqlite3.connect(os.path.join(tool_parameters['splicing_db'], 'mutation_data.db'))
|
|
367
|
+
cursor = conn.cursor()
|
|
368
|
+
# Create table if it doesn't exist
|
|
369
|
+
cursor.execute('''CREATE TABLE IF NOT EXISTS mutations (
|
|
370
|
+
tool TEXT,
|
|
371
|
+
gene TEXT,
|
|
372
|
+
mutation_id TEXT,
|
|
373
|
+
transcript_id TEXT,
|
|
374
|
+
data TEXT,
|
|
375
|
+
PRIMARY KEY (tool, gene, mutation_id, transcript_id)
|
|
376
|
+
)''')
|
|
377
|
+
|
|
378
|
+
# Look up entry
|
|
379
|
+
cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
|
|
380
|
+
(tool, gene, mutation_id, transcript_id))
|
|
381
|
+
row = cursor.fetchone()
|
|
382
|
+
|
|
383
|
+
# If entry is found and force_recompute is False, return data
|
|
384
|
+
if row and not force_recompute:
|
|
385
|
+
return json.loads(row[0])
|
|
386
|
+
|
|
387
|
+
# Otherwise, compute the dictionary
|
|
388
|
+
computed_data = find_transcript_missplicing(tool, mutation_id, transcript_id) # Replace with actual function
|
|
389
|
+
|
|
390
|
+
# Store in the database
|
|
391
|
+
data_json = json.dumps(computed_data)
|
|
392
|
+
cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
|
|
393
|
+
(tool, mutation_id, transcript_id, data_json))
|
|
394
|
+
conn.commit()
|
|
395
|
+
conn.close()
|
|
396
|
+
|
|
397
|
+
return computed_data
|
|
@@ -7,11 +7,11 @@ geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
|
7
7
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
8
8
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
9
9
|
geney/oncosplice.py,sha256=eWgY2Lcj894UBFnIVhbxiVz5oqASHg-Ot1wFbjlJbI8,21857
|
|
10
|
-
geney/pangolin_utils.py,sha256=
|
|
10
|
+
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
11
11
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
12
|
-
geney/seqmat_utils.py,sha256=
|
|
12
|
+
geney/seqmat_utils.py,sha256=f-4dtufdKHc52jf_xZqbwBu-DXCa2Tl0Wsigguus6-E,18661
|
|
13
13
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
14
|
-
geney/splicing_utils.py,sha256=
|
|
14
|
+
geney/splicing_utils.py,sha256=H4X_J8tlZwJkidL5QVvSXFoQL3DsyY6QhOyHhczSLuQ,18324
|
|
15
15
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
16
16
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
17
17
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -20,7 +20,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
20
20
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
21
21
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
22
22
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
23
|
-
geney-1.2.
|
|
24
|
-
geney-1.2.
|
|
25
|
-
geney-1.2.
|
|
26
|
-
geney-1.2.
|
|
23
|
+
geney-1.2.61.dist-info/METADATA,sha256=QaodEhA1cBBwdnPunohDs6niKp2PEHToPX-Cdudd6oY,948
|
|
24
|
+
geney-1.2.61.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
25
|
+
geney-1.2.61.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
26
|
+
geney-1.2.61.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|