geney 1.3.43__py2.py3-none-any.whl → 1.3.45__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/SeqMats.py +6 -2
- geney/oncosplice.py +22 -1
- geney/splicing_utils.py +1 -1
- geney/tis_utils.py +10 -1
- {geney-1.3.43.dist-info → geney-1.3.45.dist-info}/METADATA +1 -1
- {geney-1.3.43.dist-info → geney-1.3.45.dist-info}/RECORD +8 -8
- {geney-1.3.43.dist-info → geney-1.3.45.dist-info}/WHEEL +0 -0
- {geney-1.3.43.dist-info → geney-1.3.45.dist-info}/top_level.txt +0 -0
geney/SeqMats.py
CHANGED
|
@@ -311,8 +311,8 @@ class SeqMat:
|
|
|
311
311
|
### NEEDS some work to make sure that mutations can continue being added without issue...
|
|
312
312
|
|
|
313
313
|
# Ensure strand compatibility
|
|
314
|
-
if not self._is_same_strand(mut):
|
|
315
|
-
|
|
314
|
+
# if not self._is_same_strand(mut):
|
|
315
|
+
# raise ValueError("Mutation and sequence are not on the same strand.")
|
|
316
316
|
|
|
317
317
|
# something to make sure the mutation is contained as one deletion, insertion, or snp or indel
|
|
318
318
|
ref_seqmat = self.seqmat.copy()
|
|
@@ -351,6 +351,9 @@ class SeqMat:
|
|
|
351
351
|
return SeqMat(ref_seqmat)
|
|
352
352
|
|
|
353
353
|
def orf_seqmat(self, tis_index):
|
|
354
|
+
if tis_index not in self.indices:
|
|
355
|
+
return SeqMat.from_seq({'seq': ''})
|
|
356
|
+
|
|
354
357
|
temp = self.seqmat[:, self._rel_index(tis_index):]
|
|
355
358
|
temp = temp[:, temp[0, :] != 5]
|
|
356
359
|
temp = SeqMat(temp) # .drop_indices()
|
|
@@ -362,6 +365,7 @@ class SeqMat:
|
|
|
362
365
|
else:
|
|
363
366
|
stop_index = len(raw_seq)
|
|
364
367
|
end_index = stop_index
|
|
368
|
+
assert end_index % 3 == 0, f"{end_index} is not a multiple of 3"
|
|
365
369
|
return SeqMat(temp.seqmat[:, :end_index])
|
|
366
370
|
|
|
367
371
|
def translate(self, tis_index):
|
geney/oncosplice.py
CHANGED
|
@@ -305,13 +305,18 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
|
|
|
305
305
|
|
|
306
306
|
|
|
307
307
|
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
|
|
308
|
-
window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False
|
|
308
|
+
window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False,
|
|
309
|
+
tis_engine=None, tis_db=None):
|
|
310
|
+
|
|
309
311
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
310
312
|
reference_gene_proteins = {
|
|
311
313
|
transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
312
314
|
transcript in gene if transcript.transcript_biotype == 'protein_coding'}
|
|
313
315
|
|
|
314
316
|
mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
317
|
+
if gene.rev:
|
|
318
|
+
mutations = [m.reverse_complement(inplace=True) for m in mutations[::-1]]
|
|
319
|
+
|
|
315
320
|
results = []
|
|
316
321
|
for reference_transcript in tqdm(gene, desc=f'Processing {mut_id}...'):
|
|
317
322
|
if (cons_required and not reference_transcript.cons_available) or (
|
|
@@ -325,6 +330,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
325
330
|
center = np.mean([m.indices[0] for m in current_mutations]) // 1
|
|
326
331
|
|
|
327
332
|
mutated_transcript = reference_transcript.clone()
|
|
333
|
+
|
|
328
334
|
for mutation in current_mutations:
|
|
329
335
|
mutated_transcript.mutate(mutation, inplace=True)
|
|
330
336
|
|
|
@@ -361,6 +367,18 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
361
367
|
mutated_transcript.donors = new_boundaries['donors']
|
|
362
368
|
mutated_transcript.generate_mature_mrna().generate_protein()
|
|
363
369
|
|
|
370
|
+
### Experimental
|
|
371
|
+
# mutated_transcript.generate_mature_mrna()
|
|
372
|
+
# if tis_engine is None:
|
|
373
|
+
# tis_candidates = [(mutated_transcript.tis, 1)]
|
|
374
|
+
# else:
|
|
375
|
+
# from tis_utils import tis_predictor
|
|
376
|
+
# tis_candidates = tis_predictor(mutated_transcript.mature_mrna)
|
|
377
|
+
#
|
|
378
|
+
# for tis_candidate, tis_score in tis_candidates:
|
|
379
|
+
# mutated_transcript.generate_protein(tis_candidate)
|
|
380
|
+
######
|
|
381
|
+
|
|
364
382
|
alignment = get_logical_alignment(reference_transcript.protein, mutated_transcript.protein)
|
|
365
383
|
deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
366
384
|
modified_positions = find_modified_positions(len(reference_transcript.protein), deleted, inserted)
|
|
@@ -375,6 +393,9 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
375
393
|
report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
|
|
376
394
|
report['mut_id'] = mut_id
|
|
377
395
|
report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
|
|
396
|
+
# report['tis_engine'] = tis_engine if tis_engine is not None else 'None'
|
|
397
|
+
# report['tis_pos'] = tis_candidate
|
|
398
|
+
# report['tis_score'] = tis_score
|
|
378
399
|
report['oncosplice_score'] = affected_cons_scores
|
|
379
400
|
report['percentile'] = percentile
|
|
380
401
|
report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
|
geney/splicing_utils.py
CHANGED
|
@@ -324,7 +324,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
|
|
|
324
324
|
|
|
325
325
|
def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
|
|
326
326
|
if ref_seq.seq == var_seq.seq:
|
|
327
|
-
return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
327
|
+
return Missplicing({'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}})
|
|
328
328
|
|
|
329
329
|
ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq.seq, engine)
|
|
330
330
|
mut_seq_acceptor_probs, mut_seq_donor_probs = run_splicing_engine(var_seq.seq, engine)
|
geney/tis_utils.py
CHANGED
|
@@ -9,7 +9,16 @@ from geney import config
|
|
|
9
9
|
p = PairwiseAligner()
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def find_tis(
|
|
12
|
+
def find_tis(reference_mrna, mutated_mrna, ref_tis_pos, left_context=100, right_context=102):
|
|
13
|
+
'''
|
|
14
|
+
mature_mrna: row 0 --> encoded nucleotides
|
|
15
|
+
row 1 --> genomic indices
|
|
16
|
+
row 2 --> super positions (incase of insertions or deletions
|
|
17
|
+
row1+row2 = conhesive & monotonic genomic indices
|
|
18
|
+
row 3 --> binary mutated position or not
|
|
19
|
+
mature_mrna.seq
|
|
20
|
+
mature_mrna.indices
|
|
21
|
+
'''
|
|
13
22
|
tis_coords = ref_seq.mature_mrna.asymmetric_indices(ref_seq.TIS, left_context=0, right_context=3)
|
|
14
23
|
ref_seq, mut_seq = ref_seq.mature_mrna, mut_seq.mature_mrna
|
|
15
24
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
|
|
2
2
|
geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
|
|
3
|
-
geney/SeqMats.py,sha256=
|
|
3
|
+
geney/SeqMats.py,sha256=hQcEYTcFm06g4dGJf25Lvo_xCHsj0-GGhP-O2fPrBlE,18987
|
|
4
4
|
geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
|
|
5
5
|
geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
|
|
6
6
|
geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
|
|
@@ -11,21 +11,21 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
11
11
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
12
12
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
13
13
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
14
|
-
geney/oncosplice.py,sha256=
|
|
14
|
+
geney/oncosplice.py,sha256=q1W8k4nvRzQPH7LZsefTky6Nw2Kmx-DDXZ_UCty0Wog,24299
|
|
15
15
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
16
16
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
17
17
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
18
18
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
19
|
-
geney/splicing_utils.py,sha256=
|
|
19
|
+
geney/splicing_utils.py,sha256=WflxRPfc4DzeHuYOZqjpa-YD1nuZzs7h_WCsv-LX87A,38498
|
|
20
20
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
21
21
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
22
|
-
geney/tis_utils.py,sha256=
|
|
22
|
+
geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
|
|
23
23
|
geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
|
|
24
24
|
geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
26
26
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
27
27
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
31
|
-
geney-1.3.
|
|
28
|
+
geney-1.3.45.dist-info/METADATA,sha256=5YI3G03swzoNav06ijsq6XG8aLMzWlSCEZnY7Y4b3MM,990
|
|
29
|
+
geney-1.3.45.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
30
|
+
geney-1.3.45.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
31
|
+
geney-1.3.45.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|