geney 1.3.2__py2.py3-none-any.whl → 1.3.4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/oncosplice.py +109 -105
- geney/splicing_utils.py +317 -184
- {geney-1.3.2.dist-info → geney-1.3.4.dist-info}/METADATA +1 -1
- {geney-1.3.2.dist-info → geney-1.3.4.dist-info}/RECORD +6 -6
- {geney-1.3.2.dist-info → geney-1.3.4.dist-info}/WHEEL +0 -0
- {geney-1.3.2.dist-info → geney-1.3.4.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -6,6 +6,10 @@ import pandas as pd
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
from .SeqMats import SeqMat, MutSeqMat
|
|
8
8
|
from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
|
|
9
|
+
from .Gene import Gene
|
|
10
|
+
import copy
|
|
11
|
+
from . import config
|
|
12
|
+
|
|
9
13
|
from .tis_utils import find_tis
|
|
10
14
|
|
|
11
15
|
def short_hash_of_list(numbers, length=5):
|
|
@@ -301,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
|
|
|
301
305
|
|
|
302
306
|
|
|
303
307
|
def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
|
|
304
|
-
window_length=13, organism='hg38', engine='spliceai'
|
|
308
|
+
window_length=13, organism='hg38', engine='spliceai'):
|
|
305
309
|
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
306
310
|
reference_gene_proteins = {
|
|
307
311
|
transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
|
|
@@ -374,110 +378,110 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
|
|
|
374
378
|
'mutation_distance_from_3', 'engine', 'reference_resemblance', 'oncosplice_score', 'percentile',
|
|
375
379
|
'isoform_prevalence', 'reference_protein', 'variant_protein']]
|
|
376
380
|
|
|
377
|
-
|
|
378
|
-
import asyncio
|
|
379
|
-
async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
381
|
+
#
|
|
382
|
+
# import asyncio
|
|
383
|
+
# async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
384
|
+
# window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
|
|
385
|
+
# import sys, os
|
|
386
|
+
# needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
|
|
387
|
+
# needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
|
|
388
|
+
#
|
|
389
|
+
# if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.R_OK)) and (
|
|
390
|
+
# needed_file2.is_file() and os.access(needed_file2, os.R_OK)):
|
|
391
|
+
# sys.path.append(str(config[organism]['yoram_path']))
|
|
392
|
+
# import uniprot_utils as uput
|
|
393
|
+
#
|
|
394
|
+
# else:
|
|
395
|
+
# raise SystemError(
|
|
396
|
+
# "Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
|
|
397
|
+
#
|
|
398
|
+
# from .tis_utils import find_tis
|
|
399
|
+
#
|
|
400
|
+
# # Define async functions
|
|
401
|
+
# async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
|
|
402
|
+
# return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
|
|
403
|
+
#
|
|
404
|
+
# def inspect_domain(row, modified_vector, conservation_vector):
|
|
405
|
+
# v1, v2 = modified_vector[row.start:row.end], conservation_vector[row.start:row.end]
|
|
406
|
+
# if sum(v2) == 0:
|
|
407
|
+
# return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', 0],
|
|
408
|
+
# index=['domain_identifier', 'score'])
|
|
409
|
+
#
|
|
410
|
+
# return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', sum(v1 * v2) / sum(v2)],
|
|
411
|
+
# index=['domain_identifier', 'score'])
|
|
412
|
+
#
|
|
413
|
+
# gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
414
|
+
# reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
|
|
415
|
+
# mutations = [get_mutation(mut_id, rev=gene.rev) for mut_id in mut_id.split('|')]
|
|
416
|
+
# results = []
|
|
417
|
+
# for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
|
|
418
|
+
# if require_cons and not transcript.cons_available:
|
|
419
|
+
# continue
|
|
420
|
+
#
|
|
421
|
+
# if all(mutation not in transcript for mutation in mutations):
|
|
422
|
+
# # results.append({'transcript_id': transcript.transcript_id})
|
|
423
|
+
# continue
|
|
424
|
+
#
|
|
425
|
+
# task1 = asyncio.create_task(background_request(tid))
|
|
426
|
+
# transcript.generate_pre_mrna()
|
|
427
|
+
# transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
|
|
428
|
+
# transcript.generate_mature_mrna().generate_protein(inplace=True)
|
|
429
|
+
# ref_protein, cons_vector = transcript.protein, transcript.cons_vector
|
|
430
|
+
#
|
|
431
|
+
# if not use_cons:
|
|
432
|
+
# cons_vector = np.ones(len(ref_protein))
|
|
433
|
+
#
|
|
434
|
+
# if sum(cons_vector) == 0:
|
|
435
|
+
# cons_vector = np.ones(len(ref_protein)) #/len(ref_protein)
|
|
436
|
+
#
|
|
437
|
+
# reference_transcript = copy.deepcopy(transcript)
|
|
438
|
+
#
|
|
439
|
+
# assert len(ref_protein) == len(
|
|
440
|
+
# cons_vector), f"Protein ({len(ref_protein)}) and conservation vector ({len(cons_vector)} must be same length."
|
|
441
|
+
#
|
|
442
|
+
# missplicing = Missplicing(find_transcript_missplicing(transcript, mutations, engine=engine, threshold=splicing_threshold),
|
|
443
|
+
# threshold=splicing_threshold)
|
|
444
|
+
# for mutation in mutations:
|
|
445
|
+
# transcript.pre_mrna += mutation
|
|
446
|
+
#
|
|
447
|
+
# domains_df = await task1
|
|
448
|
+
# for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
|
|
449
|
+
# transcript.acceptors = new_boundaries['acceptors']
|
|
450
|
+
# transcript.donors = new_boundaries['donors']
|
|
451
|
+
# transcript.generate_mature_mrna()
|
|
452
|
+
# transcript.TIS = find_tis(ref_seq=reference_transcript, mut_seq=transcript)
|
|
453
|
+
# transcript.generate_protein()
|
|
454
|
+
#
|
|
455
|
+
# alignment = get_logical_alignment(reference_transcript.protein, transcript.protein)
|
|
456
|
+
# deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
|
|
457
|
+
# modified_positions = find_modified_positions(len(ref_protein), deleted, inserted)
|
|
458
|
+
# temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
|
|
459
|
+
# affected_cons_scores = max(temp_cons)
|
|
460
|
+
# percentile = (
|
|
461
|
+
# sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
|
|
462
|
+
# cons_vector))
|
|
463
|
+
#
|
|
464
|
+
# out = domains_df.apply(lambda row: inspect_domain(row, modified_positions, cons_vector), axis=1)
|
|
465
|
+
# domains_affected = '+'.join([f'{a}:{round(b, 3)}' for a, b in list(zip(out.domain_identifier, out.score))])
|
|
466
|
+
#
|
|
467
|
+
# report = OncospliceAnnotator(reference_transcript, transcript, mutation)
|
|
468
|
+
# report['mut_id'] = mut_id
|
|
469
|
+
# report['oncosplice_score'] = affected_cons_scores
|
|
470
|
+
# report['cons_available'] = transcript.cons_available
|
|
471
|
+
# report['transcript_id'] = transcript.transcript_id
|
|
472
|
+
# report['percentile'] = percentile
|
|
473
|
+
# report['isoform_id'] = i
|
|
474
|
+
# report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
475
|
+
# report['full_missplicing'] = missplicing.aberrant_splicing
|
|
476
|
+
# report['missplicing'] = max(missplicing)
|
|
477
|
+
# report['domains'] = domains_affected
|
|
478
|
+
# report['max_domain_score'] = out.score.max()
|
|
479
|
+
#
|
|
480
|
+
# report['reference_resemblance'] = reference_gene_proteins.get(transcript.protein, None)
|
|
481
|
+
# results.append(pd.Series(report))
|
|
482
|
+
#
|
|
483
|
+
# report = pd.concat(results, axis=1).T
|
|
484
|
+
# return report
|
|
481
485
|
|
|
482
486
|
|
|
483
487
|
if __name__ == '__main__':
|
geney/splicing_utils.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
4
|
-
|
|
2
|
+
from .Gene import Gene
|
|
3
|
+
from .SeqMats import MutSeqMat
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
|
|
7
6
|
|
|
@@ -145,75 +144,185 @@ def find_ss_changes(ref_dct, mut_dct, known_splice_sites, threshold=0.5):
|
|
|
145
144
|
return discovered_pos, deleted_pos
|
|
146
145
|
|
|
147
146
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if len(mutations) == 0:
|
|
164
|
-
return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
147
|
+
from typing import Tuple, Dict
|
|
148
|
+
|
|
149
|
+
def run_splicing_engine(seq, engine='spliceai'):
|
|
150
|
+
match engine:
|
|
151
|
+
case 'spliceai':
|
|
152
|
+
from .spliceai_utils import sai_predict_probs, sai_models
|
|
153
|
+
donor_probs, acceptor_probs = sai_predict_probs(seq, models=sai_models)
|
|
154
|
+
|
|
155
|
+
case 'pangolin':
|
|
156
|
+
from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
157
|
+
donor_probs, acceptor_probs = pangolin_predict_probs(seq, models=pang_models)
|
|
158
|
+
|
|
159
|
+
case _:
|
|
160
|
+
raise ValueError(f"{engine} not implemented")
|
|
161
|
+
return donor_probs, acceptor_probs
|
|
165
162
|
|
|
166
|
-
var = reduce(lambda acc, mutation: acc + mutation, mutations, ref)
|
|
167
|
-
center = int(np.mean([mutation.position for mutation in mutations]) // 1)
|
|
168
163
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
164
|
+
def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict[int, float], Dict[int, float]]:
|
|
165
|
+
"""
|
|
166
|
+
Predict splice site probabilities for a given transcript using the specified engine.
|
|
167
|
+
This function uses a padding of 5000 'N's on each side of the transcript sequence
|
|
168
|
+
to align with the model's required context length.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
transcript: An object representing a transcript, expected to have:
|
|
172
|
+
- an `indices` attribute that returns a sequence of positions.
|
|
173
|
+
- a `seq` attribute that returns the sequence string.
|
|
174
|
+
engine (str): The prediction engine to use. Supported: 'spliceai', 'pangolin'.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
(donor_probs, acceptor_probs) as two dictionaries keyed by position with probability values.
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
ValueError: If an unsupported engine is provided.
|
|
181
|
+
AssertionError: If the length of predicted probabilities does not match the length of indices.
|
|
182
|
+
"""
|
|
183
|
+
# Prepare reference sequence with padding
|
|
184
|
+
ref_indices = transcript.indices
|
|
185
|
+
ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
|
|
186
|
+
ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq, engine)
|
|
187
|
+
|
|
188
|
+
# Verify lengths
|
|
189
|
+
assert len(ref_seq_donor_probs) == len(ref_indices), (
|
|
190
|
+
f"Donor probabilities length ({len(ref_seq_donor_probs)}) does not match "
|
|
191
|
+
f"indices length ({len(ref_indices)})."
|
|
192
|
+
)
|
|
193
|
+
assert len(ref_seq_acceptor_probs) == len(ref_indices), (
|
|
194
|
+
f"Acceptor probabilities length ({len(ref_seq_acceptor_probs)}) does not match "
|
|
195
|
+
f"indices length ({len(ref_indices)})."
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Create dictionaries and sort them by probability in descending order
|
|
199
|
+
donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
|
|
200
|
+
key=lambda item: item[1], reverse=True)
|
|
201
|
+
acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
|
|
202
|
+
key=lambda item: item[1], reverse=True)
|
|
203
|
+
|
|
204
|
+
return donor_probs, acceptor_probs
|
|
174
205
|
|
|
175
|
-
length = var.seqmat.shape[-1]
|
|
176
|
-
center_index = var.rel_pos(center)
|
|
177
|
-
if center_index is None:
|
|
178
|
-
raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
|
|
179
|
-
var_start_pad = max(0, total_context - center_index)
|
|
180
|
-
var_end_pad = max(0, total_context - (length - center_index))
|
|
181
206
|
|
|
182
|
-
|
|
183
|
-
|
|
207
|
+
def find_transcript_missplicing(mut_id, transcript='primary', threshold=0.5, engine='spliceai', organism='hg38'):
|
|
208
|
+
gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
|
|
209
|
+
reference_transcript = gene.transcript(transcript) if transcript is not None else gene.transcript()
|
|
210
|
+
variant_transcript = reference_transcript.clone()
|
|
211
|
+
mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
|
|
212
|
+
mutations = [m for m in mutations if m in reference_transcript]
|
|
213
|
+
if len(mutations) == 0:
|
|
214
|
+
return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
184
215
|
|
|
185
|
-
|
|
186
|
-
|
|
216
|
+
center = np.mean([m.indices[0] for m in mutations]) // 1
|
|
217
|
+
for mutation in mutations:
|
|
218
|
+
variant_transcript.mutate(mutation, inplace=True)
|
|
187
219
|
|
|
188
|
-
|
|
189
|
-
mut_indices = mut_indices[context:-context]
|
|
220
|
+
return find_transcript_missplicing_seqs(reference_transcript.get_context(center, 7500), variant_transcript.get_context(center, 7500), reference_transcript.donors, reference_transcript.acceptors, threshold=threshold, engine=engine)
|
|
190
221
|
|
|
191
|
-
ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
|
|
192
|
-
var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
|
|
193
222
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
223
|
+
# from functools import reduce
|
|
224
|
+
# ref = transcript.pre_mrna
|
|
225
|
+
# mutations = [mutation for mutation in mutations if mutation.position in ref.indices]
|
|
226
|
+
# if len(mutations) == 0:
|
|
227
|
+
# return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
228
|
+
#
|
|
229
|
+
# var = reduce(lambda acc, mutation: acc + mutation, mutations, ref)
|
|
230
|
+
# center = int(np.mean([mutation.position for mutation in mutations]) // 1)
|
|
231
|
+
#
|
|
232
|
+
# total_context = context + window
|
|
233
|
+
# length = ref.seqmat.shape[-1]
|
|
234
|
+
# center_index = ref.rel_pos(center)
|
|
235
|
+
# ref_start_pad = max(0, total_context - center_index)
|
|
236
|
+
# ref_end_pad = max(0, total_context - (length - center_index))
|
|
237
|
+
#
|
|
238
|
+
# length = var.seqmat.shape[-1]
|
|
239
|
+
# center_index = var.rel_pos(center)
|
|
240
|
+
# if center_index is None:
|
|
241
|
+
# raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
|
|
242
|
+
#
|
|
243
|
+
# var_start_pad = max(0, total_context - center_index)
|
|
244
|
+
# var_end_pad = max(0, total_context - (length - center_index))
|
|
245
|
+
#
|
|
246
|
+
# ref = ref.inspect(center, context=total_context)
|
|
247
|
+
# var = var.inspect(center, context=total_context)
|
|
248
|
+
#
|
|
249
|
+
# ref_indices = np.concatenate([np.zeros(ref_start_pad), ref.indices, np.zeros(ref_end_pad)])
|
|
250
|
+
# mut_indices = np.concatenate([np.zeros(var_start_pad), var.indices, np.zeros(var_end_pad)])
|
|
251
|
+
#
|
|
252
|
+
# ref_indices = ref_indices[context:-context]
|
|
253
|
+
# mut_indices = mut_indices[context:-context]
|
|
254
|
+
#
|
|
255
|
+
# ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
|
|
256
|
+
# var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
|
|
257
|
+
#
|
|
258
|
+
# if engine == 'spliceai':
|
|
259
|
+
# from .spliceai_utils import sai_predict_probs, sai_models
|
|
260
|
+
# ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, models=sai_models)
|
|
261
|
+
# mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq, models=sai_models)
|
|
262
|
+
#
|
|
263
|
+
# elif engine == 'pangolin':
|
|
264
|
+
# from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
265
|
+
# ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models, just_ss=just_ss)
|
|
266
|
+
# mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq, models=pang_models, just_ss=just_ss)
|
|
267
|
+
#
|
|
268
|
+
# else:
|
|
269
|
+
# raise ValueError(f"{engine} not implemented")
|
|
270
|
+
#
|
|
271
|
+
# visible_donors = np.intersect1d(transcript.donors, ref_indices)
|
|
272
|
+
# visible_acceptors = np.intersect1d(transcript.acceptors, ref_indices)
|
|
273
|
+
#
|
|
274
|
+
# assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
|
|
275
|
+
# assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
|
|
276
|
+
#
|
|
277
|
+
# iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
|
|
278
|
+
# {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
|
|
279
|
+
# visible_acceptors,
|
|
280
|
+
# threshold=threshold)
|
|
281
|
+
#
|
|
282
|
+
# assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
|
|
283
|
+
# assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
|
|
284
|
+
#
|
|
285
|
+
# idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
|
|
286
|
+
# {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
|
|
287
|
+
# visible_donors,
|
|
288
|
+
# threshold=threshold)
|
|
289
|
+
#
|
|
290
|
+
# ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
|
|
291
|
+
# ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
|
|
292
|
+
#
|
|
293
|
+
# lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
|
|
294
|
+
# visible_acceptors if p not in mut_indices and p not in dap}
|
|
295
|
+
# lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
|
|
296
|
+
# if p not in mut_indices and p not in ddp}
|
|
297
|
+
# dap.update(lost_acceptors)
|
|
298
|
+
# ddp.update(lost_donors)
|
|
299
|
+
#
|
|
300
|
+
# missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
|
|
301
|
+
# missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
302
|
+
# temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
303
|
+
return temp
|
|
198
304
|
|
|
199
|
-
elif engine == 'pangolin':
|
|
200
|
-
from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
201
|
-
ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models, just_ss=just_ss)
|
|
202
|
-
mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq, models=pang_models, just_ss=just_ss)
|
|
203
305
|
|
|
204
|
-
|
|
205
|
-
|
|
306
|
+
def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
|
|
307
|
+
if ref_seq.seq == var_seq.seq:
|
|
308
|
+
return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
|
|
206
309
|
|
|
207
|
-
|
|
208
|
-
|
|
310
|
+
ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq.seq, engine)
|
|
311
|
+
mut_seq_acceptor_probs, mut_seq_donor_probs = run_splicing_engine(var_seq.seq, engine)
|
|
312
|
+
ref_indices = ref_seq.indices[5000:-5000]
|
|
313
|
+
mut_indices = var_seq.indices[5000:-5000]
|
|
314
|
+
visible_donors = np.intersect1d(donors, ref_indices)
|
|
315
|
+
visible_acceptors = np.intersect1d(acceptors, ref_indices)
|
|
209
316
|
|
|
210
|
-
assert len(ref_indices) == len(
|
|
211
|
-
|
|
317
|
+
assert len(ref_indices) == len(
|
|
318
|
+
ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
|
|
319
|
+
assert len(mut_indices) == len(
|
|
320
|
+
mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
|
|
212
321
|
|
|
213
322
|
iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
|
|
214
323
|
{p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
|
|
215
324
|
visible_acceptors,
|
|
216
|
-
threshold=
|
|
325
|
+
threshold=0.1)
|
|
217
326
|
|
|
218
327
|
assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
|
|
219
328
|
assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
|
|
@@ -221,113 +330,147 @@ def find_transcript_missplicing(transcript, mutations, context=5000, window=2500
|
|
|
221
330
|
idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
|
|
222
331
|
{p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
|
|
223
332
|
visible_donors,
|
|
224
|
-
threshold=
|
|
333
|
+
threshold=0.1)
|
|
225
334
|
|
|
226
335
|
ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
|
|
227
336
|
ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
|
|
228
337
|
|
|
229
338
|
lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
|
|
230
339
|
visible_acceptors if p not in mut_indices and p not in dap}
|
|
231
|
-
lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in
|
|
340
|
+
lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in
|
|
341
|
+
visible_donors
|
|
232
342
|
if p not in mut_indices and p not in ddp}
|
|
233
343
|
dap.update(lost_acceptors)
|
|
234
344
|
ddp.update(lost_donors)
|
|
235
345
|
|
|
236
|
-
missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap,
|
|
346
|
+
missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap,
|
|
347
|
+
'discovered_donors': idp}
|
|
237
348
|
missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
|
|
238
|
-
temp =
|
|
239
|
-
|
|
349
|
+
temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in
|
|
350
|
+
missplicing.items()}
|
|
351
|
+
return Missplicing(temp, threshold=threshold)
|
|
240
352
|
|
|
241
353
|
|
|
242
354
|
class Missplicing:
|
|
243
355
|
def __init__(self, splicing_dict, threshold=0.5):
|
|
356
|
+
"""
|
|
357
|
+
Initialize a Missplicing object.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
splicing_dict (dict): Dictionary containing splicing events and their details.
|
|
361
|
+
Example:
|
|
362
|
+
{
|
|
363
|
+
"missed_acceptors": {100: {"absolute": 0.0, "delta": -0.3}, ...},
|
|
364
|
+
"missed_donors": { ... },
|
|
365
|
+
"discovered_acceptors": { ... },
|
|
366
|
+
"discovered_donors": { ... }
|
|
367
|
+
}
|
|
368
|
+
threshold (float): The threshold above which a delta is considered significant.
|
|
369
|
+
"""
|
|
244
370
|
self.missplicing = splicing_dict
|
|
245
371
|
self.threshold = threshold
|
|
246
372
|
|
|
247
|
-
# def __repr__(self):
|
|
248
|
-
# return f'Missplicing({self.modification.mut_id}) --> {self.missplicing}'
|
|
249
|
-
|
|
250
373
|
def __str__(self):
|
|
251
|
-
|
|
374
|
+
"""String representation displays the filtered splicing events passing the threshold."""
|
|
375
|
+
return str(self.significant_events)
|
|
252
376
|
|
|
253
377
|
def __bool__(self):
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
378
|
+
"""
|
|
379
|
+
Boolean evaluation: True if any event surpasses the threshold, False otherwise.
|
|
380
|
+
"""
|
|
381
|
+
return self.first_significant_event() is not None
|
|
257
382
|
|
|
258
383
|
def __iter__(self):
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
# return not flag
|
|
384
|
+
"""
|
|
385
|
+
Iterate over all delta values from all events. The first yielded value is 0 (for compatibility),
|
|
386
|
+
followed by all deltas in self.missplicing.
|
|
387
|
+
"""
|
|
388
|
+
yield 0
|
|
389
|
+
for details in self.missplicing.values():
|
|
390
|
+
for d in details.values():
|
|
391
|
+
yield d['delta']
|
|
268
392
|
|
|
269
393
|
@property
|
|
270
|
-
def
|
|
271
|
-
|
|
394
|
+
def significant_events(self):
|
|
395
|
+
"""
|
|
396
|
+
Returns a filtered version of missplicing events that meet or exceed the current threshold.
|
|
397
|
+
"""
|
|
398
|
+
return self.filter_by_threshold(self.threshold)
|
|
399
|
+
|
|
400
|
+
def filter_by_threshold(self, threshold=None):
|
|
401
|
+
"""
|
|
402
|
+
Filter self.missplicing to only include events where abs(delta) >= threshold.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
threshold (float, optional): The threshold to apply. Defaults to self.threshold.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
dict: A new dictionary with filtered events.
|
|
409
|
+
"""
|
|
410
|
+
if threshold is None:
|
|
411
|
+
threshold = self.threshold
|
|
272
412
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
413
|
+
return {
|
|
414
|
+
event: {
|
|
415
|
+
pos: detail for pos, detail in details.items()
|
|
416
|
+
if abs(detail['delta']) >= threshold
|
|
417
|
+
}
|
|
418
|
+
for event, details in self.missplicing.items()
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
def first_significant_event(self, splicing_dict=None, threshold=None):
|
|
422
|
+
"""
|
|
423
|
+
Check if there is any event surpassing a given threshold and return the dictionary if found.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
splicing_dict (dict, optional): Dictionary to check. Defaults to self.missplicing.
|
|
427
|
+
threshold (float, optional): Threshold to apply. Defaults to self.threshold.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
dict or None: Returns the dictionary if a delta surpasses the threshold, otherwise None.
|
|
431
|
+
"""
|
|
432
|
+
if splicing_dict is None:
|
|
433
|
+
splicing_dict = self.missplicing
|
|
434
|
+
if threshold is None:
|
|
276
435
|
threshold = self.threshold
|
|
277
436
|
|
|
278
|
-
|
|
279
|
-
for
|
|
280
|
-
|
|
281
|
-
for e, d in details.items():
|
|
282
|
-
if abs(d['delta']) >= threshold:
|
|
283
|
-
in_dict[e] = d
|
|
284
|
-
# return splicing_dict
|
|
285
|
-
new_dict[event] = in_dict
|
|
286
|
-
return new_dict
|
|
287
|
-
|
|
288
|
-
def apply_sai_threshold_alt(self, splicing_dict=None, threshold=None):
|
|
289
|
-
splicing_dict = self.missplicing if not splicing_dict else splicing_dict
|
|
290
|
-
threshold = self.threshold if not threshold else threshold
|
|
291
|
-
for event, details in splicing_dict.items():
|
|
292
|
-
for e, d in details.items():
|
|
293
|
-
if abs(d['delta']) >= threshold:
|
|
294
|
-
return splicing_dict
|
|
437
|
+
# Check if any event meets the threshold
|
|
438
|
+
if any(abs(detail['delta']) >= threshold for details in splicing_dict.values() for detail in details.values()):
|
|
439
|
+
return splicing_dict
|
|
295
440
|
return None
|
|
296
441
|
|
|
297
|
-
def
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
acceptor_probs = dict(sorted(acceptor_probs.items(), key=lambda item: item[1], reverse=True))
|
|
330
|
-
return donor_probs, acceptor_probs
|
|
442
|
+
def max_delta(self):
|
|
443
|
+
"""
|
|
444
|
+
Returns the maximum absolute delta found in all events.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
float: The maximum absolute delta, or 0 if no events.
|
|
448
|
+
"""
|
|
449
|
+
deltas = [detail['delta'] for details in self.missplicing.values() for detail in details.values()]
|
|
450
|
+
return max(deltas, key=abs, default=0.0)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
# def find_transcript_splicing(transcript, engine='spliceai'):
|
|
454
|
+
# ref_indices = transcript.indices
|
|
455
|
+
# ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
|
|
456
|
+
# if engine == 'spliceai':
|
|
457
|
+
# from .spliceai_utils import sai_predict_probs, sai_models
|
|
458
|
+
# ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, sai_models)
|
|
459
|
+
#
|
|
460
|
+
# elif engine == 'pangolin':
|
|
461
|
+
# from .pangolin_utils import pangolin_predict_probs, pang_models
|
|
462
|
+
# ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models)
|
|
463
|
+
#
|
|
464
|
+
# else:
|
|
465
|
+
# raise ValueError(f"{engine} not implemented")
|
|
466
|
+
#
|
|
467
|
+
# assert len(ref_seq_donor_probs) == len(ref_indices), f'{len(ref_seq_donor_probs)} vs. {len(ref_indices)}'
|
|
468
|
+
# donor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_donor_probs))}
|
|
469
|
+
# donor_probs = dict(sorted(donor_probs.items(), key=lambda item: item[1], reverse=True))
|
|
470
|
+
#
|
|
471
|
+
# acceptor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_acceptor_probs))}
|
|
472
|
+
# acceptor_probs = dict(sorted(acceptor_probs.items(), key=lambda item: item[1], reverse=True))
|
|
473
|
+
# return donor_probs, acceptor_probs
|
|
331
474
|
|
|
332
475
|
|
|
333
476
|
def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
|
|
@@ -337,7 +480,7 @@ def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
|
|
|
337
480
|
return None, None
|
|
338
481
|
|
|
339
482
|
transcript.generate_pre_mrna()
|
|
340
|
-
predicted_donor_sites, predicted_acceptor_sites = find_transcript_splicing(transcript, engine=engine)
|
|
483
|
+
predicted_donor_sites, predicted_acceptor_sites = find_transcript_splicing(transcript.pre_mrna, engine=engine)
|
|
341
484
|
num_introns = len(transcript.introns)
|
|
342
485
|
predicted_donors = list(predicted_donor_sites.keys())[:num_introns]
|
|
343
486
|
predicted_acceptors = list(predicted_acceptor_sites.keys())[:num_introns]
|
|
@@ -346,68 +489,58 @@ def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
|
|
|
346
489
|
return len(correct_donor_preds) / num_introns, len(correct_acceptor_preds) / num_introns, len(transcript.introns)
|
|
347
490
|
|
|
348
491
|
|
|
349
|
-
def missplicing(mut_id, splicing_threshold=0.5, primary_transcript=True, organism='hg38', engine='spliceai'):
|
|
350
|
-
gene = Gene(mut_id.split(':')[0], organism=organism)
|
|
351
|
-
mutation = get_mutation(mut_id, rev=gene.rev)
|
|
352
|
-
results = {}
|
|
353
|
-
|
|
354
|
-
for tid, transcript in gene.run_transcripts():
|
|
355
|
-
# if not transcript.primary_transcript and primary_transcript:
|
|
356
|
-
# continue
|
|
357
|
-
#
|
|
358
|
-
if mutation not in transcript:
|
|
359
|
-
continue
|
|
360
|
-
|
|
361
|
-
good_tid = tid
|
|
362
|
-
|
|
363
|
-
transcript.generate_pre_mrna()
|
|
364
|
-
results[tid] = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine),
|
|
365
|
-
threshold=splicing_threshold)
|
|
366
|
-
|
|
367
|
-
# if len(results) == 0:
|
|
368
|
-
# return None
|
|
369
|
-
#
|
|
370
|
-
# if primary_transcript and good_tid in results:
|
|
371
|
-
# return results[good_tid]
|
|
372
|
-
# else:
|
|
373
|
-
# return None
|
|
374
|
-
|
|
375
|
-
return results
|
|
376
|
-
|
|
377
|
-
|
|
378
492
|
import sqlite3
|
|
379
493
|
import json
|
|
494
|
+
import os
|
|
495
|
+
|
|
496
|
+
# Global connection and cursor (adjust to your architecture)
|
|
497
|
+
# Ideally, initialize this once in your application startup code.
|
|
498
|
+
DB_PATH = os.path.join(config['splicing_db'], 'mutation_data.db')
|
|
499
|
+
conn = sqlite3.connect(DB_PATH, isolation_level=None) # autocommit mode
|
|
500
|
+
cursor = conn.cursor()
|
|
501
|
+
|
|
502
|
+
# Create table once at startup, not in the function
|
|
503
|
+
cursor.execute('''
|
|
504
|
+
CREATE TABLE IF NOT EXISTS mutations (
|
|
505
|
+
tool TEXT,
|
|
506
|
+
gene TEXT,
|
|
507
|
+
mutation_id TEXT,
|
|
508
|
+
transcript_id TEXT,
|
|
509
|
+
data TEXT,
|
|
510
|
+
PRIMARY KEY (tool, gene, mutation_id, transcript_id)
|
|
511
|
+
)''')
|
|
380
512
|
|
|
381
513
|
def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
514
|
+
"""
|
|
515
|
+
Retrieve computed splicing data for a given mutation from a database,
|
|
516
|
+
or compute and store it if not found or if force_recompute is True.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
tool (str): Name of the tool used for computation.
|
|
520
|
+
gene (str): Gene name or identifier.
|
|
521
|
+
mutation_id (str): A unique identifier for the mutation.
|
|
522
|
+
transcript_id (str): ID for the transcript.
|
|
523
|
+
force_recompute (bool): If True, ignore cached value and recompute.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
dict: The computed splicing data.
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
# Lookup in the database
|
|
395
530
|
cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
|
|
396
531
|
(tool, gene, mutation_id, transcript_id))
|
|
397
532
|
row = cursor.fetchone()
|
|
398
533
|
|
|
399
|
-
# If
|
|
534
|
+
# If found and no force recompute, return cached data
|
|
400
535
|
if row and not force_recompute:
|
|
401
536
|
return json.loads(row[0])
|
|
402
537
|
|
|
403
|
-
# Otherwise, compute the
|
|
404
|
-
computed_data = find_transcript_missplicing(
|
|
538
|
+
# Otherwise, compute the data
|
|
539
|
+
computed_data = find_transcript_missplicing(mutation_id, transcript_id=transcript_id, engine=tool) # Replace with your actual function
|
|
405
540
|
|
|
406
|
-
# Store in
|
|
541
|
+
# Store computed data in DB
|
|
407
542
|
data_json = json.dumps(computed_data)
|
|
408
|
-
cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
|
|
409
|
-
(tool, mutation_id, transcript_id, data_json))
|
|
410
|
-
conn.commit()
|
|
411
|
-
conn.close()
|
|
543
|
+
cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
|
|
544
|
+
(tool, gene, mutation_id, transcript_id, data_json))
|
|
412
545
|
|
|
413
546
|
return computed_data
|
|
@@ -10,12 +10,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
|
|
|
10
10
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
11
11
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
12
12
|
geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
|
|
13
|
-
geney/oncosplice.py,sha256=
|
|
13
|
+
geney/oncosplice.py,sha256=1xphL2LeAObwUKBXgcyyKbNO9bAryKDZesK7OpUpFfA,22336
|
|
14
14
|
geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
|
|
15
15
|
geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
|
|
16
16
|
geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
|
|
17
17
|
geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
|
|
18
|
-
geney/splicing_utils.py,sha256=
|
|
18
|
+
geney/splicing_utils.py,sha256=6TlSeNK8BWGnm5AqYKa_qObqHQVcgYtRY08JmfK9yZ8,26338
|
|
19
19
|
geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
|
|
20
20
|
geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
|
|
21
21
|
geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
|
|
@@ -24,7 +24,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
24
24
|
geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
|
|
25
25
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
26
26
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
27
|
-
geney-1.3.
|
|
28
|
-
geney-1.3.
|
|
29
|
-
geney-1.3.
|
|
30
|
-
geney-1.3.
|
|
27
|
+
geney-1.3.4.dist-info/METADATA,sha256=ONsBA4xTOrs0KaNJR9pBrwlHE06WC8YUuCFfH5vV2ag,994
|
|
28
|
+
geney-1.3.4.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
|
|
29
|
+
geney-1.3.4.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
30
|
+
geney-1.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|