geney 1.3.3__py2.py3-none-any.whl → 1.3.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -6,6 +6,10 @@ import pandas as pd
6
6
  import numpy as np
7
7
  from .SeqMats import SeqMat, MutSeqMat
8
8
  from .splicing_utils import find_transcript_missplicing_seqs, develop_aberrant_splicing
9
+ from .Gene import Gene
10
+ import copy
11
+ from . import config
12
+
9
13
  from .tis_utils import find_tis
10
14
 
11
15
  def short_hash_of_list(numbers, length=5):
@@ -301,7 +305,7 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
301
305
 
302
306
 
303
307
  def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
304
- window_length=13, organism='hg38', engine='spliceai', domains=None):
308
+ window_length=13, organism='hg38', engine='spliceai'):
305
309
  gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
306
310
  reference_gene_proteins = {
307
311
  transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
@@ -374,110 +378,110 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
374
378
  'mutation_distance_from_3', 'engine', 'reference_resemblance', 'oncosplice_score', 'percentile',
375
379
  'isoform_prevalence', 'reference_protein', 'variant_protein']]
376
380
 
377
-
378
- import asyncio
379
- async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
380
- window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
381
- import sys, os
382
- needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
383
- needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
384
-
385
- if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.R_OK)) and (
386
- needed_file2.is_file() and os.access(needed_file2, os.R_OK)):
387
- sys.path.append(str(config[organism]['yoram_path']))
388
- import uniprot_utils as uput
389
-
390
- else:
391
- raise SystemError(
392
- "Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
393
-
394
- from .tis_utils import find_tis
395
-
396
- # Define async functions
397
- async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
398
- return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
399
-
400
- def inspect_domain(row, modified_vector, conservation_vector):
401
- v1, v2 = modified_vector[row.start:row.end], conservation_vector[row.start:row.end]
402
- if sum(v2) == 0:
403
- return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', 0],
404
- index=['domain_identifier', 'score'])
405
-
406
- return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', sum(v1 * v2) / sum(v2)],
407
- index=['domain_identifier', 'score'])
408
-
409
- gene = Gene(mut_id.split(':')[0], organism=organism)
410
- reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
411
- mutations = [get_mutation(mut_id, rev=gene.rev) for mut_id in mut_id.split('|')]
412
- results = []
413
- for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
414
- if require_cons and not transcript.cons_available:
415
- continue
416
-
417
- if all(mutation not in transcript for mutation in mutations):
418
- # results.append({'transcript_id': transcript.transcript_id})
419
- continue
420
-
421
- task1 = asyncio.create_task(background_request(tid))
422
- transcript.generate_pre_mrna()
423
- transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
424
- transcript.generate_mature_mrna().generate_protein(inplace=True)
425
- ref_protein, cons_vector = transcript.protein, transcript.cons_vector
426
-
427
- if not use_cons:
428
- cons_vector = np.ones(len(ref_protein))
429
-
430
- if sum(cons_vector) == 0:
431
- cons_vector = np.ones(len(ref_protein)) #/len(ref_protein)
432
-
433
- reference_transcript = copy.deepcopy(transcript)
434
-
435
- assert len(ref_protein) == len(
436
- cons_vector), f"Protein ({len(ref_protein)}) and conservation vector ({len(cons_vector)} must be same length."
437
-
438
- missplicing = Missplicing(find_transcript_missplicing(transcript, mutations, engine=engine, threshold=splicing_threshold),
439
- threshold=splicing_threshold)
440
- for mutation in mutations:
441
- transcript.pre_mrna += mutation
442
-
443
- domains_df = await task1
444
- for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
445
- transcript.acceptors = new_boundaries['acceptors']
446
- transcript.donors = new_boundaries['donors']
447
- transcript.generate_mature_mrna()
448
- transcript.TIS = find_tis(ref_seq=reference_transcript, mut_seq=transcript)
449
- transcript.generate_protein()
450
-
451
- alignment = get_logical_alignment(reference_transcript.protein, transcript.protein)
452
- deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
453
- modified_positions = find_modified_positions(len(ref_protein), deleted, inserted)
454
- temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
455
- affected_cons_scores = max(temp_cons)
456
- percentile = (
457
- sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
458
- cons_vector))
459
-
460
- out = domains_df.apply(lambda row: inspect_domain(row, modified_positions, cons_vector), axis=1)
461
- domains_affected = '+'.join([f'{a}:{round(b, 3)}' for a, b in list(zip(out.domain_identifier, out.score))])
462
-
463
- report = OncospliceAnnotator(reference_transcript, transcript, mutation)
464
- report['mut_id'] = mut_id
465
- report['oncosplice_score'] = affected_cons_scores
466
- report['cons_available'] = transcript.cons_available
467
- report['transcript_id'] = transcript.transcript_id
468
- report['percentile'] = percentile
469
- report['isoform_id'] = i
470
- report['isoform_prevalence'] = new_boundaries['path_weight']
471
- report['full_missplicing'] = missplicing.aberrant_splicing
472
- report['missplicing'] = max(missplicing)
473
- report['domains'] = domains_affected
474
- report['max_domain_score'] = out.score.max()
475
-
476
- report['reference_resemblance'] = reference_gene_proteins.get(transcript.protein, None)
477
- results.append(pd.Series(report))
478
-
479
- report = pd.concat(results, axis=1).T
480
- return report
381
+ #
382
+ # import asyncio
383
+ # async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
384
+ # window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
385
+ # import sys, os
386
+ # needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
387
+ # needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
388
+ #
389
+ # if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.R_OK)) and (
390
+ # needed_file2.is_file() and os.access(needed_file2, os.R_OK)):
391
+ # sys.path.append(str(config[organism]['yoram_path']))
392
+ # import uniprot_utils as uput
393
+ #
394
+ # else:
395
+ # raise SystemError(
396
+ # "Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
397
+ #
398
+ # from .tis_utils import find_tis
399
+ #
400
+ # # Define async functions
401
+ # async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
402
+ # return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
403
+ #
404
+ # def inspect_domain(row, modified_vector, conservation_vector):
405
+ # v1, v2 = modified_vector[row.start:row.end], conservation_vector[row.start:row.end]
406
+ # if sum(v2) == 0:
407
+ # return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', 0],
408
+ # index=['domain_identifier', 'score'])
409
+ #
410
+ # return pd.Series([f'{row.type}|{row.start}|{row.end}|{row.description}', sum(v1 * v2) / sum(v2)],
411
+ # index=['domain_identifier', 'score'])
412
+ #
413
+ # gene = Gene(mut_id.split(':')[0], organism=organism)
414
+ # reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
415
+ # mutations = [get_mutation(mut_id, rev=gene.rev) for mut_id in mut_id.split('|')]
416
+ # results = []
417
+ # for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
418
+ # if require_cons and not transcript.cons_available:
419
+ # continue
420
+ #
421
+ # if all(mutation not in transcript for mutation in mutations):
422
+ # # results.append({'transcript_id': transcript.transcript_id})
423
+ # continue
424
+ #
425
+ # task1 = asyncio.create_task(background_request(tid))
426
+ # transcript.generate_pre_mrna()
427
+ # transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
428
+ # transcript.generate_mature_mrna().generate_protein(inplace=True)
429
+ # ref_protein, cons_vector = transcript.protein, transcript.cons_vector
430
+ #
431
+ # if not use_cons:
432
+ # cons_vector = np.ones(len(ref_protein))
433
+ #
434
+ # if sum(cons_vector) == 0:
435
+ # cons_vector = np.ones(len(ref_protein)) #/len(ref_protein)
436
+ #
437
+ # reference_transcript = copy.deepcopy(transcript)
438
+ #
439
+ # assert len(ref_protein) == len(
440
+ # cons_vector), f"Protein ({len(ref_protein)}) and conservation vector ({len(cons_vector)} must be same length."
441
+ #
442
+ # missplicing = Missplicing(find_transcript_missplicing(transcript, mutations, engine=engine, threshold=splicing_threshold),
443
+ # threshold=splicing_threshold)
444
+ # for mutation in mutations:
445
+ # transcript.pre_mrna += mutation
446
+ #
447
+ # domains_df = await task1
448
+ # for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
449
+ # transcript.acceptors = new_boundaries['acceptors']
450
+ # transcript.donors = new_boundaries['donors']
451
+ # transcript.generate_mature_mrna()
452
+ # transcript.TIS = find_tis(ref_seq=reference_transcript, mut_seq=transcript)
453
+ # transcript.generate_protein()
454
+ #
455
+ # alignment = get_logical_alignment(reference_transcript.protein, transcript.protein)
456
+ # deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
457
+ # modified_positions = find_modified_positions(len(ref_protein), deleted, inserted)
458
+ # temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
459
+ # affected_cons_scores = max(temp_cons)
460
+ # percentile = (
461
+ # sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
462
+ # cons_vector))
463
+ #
464
+ # out = domains_df.apply(lambda row: inspect_domain(row, modified_positions, cons_vector), axis=1)
465
+ # domains_affected = '+'.join([f'{a}:{round(b, 3)}' for a, b in list(zip(out.domain_identifier, out.score))])
466
+ #
467
+ # report = OncospliceAnnotator(reference_transcript, transcript, mutation)
468
+ # report['mut_id'] = mut_id
469
+ # report['oncosplice_score'] = affected_cons_scores
470
+ # report['cons_available'] = transcript.cons_available
471
+ # report['transcript_id'] = transcript.transcript_id
472
+ # report['percentile'] = percentile
473
+ # report['isoform_id'] = i
474
+ # report['isoform_prevalence'] = new_boundaries['path_weight']
475
+ # report['full_missplicing'] = missplicing.aberrant_splicing
476
+ # report['missplicing'] = max(missplicing)
477
+ # report['domains'] = domains_affected
478
+ # report['max_domain_score'] = out.score.max()
479
+ #
480
+ # report['reference_resemblance'] = reference_gene_proteins.get(transcript.protein, None)
481
+ # results.append(pd.Series(report))
482
+ #
483
+ # report = pd.concat(results, axis=1).T
484
+ # return report
481
485
 
482
486
 
483
487
  if __name__ == '__main__':
geney/splicing_utils.py CHANGED
@@ -1,9 +1,8 @@
1
1
  import numpy as np
2
- from ._mutation_utils import get_mutation
3
2
  from .Gene import Gene
4
-
3
+ from .SeqMats import MutSeqMat
5
4
  from collections import defaultdict
6
-
5
+ from . import config
7
6
 
8
7
  def generate_adjacency_list(acceptors, donors, transcript_start, transcript_end, max_distance=50, rev=False):
9
8
  # Append the transcript end to donors to allow connection to the end point
@@ -145,124 +144,180 @@ def find_ss_changes(ref_dct, mut_dct, known_splice_sites, threshold=0.5):
145
144
  return discovered_pos, deleted_pos
146
145
 
147
146
 
148
- def find_transcript_missplicing_mutid(mut_id):
149
- from geney.Gene import Gene
150
- transcript = Gene(mut_id.split(':')[0]).transcript().generate_mature_mrna()
151
- out = find_transcript_missplicing(transcript, [get_mutation(mut_id, rev=transcript.rev)], context=5000, window=2500, threshold=0.5, engine='spliceai', just_ss=True)
152
- best_delta = 0
153
- for k, v in out.items():
154
- for k1, v1 in v.items():
155
- if abs(v1['delta']) > abs(best_delta):
156
- best_delta = v1['delta']
157
- return out, best_delta
158
-
159
- def find_transcript_missplicing(transcript, mutations, context=5000, window=2500, threshold=0.5, engine='spliceai', just_ss=False):
160
- from functools import reduce
161
- ref = transcript.pre_mrna
162
- mutations = [mutation for mutation in mutations if mutation.position in ref.indices]
163
- if len(mutations) == 0:
164
- return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
165
-
166
- var = reduce(lambda acc, mutation: acc + mutation, mutations, ref)
167
- center = int(np.mean([mutation.position for mutation in mutations]) // 1)
168
-
169
- total_context = context + window
170
- length = ref.seqmat.shape[-1]
171
- center_index = ref.rel_pos(center)
172
- ref_start_pad = max(0, total_context - center_index)
173
- ref_end_pad = max(0, total_context - (length - center_index))
147
+ from typing import Tuple, Dict
174
148
 
175
- length = var.seqmat.shape[-1]
176
- center_index = var.rel_pos(center)
177
- if center_index is None:
178
- raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
149
+ def run_splicing_engine(seq, engine='spliceai'):
150
+ match engine:
151
+ case 'spliceai':
152
+ from .spliceai_utils import sai_predict_probs, sai_models
153
+ donor_probs, acceptor_probs = sai_predict_probs(seq, models=sai_models)
179
154
 
180
- var_start_pad = max(0, total_context - center_index)
181
- var_end_pad = max(0, total_context - (length - center_index))
155
+ case 'pangolin':
156
+ from .pangolin_utils import pangolin_predict_probs, pang_models
157
+ donor_probs, acceptor_probs = pangolin_predict_probs(seq, models=pang_models)
182
158
 
183
- ref = ref.inspect(center, context=total_context)
184
- var = var.inspect(center, context=total_context)
185
-
186
- ref_indices = np.concatenate([np.zeros(ref_start_pad), ref.indices, np.zeros(ref_end_pad)])
187
- mut_indices = np.concatenate([np.zeros(var_start_pad), var.indices, np.zeros(var_end_pad)])
188
-
189
- ref_indices = ref_indices[context:-context]
190
- mut_indices = mut_indices[context:-context]
191
-
192
- ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
193
- var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
159
+ case _:
160
+ raise ValueError(f"{engine} not implemented")
161
+ return donor_probs, acceptor_probs
194
162
 
195
- if engine == 'spliceai':
196
- from .spliceai_utils import sai_predict_probs, sai_models
197
- ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, models=sai_models)
198
- mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq, models=sai_models)
199
163
 
200
- elif engine == 'pangolin':
201
- from .pangolin_utils import pangolin_predict_probs, pang_models
202
- ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models, just_ss=just_ss)
203
- mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq, models=pang_models, just_ss=just_ss)
164
+ def find_transcript_splicing(transcript, engine: str = 'spliceai') -> Tuple[Dict[int, float], Dict[int, float]]:
165
+ """
166
+ Predict splice site probabilities for a given transcript using the specified engine.
167
+ This function uses a padding of 5000 'N's on each side of the transcript sequence
168
+ to align with the model's required context length.
169
+
170
+ Args:
171
+ transcript: An object representing a transcript, expected to have:
172
+ - an `indices` attribute that returns a sequence of positions.
173
+ - a `seq` attribute that returns the sequence string.
174
+ engine (str): The prediction engine to use. Supported: 'spliceai', 'pangolin'.
175
+
176
+ Returns:
177
+ (donor_probs, acceptor_probs) as two dictionaries keyed by position with probability values.
178
+
179
+ Raises:
180
+ ValueError: If an unsupported engine is provided.
181
+ AssertionError: If the length of predicted probabilities does not match the length of indices.
182
+ """
183
+ # Prepare reference sequence with padding
184
+ ref_indices = transcript.indices
185
+ ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
186
+ ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq, engine)
187
+
188
+ # Verify lengths
189
+ assert len(ref_seq_donor_probs) == len(ref_indices), (
190
+ f"Donor probabilities length ({len(ref_seq_donor_probs)}) does not match "
191
+ f"indices length ({len(ref_indices)})."
192
+ )
193
+ assert len(ref_seq_acceptor_probs) == len(ref_indices), (
194
+ f"Acceptor probabilities length ({len(ref_seq_acceptor_probs)}) does not match "
195
+ f"indices length ({len(ref_indices)})."
196
+ )
197
+
198
+ # Create dictionaries and sort them by probability in descending order
199
+ donor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_donor_probs)),
200
+ key=lambda item: item[1], reverse=True)
201
+ acceptor_probs = dict(sorted((i, p) for i, p in zip(ref_indices, ref_seq_acceptor_probs)),
202
+ key=lambda item: item[1], reverse=True)
204
203
 
205
- else:
206
- raise ValueError(f"{engine} not implemented")
204
+ return donor_probs, acceptor_probs
207
205
 
208
- visible_donors = np.intersect1d(transcript.donors, ref_indices)
209
- visible_acceptors = np.intersect1d(transcript.acceptors, ref_indices)
210
206
 
211
- assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
212
- assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
207
+ def find_transcript_missplicing(mut_id, transcript='primary', threshold=0.5, engine='spliceai', organism='hg38'):
208
+ gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
209
+ reference_transcript = gene.transcript(transcript) if transcript is not None else gene.transcript()
210
+ variant_transcript = reference_transcript.clone()
211
+ mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
212
+ mutations = [m for m in mutations if m in reference_transcript]
213
+ if len(mutations) == 0:
214
+ return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
213
215
 
214
- iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
215
- {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
216
- visible_acceptors,
217
- threshold=threshold)
216
+ center = np.mean([m.indices[0] for m in mutations]) // 1
217
+ for mutation in mutations:
218
+ variant_transcript.mutate(mutation, inplace=True)
218
219
 
219
- assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
220
- assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
220
+ return find_transcript_missplicing_seqs(reference_transcript.get_context(center, 7500), variant_transcript.get_context(center, 7500), reference_transcript.donors, reference_transcript.acceptors, threshold=threshold, engine=engine)
221
221
 
222
- idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
223
- {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
224
- visible_donors,
225
- threshold=threshold)
226
222
 
227
- ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
228
- ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
229
-
230
- lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
231
- visible_acceptors if p not in mut_indices and p not in dap}
232
- lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
233
- if p not in mut_indices and p not in ddp}
234
- dap.update(lost_acceptors)
235
- ddp.update(lost_donors)
236
-
237
- missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
238
- missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
239
- temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
223
+ # from functools import reduce
224
+ # ref = transcript.pre_mrna
225
+ # mutations = [mutation for mutation in mutations if mutation.position in ref.indices]
226
+ # if len(mutations) == 0:
227
+ # return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
228
+ #
229
+ # var = reduce(lambda acc, mutation: acc + mutation, mutations, ref)
230
+ # center = int(np.mean([mutation.position for mutation in mutations]) // 1)
231
+ #
232
+ # total_context = context + window
233
+ # length = ref.seqmat.shape[-1]
234
+ # center_index = ref.rel_pos(center)
235
+ # ref_start_pad = max(0, total_context - center_index)
236
+ # ref_end_pad = max(0, total_context - (length - center_index))
237
+ #
238
+ # length = var.seqmat.shape[-1]
239
+ # center_index = var.rel_pos(center)
240
+ # if center_index is None:
241
+ # raise IndexError("Center index must not be none... Issue with mutations... They must not be within the transcript.")
242
+ #
243
+ # var_start_pad = max(0, total_context - center_index)
244
+ # var_end_pad = max(0, total_context - (length - center_index))
245
+ #
246
+ # ref = ref.inspect(center, context=total_context)
247
+ # var = var.inspect(center, context=total_context)
248
+ #
249
+ # ref_indices = np.concatenate([np.zeros(ref_start_pad), ref.indices, np.zeros(ref_end_pad)])
250
+ # mut_indices = np.concatenate([np.zeros(var_start_pad), var.indices, np.zeros(var_end_pad)])
251
+ #
252
+ # ref_indices = ref_indices[context:-context]
253
+ # mut_indices = mut_indices[context:-context]
254
+ #
255
+ # ref_seq = 'N'*ref_start_pad + ref.seq + 'N'*ref_end_pad
256
+ # var_seq = 'N'*var_start_pad + var.seq + 'N'*var_end_pad
257
+ #
258
+ # if engine == 'spliceai':
259
+ # from .spliceai_utils import sai_predict_probs, sai_models
260
+ # ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, models=sai_models)
261
+ # mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq, models=sai_models)
262
+ #
263
+ # elif engine == 'pangolin':
264
+ # from .pangolin_utils import pangolin_predict_probs, pang_models
265
+ # ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models, just_ss=just_ss)
266
+ # mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq, models=pang_models, just_ss=just_ss)
267
+ #
268
+ # else:
269
+ # raise ValueError(f"{engine} not implemented")
270
+ #
271
+ # visible_donors = np.intersect1d(transcript.donors, ref_indices)
272
+ # visible_acceptors = np.intersect1d(transcript.acceptors, ref_indices)
273
+ #
274
+ # assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
275
+ # assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
276
+ #
277
+ # iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
278
+ # {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
279
+ # visible_acceptors,
280
+ # threshold=threshold)
281
+ #
282
+ # assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
283
+ # assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
284
+ #
285
+ # idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
286
+ # {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
287
+ # visible_donors,
288
+ # threshold=threshold)
289
+ #
290
+ # ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
291
+ # ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
292
+ #
293
+ # lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
294
+ # visible_acceptors if p not in mut_indices and p not in dap}
295
+ # lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
296
+ # if p not in mut_indices and p not in ddp}
297
+ # dap.update(lost_acceptors)
298
+ # ddp.update(lost_donors)
299
+ #
300
+ # missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
301
+ # missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
302
+ # temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
240
303
  return temp
241
304
 
242
- def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai', just_ss=False):
305
+
306
+ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
243
307
  if ref_seq.seq == var_seq.seq:
244
308
  return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
245
309
 
246
- if engine == 'spliceai':
247
- from .spliceai_utils import sai_predict_probs, sai_models
248
- ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq.seq, models=sai_models)
249
- mut_seq_acceptor_probs, mut_seq_donor_probs = sai_predict_probs(var_seq.seq, models=sai_models)
250
-
251
- elif engine == 'pangolin':
252
- from .pangolin_utils import pangolin_predict_probs, pang_models
253
- ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq.seq , models=pang_models, just_ss=just_ss)
254
- mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(var_seq.seq, models=pang_models, just_ss=just_ss)
255
-
256
- else:
257
- raise ValueError(f"{engine} not implemented")
258
-
310
+ ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq.seq, engine)
311
+ mut_seq_acceptor_probs, mut_seq_donor_probs = run_splicing_engine(var_seq.seq, engine)
259
312
  ref_indices = ref_seq.indices[5000:-5000]
260
313
  mut_indices = var_seq.indices[5000:-5000]
261
314
  visible_donors = np.intersect1d(donors, ref_indices)
262
315
  visible_acceptors = np.intersect1d(acceptors, ref_indices)
263
316
 
264
- assert len(ref_indices) == len(ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
265
- assert len(mut_indices) == len(mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
317
+ assert len(ref_indices) == len(
318
+ ref_seq_acceptor_probs), f'Reference pos ({len(ref_indices)}) not the same as probs ({len(ref_seq_acceptor_probs)})'
319
+ assert len(mut_indices) == len(
320
+ mut_seq_acceptor_probs), f'Mut pos ({len(mut_indices)}) not the same as probs ({len(mut_seq_acceptor_probs)})'
266
321
 
267
322
  iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
268
323
  {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
@@ -282,106 +337,140 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
282
337
 
283
338
  lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
284
339
  visible_acceptors if p not in mut_indices and p not in dap}
285
- lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
340
+ lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in
341
+ visible_donors
286
342
  if p not in mut_indices and p not in ddp}
287
343
  dap.update(lost_acceptors)
288
344
  ddp.update(lost_donors)
289
345
 
290
- missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
346
+ missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap,
347
+ 'discovered_donors': idp}
291
348
  missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
292
- temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
349
+ temp = {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in
350
+ missplicing.items()}
293
351
  return Missplicing(temp, threshold=threshold)
294
352
 
295
353
 
296
354
  class Missplicing:
297
355
  def __init__(self, splicing_dict, threshold=0.5):
356
+ """
357
+ Initialize a Missplicing object.
358
+
359
+ Args:
360
+ splicing_dict (dict): Dictionary containing splicing events and their details.
361
+ Example:
362
+ {
363
+ "missed_acceptors": {100: {"absolute": 0.0, "delta": -0.3}, ...},
364
+ "missed_donors": { ... },
365
+ "discovered_acceptors": { ... },
366
+ "discovered_donors": { ... }
367
+ }
368
+ threshold (float): The threshold above which a delta is considered significant.
369
+ """
298
370
  self.missplicing = splicing_dict
299
371
  self.threshold = threshold
300
372
 
301
- # def __repr__(self):
302
- # return f'Missplicing({self.modification.mut_id}) --> {self.missplicing}'
303
-
304
373
  def __str__(self):
305
- return self.aberrant_splicing
374
+ """String representation displays the filtered splicing events passing the threshold."""
375
+ return str(self.significant_events)
306
376
 
307
377
  def __bool__(self):
308
- if self.apply_sai_threshold_alt() is not None:
309
- return True
310
- return False
378
+ """
379
+ Boolean evaluation: True if any event surpasses the threshold, False otherwise.
380
+ """
381
+ return self.first_significant_event() is not None
311
382
 
312
383
  def __iter__(self):
313
- vals = [0]
314
- for event, details in self.missplicing.items():
315
- for e, d in details.items():
316
- vals.append(d['delta'])
317
- return iter(vals)
318
-
319
- # def __eq__(self, alt_splicing):
320
- # flag, _ = self.check_splicing_difference(self.missplicing, alt_splicing, self.threshold)
321
- # return not flag
384
+ """
385
+ Iterate over all delta values from all events. The first yielded value is 0 (for compatibility),
386
+ followed by all deltas in self.missplicing.
387
+ """
388
+ yield 0
389
+ for details in self.missplicing.values():
390
+ for d in details.values():
391
+ yield d['delta']
322
392
 
323
393
  @property
324
- def aberrant_splicing(self):
325
- return self.apply_sai_threshold(self.threshold)
394
+ def significant_events(self):
395
+ """
396
+ Returns a filtered version of missplicing events that meet or exceed the current threshold.
397
+ """
398
+ return self.filter_by_threshold(self.threshold)
399
+
400
+ def filter_by_threshold(self, threshold=None):
401
+ """
402
+ Filter self.missplicing to only include events where abs(delta) >= threshold.
403
+
404
+ Args:
405
+ threshold (float, optional): The threshold to apply. Defaults to self.threshold.
406
+
407
+ Returns:
408
+ dict: A new dictionary with filtered events.
409
+ """
410
+ if threshold is None:
411
+ threshold = self.threshold
326
412
 
327
- def apply_sai_threshold(self, threshold=None):
328
- splicing_dict = self.missplicing
329
- if not threshold:
413
+ return {
414
+ event: {
415
+ pos: detail for pos, detail in details.items()
416
+ if abs(detail['delta']) >= threshold
417
+ }
418
+ for event, details in self.missplicing.items()
419
+ }
420
+
421
+ def first_significant_event(self, splicing_dict=None, threshold=None):
422
+ """
423
+ Check if there is any event surpassing a given threshold and return the dictionary if found.
424
+
425
+ Args:
426
+ splicing_dict (dict, optional): Dictionary to check. Defaults to self.missplicing.
427
+ threshold (float, optional): Threshold to apply. Defaults to self.threshold.
428
+
429
+ Returns:
430
+ dict or None: Returns the dictionary if a delta surpasses the threshold, otherwise None.
431
+ """
432
+ if splicing_dict is None:
433
+ splicing_dict = self.missplicing
434
+ if threshold is None:
330
435
  threshold = self.threshold
331
436
 
332
- new_dict = {}
333
- for event, details in self.missplicing.items():
334
- in_dict = {}
335
- for e, d in details.items():
336
- if abs(d['delta']) >= threshold:
337
- in_dict[e] = d
338
- # return splicing_dict
339
- new_dict[event] = in_dict
340
- return new_dict
341
-
342
- def apply_sai_threshold_alt(self, splicing_dict=None, threshold=None):
343
- splicing_dict = self.missplicing if not splicing_dict else splicing_dict
344
- threshold = self.threshold if not threshold else threshold
345
- for event, details in splicing_dict.items():
346
- for e, d in details.items():
347
- if abs(d['delta']) >= threshold:
348
- return splicing_dict
437
+ # Check if any event meets the threshold
438
+ if any(abs(detail['delta']) >= threshold for details in splicing_dict.values() for detail in details.values()):
439
+ return splicing_dict
349
440
  return None
350
441
 
351
- def get_max_missplicing_delta(self):
352
- max_delta = 0
353
- for event, details in self.missplicing.items():
354
- for e, d in details.items():
355
- if abs(d['delta']) > max_delta:
356
- max_delta = abs(d['delta'])
357
- return max_delta
358
-
359
-
360
- def find_transcript_splicing(transcript, engine='spliceai'):
361
- ref = transcript.pre_mrna
362
- ref_start_pad = 5000
363
- ref_end_pad = 5000
364
-
365
- ref_indices = ref.indices
366
- ref_seq = 'N' * ref_start_pad + ref.seq + 'N' * ref_end_pad
367
- if engine == 'spliceai':
368
- from .spliceai_utils import sai_predict_probs, sai_models
369
- ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, sai_models)
370
-
371
- elif engine == 'pangolin':
372
- from .pangolin_utils import pangolin_predict_probs, pang_models
373
- ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models)
442
+ def max_delta(self):
443
+ """
444
+ Returns the maximum absolute delta found in all events.
374
445
 
375
- else:
376
- raise ValueError(f"{engine} not implemented")
446
+ Returns:
447
+ float: The maximum absolute delta, or 0 if no events.
448
+ """
449
+ deltas = [detail['delta'] for details in self.missplicing.values() for detail in details.values()]
450
+ return max(deltas, key=abs, default=0.0)
377
451
 
378
- assert len(ref_seq_donor_probs) == len(ref_indices), f'{len(ref_seq_donor_probs)} vs. {len(ref_indices)}'
379
- donor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_donor_probs))}
380
- donor_probs = dict(sorted(donor_probs.items(), key=lambda item: item[1], reverse=True))
381
452
 
382
- acceptor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_acceptor_probs))}
383
- acceptor_probs = dict(sorted(acceptor_probs.items(), key=lambda item: item[1], reverse=True))
384
- return donor_probs, acceptor_probs
453
+ # def find_transcript_splicing(transcript, engine='spliceai'):
454
+ # ref_indices = transcript.indices
455
+ # ref_seq = 'N' * 5000 + transcript.seq + 'N' * 5000
456
+ # if engine == 'spliceai':
457
+ # from .spliceai_utils import sai_predict_probs, sai_models
458
+ # ref_seq_acceptor_probs, ref_seq_donor_probs = sai_predict_probs(ref_seq, sai_models)
459
+ #
460
+ # elif engine == 'pangolin':
461
+ # from .pangolin_utils import pangolin_predict_probs, pang_models
462
+ # ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models)
463
+ #
464
+ # else:
465
+ # raise ValueError(f"{engine} not implemented")
466
+ #
467
+ # assert len(ref_seq_donor_probs) == len(ref_indices), f'{len(ref_seq_donor_probs)} vs. {len(ref_indices)}'
468
+ # donor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_donor_probs))}
469
+ # donor_probs = dict(sorted(donor_probs.items(), key=lambda item: item[1], reverse=True))
470
+ #
471
+ # acceptor_probs = {i: p for i, p in list(zip(ref_indices, ref_seq_acceptor_probs))}
472
+ # acceptor_probs = dict(sorted(acceptor_probs.items(), key=lambda item: item[1], reverse=True))
473
+ # return donor_probs, acceptor_probs
385
474
 
386
475
 
387
476
  def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
@@ -391,7 +480,7 @@ def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
391
480
  return None, None
392
481
 
393
482
  transcript.generate_pre_mrna()
394
- predicted_donor_sites, predicted_acceptor_sites = find_transcript_splicing(transcript, engine=engine)
483
+ predicted_donor_sites, predicted_acceptor_sites = find_transcript_splicing(transcript.pre_mrna, engine=engine)
395
484
  num_introns = len(transcript.introns)
396
485
  predicted_donors = list(predicted_donor_sites.keys())[:num_introns]
397
486
  predicted_acceptors = list(predicted_acceptor_sites.keys())[:num_introns]
@@ -400,68 +489,58 @@ def benchmark_splicing(gene, organism='hg38', engine='spliceai'):
400
489
  return len(correct_donor_preds) / num_introns, len(correct_acceptor_preds) / num_introns, len(transcript.introns)
401
490
 
402
491
 
403
- def missplicing(mut_id, splicing_threshold=0.5, primary_transcript=True, organism='hg38', engine='spliceai'):
404
- gene = Gene(mut_id.split(':')[0], organism=organism)
405
- mutation = get_mutation(mut_id, rev=gene.rev)
406
- results = {}
407
-
408
- for tid, transcript in gene.run_transcripts():
409
- # if not transcript.primary_transcript and primary_transcript:
410
- # continue
411
- #
412
- if mutation not in transcript:
413
- continue
414
-
415
- good_tid = tid
416
-
417
- transcript.generate_pre_mrna()
418
- results[tid] = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine),
419
- threshold=splicing_threshold)
420
-
421
- # if len(results) == 0:
422
- # return None
423
- #
424
- # if primary_transcript and good_tid in results:
425
- # return results[good_tid]
426
- # else:
427
- # return None
428
-
429
- return results
430
-
431
-
432
492
  import sqlite3
433
493
  import json
434
-
435
- # def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
436
- # conn = sqlite3.connect(os.path.join(tool_parameters['splicing_db'], 'mutation_data.db'))
437
- # cursor = conn.cursor()
438
- # # Create table if it doesn't exist
439
- # cursor.execute('''CREATE TABLE IF NOT EXISTS mutations (
440
- # tool TEXT,
441
- # gene TEXT,
442
- # mutation_id TEXT,
443
- # transcript_id TEXT,
444
- # data TEXT,
445
- # PRIMARY KEY (tool, gene, mutation_id, transcript_id)
446
- # )''')
447
- #
448
- # # Look up entry
449
- # cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
450
- # (tool, gene, mutation_id, transcript_id))
451
- # row = cursor.fetchone()
452
- #
453
- # # If entry is found and force_recompute is False, return data
454
- # if row and not force_recompute:
455
- # return json.loads(row[0])
456
- #
457
- # # Otherwise, compute the dictionary
458
- # computed_data = find_transcript_missplicing(tool, mutation_id, transcript_id) # Replace with actual function
459
- #
460
- # # Store in the database
461
- # data_json = json.dumps(computed_data)
462
- # cursor.execute('REPLACE INTO mutations (tool, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?)',
463
- # (tool, mutation_id, transcript_id, data_json))
464
- # conn.commit()
465
- # conn.close()
466
- #
467
- # return computed_data
494
+ import os
495
+
496
+ # Global connection and cursor (adjust to your architecture)
497
+ # Ideally, initialize this once in your application startup code.
498
+ DB_PATH = os.path.join(config['splicing_db'], 'mutation_data.db')
499
+ conn = sqlite3.connect(DB_PATH, isolation_level=None) # autocommit mode
500
+ cursor = conn.cursor()
501
+
502
+ # Create table once at startup, not in the function
503
+ cursor.execute('''
504
+ CREATE TABLE IF NOT EXISTS mutations (
505
+ tool TEXT,
506
+ gene TEXT,
507
+ mutation_id TEXT,
508
+ transcript_id TEXT,
509
+ data TEXT,
510
+ PRIMARY KEY (tool, gene, mutation_id, transcript_id)
511
+ )''')
512
+
513
+ def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
514
+ """
515
+ Retrieve computed splicing data for a given mutation from a database,
516
+ or compute and store it if not found or if force_recompute is True.
517
+
518
+ Args:
519
+ tool (str): Name of the tool used for computation.
520
+ gene (str): Gene name or identifier.
521
+ mutation_id (str): A unique identifier for the mutation.
522
+ transcript_id (str): ID for the transcript.
523
+ force_recompute (bool): If True, ignore cached value and recompute.
524
+
525
+ Returns:
526
+ dict: The computed splicing data.
527
+ """
528
+
529
+ # Lookup in the database
530
+ cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
531
+ (tool, gene, mutation_id, transcript_id))
532
+ row = cursor.fetchone()
533
+
534
+ # If found and no force recompute, return cached data
535
+ if row and not force_recompute:
536
+ return json.loads(row[0])
537
+
538
+ # Otherwise, compute the data
539
+ computed_data = find_transcript_missplicing(mutation_id, transcript_id=transcript_id, engine=tool) # Replace with your actual function
540
+
541
+ # Store computed data in DB
542
+ data_json = json.dumps(computed_data)
543
+ cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
544
+ (tool, gene, mutation_id, transcript_id, data_json))
545
+
546
+ return computed_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.3
3
+ Version: 1.3.5
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -10,12 +10,12 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
10
10
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
11
11
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
12
12
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
13
- geney/oncosplice.py,sha256=FdvuROk2G7wwLoB5lLzYia8Smw9hHZeVs-J2MUoAwlU,22106
13
+ geney/oncosplice.py,sha256=1xphL2LeAObwUKBXgcyyKbNO9bAryKDZesK7OpUpFfA,22336
14
14
  geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
15
15
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
16
16
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
17
17
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
18
- geney/splicing_utils.py,sha256=GK0ZzI-eBVxUvuI_bV7-WckvXTmhO83Yx0wS83tyI-w,22524
18
+ geney/splicing_utils.py,sha256=Y-yqRSlP7aRaYP9mpHLOI_1fL8nEEkRmgpfqQPslD8I,26358
19
19
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
20
20
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
21
21
  geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -24,7 +24,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
24
24
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
25
25
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
26
26
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
27
- geney-1.3.3.dist-info/METADATA,sha256=aEuwIsdWgmM-Gmdk4jON64JBr0vj7ify5PBQul57RmQ,994
28
- geney-1.3.3.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
- geney-1.3.3.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
- geney-1.3.3.dist-info/RECORD,,
27
+ geney-1.3.5.dist-info/METADATA,sha256=XnLZYFIOc0OInDhB0TQOrp0rM07OatxiD8QcNNWMISg,994
28
+ geney-1.3.5.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
29
+ geney-1.3.5.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
30
+ geney-1.3.5.dist-info/RECORD,,
File without changes