geney 1.3.43__py2.py3-none-any.whl → 1.3.45__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/SeqMats.py CHANGED
@@ -311,8 +311,8 @@ class SeqMat:
311
311
  ### NEEDS some work to make sure that mutations can continue being added without issue...
312
312
 
313
313
  # Ensure strand compatibility
314
- if not self._is_same_strand(mut):
315
- raise ValueError("Mutation and sequence are not on the same strand.")
314
+ # if not self._is_same_strand(mut):
315
+ # raise ValueError("Mutation and sequence are not on the same strand.")
316
316
 
317
317
  # something to make sure the mutation is contained as one deletion, insertion, or snp or indel
318
318
  ref_seqmat = self.seqmat.copy()
@@ -351,6 +351,9 @@ class SeqMat:
351
351
  return SeqMat(ref_seqmat)
352
352
 
353
353
  def orf_seqmat(self, tis_index):
354
+ if tis_index not in self.indices:
355
+ return SeqMat.from_seq({'seq': ''})
356
+
354
357
  temp = self.seqmat[:, self._rel_index(tis_index):]
355
358
  temp = temp[:, temp[0, :] != 5]
356
359
  temp = SeqMat(temp) # .drop_indices()
@@ -362,6 +365,7 @@ class SeqMat:
362
365
  else:
363
366
  stop_index = len(raw_seq)
364
367
  end_index = stop_index
368
+ assert end_index % 3 == 0, f"{end_index} is not a multiple of 3"
365
369
  return SeqMat(temp.seqmat[:, :end_index])
366
370
 
367
371
  def translate(self, tis_index):
geney/oncosplice.py CHANGED
@@ -305,13 +305,18 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut, ref_attri
305
305
 
306
306
 
307
307
  def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False,
308
- window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False):
308
+ window_length=13, organism='hg38', splicing_engine=None, splicing_db=None, verbose=False,
309
+ tis_engine=None, tis_db=None):
310
+
309
311
  gene = Gene.from_file(mut_id.split(':')[0], organism=organism)
310
312
  reference_gene_proteins = {
311
313
  transcript.generate_pre_mrna().generate_mature_mrna().generate_protein().protein: transcript.transcript_id for
312
314
  transcript in gene if transcript.transcript_biotype == 'protein_coding'}
313
315
 
314
316
  mutations = [MutSeqMat.from_mutid(m) for m in mut_id.split('|')]
317
+ if gene.rev:
318
+ mutations = [m.reverse_complement(inplace=True) for m in mutations[::-1]]
319
+
315
320
  results = []
316
321
  for reference_transcript in tqdm(gene, desc=f'Processing {mut_id}...'):
317
322
  if (cons_required and not reference_transcript.cons_available) or (
@@ -325,6 +330,7 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
325
330
  center = np.mean([m.indices[0] for m in current_mutations]) // 1
326
331
 
327
332
  mutated_transcript = reference_transcript.clone()
333
+
328
334
  for mutation in current_mutations:
329
335
  mutated_transcript.mutate(mutation, inplace=True)
330
336
 
@@ -361,6 +367,18 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
361
367
  mutated_transcript.donors = new_boundaries['donors']
362
368
  mutated_transcript.generate_mature_mrna().generate_protein()
363
369
 
370
+ ### Experimental
371
+ # mutated_transcript.generate_mature_mrna()
372
+ # if tis_engine is None:
373
+ # tis_candidates = [(mutated_transcript.tis, 1)]
374
+ # else:
375
+ # from tis_utils import tis_predictor
376
+ # tis_candidates = tis_predictor(mutated_transcript.mature_mrna)
377
+ #
378
+ # for tis_candidate, tis_score in tis_candidates:
379
+ # mutated_transcript.generate_protein(tis_candidate)
380
+ ######
381
+
364
382
  alignment = get_logical_alignment(reference_transcript.protein, mutated_transcript.protein)
365
383
  deleted, inserted = find_indels_with_mismatches_as_deletions(alignment.seqA, alignment.seqB)
366
384
  modified_positions = find_modified_positions(len(reference_transcript.protein), deleted, inserted)
@@ -375,6 +393,9 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_require
375
393
  report = OncospliceAnnotator(reference_transcript, mutated_transcript, current_mutations[0])
376
394
  report['mut_id'] = mut_id
377
395
  report['splicing_engine'] = splicing_engine if splicing_engine is not None else 'None'
396
+ # report['tis_engine'] = tis_engine if tis_engine is not None else 'None'
397
+ # report['tis_pos'] = tis_candidate
398
+ # report['tis_score'] = tis_score
378
399
  report['oncosplice_score'] = affected_cons_scores
379
400
  report['percentile'] = percentile
380
401
  report['isoform_id'] = short_hash_of_list(mutated_transcript.exons)
geney/splicing_utils.py CHANGED
@@ -324,7 +324,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
324
324
 
325
325
  def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
326
326
  if ref_seq.seq == var_seq.seq:
327
- return {'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}}
327
+ return Missplicing({'missed_acceptors': {}, 'missed_donors': {}, 'discovered_acceptors': {}, 'discovered_donors': {}})
328
328
 
329
329
  ref_seq_acceptor_probs, ref_seq_donor_probs = run_splicing_engine(ref_seq.seq, engine)
330
330
  mut_seq_acceptor_probs, mut_seq_donor_probs = run_splicing_engine(var_seq.seq, engine)
geney/tis_utils.py CHANGED
@@ -9,7 +9,16 @@ from geney import config
9
9
  p = PairwiseAligner()
10
10
 
11
11
 
12
- def find_tis(ref_seq, mut_seq, left_context=100, right_context=102):
12
+ def find_tis(reference_mrna, mutated_mrna, ref_tis_pos, left_context=100, right_context=102):
13
+ '''
14
+ mature_mrna: row 0 --> encoded nucleotides
15
+ row 1 --> genomic indices
16
+ row 2 --> super positions (incase of insertions or deletions
17
+ row1+row2 = conhesive & monotonic genomic indices
18
+ row 3 --> binary mutated position or not
19
+ mature_mrna.seq
20
+ mature_mrna.indices
21
+ '''
13
22
  tis_coords = ref_seq.mature_mrna.asymmetric_indices(ref_seq.TIS, left_context=0, right_context=3)
14
23
  ref_seq, mut_seq = ref_seq.mature_mrna, mut_seq.mature_mrna
15
24
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.3.43
3
+ Version: 1.3.45
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -1,6 +1,6 @@
1
1
  geney/Fasta_segment.py,sha256=99HxNGNh_MfdVW6hhtlb1vOn7eSmT7oFoEfHDFMxG8w,11275
2
2
  geney/Gene.py,sha256=nMWJjoQaiVFm2iRjoiq7ghZqnXtW0tJDcq2S0AyOIvY,6883
3
- geney/SeqMats.py,sha256=aLpqd7RJSEU07jdPXpbtZPeb2D9BxrZuW6BTkcXpNE4,18819
3
+ geney/SeqMats.py,sha256=hQcEYTcFm06g4dGJf25Lvo_xCHsj0-GGhP-O2fPrBlE,18987
4
4
  geney/Transcript.py,sha256=CpfxYkuCwFILozrtLuiWnlr1mRnMKn4o84HVJislgYs,14499
5
5
  geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
6
6
  geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
@@ -11,21 +11,21 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
11
11
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
12
12
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
13
13
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
14
- geney/oncosplice.py,sha256=6s0aW6vXtD-z7yToFBcFCS5M_npoQe4tgdf4g5TuR2o,23465
14
+ geney/oncosplice.py,sha256=q1W8k4nvRzQPH7LZsefTky6Nw2Kmx-DDXZ_UCty0Wog,24299
15
15
  geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
16
16
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
17
17
  geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
18
18
  geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
19
- geney/splicing_utils.py,sha256=TQsRhEegW4SW6t7dghHQ5vGgn9WdioTcai6EzPPcdKM,38485
19
+ geney/splicing_utils.py,sha256=WflxRPfc4DzeHuYOZqjpa-YD1nuZzs7h_WCsv-LX87A,38498
20
20
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
21
21
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
22
- geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
22
+ geney/tis_utils.py,sha256=la0CZroaKe5RgAyFd4Bf_DqQncklWgAY2823xVst98o,7813
23
23
  geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
24
24
  geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
26
26
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
27
27
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
28
- geney-1.3.43.dist-info/METADATA,sha256=kqekeyuXWKLb40n_ShUhUqBqqZAyIob5vEDY8dgiSxI,990
29
- geney-1.3.43.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
- geney-1.3.43.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
- geney-1.3.43.dist-info/RECORD,,
28
+ geney-1.3.45.dist-info/METADATA,sha256=5YI3G03swzoNav06ijsq6XG8aLMzWlSCEZnY7Y4b3MM,990
29
+ geney-1.3.45.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
30
+ geney-1.3.45.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
31
+ geney-1.3.45.dist-info/RECORD,,
File without changes