geney 1.2.4__py2.py3-none-any.whl → 1.2.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -35,7 +35,7 @@ import torch
35
35
  from pkg_resources import resource_filename
36
36
  from pangolin.model import *
37
37
 
38
- pang_model_nums = [0, 2, 4, 6]
38
+ pang_model_nums = [0, 1, 2, 3, 4, 5, 6]
39
39
  pang_models = []
40
40
  for i in pang_model_nums:
41
41
  for j in range(1, 6):
@@ -752,7 +752,7 @@ def pangolin_predict_probs(true_seq, models):
752
752
  splicing_pred = np.array(scores).max(axis=0)
753
753
  donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
754
754
  acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
755
- return donor_probs, acceptor_probs
755
+ return donor_probs[5000:-5000], acceptor_probs[5000:-5000]
756
756
 
757
757
  def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, min_coverage=2500, sai_threshold=0.5, engine='spliceai'):
758
758
  positions = mutations.positions
@@ -803,9 +803,11 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
803
803
  mut_seq_acceptor_probs, mut_seq_donor_probs = mut_seq_probs_temp[0, :], mut_seq_probs_temp[1, :]
804
804
 
805
805
  elif engine == 'pangolin':
806
- ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models)
807
- mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models)
806
+ ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models=pang_models)
807
+ mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models=pang_models)
808
808
 
809
+ else:
810
+ raise ValueError(f"{engine} not implemented")
809
811
 
810
812
  assert len(ref_indices) == len(ref_seq_acceptor_probs), 'Reference pos not the same'
811
813
  assert len(mut_indices) == len(mut_seq_acceptor_probs), 'Mut pos not the same'
@@ -1400,18 +1402,15 @@ def moving_average_conv(vector, window_size, factor=1):
1400
1402
 
1401
1403
  return np.convolve(vector, np.ones(window_size), mode='same') / window_size
1402
1404
 
1403
- def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcript=False, per_transcript_missplicing=False, window_length=13, save_spliceai_results=False, force_spliceai=False):
1405
+ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, save_spliceai_results=False, force_spliceai=False, organism='hg38'):
1404
1406
  mutation = Variations(mut_id)
1405
- try:
1406
- reference_gene = Gene(mutation.gene)
1407
- except FileNotFoundError:
1408
- return pd.DataFrame()
1407
+ # try:
1408
+ reference_gene = Gene(mutation.gene, organism=organism)
1409
+ # except FileNotFoundError:
1410
+ # return pd.DataFrame()
1409
1411
 
1410
- reference_gene_proteines = {g.protein: g.transcript_id for g in reference_gene.run_transcripts()}
1411
- mutated_gene = Gene(mutation.gene, mut_id)
1412
- # if not per_transcript_missplicing:
1413
- # missplicing_obj = PredictSpliceAI(mutation, reference_gene, threshold=sai_threshold, force=True, save_results=False)
1414
- # missplicing = missplicing_obj.missplicing
1412
+ reference_gene_proteins = {g.protein: g.transcript_id for g in reference_gene.run_transcripts()}
1413
+ mutated_gene = Gene(mutation.gene, mut_id, organism=organism)
1415
1414
 
1416
1415
  results = []
1417
1416
  for variant in mutated_gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
@@ -1420,10 +1419,9 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
1420
1419
  continue
1421
1420
 
1422
1421
  cons_vector = transform_conservation_vector(reference.cons_vector, window=window_length)
1423
- # if per_transcript_missplicing:
1424
1422
  missplicing_obj = PredictSpliceAI(mutation, reference, threshold=sai_threshold, force=force_spliceai, save_results=save_spliceai_results)
1425
1423
  missplicing = missplicing_obj.apply_sai_threshold_primary(threshold=sai_threshold)
1426
- # print(missplicing)
1424
+
1427
1425
  for i, new_boundaries in enumerate(develop_aberrant_splicing(variant, missplicing)):
1428
1426
  variant_isoform = deepcopy(variant)
1429
1427
  variant_isoform.reset_acceptors(acceptors=new_boundaries['acceptors']).reset_donors(donors=new_boundaries['donors']).organize().generate_protein()
@@ -1432,9 +1430,6 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
1432
1430
  modified_positions = find_modified_positions(len(reference.protein), deleted, inserted)
1433
1431
  temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
1434
1432
  affected_cons_scores = max(temp_cons)
1435
- # temp_cons = np.convolve(cons_vector, np.ones(window_length))
1436
- # print(temp_cons)
1437
- # print(cons_vector)
1438
1433
  percentile = (
1439
1434
  sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
1440
1435
  cons_vector))
@@ -1449,7 +1444,7 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
1449
1444
  report['isoform_prevalence'] = new_boundaries['path_weight']
1450
1445
  report['full_missplicing'] = missplicing
1451
1446
  report['missplicing'] = max(missplicing_obj)
1452
- report['reference_resemblance'] = reference_gene_proteines.get(variant_isoform.protein, None)
1447
+ report['reference_resemblance'] = reference_gene_proteins.get(variant_isoform.protein, None)
1453
1448
  results.append(report)
1454
1449
 
1455
1450
  report = pd.DataFrame(results)
geney/power_utils.py CHANGED
@@ -38,7 +38,7 @@ def write_executors(folder_path, script='geney.power_utils', input_file='/tamir2
38
38
 
39
39
  def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
40
40
  walltime="24:00:00", dashboard_address=":23154",
41
- log_directory="dask-logs", slurm=False):
41
+ log_directory="dask-logs", slurm=False, organism='hg38'):
42
42
  """
43
43
  Launch a Dask cluster using PBS.
44
44
 
@@ -63,7 +63,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
63
63
  walltime='7200',
64
64
  scheduler_options={"dashboard_address": dashboard_address},
65
65
  log_directory=log_directory,
66
- job_script_prologue=[f"cd {config_setup['BASE']}"]
66
+ job_script_prologue=[f"cd {config_setup[organism]['BASE']}"]
67
67
  )
68
68
 
69
69
  else:
@@ -75,7 +75,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
75
75
  walltime=walltime,
76
76
  scheduler_options={"dashboard_address": dashboard_address},
77
77
  log_directory=log_directory,
78
- job_script_prologue=[f"cd {config_setup['BASE']}"]
78
+ job_script_prologue=[f"cd {config_setup[organism]['BASE']}"]
79
79
  )
80
80
 
81
81
  dask_cluster.scale(num_workers)
geney/tcga_utils.py CHANGED
@@ -363,8 +363,8 @@ class TCGAGene:
363
363
  # return cases
364
364
  #
365
365
  #
366
- # def create_mut_id(row):
367
- # return f"{row.Gene_name}:{row['Chromosome']}:{row['Start_Position']}:{row['Reference_Allele']}:{row['Tumor_Seq_Allele2']}"
366
+ def create_mut_id(row):
367
+ return f"{row.Gene_name}:{row['Chromosome']}:{row['Start_Position']}:{row['Reference_Allele']}:{row['Tumor_Seq_Allele2']}"
368
368
  #
369
369
  #
370
370
  # def is_in_exon(mut_id, tid):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.4
3
+ Version: 1.2.5
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -9,15 +9,15 @@ geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
9
9
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
10
10
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
11
11
  geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
12
- geney/oncosplice.py,sha256=xDzCLivFyurx-qlQo9cyrV-9KJ9VykYAb8lY9DDWl7Q,71810
12
+ geney/oncosplice.py,sha256=9oZs9W_bI6O5h3284WvatkerhSCaxMZWfs1xVc1lJO0,71524
13
13
  geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
14
14
  geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
15
15
  geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
16
- geney/power_utils.py,sha256=GtEvKAbz34S-ILQST6tabt3g0M4L8_aa50HIAQZ7byM,7266
16
+ geney/power_utils.py,sha256=nppfT1-bOC1dnvfRs55LipjoWDlRrOqWiuCMH0v1auU,7303
17
17
  geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
18
18
  geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
19
19
  geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
20
- geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
20
+ geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
21
21
  geney/utils.py,sha256=xJi7fk3g7DkR2rKOb8WePLQNM1ib83rcHecwRdwd5lA,2036
22
22
  geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
@@ -45,7 +45,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
45
45
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
46
46
  geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- geney-1.2.4.dist-info/METADATA,sha256=0oE3AHzIGNnpkmPiPK0wXpDRuxZbA6ZcdvpOloz82bQ,1198
49
- geney-1.2.4.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
50
- geney-1.2.4.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
- geney-1.2.4.dist-info/RECORD,,
48
+ geney-1.2.5.dist-info/METADATA,sha256=9UptuZVJWZvVN6Y9KgPUxrC4gnijFVW4CtkkESxrY9E,1198
49
+ geney-1.2.5.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
50
+ geney-1.2.5.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
+ geney-1.2.5.dist-info/RECORD,,
File without changes