geney 1.2.4__py2.py3-none-any.whl → 1.2.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/oncosplice.py +15 -20
- geney/power_utils.py +3 -3
- geney/tcga_utils.py +2 -2
- {geney-1.2.4.dist-info → geney-1.2.5.dist-info}/METADATA +1 -1
- {geney-1.2.4.dist-info → geney-1.2.5.dist-info}/RECORD +7 -7
- {geney-1.2.4.dist-info → geney-1.2.5.dist-info}/WHEEL +0 -0
- {geney-1.2.4.dist-info → geney-1.2.5.dist-info}/top_level.txt +0 -0
geney/oncosplice.py
CHANGED
|
@@ -35,7 +35,7 @@ import torch
|
|
|
35
35
|
from pkg_resources import resource_filename
|
|
36
36
|
from pangolin.model import *
|
|
37
37
|
|
|
38
|
-
pang_model_nums = [0, 2, 4, 6]
|
|
38
|
+
pang_model_nums = [0, 1, 2, 3, 4, 5, 6]
|
|
39
39
|
pang_models = []
|
|
40
40
|
for i in pang_model_nums:
|
|
41
41
|
for j in range(1, 6):
|
|
@@ -752,7 +752,7 @@ def pangolin_predict_probs(true_seq, models):
|
|
|
752
752
|
splicing_pred = np.array(scores).max(axis=0)
|
|
753
753
|
donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
|
|
754
754
|
acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
|
|
755
|
-
return donor_probs, acceptor_probs
|
|
755
|
+
return donor_probs[5000:-5000], acceptor_probs[5000:-5000]
|
|
756
756
|
|
|
757
757
|
def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, min_coverage=2500, sai_threshold=0.5, engine='spliceai'):
|
|
758
758
|
positions = mutations.positions
|
|
@@ -803,9 +803,11 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
|
|
|
803
803
|
mut_seq_acceptor_probs, mut_seq_donor_probs = mut_seq_probs_temp[0, :], mut_seq_probs_temp[1, :]
|
|
804
804
|
|
|
805
805
|
elif engine == 'pangolin':
|
|
806
|
-
ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models)
|
|
807
|
-
mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models)
|
|
806
|
+
ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models=pang_models)
|
|
807
|
+
mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models=pang_models)
|
|
808
808
|
|
|
809
|
+
else:
|
|
810
|
+
raise ValueError(f"{engine} not implemented")
|
|
809
811
|
|
|
810
812
|
assert len(ref_indices) == len(ref_seq_acceptor_probs), 'Reference pos not the same'
|
|
811
813
|
assert len(mut_indices) == len(mut_seq_acceptor_probs), 'Mut pos not the same'
|
|
@@ -1400,18 +1402,15 @@ def moving_average_conv(vector, window_size, factor=1):
|
|
|
1400
1402
|
|
|
1401
1403
|
return np.convolve(vector, np.ones(window_size), mode='same') / window_size
|
|
1402
1404
|
|
|
1403
|
-
def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcript=False,
|
|
1405
|
+
def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, save_spliceai_results=False, force_spliceai=False, organism='hg38'):
|
|
1404
1406
|
mutation = Variations(mut_id)
|
|
1405
|
-
try:
|
|
1406
|
-
|
|
1407
|
-
except FileNotFoundError:
|
|
1408
|
-
|
|
1407
|
+
# try:
|
|
1408
|
+
reference_gene = Gene(mutation.gene, organism=organism)
|
|
1409
|
+
# except FileNotFoundError:
|
|
1410
|
+
# return pd.DataFrame()
|
|
1409
1411
|
|
|
1410
|
-
|
|
1411
|
-
mutated_gene = Gene(mutation.gene, mut_id)
|
|
1412
|
-
# if not per_transcript_missplicing:
|
|
1413
|
-
# missplicing_obj = PredictSpliceAI(mutation, reference_gene, threshold=sai_threshold, force=True, save_results=False)
|
|
1414
|
-
# missplicing = missplicing_obj.missplicing
|
|
1412
|
+
reference_gene_proteins = {g.protein: g.transcript_id for g in reference_gene.run_transcripts()}
|
|
1413
|
+
mutated_gene = Gene(mutation.gene, mut_id, organism=organism)
|
|
1415
1414
|
|
|
1416
1415
|
results = []
|
|
1417
1416
|
for variant in mutated_gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
|
|
@@ -1420,10 +1419,9 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
|
|
|
1420
1419
|
continue
|
|
1421
1420
|
|
|
1422
1421
|
cons_vector = transform_conservation_vector(reference.cons_vector, window=window_length)
|
|
1423
|
-
# if per_transcript_missplicing:
|
|
1424
1422
|
missplicing_obj = PredictSpliceAI(mutation, reference, threshold=sai_threshold, force=force_spliceai, save_results=save_spliceai_results)
|
|
1425
1423
|
missplicing = missplicing_obj.apply_sai_threshold_primary(threshold=sai_threshold)
|
|
1426
|
-
|
|
1424
|
+
|
|
1427
1425
|
for i, new_boundaries in enumerate(develop_aberrant_splicing(variant, missplicing)):
|
|
1428
1426
|
variant_isoform = deepcopy(variant)
|
|
1429
1427
|
variant_isoform.reset_acceptors(acceptors=new_boundaries['acceptors']).reset_donors(donors=new_boundaries['donors']).organize().generate_protein()
|
|
@@ -1432,9 +1430,6 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
|
|
|
1432
1430
|
modified_positions = find_modified_positions(len(reference.protein), deleted, inserted)
|
|
1433
1431
|
temp_cons = np.convolve(cons_vector * modified_positions, np.ones(window_length)) / window_length
|
|
1434
1432
|
affected_cons_scores = max(temp_cons)
|
|
1435
|
-
# temp_cons = np.convolve(cons_vector, np.ones(window_length))
|
|
1436
|
-
# print(temp_cons)
|
|
1437
|
-
# print(cons_vector)
|
|
1438
1433
|
percentile = (
|
|
1439
1434
|
sorted(cons_vector).index(next(x for x in sorted(cons_vector) if x >= affected_cons_scores)) / len(
|
|
1440
1435
|
cons_vector))
|
|
@@ -1449,7 +1444,7 @@ def oncosplice(mut_id, sai_threshold=0.5, protein_coding=True, primary_transcrip
|
|
|
1449
1444
|
report['isoform_prevalence'] = new_boundaries['path_weight']
|
|
1450
1445
|
report['full_missplicing'] = missplicing
|
|
1451
1446
|
report['missplicing'] = max(missplicing_obj)
|
|
1452
|
-
report['reference_resemblance'] =
|
|
1447
|
+
report['reference_resemblance'] = reference_gene_proteins.get(variant_isoform.protein, None)
|
|
1453
1448
|
results.append(report)
|
|
1454
1449
|
|
|
1455
1450
|
report = pd.DataFrame(results)
|
geney/power_utils.py
CHANGED
|
@@ -38,7 +38,7 @@ def write_executors(folder_path, script='geney.power_utils', input_file='/tamir2
|
|
|
38
38
|
|
|
39
39
|
def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
|
|
40
40
|
walltime="24:00:00", dashboard_address=":23154",
|
|
41
|
-
log_directory="dask-logs", slurm=False):
|
|
41
|
+
log_directory="dask-logs", slurm=False, organism='hg38'):
|
|
42
42
|
"""
|
|
43
43
|
Launch a Dask cluster using PBS.
|
|
44
44
|
|
|
@@ -63,7 +63,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
|
|
|
63
63
|
walltime='7200',
|
|
64
64
|
scheduler_options={"dashboard_address": dashboard_address},
|
|
65
65
|
log_directory=log_directory,
|
|
66
|
-
job_script_prologue=[f"cd {config_setup['BASE']}"]
|
|
66
|
+
job_script_prologue=[f"cd {config_setup[organism]['BASE']}"]
|
|
67
67
|
)
|
|
68
68
|
|
|
69
69
|
else:
|
|
@@ -75,7 +75,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
|
|
|
75
75
|
walltime=walltime,
|
|
76
76
|
scheduler_options={"dashboard_address": dashboard_address},
|
|
77
77
|
log_directory=log_directory,
|
|
78
|
-
job_script_prologue=[f"cd {config_setup['BASE']}"]
|
|
78
|
+
job_script_prologue=[f"cd {config_setup[organism]['BASE']}"]
|
|
79
79
|
)
|
|
80
80
|
|
|
81
81
|
dask_cluster.scale(num_workers)
|
geney/tcga_utils.py
CHANGED
|
@@ -363,8 +363,8 @@ class TCGAGene:
|
|
|
363
363
|
# return cases
|
|
364
364
|
#
|
|
365
365
|
#
|
|
366
|
-
|
|
367
|
-
|
|
366
|
+
def create_mut_id(row):
|
|
367
|
+
return f"{row.Gene_name}:{row['Chromosome']}:{row['Start_Position']}:{row['Reference_Allele']}:{row['Tumor_Seq_Allele2']}"
|
|
368
368
|
#
|
|
369
369
|
#
|
|
370
370
|
# def is_in_exon(mut_id, tid):
|
|
@@ -9,15 +9,15 @@ geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
|
9
9
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
10
10
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
11
11
|
geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
|
|
12
|
-
geney/oncosplice.py,sha256=
|
|
12
|
+
geney/oncosplice.py,sha256=9oZs9W_bI6O5h3284WvatkerhSCaxMZWfs1xVc1lJO0,71524
|
|
13
13
|
geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
|
|
14
14
|
geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
|
|
15
15
|
geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
|
|
16
|
-
geney/power_utils.py,sha256=
|
|
16
|
+
geney/power_utils.py,sha256=nppfT1-bOC1dnvfRs55LipjoWDlRrOqWiuCMH0v1auU,7303
|
|
17
17
|
geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
|
|
18
18
|
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
19
19
|
geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
|
|
20
|
-
geney/tcga_utils.py,sha256=
|
|
20
|
+
geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
|
|
21
21
|
geney/utils.py,sha256=xJi7fk3g7DkR2rKOb8WePLQNM1ib83rcHecwRdwd5lA,2036
|
|
22
22
|
geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
|
|
@@ -45,7 +45,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
|
|
|
45
45
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
46
46
|
geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
geney-1.2.
|
|
49
|
-
geney-1.2.
|
|
50
|
-
geney-1.2.
|
|
51
|
-
geney-1.2.
|
|
48
|
+
geney-1.2.5.dist-info/METADATA,sha256=9UptuZVJWZvVN6Y9KgPUxrC4gnijFVW4CtkkESxrY9E,1198
|
|
49
|
+
geney-1.2.5.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
|
|
50
|
+
geney-1.2.5.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
51
|
+
geney-1.2.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|