geney 1.2.1__py2.py3-none-any.whl → 1.2.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
geney/oncosplice.py
CHANGED
|
@@ -210,7 +210,7 @@ def generate_mut_variant(seq: str, indices: list, mut: Mutation):
|
|
|
210
210
|
|
|
211
211
|
|
|
212
212
|
class Gene:
|
|
213
|
-
def __init__(self, gene_name, variation=None):
|
|
213
|
+
def __init__(self, gene_name, variation=None, organism='hg38'):
|
|
214
214
|
self.gene_name = gene_name
|
|
215
215
|
self.gene_id = ''
|
|
216
216
|
self.rev = None
|
|
@@ -218,9 +218,10 @@ class Gene:
|
|
|
218
218
|
self.gene_start = 0
|
|
219
219
|
self.gene_end = 0
|
|
220
220
|
self.transcripts = {}
|
|
221
|
-
self.load_from_file(find_files_by_gene_name(gene_name))
|
|
221
|
+
self.load_from_file(find_files_by_gene_name(gene_name, organism=organism))
|
|
222
222
|
self.variations = variation
|
|
223
223
|
self.primary_tid = None
|
|
224
|
+
self.organism = organism
|
|
224
225
|
tids = [k for k, v in self.transcripts.items() if v['primary_transcript'] and v['transcript_biotype'] == 'protein_coding']
|
|
225
226
|
if tids:
|
|
226
227
|
self.primary_tid = tids[0]
|
|
@@ -279,11 +280,11 @@ class Gene:
|
|
|
279
280
|
if protein_coding and annotations['transcript_biotype'] != 'protein_coding':
|
|
280
281
|
continue
|
|
281
282
|
|
|
282
|
-
yield Transcript(self.transcripts[tid], variations=self.variations)
|
|
283
|
+
yield Transcript(self.transcripts[tid], variations=self.variations, organism=self.organism)
|
|
283
284
|
|
|
284
285
|
|
|
285
286
|
class Transcript:
|
|
286
|
-
def __init__(self, d=None, variations=None):
|
|
287
|
+
def __init__(self, d=None, variations=None, organism='hg38'):
|
|
287
288
|
self.transcript_id = None
|
|
288
289
|
self.transcript_start = None # transcription
|
|
289
290
|
self.transcript_end = None # transcription
|
|
@@ -304,6 +305,7 @@ class Transcript:
|
|
|
304
305
|
self.cons_seq = ''
|
|
305
306
|
self.cons_vector = ''
|
|
306
307
|
self.variations = None
|
|
308
|
+
self.organism = organism
|
|
307
309
|
if variations:
|
|
308
310
|
self.variations = Variations(variations)
|
|
309
311
|
|
|
@@ -508,7 +510,7 @@ class Transcript:
|
|
|
508
510
|
# Related to transcript seq generation
|
|
509
511
|
def pull_pre_mrna_pos(self):
|
|
510
512
|
fasta_obj = Fasta_segment()
|
|
511
|
-
return fasta_obj.read_segment_endpoints(config_setup['CHROM_SOURCE'] / f'chr{self.chrm}.fasta',
|
|
513
|
+
return fasta_obj.read_segment_endpoints(config_setup[self.organism]['CHROM_SOURCE'] / f'chr{self.chrm}.fasta',
|
|
512
514
|
self.transcript_lower,
|
|
513
515
|
self.transcript_upper)
|
|
514
516
|
|
|
@@ -762,7 +764,7 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
|
|
|
762
764
|
|
|
763
765
|
fasta_obj = Fasta_segment()
|
|
764
766
|
ref_seq, ref_indices = fasta_obj.read_segment_endpoints(
|
|
765
|
-
config_setup['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
|
|
767
|
+
config_setup[transcript_data.organism]['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
|
|
766
768
|
seq_start_pos,
|
|
767
769
|
seq_end_pos)
|
|
768
770
|
|
|
@@ -916,7 +918,7 @@ class PredictSpliceAI:
|
|
|
916
918
|
self.modification = mutation
|
|
917
919
|
self.threshold = threshold
|
|
918
920
|
self.transcript_id = gene_data.transcript_id
|
|
919
|
-
self.spliceai_db = config_setup['MISSPLICING_PATH'] / f'spliceai_epistatic'
|
|
921
|
+
self.spliceai_db = config_setup[gene_data.organism]['MISSPLICING_PATH'] / f'spliceai_epistatic'
|
|
920
922
|
self.missplicing = {}
|
|
921
923
|
|
|
922
924
|
if self.prediction_file_exists() and not force: # need to do a check for the filename length
|
geney/utils.py
CHANGED
|
@@ -16,9 +16,9 @@ def is_monotonic(A):
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def available_genes():
|
|
19
|
+
def available_genes(organism='hg38'):
|
|
20
20
|
from geney import config_setup
|
|
21
|
-
annotation_path = config_setup['MRNA_PATH'] / 'protein_coding'
|
|
21
|
+
annotation_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
|
|
22
22
|
return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
|
|
23
23
|
|
|
24
24
|
def contains(a, x):
|
|
@@ -54,7 +54,7 @@ def dump_pickle(file_path, payload):
|
|
|
54
54
|
|
|
55
55
|
def find_files_by_gene_name(gene_name, organism='hg38'):
|
|
56
56
|
from geney import config_setup
|
|
57
|
-
mrna_path = config_setup['MRNA_PATH'] /
|
|
57
|
+
mrna_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
|
|
58
58
|
matching_files = [f for f in mrna_path.glob(f'*_{gene_name}.pkl')]
|
|
59
59
|
if len(matching_files) > 1:
|
|
60
60
|
print(f"Multiple files available ({[f.name for f in matching_files]}).")
|
|
@@ -9,7 +9,7 @@ geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
|
9
9
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
10
10
|
geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
|
|
11
11
|
geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
|
|
12
|
-
geney/oncosplice.py,sha256=
|
|
12
|
+
geney/oncosplice.py,sha256=_uQ13nuOFNNVsl_7EzCmJtC9WLbzX9zEcqoA7PvkQWY,71786
|
|
13
13
|
geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
|
|
14
14
|
geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
|
|
15
15
|
geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
|
|
@@ -18,7 +18,7 @@ geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
|
|
|
18
18
|
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
19
19
|
geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
|
|
20
20
|
geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
|
|
21
|
-
geney/utils.py,sha256=
|
|
21
|
+
geney/utils.py,sha256=xJi7fk3g7DkR2rKOb8WePLQNM1ib83rcHecwRdwd5lA,2036
|
|
22
22
|
geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
|
|
24
24
|
geney/analyzers/characterize_epistasis.py,sha256=MvcYQMRwZ-qqlX9mn41vmr0Uxb5dIrrcaE3oiZMTYm8,648
|
|
@@ -45,7 +45,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
|
|
|
45
45
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
46
46
|
geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
geney-1.2.
|
|
49
|
-
geney-1.2.
|
|
50
|
-
geney-1.2.
|
|
51
|
-
geney-1.2.
|
|
48
|
+
geney-1.2.2.dist-info/METADATA,sha256=M-hFK8XG5ZbWtH0gsZvGgDBH18l6iGuDKT8xOCGYDSs,1198
|
|
49
|
+
geney-1.2.2.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
|
|
50
|
+
geney-1.2.2.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
51
|
+
geney-1.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|