geney 1.2.1__py2.py3-none-any.whl → 1.2.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -210,7 +210,7 @@ def generate_mut_variant(seq: str, indices: list, mut: Mutation):
210
210
 
211
211
 
212
212
  class Gene:
213
- def __init__(self, gene_name, variation=None):
213
+ def __init__(self, gene_name, variation=None, organism='hg38'):
214
214
  self.gene_name = gene_name
215
215
  self.gene_id = ''
216
216
  self.rev = None
@@ -218,9 +218,10 @@ class Gene:
218
218
  self.gene_start = 0
219
219
  self.gene_end = 0
220
220
  self.transcripts = {}
221
- self.load_from_file(find_files_by_gene_name(gene_name))
221
+ self.load_from_file(find_files_by_gene_name(gene_name, organism=organism))
222
222
  self.variations = variation
223
223
  self.primary_tid = None
224
+ self.organism = organism
224
225
  tids = [k for k, v in self.transcripts.items() if v['primary_transcript'] and v['transcript_biotype'] == 'protein_coding']
225
226
  if tids:
226
227
  self.primary_tid = tids[0]
@@ -279,11 +280,11 @@ class Gene:
279
280
  if protein_coding and annotations['transcript_biotype'] != 'protein_coding':
280
281
  continue
281
282
 
282
- yield Transcript(self.transcripts[tid], variations=self.variations)
283
+ yield Transcript(self.transcripts[tid], variations=self.variations, organism=self.organism)
283
284
 
284
285
 
285
286
  class Transcript:
286
- def __init__(self, d=None, variations=None):
287
+ def __init__(self, d=None, variations=None, organism='hg38'):
287
288
  self.transcript_id = None
288
289
  self.transcript_start = None # transcription
289
290
  self.transcript_end = None # transcription
@@ -304,6 +305,7 @@ class Transcript:
304
305
  self.cons_seq = ''
305
306
  self.cons_vector = ''
306
307
  self.variations = None
308
+ self.organism = organism
307
309
  if variations:
308
310
  self.variations = Variations(variations)
309
311
 
@@ -508,7 +510,7 @@ class Transcript:
508
510
  # Related to transcript seq generation
509
511
  def pull_pre_mrna_pos(self):
510
512
  fasta_obj = Fasta_segment()
511
- return fasta_obj.read_segment_endpoints(config_setup['CHROM_SOURCE'] / f'chr{self.chrm}.fasta',
513
+ return fasta_obj.read_segment_endpoints(config_setup[self.organism]['CHROM_SOURCE'] / f'chr{self.chrm}.fasta',
512
514
  self.transcript_lower,
513
515
  self.transcript_upper)
514
516
 
@@ -762,7 +764,7 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
762
764
 
763
765
  fasta_obj = Fasta_segment()
764
766
  ref_seq, ref_indices = fasta_obj.read_segment_endpoints(
765
- config_setup['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
767
+ config_setup[transcript_data.organism]['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
766
768
  seq_start_pos,
767
769
  seq_end_pos)
768
770
 
@@ -916,7 +918,7 @@ class PredictSpliceAI:
916
918
  self.modification = mutation
917
919
  self.threshold = threshold
918
920
  self.transcript_id = gene_data.transcript_id
919
- self.spliceai_db = config_setup['MISSPLICING_PATH'] / f'spliceai_epistatic'
921
+ self.spliceai_db = config_setup[gene_data.organism]['MISSPLICING_PATH'] / f'spliceai_epistatic'
920
922
  self.missplicing = {}
921
923
 
922
924
  if self.prediction_file_exists() and not force: # need to do a check for the filename length
geney/utils.py CHANGED
@@ -16,9 +16,9 @@ def is_monotonic(A):
16
16
 
17
17
 
18
18
 
19
- def available_genes():
19
+ def available_genes(organism='hg38'):
20
20
  from geney import config_setup
21
- annotation_path = config_setup['MRNA_PATH'] / 'protein_coding'
21
+ annotation_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
22
22
  return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
23
23
 
24
24
  def contains(a, x):
@@ -54,7 +54,7 @@ def dump_pickle(file_path, payload):
54
54
 
55
55
  def find_files_by_gene_name(gene_name, organism='hg38'):
56
56
  from geney import config_setup
57
- mrna_path = config_setup['MRNA_PATH'] / organism / 'protein_coding'
57
+ mrna_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
58
58
  matching_files = [f for f in mrna_path.glob(f'*_{gene_name}.pkl')]
59
59
  if len(matching_files) > 1:
60
60
  print(f"Multiple files available ({[f.name for f in matching_files]}).")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -9,7 +9,7 @@ geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
9
9
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
10
10
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
11
11
  geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
12
- geney/oncosplice.py,sha256=PzeQFy8k2xCSIl07kY19rGZ6U5ljyrJ0REC_Qgf-IN0,71582
12
+ geney/oncosplice.py,sha256=_uQ13nuOFNNVsl_7EzCmJtC9WLbzX9zEcqoA7PvkQWY,71786
13
13
  geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
14
14
  geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
15
15
  geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
@@ -18,7 +18,7 @@ geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
18
18
  geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
19
19
  geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
20
20
  geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
21
- geney/utils.py,sha256=CgQQ8sy5g7g75cy-NEgYprink8a6pUreBgs-BhpyJt8,2012
21
+ geney/utils.py,sha256=xJi7fk3g7DkR2rKOb8WePLQNM1ib83rcHecwRdwd5lA,2036
22
22
  geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
24
24
  geney/analyzers/characterize_epistasis.py,sha256=MvcYQMRwZ-qqlX9mn41vmr0Uxb5dIrrcaE3oiZMTYm8,648
@@ -45,7 +45,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
45
45
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
46
46
  geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- geney-1.2.1.dist-info/METADATA,sha256=HgySFYXNAhqjuBEJU8_I5oSKq2Rf6v-0hNIQmaqKbEE,1198
49
- geney-1.2.1.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
50
- geney-1.2.1.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
- geney-1.2.1.dist-info/RECORD,,
48
+ geney-1.2.2.dist-info/METADATA,sha256=M-hFK8XG5ZbWtH0gsZvGgDBH18l6iGuDKT8xOCGYDSs,1198
49
+ geney-1.2.2.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
50
+ geney-1.2.2.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
51
+ geney-1.2.2.dist-info/RECORD,,
File without changes