geney 1.2.30__py2.py3-none-any.whl → 1.2.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -415,20 +415,29 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
415
415
  return report
416
416
 
417
417
 
418
+ import asyncio
419
+ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai'):
420
+ import sys, os
421
+ from pathlib import Path
422
+ needed_path = Path('/tamir2/yoramzar/Projects/Cancer_mut/Utils')
423
+ needed_file1 = needed_path / 'rest_api_utils.py'
424
+ needed_file2 = needed_path / 'uniprot_utils.py'
425
+
426
+ if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.X_OK)) and (needed_file2.is_file() and os.access(needed_file2, os.X_OK)):
427
+ sys.path.append(str(needed_path))
428
+ import uniprot_utils as uput
418
429
 
430
+ else:
431
+ raise SystemError("Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
419
432
 
420
- def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
421
- import requests
422
- import threading
433
+ # Define async functions
434
+ async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
435
+ return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
423
436
 
424
- def background_request(url, result):
425
- return {'data': 'success'}
426
437
 
427
438
  gene = Gene(mut_id.split(':')[0], organism=organism)
428
-
429
- domains = {}
430
- request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
431
- request_thread.start()
439
+ # request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
440
+ # request_thread.start()
432
441
 
433
442
  mutation = get_mutation(mut_id, rev=gene.rev)
434
443
 
@@ -441,6 +450,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
441
450
  results.append({'transcript_id': transcript.transcript_id})
442
451
  continue
443
452
 
453
+ task1 = asyncio.create_task(background_request(transcript.transcript_id))
454
+
444
455
  transcript.generate_pre_mrna()
445
456
  transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
446
457
  transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
@@ -451,7 +462,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
451
462
 
452
463
  missplicing = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine), threshold=splicing_threshold)
453
464
  transcript.pre_mrna += mutation
454
-
465
+ result1 = await task1
466
+ print(result1)
455
467
  for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
456
468
  transcript.acceptors = new_boundaries['acceptors']
457
469
  transcript.donors = new_boundaries['donors']
geney/pangolin_utils.py CHANGED
@@ -73,7 +73,8 @@ def pangolin_predict_probs(true_seq, models):
73
73
 
74
74
  scores.append(np.mean(score, axis=0))
75
75
 
76
- splicing_pred = np.array(scores).max(axis=0)
76
+ # splicing_pred = np.array(scores).max(axis=0)
77
+ splicing_pred = np.array(scores).mean(axis=0)
77
78
  donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
78
79
  acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
79
80
  # print(acceptor_probs)
geney/seqmat_utils.py CHANGED
@@ -140,6 +140,19 @@ class SeqMat:
140
140
  end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
141
141
  return self.seqmat[:, start_pos:end_pos]
142
142
 
143
+ def asymmetric_subseq(self, center, left_context, right_context, padding='$'):
144
+ center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
145
+ start_idx = center_idx - left_context
146
+ end_idx = center_idx + right_context + 1 # +1 because end index in slicing is exclusive
147
+ left_padding = max(0, -start_idx)
148
+ right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
149
+ valid_start_idx = max(0, start_idx)
150
+ valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
151
+ valid_subseq = self.seq[valid_start_idx:valid_end_idx]
152
+ padded_subseq = (padding * left_padding) + valid_subseq + (padding * right_padding)
153
+ return padded_subseq
154
+
155
+
143
156
  def subseq_suffix(self, start):
144
157
  start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
145
158
  return self.seqmat[:, start_pos:]
@@ -166,7 +179,7 @@ class SeqMat:
166
179
  # if seq_length % 3 != 0:
167
180
  # temp.seqmat = temp.seqmat[:, :-(seq_length % 3)] # Trim the extra nucleotides
168
181
 
169
- if temp.seq[:3] == 'ATG':
182
+ if temp.seq[1:3] == 'TG':
170
183
  for i in range(3, len(temp.seq), 3):
171
184
  codon = temp.seq[i:i + 3]
172
185
  if codon in ['TAA', 'TAG', 'TGA']:
@@ -179,6 +192,11 @@ class SeqMat:
179
192
  else:
180
193
  return SeqMat('ATG')
181
194
 
195
+ def translate(self, tis_index):
196
+ from Bio import Seq
197
+ return Seq(self.orf_seqmat(tis_index).seq).translate()
198
+
199
+
182
200
  class Gene:
183
201
  def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
184
202
  gene_files = list((config[organism]['MRNA_PATH'] / 'protein_coding').glob(f'*_{gene_name}.pkl'))
@@ -419,7 +437,11 @@ class Transcript:
419
437
 
420
438
  def generate_protein(self, inplace=True, domains=None):
421
439
  protein = str(Seq(self.orf.seq).translate()).replace('*', '')
422
- cons_vector = self.cons_vector
440
+ if hasattr(self, 'cons_vector'):
441
+ cons_vector = self.cons_vector
442
+ else:
443
+ cons_vector = np.ones(len(protein))
444
+
423
445
  if domains is not None and np.all(np.isin(domains, np.arange(0, len(protein)))):
424
446
  all_indices = np.arange(cons_vector.size)
425
447
  mask = ~np.isin(all_indices, domains)
geney/utils.py CHANGED
@@ -16,10 +16,11 @@ def is_monotonic(A):
16
16
 
17
17
 
18
18
  def available_genes(organism='hg38'):
19
- from geney import config_setup
20
- annotation_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
19
+ from geney import config
20
+ annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
21
21
  return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
22
22
 
23
+
23
24
  def contains(a, x):
24
25
  """returns true if sorted sequence `a` contains `x`"""
25
26
  i = bisect_left(a, x)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.30
3
+ Version: 1.2.31
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -6,20 +6,20 @@ geney/graphic_utils.py,sha256=tjm6IDQ1BdfSeuPYzjlqAUHFQoDYH9jXTzJjKFS4Hh4,11078
6
6
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
7
7
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
8
8
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
9
- geney/oncosplice.py,sha256=zOOLdY_9tHpwwMQxTS358MCUlhQ6x-XmwlwlIrqwIwc,20902
10
- geney/pangolin_utils.py,sha256=MP4wGgiw36NAPfwpaXJ5mD4Q-DTbkL3xHcSlYtuZODw,2939
9
+ geney/oncosplice.py,sha256=7wf0_-Gkc_G9HhUXjORHk3buZ66JzVzSFVQ4EZOtUAE,21787
10
+ geney/pangolin_utils.py,sha256=ETTGpuaQgdZ1v8H0NP8sbTEfGWu0VXUFUS7wsURsTc4,2991
11
11
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
12
- geney/seqmat_utils.py,sha256=fawiPa4PPhmbx6wPynt8SG6eowZKUZ2yN32r6B8Ba-g,16802
12
+ geney/seqmat_utils.py,sha256=jTW2LZEAh5ftgWp0sblq_6AfI21uC4_bPu6zxvLhPWA,17755
13
13
  geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
14
14
  geney/splicing_utils.py,sha256=q47EdcsHrp4aLIPVWvkGBJSzS3l3DKiD9DNDsPpZdHk,16075
15
15
  geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
16
16
  geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
17
- geney/utils.py,sha256=WbV1DBllQyvzoDiYkidRiTX5MBpQGr99M4hTUQ0BKo8,2185
17
+ geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
18
18
  geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
20
20
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
21
21
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
22
- geney-1.2.30.dist-info/METADATA,sha256=T8wma9mdUQjDbYAvMviVcdGPFJId-piDzxGpIVdcXMo,948
23
- geney-1.2.30.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
- geney-1.2.30.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
- geney-1.2.30.dist-info/RECORD,,
22
+ geney-1.2.31.dist-info/METADATA,sha256=vRYAE-ITb5427-B42evqLtx6LOrfhZ7wlGONrlje9vo,948
23
+ geney-1.2.31.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
24
+ geney-1.2.31.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
25
+ geney-1.2.31.dist-info/RECORD,,
File without changes