PyPI - geney - Versions diffs - 1.2.30__py2.py3-none-any.whl → 1.2.32__py2.py3-none-any.whl - Mend

geney 1.2.30py2.py3-none-any.whl → 1.2.32py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

geney/oncosplice.py +22 -10
geney/pangolin_utils.py +2 -1
geney/seqmat_utils.py +34 -2
geney/utils.py +3 -2
{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/METADATA +1 -1
{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/RECORD +8 -8
{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/WHEEL +0 -0
{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/top_level.txt +0 -0

geney/oncosplice.py CHANGED Viewed

@@ -415,20 +415,29 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
     return report
+import asyncio
+async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai'):
+    import sys, os
+    from pathlib import Path
+    needed_path = Path('/tamir2/yoramzar/Projects/Cancer_mut/Utils')
+    needed_file1 = needed_path / 'rest_api_utils.py'
+    needed_file2 = needed_path / 'uniprot_utils.py'
+    if sys.platform == 'linux' and (needed_file1.is_file() and os.access(needed_file1, os.X_OK)) and (needed_file2.is_file() and os.access(needed_file2, os.X_OK)):
+        sys.path.append(str(needed_path))
+        import uniprot_utils as uput
+    else:
+        raise SystemError("Oncosplice Prototype can only be run on Power with access to the /tamir2/yoramzar/Projects/Cancer_mut/Utils folder.")
-def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
-    import requests
-    import threading
+    # Define async functions
+    async def background_request(ensb_id, Uniprot_features=["Topological domain", "Transmembrane", "Domain"]):
+        return uput.retrieve_protein_data_features_subset(uput.ensembl_id2uniprot_id(ensb_id), Uniprot_features)
-    def background_request(url, result):
-        return {'data': 'success'}
     gene = Gene(mut_id.split(':')[0], organism=organism)
-    domains = {}
-    request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
-    request_thread.start()
+    # request_thread = threading.Thread(target=background_request, args=(gene.transcript_ids, domains))
+    # request_thread.start()
     mutation = get_mutation(mut_id, rev=gene.rev)
@@ -441,6 +450,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
             results.append({'transcript_id': transcript.transcript_id})
             continue
+        task1 = asyncio.create_task(background_request(transcript.transcript_id))
         transcript.generate_pre_mrna()
         transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
         transcript.generate_mature_mrna().generate_protein(inplace=True, domains=domains)
@@ -451,7 +462,8 @@ def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, pr
         missplicing = Missplicing(find_transcript_missplicing(transcript, mutation, engine=engine), threshold=splicing_threshold)
         transcript.pre_mrna += mutation
+        result1 = await task1
+        print(result1)
         for i, new_boundaries in enumerate(develop_aberrant_splicing(transcript, missplicing.aberrant_splicing)):
             transcript.acceptors = new_boundaries['acceptors']
             transcript.donors = new_boundaries['donors']

geney/pangolin_utils.py CHANGED Viewed

@@ -73,7 +73,8 @@ def pangolin_predict_probs(true_seq, models):
         scores.append(np.mean(score, axis=0))
-    splicing_pred = np.array(scores).max(axis=0)
+    # splicing_pred = np.array(scores).max(axis=0)
+    splicing_pred = np.array(scores).mean(axis=0)
     donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
     acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
     # print(acceptor_probs)

geney/seqmat_utils.py CHANGED Viewed

@@ -140,6 +140,29 @@ class SeqMat:
         end_pos = np.where(self.seqmat[self.ROW_INDS] == end)[0][0] + 1
         return self.seqmat[:, start_pos:end_pos]
+    def asymmetric_subseq(self, center, left_context, right_context, padding='$'):
+        center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
+        start_idx = center_idx - left_context
+        end_idx = center_idx + right_context + 1  # +1 because end index in slicing is exclusive
+        left_padding = max(0, -start_idx)
+        right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
+        valid_start_idx = max(0, start_idx)
+        valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
+        valid_subseq = self.seq[valid_start_idx:valid_end_idx]
+        padded_subseq = (padding * left_padding) + valid_subseq + (padding * right_padding)
+        return padded_subseq
+    def asymmetric_indices(self, center, left_context, right_context):
+        center_idx = np.where(self.seqmat[self.ROW_INDS] == center)[0][0]
+        start_idx = center_idx - left_context
+        end_idx = center_idx + right_context + 1  # +1 because end index in slicing is exclusive
+        left_padding = max(0, -start_idx)
+        right_padding = max(0, end_idx - len(self.seqmat[self.ROW_INDS]))
+        valid_start_idx = max(0, start_idx)
+        valid_end_idx = min(len(self.seqmat[self.ROW_INDS]), end_idx)
+        valid_subseq = self.indices[valid_start_idx:valid_end_idx]
+        return valid_subseq
     def subseq_suffix(self, start):
         start_pos = np.where(self.seqmat[self.ROW_INDS] == start)[0][0]
         return self.seqmat[:, start_pos:]
@@ -166,7 +189,7 @@ class SeqMat:
         # if seq_length % 3 != 0:
         #     temp.seqmat = temp.seqmat[:, :-(seq_length % 3)]  # Trim the extra nucleotides
-        if temp.seq[:3] == 'ATG':
+        if temp.seq[1:3] == 'TG':
             for i in range(3, len(temp.seq), 3):
                 codon = temp.seq[i:i + 3]
                 if codon in ['TAA', 'TAG', 'TGA']:
@@ -179,6 +202,11 @@ class SeqMat:
         else:
             return SeqMat('ATG')
+    def translate(self, tis_index):
+        from Bio import Seq
+        return Seq(self.orf_seqmat(tis_index).seq).translate()
 class Gene:
     def __init__(self, gene_name='KRAS', variation=None, organism='hg38'):
         gene_files = list((config[organism]['MRNA_PATH'] / 'protein_coding').glob(f'*_{gene_name}.pkl'))
@@ -419,7 +447,11 @@ class Transcript:
     def generate_protein(self, inplace=True, domains=None):
         protein = str(Seq(self.orf.seq).translate()).replace('*', '')
-        cons_vector = self.cons_vector
+        if hasattr(self, 'cons_vector'):
+            cons_vector = self.cons_vector
+        else:
+            cons_vector = np.ones(len(protein))
         if domains is not None and np.all(np.isin(domains, np.arange(0, len(protein)))):
             all_indices = np.arange(cons_vector.size)
             mask = ~np.isin(all_indices, domains)

geney/utils.py CHANGED Viewed

@@ -16,10 +16,11 @@ def is_monotonic(A):
 def available_genes(organism='hg38'):
-    from geney import config_setup
-    annotation_path = config_setup[organism]['MRNA_PATH'] / 'protein_coding'
+    from geney import config
+    annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
     return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
 def contains(a, x):
     """returns true if sorted sequence `a` contains `x`"""
     i = bisect_left(a, x)

{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geney
-Version: 1.2.30
+Version: 1.2.32
 Summary: A Python package for gene expression modeling.
 Home-page: https://github.com/nicolaslynn/geney
 Author: Nicolas Lynn

{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/RECORD RENAMED Viewed

@@ -6,20 +6,20 @@ geney/graphic_utils.py,sha256=tjm6IDQ1BdfSeuPYzjlqAUHFQoDYH9jXTzJjKFS4Hh4,11078
 geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
 geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
 geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
-geney/oncosplice.py,sha256=zOOLdY_9tHpwwMQxTS358MCUlhQ6x-XmwlwlIrqwIwc,20902
-geney/pangolin_utils.py,sha256=MP4wGgiw36NAPfwpaXJ5mD4Q-DTbkL3xHcSlYtuZODw,2939
+geney/oncosplice.py,sha256=7wf0_-Gkc_G9HhUXjORHk3buZ66JzVzSFVQ4EZOtUAE,21787
+geney/pangolin_utils.py,sha256=ETTGpuaQgdZ1v8H0NP8sbTEfGWu0VXUFUS7wsURsTc4,2991
 geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
-geney/seqmat_utils.py,sha256=fawiPa4PPhmbx6wPynt8SG6eowZKUZ2yN32r6B8Ba-g,16802
+geney/seqmat_utils.py,sha256=TDWhE5oVTGJceaO6YmE7I_BEWRxWLT74_3rkmY1M0Fs,18368
 geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
 geney/splicing_utils.py,sha256=q47EdcsHrp4aLIPVWvkGBJSzS3l3DKiD9DNDsPpZdHk,16075
 geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
 geney/tcga_utils.py,sha256=vXSMf1OxoF_AdE_rMguy_BoYaart_E1t4FFMx2DS1Ak,15585
-geney/utils.py,sha256=WbV1DBllQyvzoDiYkidRiTX5MBpQGr99M4hTUQ0BKo8,2185
+geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
 geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geney/translation_initiation/tis_utils.py,sha256=iXrWVijyPe-f8I9rEVGdxNnXBrOGPoKFjmvaOEnQYNE,4446
 geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
 geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
-geney-1.2.30.dist-info/METADATA,sha256=T8wma9mdUQjDbYAvMviVcdGPFJId-piDzxGpIVdcXMo,948
-geney-1.2.30.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
-geney-1.2.30.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
-geney-1.2.30.dist-info/RECORD,,
+geney-1.2.32.dist-info/METADATA,sha256=aHeSBHWq3b1li4G_CI2ClUEHJc5SfWHowqKrkZbQPGk,948
+geney-1.2.32.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
+geney-1.2.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
+geney-1.2.32.dist-info/RECORD,,

{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{geney-1.2.30.dist-info → geney-1.2.32.dist-info}/top_level.txt RENAMED Viewed

File without changes

geney 1.2.30__py2.py3-none-any.whl → 1.2.32__py2.py3-none-any.whl

geney 1.2.30py2.py3-none-any.whl → 1.2.32py2.py3-none-any.whl