PyPI - geney - Versions diffs - 1.2.5__py2.py3-none-any.whl → 1.2.6__py2.py3-none-any.whl - Mend

geney 1.2.5py2.py3-none-any.whl → 1.2.6py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

geney/oncosplice.py CHANGED Viewed

@@ -545,7 +545,7 @@ class Transcript:
     def generate_mature_mrna_pos(self, reset=True):
         if reset:
             pre_seq_pos, pre_indices_pos = self.generate_pre_mrna_pos()
-            self.pre_mrna, _ = self.__pos2sense(pre_seq_pos, pre_indices_pos)
+            self.pre_mrna, self.pre_indices = self.__pos2sense(pre_seq_pos, pre_indices_pos)
         else:
             pre_seq_pos, pre_indices_pos = self.__sense2pos(self.pre_mrna, self.pre_indices)
@@ -725,7 +725,17 @@ def pang_one_hot_encode(seq):
     seq = np.asarray(list(map(int, list(seq))))
     return IN_MAP[seq.astype('int8')]
+def get_pos_seq_indices(t):
+    seq, indices = t.pre_mrna, t.pre_indices
+    if t.rev:
+        return reverse_complement(seq), indices[::-1]
+    else:
+        return seq, indices
 def pangolin_predict_probs(true_seq, models):
+    # print(f"Running pangolin on: {true_seq}")
     model_nums = [0, 2, 4, 6]
     INDEX_MAP = {0: 1, 1: 2, 2: 4, 3: 5, 4: 7, 5: 8, 6: 10, 7: 11}
@@ -754,42 +764,36 @@ def pangolin_predict_probs(true_seq, models):
     acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
     return donor_probs[5000:-5000], acceptor_probs[5000:-5000]
-def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, min_coverage=2500, sai_threshold=0.5, engine='spliceai'):
+def find_transcript_missplicing(mutations, ref_transcript, var_transcript, context=7500, threshold=0.5,
+                                engine='spliceai'):
     positions = mutations.positions
     end_positions = [m.start + len(m.ref) for m in mutations.variants]
     positions.extend(end_positions)
+    center = int(np.mean(positions) // 1)
+    seq_start_pos, seq_end_pos = center - context, center + context
+    transcript_start, transcript_end, rev = ref_transcript.transcript_lower, ref_transcript.transcript_upper, ref_transcript.rev
+    # Generate reference sequence data
+    ref_seq, ref_indices = get_pos_seq_indices(ref_transcript)
+    center_index = ref_indices.index(center)
+    start_cutoff = ref_indices.index(seq_start_pos) if seq_start_pos in ref_indices else 0
+    end_cutoff = ref_indices.index(seq_end_pos) if seq_end_pos in ref_indices else len(ref_indices)
+    start_pad, end_pad = max(0, context - (center_index - start_cutoff)), max(0, context - (end_cutoff - center_index))
+    ref_seq = 'N' * start_pad + ref_seq[start_cutoff:end_cutoff] + 'N' * end_pad
+    ref_indices = [-1] * start_pad + ref_indices[start_cutoff:end_cutoff] + [-1] * end_pad
+    # Generate mutation sequence data
+    mut_seq, mut_indices = get_pos_seq_indices(var_transcript)
+    start_cutoff = mut_indices.index(seq_start_pos) if seq_start_pos in mut_indices else 0
+    end_cutoff = mut_indices.index(seq_end_pos) if seq_end_pos in mut_indices else len(mut_indices)
+    start_pad, end_pad = max(0, context - (center_index - start_cutoff)), max(0, context - (end_cutoff - center_index))
+    mut_seq = 'N' * start_pad + mut_seq[start_cutoff:end_cutoff] + 'N' * end_pad
+    mut_indices = [-1] * start_pad + mut_indices[start_cutoff:end_cutoff] + [-1] * end_pad
-    seq_start_pos = min(positions) - sai_mrg_context - min_coverage
-    seq_end_pos = max(positions) + sai_mrg_context + min_coverage
-    fasta_obj = Fasta_segment()
-    ref_seq, ref_indices = fasta_obj.read_segment_endpoints(
-        config_setup[transcript_data.organism]['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
-        seq_start_pos,
-        seq_end_pos)
-    transcript_start, transcript_end, rev = transcript_data.transcript_lower, transcript_data.transcript_upper, transcript_data.rev
-    # visible_donors = np.intersect1d(transcript_data.donors, ref_indices)
-    # visible_acceptors = np.intersect1d(transcript_data.acceptors, ref_indices)
-    start_pad = ref_indices.index(transcript_start) if transcript_start in ref_indices else 0
-    end_cutoff = ref_indices.index(transcript_end) if transcript_end in ref_indices else len(ref_indices)
-    end_pad = len(ref_indices) - end_cutoff
-    ref_seq = 'N' * start_pad + ref_seq[start_pad:end_cutoff] + 'N' * end_pad
-    ref_indices = [-1] * start_pad + ref_indices[start_pad:end_cutoff] + [-1] * end_pad
-    mut_seq, mut_indices = ref_seq, ref_indices
-    for mut in mutations:
-        mut_seq, mut_indices = generate_mut_variant(seq=mut_seq, indices=mut_indices, mut=mut)
-    ref_indices = ref_indices[sai_mrg_context:-sai_mrg_context]
-    mut_indices = mut_indices[sai_mrg_context:-sai_mrg_context]
     copy_mut_indices = mut_indices.copy()
-    visible_donors = np.intersect1d(transcript_data.donors, ref_indices)
-    visible_acceptors = np.intersect1d(transcript_data.acceptors, ref_indices)
     if rev:
         ref_seq = reverse_complement(ref_seq)
         mut_seq = reverse_complement(mut_seq)
@@ -801,21 +805,28 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
         mut_seq_probs_temp = sai_predict_probs(mut_seq, sai_models)
         ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
         mut_seq_acceptor_probs, mut_seq_donor_probs = mut_seq_probs_temp[0, :], mut_seq_probs_temp[1, :]
+        ref_indices, mut_indices = ref_indices[5000:-5000], mut_indices[5000:-5000]
     elif engine == 'pangolin':
-        ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models=pang_models)
-        mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models=pang_models)
+        ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, models=pang_models)
+        mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, models=pang_models)
+        ref_indices, mut_indices = ref_indices[5000:-5000], mut_indices[5000:-5000]
     else:
         raise ValueError(f"{engine} not implemented")
+    visible_donors = np.intersect1d(ref_transcript.donors, ref_indices)
+    visible_acceptors = np.intersect1d(ref_transcript.acceptors, ref_indices)
+    # print(ref_indices.index(visible_donors[0]), ref_seq_donor_probs[ref_indices.index(visible_donors[0])], mut_seq_donor_probs[mut_indices.index(visible_donors[0])])
     assert len(ref_indices) == len(ref_seq_acceptor_probs), 'Reference pos not the same'
     assert len(mut_indices) == len(mut_seq_acceptor_probs), 'Mut pos not the same'
     iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
                                {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
                                visible_acceptors,
-                               threshold=sai_threshold)
+                               threshold=threshold)
     assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
     assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
@@ -823,13 +834,15 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
     idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
                                {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
                                visible_donors,
-                               threshold=sai_threshold)
+                               threshold=threshold)
     ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
     ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
-    lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in visible_acceptors if p not in mut_indices and p not in dap}
-    lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors if p not in mut_indices and p not in ddp}
+    lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in
+                      visible_acceptors if p not in mut_indices and p not in dap}
+    lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors
+                   if p not in mut_indices and p not in ddp}
     dap.update(lost_acceptors)
     ddp.update(lost_donors)
@@ -838,6 +851,90 @@ def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, mi
     return {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
+# def run_spliceai_transcript(mutations, transcript_data, sai_mrg_context=5000, min_coverage=2500, sai_threshold=0.5, engine='spliceai'):
+#     positions = mutations.positions
+#     end_positions = [m.start + len(m.ref) for m in mutations.variants]
+#     positions.extend(end_positions)
+#
+#     seq_start_pos = min(positions) - sai_mrg_context - min_coverage
+#     seq_end_pos = max(positions) + sai_mrg_context + min_coverage
+#
+#     fasta_obj = Fasta_segment()
+#     ref_seq, ref_indices = fasta_obj.read_segment_endpoints(
+#         config_setup[transcript_data.organism]['CHROM_SOURCE'] / f'chr{mutations.chrom}.fasta',
+#         seq_start_pos,
+#         seq_end_pos)
+#
+#     transcript_start, transcript_end, rev = transcript_data.transcript_lower, transcript_data.transcript_upper, transcript_data.rev
+#
+#     # visible_donors = np.intersect1d(transcript_data.donors, ref_indices)
+#     # visible_acceptors = np.intersect1d(transcript_data.acceptors, ref_indices)
+#
+#     start_pad = ref_indices.index(transcript_start) if transcript_start in ref_indices else 0
+#     end_cutoff = ref_indices.index(transcript_end) if transcript_end in ref_indices else len(ref_indices)
+#     end_pad = len(ref_indices) - end_cutoff
+#     ref_seq = 'N' * start_pad + ref_seq[start_pad:end_cutoff] + 'N' * end_pad
+#     ref_indices = [-1] * start_pad + ref_indices[start_pad:end_cutoff] + [-1] * end_pad
+#     mut_seq, mut_indices = ref_seq, ref_indices
+#
+#     for mut in mutations:
+#         mut_seq, mut_indices = generate_mut_variant(seq=mut_seq, indices=mut_indices, mut=mut)
+#
+#     ref_indices = ref_indices[sai_mrg_context:-sai_mrg_context]
+#     mut_indices = mut_indices[sai_mrg_context:-sai_mrg_context]
+#     copy_mut_indices = mut_indices.copy()
+#
+#     visible_donors = np.intersect1d(transcript_data.donors, ref_indices)
+#     visible_acceptors = np.intersect1d(transcript_data.acceptors, ref_indices)
+#
+#     if rev:
+#         ref_seq = reverse_complement(ref_seq)
+#         mut_seq = reverse_complement(mut_seq)
+#         ref_indices = ref_indices[::-1]
+#         mut_indices = mut_indices[::-1]
+#
+#     if engine == 'spliceai':
+#         ref_seq_probs_temp = sai_predict_probs(ref_seq, sai_models)
+#         mut_seq_probs_temp = sai_predict_probs(mut_seq, sai_models)
+#         ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
+#         mut_seq_acceptor_probs, mut_seq_donor_probs = mut_seq_probs_temp[0, :], mut_seq_probs_temp[1, :]
+#
+#     elif engine == 'pangolin':
+#         ref_seq_donor_probs, ref_seq_acceptor_probs = pangolin_predict_probs(ref_seq, pangolin_models=pang_models)
+#         mut_seq_donor_probs, mut_seq_acceptor_probs = pangolin_predict_probs(mut_seq, pangolin_models=pang_models)
+#
+#     else:
+#         raise ValueError(f"{engine} not implemented")
+#
+#     assert len(ref_indices) == len(ref_seq_acceptor_probs), 'Reference pos not the same'
+#     assert len(mut_indices) == len(mut_seq_acceptor_probs), 'Mut pos not the same'
+#
+#     iap, dap = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_acceptor_probs))},
+#                                {p: v for p, v in list(zip(mut_indices, mut_seq_acceptor_probs))},
+#                                visible_acceptors,
+#                                threshold=sai_threshold)
+#
+#     assert len(ref_indices) == len(ref_seq_donor_probs), 'Reference pos not the same'
+#     assert len(mut_indices) == len(mut_seq_donor_probs), 'Mut pos not the same'
+#
+#     idp, ddp = find_ss_changes({p: v for p, v in list(zip(ref_indices, ref_seq_donor_probs))},
+#                                {p: v for p, v in list(zip(mut_indices, mut_seq_donor_probs))},
+#                                visible_donors,
+#                                threshold=sai_threshold)
+#
+#     ref_acceptors = {a: b for a, b in list(zip(ref_indices, ref_seq_acceptor_probs))}
+#     ref_donors = {a: b for a, b in list(zip(ref_indices, ref_seq_donor_probs))}
+#
+#     lost_acceptors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_acceptors[p]), 3)} for p in visible_acceptors if p not in mut_indices and p not in dap}
+#     lost_donors = {int(p): {'absolute': np.float64(0), 'delta': round(float(-ref_donors[p]), 3)} for p in visible_donors if p not in mut_indices and p not in ddp}
+#     dap.update(lost_acceptors)
+#     ddp.update(lost_donors)
+#
+#     missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
+#     missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
+#     return {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
 # def run_spliceai(mutations, gene_data, sai_mrg_context=5000, min_coverage=2500, sai_threshold=0.5):
 #     positions = mutations.positions
 #     seq_start_pos = min(positions) - sai_mrg_context - min_coverage

{geney-1.2.5.dist-info → geney-1.2.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geney
-Version: 1.2.5
+Version: 1.2.6
 Summary: A Python package for gene expression modeling.
 Home-page: https://github.com/nicolaslynn/geney
 Author: Nicolas Lynn

{geney-1.2.5.dist-info → geney-1.2.6.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
 geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
 geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
 geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
-geney/oncosplice.py,sha256=9oZs9W_bI6O5h3284WvatkerhSCaxMZWfs1xVc1lJO0,71524
+geney/oncosplice.py,sha256=aaOxri0bXLPfB3Mu8tkzk4KEVlzzocImzj3MI-0uU_0,76949
 geney/oncosplice_mouse.py,sha256=LYLOukI9qI1IBkyl1qVRFR5d1NAw7Orlj8Zth-4xCW8,12962
 geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
 geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
@@ -45,7 +45,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
 geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
 geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-geney-1.2.5.dist-info/METADATA,sha256=9UptuZVJWZvVN6Y9KgPUxrC4gnijFVW4CtkkESxrY9E,1198
-geney-1.2.5.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
-geney-1.2.5.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
-geney-1.2.5.dist-info/RECORD,,
+geney-1.2.6.dist-info/METADATA,sha256=hcHNz2oNxRLzNpimwrHeM2yKSnd1z5KHJ_Rl86LD3QE,1198
+geney-1.2.6.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
+geney-1.2.6.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
+geney-1.2.6.dist-info/RECORD,,

{geney-1.2.5.dist-info → geney-1.2.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{geney-1.2.5.dist-info → geney-1.2.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

geney 1.2.5__py2.py3-none-any.whl → 1.2.6__py2.py3-none-any.whl

geney 1.2.5py2.py3-none-any.whl → 1.2.6py2.py3-none-any.whl