PyPI - geney - Versions diffs - 1.3.9__py2.py3-none-any.whl → 1.3.11__py2.py3-none-any.whl - Mend

geney 1.3.9py2.py3-none-any.whl → 1.3.11py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of geney might be problematic. Click here for more details.

Files changed (6) hide show

geney/allele_linkage.py ADDED Viewed

File without changes

geney/splicing_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from .Gene import Gene
 from .SeqMats import MutSeqMat
 from collections import defaultdict
@@ -300,7 +301,7 @@ def find_transcript_missplicing(mut_id, transcript=None, threshold=0.5, engine='
     # missplicing = {'missed_acceptors': dap, 'missed_donors': ddp, 'discovered_acceptors': iap, 'discovered_donors': idp}
     # missplicing = {outk: {float(k): v for k, v in outv.items()} for outk, outv in missplicing.items()}
     # temp =  {outk: {int(k) if k.is_integer() else k: v for k, v in outv.items()} for outk, outv in missplicing.items()}
-    return temp
+    # return temp
 def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, threshold=0.5, engine='spliceai'):
@@ -351,6 +352,91 @@ def find_transcript_missplicing_seqs(ref_seq, var_seq, donors, acceptors, thresh
     return Missplicing(temp, threshold=threshold)
+def process_pairwise_epistasis(mids, engine='pangolin', fprint=False):
+    results = []
+    for mid in mids:
+        m1, m2 = mid.split('|')
+        missplicing1 = find_transcript_missplicing(m1, threshold=0.25, engine=engine)
+        missplicing2 = find_transcript_missplicing(m2, threshold=0.25, engine=engine)
+        missplicing_both = find_transcript_missplicing(mid, threshold=0.25, engine=engine)
+        if fprint:
+            print(missplicing1)
+            print(missplicing2)
+            print(missplicing_both)
+        max_delta = 0
+        flag = {'mut_id': mid, 'corrective': 0, 'cummulative': 0}
+        cummulative_deltas, corrective_deltas = [], []
+        ms1, ms2, msb = [], [], []
+        increased_canonical_splicing = False
+        events = ['missed_acceptors', 'missed_donors', 'discovered_acceptors', 'discovered_donors']
+        for event in events:
+            m1, m2, mb = missplicing1[event], missplicing2[event], missplicing_both[event]
+            # Get all keys from the three dicts
+            keys = set(m1.keys()) | set(m2.keys()) | set(mb.keys())
+            for k in keys:
+                # Determine reference value
+                ref_val = (
+                        m1.get(k, {}).get('reference') or
+                        m2.get(k, {}).get('reference') or
+                        mb.get(k, {}).get('reference')
+                )
+                # Compute deltas
+                delta1 = m1.get(k, {'delta': 0})['delta'] # max(-1, min(10, m1.get(k, {'delta': 0})['delta'] / ref_val))
+                delta2 = m2.get(k, {'delta': 0})['delta'] # max(-1, min(10, m2.get(k, {'delta': 0})['delta'] / ref_val))
+                deltab = mb.get(k, {'delta': 0})['delta'] # max(-1, min(10, mb.get(k, {'delta': 0})['delta'] / ref_val))
+                ms1.append(delta1)
+                ms2.append(delta2)
+                msb.append(deltab)
+                # Corrective condition
+                # if ((abs(delta1) * 0.5 > abs(deltab) or abs(delta2) * 0.5 > abs(deltab)) and abs(deltab) == 0):
+                if (abs(deltab) < 0.25) and (
+                        (abs(delta1) > 0.25 and (abs(delta1 - deltab) > 0.25)) or
+                        (abs(delta2) > 0.25 and (abs(delta2 - deltab) > 0.25))
+                ):
+                    flag['corrective'] += 1
+                    corrective_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
+                if (abs(deltab) > 0.25) and (
+                        (abs(delta1) < 0.25 and (abs(delta1 - deltab) > 0.25)) or
+                        (abs(delta2) < 0.25 and (abs(delta2 - deltab) > 0.25))
+                ):
+                    flag['cummulative'] += 1
+                    cummulative_deltas.append(max((deltab - delta1, deltab - delta2), key=abs))
+                if ((0.25 <= ref_val) <= 0.75) and (
+                        ((delta1 > 0.25 or delta2 > 0.25) and deltab < 0.25) or
+                        (delta1 < 0.25 and delta2 < 0.25 and deltab > 0.25)
+                ) and (
+                        abs(delta1 - deltab) > 0.25 or
+                        abs(delta2 - deltab) > 0.25
+                ):
+                    increased_canonical_splicing = True
+        vals = cummulative_deltas + corrective_deltas
+        flag['max_delta'] = max(vals, key=abs) if vals else 0
+        flag['ms1'] = max(ms1, key=abs)
+        flag['ms2'] = max(ms2, key=abs)
+        flag['msb'] = max(msb, key=abs)
+        flag['increased_canonical_splicing'] = increased_canonical_splicing
+        result_df = pd.Series(flag).to_frame().T
+        results.append(result_df)
+    if len(results) == 0:
+        return None
+    return pd.concat(results)
 class Missplicing:
     def __init__(self, splicing_dict, threshold=0.5):
         """
@@ -390,6 +476,9 @@ class Missplicing:
             for d in details.values():
                 yield d['delta']
+    def __getitem__(self, key):
+        return self.missplicing[key]
     @property
     def aberrant_splicing(self):
         """
@@ -502,45 +591,83 @@ cursor = conn.cursor()
 # Create table once at startup, not in the function
 cursor.execute('''
 CREATE TABLE IF NOT EXISTS mutations (
-    tool TEXT,
+    engine TEXT,
     gene TEXT,
-    mutation_id TEXT,
+    mut_id TEXT,
     transcript_id TEXT,
     data TEXT,
-    PRIMARY KEY (tool, gene, mutation_id, transcript_id)
+    PRIMARY KEY (engine, gene, mut_id, transcript_id)
 )''')
-def get_or_compute_splicing(tool, gene, mutation_id, transcript_id, force_recompute=False):
+def get_splicing(engine, gene, mut_id, transcript_id, force_recompute=False):
     """
     Retrieve computed splicing data for a given mutation from a database,
-    or compute and store it if not found or if force_recompute is True.
     Args:
-        tool (str): Name of the tool used for computation.
+        engine (str): Name of the tool used for computation.
         gene (str): Gene name or identifier.
-        mutation_id (str): A unique identifier for the mutation.
+        mut_id (str): A unique identifier for the mutation.
         transcript_id (str): ID for the transcript.
         force_recompute (bool): If True, ignore cached value and recompute.
+    Returns:
+        dict: The splicing data.
+    """
+    # Lookup in the database
+    cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
+                   (engine, gene, mut_id, transcript_id))
+    row = cursor.fetchone()
+    # If found and no force recompute, return cached data
+    if row:
+        return json.loads(row[0])
+    return None
+def save_splicing(engine, gene, mut_id, transcript_id, splicing):
+    data_json = json.dumps(convert_numpy_to_native(splicing))
+    cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
+                   (engine, gene, mut_id, transcript_id, data_json))
+    return None
+def get_or_compute_splicing(mut_id, transcript_id=None, engine='spliceai', force_recompute=False):
+    """
+    Retrieve computed splicing data for a given mutation from a database,
+    or compute and store it if not found or if force_recompute is True.
+    Args:
+        engine (str): Name of the tool used for computation.
+        mut_id (str): A unique identifier for the mutation.
+        transcript_id (str): ID for the transcript.
+        force_recompute (bool): If True, ignore cached value and recompute.
     Returns:
         dict: The computed splicing data.
     """
+    gene = mut_id.split(':')[0]
+    if transcript_id is None:
+        transcript_id = Gene.from_file(gene).transcript().transcript_id
     # Lookup in the database
-    cursor.execute('SELECT data FROM mutations WHERE tool=? AND gene=? AND mutation_id=? AND transcript_id=?',
-                   (tool, gene, mutation_id, transcript_id))
+    cursor.execute('SELECT data FROM mutations WHERE engine=? AND gene=? AND mut_id=? AND transcript_id=?',
+                   (engine, gene, mut_id, transcript_id))
     row = cursor.fetchone()
     # If found and no force recompute, return cached data
     if row and not force_recompute:
         return json.loads(row[0])
     # Otherwise, compute the data
-    computed_data = find_transcript_missplicing(mutation_id, transcript_id=transcript_id, engine=tool)  # Replace with your actual function
+    computed_data = convert_numpy_to_native(find_transcript_missplicing(mut_id, transcript=transcript_id, engine=engine).missplicing) # Replace with your actual function
     # Store computed data in DB
     data_json = json.dumps(computed_data)
-    cursor.execute('REPLACE INTO mutations (tool, gene, mutation_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
-                   (tool, gene, mutation_id, transcript_id, data_json))
+    cursor.execute('REPLACE INTO mutations (engine, gene, mut_id, transcript_id, data) VALUES (?, ?, ?, ?, ?)',
+                   (engine, gene, mut_id, transcript_id, data_json))
+    return computed_data
-    return computed_data
+def convert_numpy_to_native(obj):
+    """
+    Recursively convert NumPy data types to native Python types.
+    """
+    if isinstance(obj, dict):
+        return {key: convert_numpy_to_native(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_to_native(item) for item in obj]
+    elif isinstance(obj, np.generic):  # Check for NumPy scalar types
+        return round(obj.item(), 3)
+    else:
+        return round(obj, 3)

{geney-1.3.9.dist-info → geney-1.3.11.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.1
 Name: geney
-Version: 1.3.9
+Version: 1.3.11
 Summary: A Python package for gene expression modeling.
 Home-page: https://github.com/nicolaslynn/geney
 Author: Nicolas Lynn
 Author-email: nicolasalynn@gmail.com
 License: Free for non-commercial use
-Platform: UNKNOWN
 Classifier: Development Status :: 1 - Planning
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: Free for non-commercial use
@@ -20,9 +19,9 @@ Requires-Dist: networkx
 Requires-Dist: viennarna
 Requires-Dist: tqdm
 Requires-Dist: spliceai
-Requires-Dist: biopython==1.81
-Requires-Dist: gtfparse==1.3.0
-Requires-Dist: sh==2.0.6
+Requires-Dist: biopython ==1.81
+Requires-Dist: gtfparse ==1.3.0
+Requires-Dist: sh ==2.0.6
 Requires-Dist: torch
 Requires-Dist: lifelines
 Requires-Dist: notebook
@@ -32,5 +31,3 @@ Requires-Dist: pyfastx
 Requires-Dist: tensorflow
 Requires-Dist: keras
-UNKNOWN

{geney-1.3.9.dist-info → geney-1.3.11.dist-info}/RECORD RENAMED Viewed

@@ -4,6 +4,7 @@ geney/SeqMats.py,sha256=jkXmXAs0OpnFeyCfiJcKKpHHSi9JpKgiOIwsu63e1CQ,18557
 geney/Transcript.py,sha256=eRZXVVxDVBbv0l385bnAOBFRBSzBwppXcbBq8KXkwlo,14443
 geney/__init__.py,sha256=eBdDl42N6UhcYeZDjOnv199Z88fI5_8Y6xW8447OKXM,755
 geney/_mutation_utils.py,sha256=dHssUsnii_mf-wuRoMmF13UlD7k3ml_VwQMItTYnXpU,1132
+geney/allele_linkage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geney/config_setup.py,sha256=nblcGU3HIt8YjdrAoGfbEVKRxwJKv0PikJ5-7AL6axQ,723
 geney/data_setup.py,sha256=2RHmuvcGUQbEglXQEZr0C2QPDTQYRZOEm0EcmyfQJgU,12229
 geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
@@ -15,7 +16,7 @@ geney/pangolin_utils.py,sha256=i5j5vEMCWOTIa1mRP2377BAhlUFZjHBzTQBips4lA_4,2934
 geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
 geney/seqmat_utils.py,sha256=wzb3PX5it5bpIFQvcxyzlxfhoJTbHHbsjg0rzh05iVs,19753
 geney/spliceai_utils.py,sha256=PFIhTK8Ihrj-cv5tgRN0UFPYEmC4uxtqXSP9bBLnZRM,3077
-geney/splicing_utils.py,sha256=YDhYtns6pVHmOfk4Z1AC-acA1kKAqKZmJ-P2EZS946k,26378
+geney/splicing_utils.py,sha256=Bj5YV-LHs684afjriep7N2QaRAAKdidFS-adihfDzfI,31887
 geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
 geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
 geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
@@ -24,7 +25,7 @@ geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
 geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
 geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
 geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
-geney-1.3.9.dist-info/METADATA,sha256=tkuruBdfjmOTkAz6lM3xZ_nAEXm5Vh4wpaP5FihlTvk,994
-geney-1.3.9.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
-geney-1.3.9.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
-geney-1.3.9.dist-info/RECORD,,
+geney-1.3.11.dist-info/METADATA,sha256=Por9VSaGxOGXy61knApS-3BefXSrh8hhQQQ0ULGQn2I,971
+geney-1.3.11.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
+geney-1.3.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
+geney-1.3.11.dist-info/RECORD,,

{geney-1.3.9.dist-info → geney-1.3.11.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py2-none-any
 Tag: py3-none-any

{geney-1.3.9.dist-info → geney-1.3.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

geney 1.3.9__py2.py3-none-any.whl → 1.3.11__py2.py3-none-any.whl

Potentially problematic release.

geney 1.3.9py2.py3-none-any.whl → 1.3.11py2.py3-none-any.whl