PyPI - geney - Versions diffs - 1.1.10__py2.py3-none-any.whl → 1.1.11__py2.py3-none-any.whl - Mend

geney 1.1.10py2.py3-none-any.whl → 1.1.11py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of geney might be problematic. Click here for more details.

Files changed (8) hide show

geney/immune_utils.py +185 -0
geney/power_utils.py +1 -0
geney/survival_utils.py +5 -3
geney/tcga_utils.py +0 -2
{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/METADATA +1 -1
{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/RECORD +8 -7
{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/WHEEL +0 -0
{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/top_level.txt +0 -0

geney/immune_utils.py ADDED Viewed

@@ -0,0 +1,185 @@
+import subprocess
+import logging
+import tempfile
+from geney import config_setup
+import pandas as pd
+class NetChop(object):
+    """
+    Wrapper around netChop tool. Assumes netChop is in your PATH.
+    """
+    def predict_epitopes(self, sequences, threshold=0.5, min_len=8):
+        """
+        Return netChop predictions for each position in each sequence.
+        Parameters
+        -----------
+        sequences : list of string
+            Amino acid sequences to predict cleavage for
+        Returns
+        -----------
+        list of list of float
+        The i'th list corresponds to the i'th sequence. Each list gives
+        the cleavage probability for each position in the sequence.
+        """
+        with tempfile.NamedTemporaryFile(dir=config_setup['NETCHOP'], suffix=".fsa", mode="w") as input_fd:
+            for (i, sequence) in enumerate(sequences):
+                _ = input_fd.write("> %d\n" % i)
+                _ = input_fd.write(sequence)
+                _ = input_fd.write("\n")
+            input_fd.flush()
+            try:
+                output = subprocess.check_output(["netchop", str(input_fd.name)])
+            except subprocess.CalledProcessError as e:
+                logging.error("Error calling netChop: %s:\n%s" % (e, e.output))
+                raise
+        parsed = self.parse_netchop(output)
+        return parsed
+        #
+        # assert len(parsed) == len(sequences), \
+        #     "Expected %d results but got %d" % (
+        #         len(sequences), len(parsed))
+        # assert [len(x) for x in parsed] == [len(x) for x in sequences]
+        # filtered_proteosomes = []
+        # for scores, seq in list(zip(parsed, sequences)):
+        #     proteosome = self.chop_protein(seq, [s > threshold for s in scores])
+        #     filtered_proteosomes.append([e for e in proteosome if len(e) > min_len])
+        # return filtered_proteosomes
+    @staticmethod
+    def parse_netchop(netchop_output):
+        """
+        Parse netChop stdout.
+        """
+        line_iterator = iter(netchop_output.decode().split("\n"))
+        scores = []
+        for line in line_iterator:
+            if "pos" in line and 'AA' in line and 'score' in line:
+                scores.append([])
+                if "----" not in next(line_iterator):
+                    raise ValueError("Dashes expected")
+                line = next(line_iterator)
+                while '-------' not in line:
+                    score = float(line.split()[3])
+                    scores[-1].append(score)
+                    line = next(line_iterator)
+        return scores
+    def chop_protein(self, seq, pos):
+        # Generate subsequences using list comprehension and slicing
+        start = 0
+        subsequences = [seq[start:(start := i+1)] for i, marker in enumerate(pos) if marker == 1]
+        # Check if the last part needs to be added
+        if start < len(seq):
+            subsequences.append(seq[start:])
+        return subsequences
+    def generate_cut_sequences(self, char_sequence, cut_probabilities):
+        """
+        Generate all possible cut sequences and their abundance values,
+        considering only those sequences where the probabilities of all cut sites
+        between the two ends are zero.
+        :param char_sequence: A string representing the sequence of characters.
+        :param cut_probabilities: A list of probabilities for each position in the sequence.
+        :return: A list of tuples, where each tuple contains a cut sequence and its abundance value.
+        """
+        if len(char_sequence) != len(cut_probabilities):
+            raise ValueError("Character sequence and cut probabilities must have the same length.")
+        cut_sequences = []
+        # Generate all possible cuts
+        for i in range(len(char_sequence)):
+            for j in range(i + 1, len(char_sequence) + 1):
+                # Check if probabilities of all cut sites between i and j are zero
+                if sum(cut_probabilities[i + 1:j - 1]) < 1:
+                    cut_sequence = char_sequence[i:j]
+                    abundance_value = cut_probabilities[i] * cut_probabilities[j - 1] - sum(
+                        cut_probabilities[i + 1:j - 1])
+                    cut_sequences.append({'seq': cut_sequence, 'abundance': abundance_value})
+        return pd.DataFrame(cut_sequences)
+from .base_commandline_predictor import BaseCommandlinePredictor
+from .parsing import parse_netmhc41_stdout
+from functools import partial
+class NetMHCpan41(BaseCommandlinePredictor):
+    def __init__(
+            self,
+            alleles,
+            default_peptide_lengths=[9],
+            program_name="netMHCpan",
+            process_limit=-1,
+            mode="binding_affinity",
+            extra_flags=[]):
+        """
+        Wrapper for NetMHCpan4.1.
+        The mode argument should be one of "binding_affinity" (default) or
+        "elution_score".
+        """
+        # The -BA flag is required to predict binding affinity
+        if mode == "binding_affinity":
+            flags = ["-BA"]
+        elif mode == "elution_score":
+            flags = []
+        else:
+            raise ValueError("Unsupported mode", mode)
+        BaseCommandlinePredictor.__init__(
+            self,
+            program_name=program_name,
+            alleles=alleles,
+            default_peptide_lengths=default_peptide_lengths,
+            parse_output_fn=partial(parse_netmhc41_stdout, mode=mode),
+            supported_alleles_flag="-listMHC",
+            input_file_flag="-f",
+            length_flag="-l",
+            allele_flag="-a",
+            extra_flags=flags + extra_flags,
+            process_limit=process_limit)
+class NetMHCpan41_EL(NetMHCpan41):
+    """
+    Wrapper for NetMHCpan4 when the preferred mode is elution score
+    """
+    def __init__(
+            self,
+            alleles,
+            default_peptide_lengths=[9],
+            program_name="netMHCpan",
+            process_limit=-1,
+            extra_flags=[]):
+        NetMHCpan41.__init__(
+            self,
+            alleles=alleles,
+            default_peptide_lengths=default_peptide_lengths,
+            program_name=program_name,
+            process_limit=process_limit,
+            mode="elution_score",
+            extra_flags=extra_flags)
+class NetMHCpan41_BA(NetMHCpan41):
+    """
+    Wrapper for NetMHCpan4 when the preferred mode is binding affinity
+    """
+    def __init__(
+            self,
+            alleles,
+            default_peptide_lengths=[9],
+            program_name="netMHCpan",
+            process_limit=-1,
+            extra_flags=[]):
+        NetMHCpan41.__init__(
+            self,
+            alleles=alleles,
+            default_peptide_lengths=default_peptide_lengths,
+            program_name=program_name,
+            process_limit=process_limit,
+            mode="binding_affinity",
+            extra_flags=extra_flags)

geney/power_utils.py CHANGED Viewed

@@ -65,6 +65,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
                 log_directory=log_directory,
                 job_script_prologue=[f"cd {config_setup['BASE']}"]
             )
         else:
             dask_cluster = PBSCluster(
                 cores=1,

geney/survival_utils.py CHANGED Viewed

@@ -82,13 +82,15 @@ class SurvivalAnalysis:
             kmf.fit(g['duration'], g['event'], label=label)
             surv_func = kmf.survival_function_
-            auc = trapz(surv_func[label], surv_func.index)
+            filtered_surv_func = surv_func[surv_func.index <= cap_time]
+            auc = np.trapz(filtered_surv_func[label], filtered_surv_func.index)
+            # auc = trapz(surv_func[label], surv_func.index)
             auc_vals.append(auc)
             if plot:
                 if count == 0:
-                    ax = kmf.plot()
+                    ax = kmf.plot_survival_function()
                 else:
-                    kmf.plot(ax=ax)
+                    kmf.plot_survival_function(ax=ax)
                 count += 1
         p_value = self.log_rank(df[df[feature] == 1], df[df[feature] == 0])

geney/tcga_utils.py CHANGED Viewed

@@ -157,12 +157,10 @@ class TCGAGene:
         # returns two lists: all patients affected by a mutation and all patients with none of the mutations (or the mutations but not togehter)
         pass
     def arrange_patients_by_project(self, mut_id):
         # returns all the patients affected by a mutation grouped by cancer project
         pass
     def total_prevalence(self, mut_id):
         pass

{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geney
-Version: 1.1.10
+Version: 1.1.11
 Summary: A Python package for gene expression modeling.
 Home-page: https://github.com/nicolaslynn/geney
 Author: Nicolas Lynn

{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/RECORD RENAMED Viewed

@@ -7,15 +7,16 @@ geney/config_setup.py,sha256=SePeooA4RWAtR_KAT1-W1hkD3MT5tH6YMyp80t_RNPQ,385
 geney/data_setup.py,sha256=DZeksRPr2ZT7bszMo33W0r3OwmqHokVXtZ4gx5Lu_Mo,10725
 geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
 geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
+geney/immune_utils.py,sha256=0udmTxqF9jCYeUOgP7bGLWMEBH3KBikKu8pPQnE9Rfo,6881
 geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
 geney/oncosplice.py,sha256=Fyc_UtAhV3Pv0vk8V55rO_jnb2Dwj5sW98KVwP3PHwU,68964
 geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
 geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
-geney/power_utils.py,sha256=WRpqMnqUv1xrAeTduAUhx6YpSEJQci7bC2od12JcVtE,7267
+geney/power_utils.py,sha256=6InuDm1jSrsgR-F_LmdMTbuQwty2OdYjwfGGaAPhaRI,7268
 geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
-geney/survival_utils.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
+geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
 geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
-geney/tcga_utils.py,sha256=XrLI8RzmXhyabvL24sMrqQM3KNusmU1_kyKYdkv6lpo,15591
+geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
 geney/utils.py,sha256=YOe22gA0Oew9_QEym7ivM9sb7t3wNeHTeiSDBmvOPso,1984
 geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
@@ -43,7 +44,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
 geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
 geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-geney-1.1.10.dist-info/METADATA,sha256=Et_H-jo1c9eYsqEnjk1_gVLRtJYzS9-RXs8RE7Z-u6c,1131
-geney-1.1.10.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
-geney-1.1.10.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
-geney-1.1.10.dist-info/RECORD,,
+geney-1.1.11.dist-info/METADATA,sha256=eKUG3cuHIC37_E6QJg5TyDjBC6NXoine75FZWLxCK6A,1131
+geney-1.1.11.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
+geney-1.1.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
+geney-1.1.11.dist-info/RECORD,,

{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{geney-1.1.10.dist-info → geney-1.1.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

geney 1.1.10__py2.py3-none-any.whl → 1.1.11__py2.py3-none-any.whl

Potentially problematic release.

geney 1.1.10py2.py3-none-any.whl → 1.1.11py2.py3-none-any.whl