geney 1.1.10__py2.py3-none-any.whl → 1.1.11__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/immune_utils.py ADDED
@@ -0,0 +1,185 @@
1
+ import subprocess
2
+ import logging
3
+ import tempfile
4
+ from geney import config_setup
5
+ import pandas as pd
6
+
7
+
8
+ class NetChop(object):
9
+ """
10
+ Wrapper around netChop tool. Assumes netChop is in your PATH.
11
+ """
12
+ def predict_epitopes(self, sequences, threshold=0.5, min_len=8):
13
+ """
14
+ Return netChop predictions for each position in each sequence.
15
+
16
+ Parameters
17
+ -----------
18
+ sequences : list of string
19
+ Amino acid sequences to predict cleavage for
20
+
21
+ Returns
22
+ -----------
23
+ list of list of float
24
+
25
+ The i'th list corresponds to the i'th sequence. Each list gives
26
+ the cleavage probability for each position in the sequence.
27
+ """
28
+ with tempfile.NamedTemporaryFile(dir=config_setup['NETCHOP'], suffix=".fsa", mode="w") as input_fd:
29
+ for (i, sequence) in enumerate(sequences):
30
+ _ = input_fd.write("> %d\n" % i)
31
+ _ = input_fd.write(sequence)
32
+ _ = input_fd.write("\n")
33
+ input_fd.flush()
34
+ try:
35
+ output = subprocess.check_output(["netchop", str(input_fd.name)])
36
+ except subprocess.CalledProcessError as e:
37
+ logging.error("Error calling netChop: %s:\n%s" % (e, e.output))
38
+ raise
39
+ parsed = self.parse_netchop(output)
40
+ return parsed
41
+ #
42
+ # assert len(parsed) == len(sequences), \
43
+ # "Expected %d results but got %d" % (
44
+ # len(sequences), len(parsed))
45
+ # assert [len(x) for x in parsed] == [len(x) for x in sequences]
46
+ # filtered_proteosomes = []
47
+ # for scores, seq in list(zip(parsed, sequences)):
48
+ # proteosome = self.chop_protein(seq, [s > threshold for s in scores])
49
+ # filtered_proteosomes.append([e for e in proteosome if len(e) > min_len])
50
+ # return filtered_proteosomes
51
+ @staticmethod
52
+ def parse_netchop(netchop_output):
53
+ """
54
+ Parse netChop stdout.
55
+ """
56
+ line_iterator = iter(netchop_output.decode().split("\n"))
57
+ scores = []
58
+ for line in line_iterator:
59
+ if "pos" in line and 'AA' in line and 'score' in line:
60
+ scores.append([])
61
+ if "----" not in next(line_iterator):
62
+ raise ValueError("Dashes expected")
63
+ line = next(line_iterator)
64
+ while '-------' not in line:
65
+ score = float(line.split()[3])
66
+ scores[-1].append(score)
67
+ line = next(line_iterator)
68
+ return scores
69
+ def chop_protein(self, seq, pos):
70
+ # Generate subsequences using list comprehension and slicing
71
+ start = 0
72
+ subsequences = [seq[start:(start := i+1)] for i, marker in enumerate(pos) if marker == 1]
73
+ # Check if the last part needs to be added
74
+ if start < len(seq):
75
+ subsequences.append(seq[start:])
76
+ return subsequences
77
+ def generate_cut_sequences(self, char_sequence, cut_probabilities):
78
+ """
79
+ Generate all possible cut sequences and their abundance values,
80
+ considering only those sequences where the probabilities of all cut sites
81
+ between the two ends are zero.
82
+
83
+ :param char_sequence: A string representing the sequence of characters.
84
+ :param cut_probabilities: A list of probabilities for each position in the sequence.
85
+ :return: A list of tuples, where each tuple contains a cut sequence and its abundance value.
86
+ """
87
+ if len(char_sequence) != len(cut_probabilities):
88
+ raise ValueError("Character sequence and cut probabilities must have the same length.")
89
+ cut_sequences = []
90
+ # Generate all possible cuts
91
+ for i in range(len(char_sequence)):
92
+ for j in range(i + 1, len(char_sequence) + 1):
93
+ # Check if probabilities of all cut sites between i and j are zero
94
+ if sum(cut_probabilities[i + 1:j - 1]) < 1:
95
+ cut_sequence = char_sequence[i:j]
96
+ abundance_value = cut_probabilities[i] * cut_probabilities[j - 1] - sum(
97
+ cut_probabilities[i + 1:j - 1])
98
+ cut_sequences.append({'seq': cut_sequence, 'abundance': abundance_value})
99
+ return pd.DataFrame(cut_sequences)
100
+
101
+
102
+
103
+
104
+ from .base_commandline_predictor import BaseCommandlinePredictor
105
+ from .parsing import parse_netmhc41_stdout
106
+ from functools import partial
107
+
108
+
109
+ class NetMHCpan41(BaseCommandlinePredictor):
110
+ def __init__(
111
+ self,
112
+ alleles,
113
+ default_peptide_lengths=[9],
114
+ program_name="netMHCpan",
115
+ process_limit=-1,
116
+ mode="binding_affinity",
117
+ extra_flags=[]):
118
+ """
119
+ Wrapper for NetMHCpan4.1.
120
+
121
+ The mode argument should be one of "binding_affinity" (default) or
122
+ "elution_score".
123
+ """
124
+
125
+ # The -BA flag is required to predict binding affinity
126
+ if mode == "binding_affinity":
127
+ flags = ["-BA"]
128
+ elif mode == "elution_score":
129
+ flags = []
130
+ else:
131
+ raise ValueError("Unsupported mode", mode)
132
+
133
+ BaseCommandlinePredictor.__init__(
134
+ self,
135
+ program_name=program_name,
136
+ alleles=alleles,
137
+ default_peptide_lengths=default_peptide_lengths,
138
+ parse_output_fn=partial(parse_netmhc41_stdout, mode=mode),
139
+ supported_alleles_flag="-listMHC",
140
+ input_file_flag="-f",
141
+ length_flag="-l",
142
+ allele_flag="-a",
143
+ extra_flags=flags + extra_flags,
144
+ process_limit=process_limit)
145
+
146
+ class NetMHCpan41_EL(NetMHCpan41):
147
+ """
148
+ Wrapper for NetMHCpan4 when the preferred mode is elution score
149
+ """
150
+ def __init__(
151
+ self,
152
+ alleles,
153
+ default_peptide_lengths=[9],
154
+ program_name="netMHCpan",
155
+ process_limit=-1,
156
+ extra_flags=[]):
157
+ NetMHCpan41.__init__(
158
+ self,
159
+ alleles=alleles,
160
+ default_peptide_lengths=default_peptide_lengths,
161
+ program_name=program_name,
162
+ process_limit=process_limit,
163
+ mode="elution_score",
164
+ extra_flags=extra_flags)
165
+
166
+
167
+ class NetMHCpan41_BA(NetMHCpan41):
168
+ """
169
+ Wrapper for NetMHCpan4 when the preferred mode is binding affinity
170
+ """
171
+ def __init__(
172
+ self,
173
+ alleles,
174
+ default_peptide_lengths=[9],
175
+ program_name="netMHCpan",
176
+ process_limit=-1,
177
+ extra_flags=[]):
178
+ NetMHCpan41.__init__(
179
+ self,
180
+ alleles=alleles,
181
+ default_peptide_lengths=default_peptide_lengths,
182
+ program_name=program_name,
183
+ process_limit=process_limit,
184
+ mode="binding_affinity",
185
+ extra_flags=extra_flags)
geney/power_utils.py CHANGED
@@ -65,6 +65,7 @@ def launch_dask_cluster(memory_size="3GB", num_workers=10, queue="tamirQ",
65
65
  log_directory=log_directory,
66
66
  job_script_prologue=[f"cd {config_setup['BASE']}"]
67
67
  )
68
+
68
69
  else:
69
70
  dask_cluster = PBSCluster(
70
71
  cores=1,
geney/survival_utils.py CHANGED
@@ -82,13 +82,15 @@ class SurvivalAnalysis:
82
82
 
83
83
  kmf.fit(g['duration'], g['event'], label=label)
84
84
  surv_func = kmf.survival_function_
85
- auc = trapz(surv_func[label], surv_func.index)
85
+ filtered_surv_func = surv_func[surv_func.index <= cap_time]
86
+ auc = np.trapz(filtered_surv_func[label], filtered_surv_func.index)
87
+ # auc = trapz(surv_func[label], surv_func.index)
86
88
  auc_vals.append(auc)
87
89
  if plot:
88
90
  if count == 0:
89
- ax = kmf.plot()
91
+ ax = kmf.plot_survival_function()
90
92
  else:
91
- kmf.plot(ax=ax)
93
+ kmf.plot_survival_function(ax=ax)
92
94
  count += 1
93
95
  p_value = self.log_rank(df[df[feature] == 1], df[df[feature] == 0])
94
96
 
geney/tcga_utils.py CHANGED
@@ -157,12 +157,10 @@ class TCGAGene:
157
157
  # returns two lists: all patients affected by a mutation and all patients with none of the mutations (or the mutations but not togehter)
158
158
  pass
159
159
 
160
-
161
160
  def arrange_patients_by_project(self, mut_id):
162
161
  # returns all the patients affected by a mutation grouped by cancer project
163
162
  pass
164
163
 
165
-
166
164
  def total_prevalence(self, mut_id):
167
165
  pass
168
166
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.1.10
3
+ Version: 1.1.11
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -7,15 +7,16 @@ geney/config_setup.py,sha256=SePeooA4RWAtR_KAT1-W1hkD3MT5tH6YMyp80t_RNPQ,385
7
7
  geney/data_setup.py,sha256=DZeksRPr2ZT7bszMo33W0r3OwmqHokVXtZ4gx5Lu_Mo,10725
8
8
  geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
9
9
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
10
+ geney/immune_utils.py,sha256=0udmTxqF9jCYeUOgP7bGLWMEBH3KBikKu8pPQnE9Rfo,6881
10
11
  geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
11
12
  geney/oncosplice.py,sha256=Fyc_UtAhV3Pv0vk8V55rO_jnb2Dwj5sW98KVwP3PHwU,68964
12
13
  geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
13
14
  geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
14
- geney/power_utils.py,sha256=WRpqMnqUv1xrAeTduAUhx6YpSEJQci7bC2od12JcVtE,7267
15
+ geney/power_utils.py,sha256=6InuDm1jSrsgR-F_LmdMTbuQwty2OdYjwfGGaAPhaRI,7268
15
16
  geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
16
- geney/survival_utils.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
17
+ geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
17
18
  geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
18
- geney/tcga_utils.py,sha256=XrLI8RzmXhyabvL24sMrqQM3KNusmU1_kyKYdkv6lpo,15591
19
+ geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
19
20
  geney/utils.py,sha256=YOe22gA0Oew9_QEym7ivM9sb7t3wNeHTeiSDBmvOPso,1984
20
21
  geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
22
  geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
@@ -43,7 +44,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
43
44
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
44
45
  geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
46
  geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- geney-1.1.10.dist-info/METADATA,sha256=Et_H-jo1c9eYsqEnjk1_gVLRtJYzS9-RXs8RE7Z-u6c,1131
47
- geney-1.1.10.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
48
- geney-1.1.10.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
49
- geney-1.1.10.dist-info/RECORD,,
47
+ geney-1.1.11.dist-info/METADATA,sha256=eKUG3cuHIC37_E6QJg5TyDjBC6NXoine75FZWLxCK6A,1131
48
+ geney-1.1.11.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
49
+ geney-1.1.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
50
+ geney-1.1.11.dist-info/RECORD,,
File without changes