geney 1.1.10__py2.py3-none-any.whl → 1.1.11__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/immune_utils.py +185 -0
- geney/power_utils.py +1 -0
- geney/survival_utils.py +5 -3
- geney/tcga_utils.py +0 -2
- {geney-1.1.10.dist-info → geney-1.1.11.dist-info}/METADATA +1 -1
- {geney-1.1.10.dist-info → geney-1.1.11.dist-info}/RECORD +8 -7
- {geney-1.1.10.dist-info → geney-1.1.11.dist-info}/WHEEL +0 -0
- {geney-1.1.10.dist-info → geney-1.1.11.dist-info}/top_level.txt +0 -0
geney/immune_utils.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import logging
|
|
3
|
+
import tempfile
|
|
4
|
+
from geney import config_setup
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NetChop(object):
|
|
9
|
+
"""
|
|
10
|
+
Wrapper around netChop tool. Assumes netChop is in your PATH.
|
|
11
|
+
"""
|
|
12
|
+
def predict_epitopes(self, sequences, threshold=0.5, min_len=8):
|
|
13
|
+
"""
|
|
14
|
+
Return netChop predictions for each position in each sequence.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
-----------
|
|
18
|
+
sequences : list of string
|
|
19
|
+
Amino acid sequences to predict cleavage for
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-----------
|
|
23
|
+
list of list of float
|
|
24
|
+
|
|
25
|
+
The i'th list corresponds to the i'th sequence. Each list gives
|
|
26
|
+
the cleavage probability for each position in the sequence.
|
|
27
|
+
"""
|
|
28
|
+
with tempfile.NamedTemporaryFile(dir=config_setup['NETCHOP'], suffix=".fsa", mode="w") as input_fd:
|
|
29
|
+
for (i, sequence) in enumerate(sequences):
|
|
30
|
+
_ = input_fd.write("> %d\n" % i)
|
|
31
|
+
_ = input_fd.write(sequence)
|
|
32
|
+
_ = input_fd.write("\n")
|
|
33
|
+
input_fd.flush()
|
|
34
|
+
try:
|
|
35
|
+
output = subprocess.check_output(["netchop", str(input_fd.name)])
|
|
36
|
+
except subprocess.CalledProcessError as e:
|
|
37
|
+
logging.error("Error calling netChop: %s:\n%s" % (e, e.output))
|
|
38
|
+
raise
|
|
39
|
+
parsed = self.parse_netchop(output)
|
|
40
|
+
return parsed
|
|
41
|
+
#
|
|
42
|
+
# assert len(parsed) == len(sequences), \
|
|
43
|
+
# "Expected %d results but got %d" % (
|
|
44
|
+
# len(sequences), len(parsed))
|
|
45
|
+
# assert [len(x) for x in parsed] == [len(x) for x in sequences]
|
|
46
|
+
# filtered_proteosomes = []
|
|
47
|
+
# for scores, seq in list(zip(parsed, sequences)):
|
|
48
|
+
# proteosome = self.chop_protein(seq, [s > threshold for s in scores])
|
|
49
|
+
# filtered_proteosomes.append([e for e in proteosome if len(e) > min_len])
|
|
50
|
+
# return filtered_proteosomes
|
|
51
|
+
@staticmethod
|
|
52
|
+
def parse_netchop(netchop_output):
|
|
53
|
+
"""
|
|
54
|
+
Parse netChop stdout.
|
|
55
|
+
"""
|
|
56
|
+
line_iterator = iter(netchop_output.decode().split("\n"))
|
|
57
|
+
scores = []
|
|
58
|
+
for line in line_iterator:
|
|
59
|
+
if "pos" in line and 'AA' in line and 'score' in line:
|
|
60
|
+
scores.append([])
|
|
61
|
+
if "----" not in next(line_iterator):
|
|
62
|
+
raise ValueError("Dashes expected")
|
|
63
|
+
line = next(line_iterator)
|
|
64
|
+
while '-------' not in line:
|
|
65
|
+
score = float(line.split()[3])
|
|
66
|
+
scores[-1].append(score)
|
|
67
|
+
line = next(line_iterator)
|
|
68
|
+
return scores
|
|
69
|
+
def chop_protein(self, seq, pos):
|
|
70
|
+
# Generate subsequences using list comprehension and slicing
|
|
71
|
+
start = 0
|
|
72
|
+
subsequences = [seq[start:(start := i+1)] for i, marker in enumerate(pos) if marker == 1]
|
|
73
|
+
# Check if the last part needs to be added
|
|
74
|
+
if start < len(seq):
|
|
75
|
+
subsequences.append(seq[start:])
|
|
76
|
+
return subsequences
|
|
77
|
+
def generate_cut_sequences(self, char_sequence, cut_probabilities):
|
|
78
|
+
"""
|
|
79
|
+
Generate all possible cut sequences and their abundance values,
|
|
80
|
+
considering only those sequences where the probabilities of all cut sites
|
|
81
|
+
between the two ends are zero.
|
|
82
|
+
|
|
83
|
+
:param char_sequence: A string representing the sequence of characters.
|
|
84
|
+
:param cut_probabilities: A list of probabilities for each position in the sequence.
|
|
85
|
+
:return: A list of tuples, where each tuple contains a cut sequence and its abundance value.
|
|
86
|
+
"""
|
|
87
|
+
if len(char_sequence) != len(cut_probabilities):
|
|
88
|
+
raise ValueError("Character sequence and cut probabilities must have the same length.")
|
|
89
|
+
cut_sequences = []
|
|
90
|
+
# Generate all possible cuts
|
|
91
|
+
for i in range(len(char_sequence)):
|
|
92
|
+
for j in range(i + 1, len(char_sequence) + 1):
|
|
93
|
+
# Check if probabilities of all cut sites between i and j are zero
|
|
94
|
+
if sum(cut_probabilities[i + 1:j - 1]) < 1:
|
|
95
|
+
cut_sequence = char_sequence[i:j]
|
|
96
|
+
abundance_value = cut_probabilities[i] * cut_probabilities[j - 1] - sum(
|
|
97
|
+
cut_probabilities[i + 1:j - 1])
|
|
98
|
+
cut_sequences.append({'seq': cut_sequence, 'abundance': abundance_value})
|
|
99
|
+
return pd.DataFrame(cut_sequences)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
from .base_commandline_predictor import BaseCommandlinePredictor
|
|
105
|
+
from .parsing import parse_netmhc41_stdout
|
|
106
|
+
from functools import partial
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class NetMHCpan41(BaseCommandlinePredictor):
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
alleles,
|
|
113
|
+
default_peptide_lengths=[9],
|
|
114
|
+
program_name="netMHCpan",
|
|
115
|
+
process_limit=-1,
|
|
116
|
+
mode="binding_affinity",
|
|
117
|
+
extra_flags=[]):
|
|
118
|
+
"""
|
|
119
|
+
Wrapper for NetMHCpan4.1.
|
|
120
|
+
|
|
121
|
+
The mode argument should be one of "binding_affinity" (default) or
|
|
122
|
+
"elution_score".
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# The -BA flag is required to predict binding affinity
|
|
126
|
+
if mode == "binding_affinity":
|
|
127
|
+
flags = ["-BA"]
|
|
128
|
+
elif mode == "elution_score":
|
|
129
|
+
flags = []
|
|
130
|
+
else:
|
|
131
|
+
raise ValueError("Unsupported mode", mode)
|
|
132
|
+
|
|
133
|
+
BaseCommandlinePredictor.__init__(
|
|
134
|
+
self,
|
|
135
|
+
program_name=program_name,
|
|
136
|
+
alleles=alleles,
|
|
137
|
+
default_peptide_lengths=default_peptide_lengths,
|
|
138
|
+
parse_output_fn=partial(parse_netmhc41_stdout, mode=mode),
|
|
139
|
+
supported_alleles_flag="-listMHC",
|
|
140
|
+
input_file_flag="-f",
|
|
141
|
+
length_flag="-l",
|
|
142
|
+
allele_flag="-a",
|
|
143
|
+
extra_flags=flags + extra_flags,
|
|
144
|
+
process_limit=process_limit)
|
|
145
|
+
|
|
146
|
+
class NetMHCpan41_EL(NetMHCpan41):
|
|
147
|
+
"""
|
|
148
|
+
Wrapper for NetMHCpan4 when the preferred mode is elution score
|
|
149
|
+
"""
|
|
150
|
+
def __init__(
|
|
151
|
+
self,
|
|
152
|
+
alleles,
|
|
153
|
+
default_peptide_lengths=[9],
|
|
154
|
+
program_name="netMHCpan",
|
|
155
|
+
process_limit=-1,
|
|
156
|
+
extra_flags=[]):
|
|
157
|
+
NetMHCpan41.__init__(
|
|
158
|
+
self,
|
|
159
|
+
alleles=alleles,
|
|
160
|
+
default_peptide_lengths=default_peptide_lengths,
|
|
161
|
+
program_name=program_name,
|
|
162
|
+
process_limit=process_limit,
|
|
163
|
+
mode="elution_score",
|
|
164
|
+
extra_flags=extra_flags)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class NetMHCpan41_BA(NetMHCpan41):
|
|
168
|
+
"""
|
|
169
|
+
Wrapper for NetMHCpan4 when the preferred mode is binding affinity
|
|
170
|
+
"""
|
|
171
|
+
def __init__(
|
|
172
|
+
self,
|
|
173
|
+
alleles,
|
|
174
|
+
default_peptide_lengths=[9],
|
|
175
|
+
program_name="netMHCpan",
|
|
176
|
+
process_limit=-1,
|
|
177
|
+
extra_flags=[]):
|
|
178
|
+
NetMHCpan41.__init__(
|
|
179
|
+
self,
|
|
180
|
+
alleles=alleles,
|
|
181
|
+
default_peptide_lengths=default_peptide_lengths,
|
|
182
|
+
program_name=program_name,
|
|
183
|
+
process_limit=process_limit,
|
|
184
|
+
mode="binding_affinity",
|
|
185
|
+
extra_flags=extra_flags)
|
geney/power_utils.py
CHANGED
geney/survival_utils.py
CHANGED
|
@@ -82,13 +82,15 @@ class SurvivalAnalysis:
|
|
|
82
82
|
|
|
83
83
|
kmf.fit(g['duration'], g['event'], label=label)
|
|
84
84
|
surv_func = kmf.survival_function_
|
|
85
|
-
|
|
85
|
+
filtered_surv_func = surv_func[surv_func.index <= cap_time]
|
|
86
|
+
auc = np.trapz(filtered_surv_func[label], filtered_surv_func.index)
|
|
87
|
+
# auc = trapz(surv_func[label], surv_func.index)
|
|
86
88
|
auc_vals.append(auc)
|
|
87
89
|
if plot:
|
|
88
90
|
if count == 0:
|
|
89
|
-
ax = kmf.
|
|
91
|
+
ax = kmf.plot_survival_function()
|
|
90
92
|
else:
|
|
91
|
-
kmf.
|
|
93
|
+
kmf.plot_survival_function(ax=ax)
|
|
92
94
|
count += 1
|
|
93
95
|
p_value = self.log_rank(df[df[feature] == 1], df[df[feature] == 0])
|
|
94
96
|
|
geney/tcga_utils.py
CHANGED
|
@@ -157,12 +157,10 @@ class TCGAGene:
|
|
|
157
157
|
# returns two lists: all patients affected by a mutation and all patients with none of the mutations (or the mutations but not togehter)
|
|
158
158
|
pass
|
|
159
159
|
|
|
160
|
-
|
|
161
160
|
def arrange_patients_by_project(self, mut_id):
|
|
162
161
|
# returns all the patients affected by a mutation grouped by cancer project
|
|
163
162
|
pass
|
|
164
163
|
|
|
165
|
-
|
|
166
164
|
def total_prevalence(self, mut_id):
|
|
167
165
|
pass
|
|
168
166
|
|
|
@@ -7,15 +7,16 @@ geney/config_setup.py,sha256=SePeooA4RWAtR_KAT1-W1hkD3MT5tH6YMyp80t_RNPQ,385
|
|
|
7
7
|
geney/data_setup.py,sha256=DZeksRPr2ZT7bszMo33W0r3OwmqHokVXtZ4gx5Lu_Mo,10725
|
|
8
8
|
geney/gtex.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
9
9
|
geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
|
|
10
|
+
geney/immune_utils.py,sha256=0udmTxqF9jCYeUOgP7bGLWMEBH3KBikKu8pPQnE9Rfo,6881
|
|
10
11
|
geney/netchop.py,sha256=AMiy9YsdTmX4B3k3Y5Yh7EmoGAojM1O3AzhPKOiB--g,3050
|
|
11
12
|
geney/oncosplice.py,sha256=Fyc_UtAhV3Pv0vk8V55rO_jnb2Dwj5sW98KVwP3PHwU,68964
|
|
12
13
|
geney/oncosplice_pipeline.py,sha256=hpGqFHOdn8i8tvvs1-t3-G9Ko18zInwoDXBJbbrfbC4,68036
|
|
13
14
|
geney/performance_utils.py,sha256=FQt7rA4r-Wuq3kceCxsSuMfj3wU1tMG8QnbL59aBohs,4700
|
|
14
|
-
geney/power_utils.py,sha256=
|
|
15
|
+
geney/power_utils.py,sha256=6InuDm1jSrsgR-F_LmdMTbuQwty2OdYjwfGGaAPhaRI,7268
|
|
15
16
|
geney/survival.py,sha256=gNKZGcwxDZ00ixVBHf3ZdjbY_AHQOCU9kKpBC_dokbM,5572
|
|
16
|
-
geney/survival_utils.py,sha256=
|
|
17
|
+
geney/survival_utils.py,sha256=2CAkC2LsspicHIdrqsiPnjgvpr5KHDUfLFFqnRbPJqs,5762
|
|
17
18
|
geney/tcga_annotations.py,sha256=DjRl6Pk5VAOL1yhbt8SXD6FZhYbcYNu3FtXYMeveGB0,15016
|
|
18
|
-
geney/tcga_utils.py,sha256=
|
|
19
|
+
geney/tcga_utils.py,sha256=uAjejr7F-XqcXS5uANGlsHLOlzMmGo4CTbWhMO0E318,15589
|
|
19
20
|
geney/utils.py,sha256=YOe22gA0Oew9_QEym7ivM9sb7t3wNeHTeiSDBmvOPso,1984
|
|
20
21
|
geney/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
22
|
geney/analyzers/benchmark_clinvar.py,sha256=ZAxvZ-Ue5T6au5mGbk8clfvbAYl13NIY7U92KzL0lXI,5531
|
|
@@ -43,7 +44,7 @@ geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFW
|
|
|
43
44
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
44
45
|
geney/translation_termination/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
geney/translation_termination/tts_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
geney-1.1.
|
|
47
|
-
geney-1.1.
|
|
48
|
-
geney-1.1.
|
|
49
|
-
geney-1.1.
|
|
47
|
+
geney-1.1.11.dist-info/METADATA,sha256=eKUG3cuHIC37_E6QJg5TyDjBC6NXoine75FZWLxCK6A,1131
|
|
48
|
+
geney-1.1.11.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
|
|
49
|
+
geney-1.1.11.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
50
|
+
geney-1.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|