aldepyde 0.0.0a1__py3-none-any.whl → 0.0.0a32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aldepyde might be problematic. Click here for more details.
- aldepyde/Parsers/_mmcif_parser.py +0 -0
- aldepyde/Parsers/_pdb_parser.py +0 -0
- aldepyde/__init__.py +50 -0
- aldepyde/_config.py +113 -0
- aldepyde/biomolecule/Residue.py +9 -0
- aldepyde/biomolecule/_Atom.py +95 -0
- aldepyde/biomolecule/_AtomFactory.py +71 -0
- aldepyde/biomolecule/__init__.py +15 -0
- aldepyde/biomolecule/_amino_acid.py +6 -0
- aldepyde/biomolecule/_dna.py +6 -0
- aldepyde/biomolecule/_pdb.py +455 -0
- aldepyde/biomolecule/_rna.py +6 -0
- aldepyde/biomolecule/utils.py +60 -0
- aldepyde/cache/__init__.py +2 -0
- aldepyde/cache/_cache.py +257 -0
- aldepyde/cache/cachemanager.py +212 -0
- aldepyde/cache/downloader.py +13 -0
- aldepyde/cache/utils.py +32 -0
- aldepyde/configurable.py +7 -0
- aldepyde/data/RemoteFileHandler.py +32 -0
- aldepyde/data/__init__.py +1 -0
- aldepyde/data.py +148 -0
- aldepyde/databases/PDB.py +0 -0
- aldepyde/databases/RemoteFileHandler.py +43 -0
- aldepyde/databases/UniRef.py +75 -0
- aldepyde/databases/__init__.py +0 -0
- aldepyde/databases/_database.py +38 -0
- aldepyde/env.py +43 -0
- aldepyde/fetcher/__init__.py +0 -0
- aldepyde/fetcher/test.py +2 -0
- aldepyde/json/CHG.json +25 -0
- aldepyde/json/Swiss_Prot.json +25 -0
- aldepyde/json/chemistry.json +4622 -0
- aldepyde/rand/RandomProtein.py +402 -0
- aldepyde/rand/__init__.py +3 -0
- aldepyde/stats/ProteinStats.py +89 -0
- aldepyde/stats/__init__.py +0 -0
- aldepyde/utils.py +275 -0
- {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info}/METADATA +4 -3
- aldepyde-0.0.0a32.dist-info/RECORD +43 -0
- {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info}/WHEEL +1 -1
- aldepyde-0.0.0a32.dist-info/top_level.txt +1 -0
- aldepyde-0.0.0a1.dist-info/RECORD +0 -5
- aldepyde-0.0.0a1.dist-info/top_level.txt +0 -1
- {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import math
|
|
3
|
+
import requests
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from aldepyde.stats.ProteinStats import *
|
|
7
|
+
|
|
8
|
+
class InvalidDistribution(Exception):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
class ImpossibleSetting(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
class RandomProtein:
|
|
15
|
+
# Hardcode data for now
|
|
16
|
+
def __init__(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True, Distribution="Swiss"):
|
|
17
|
+
self.Analyzer = ProteinStats(His_Is_Charged, Cys_Is_Polar, Charged_Is_Polar)
|
|
18
|
+
|
|
19
|
+
self._ACCEPTED_METHODS = ["Random", "Custom", "Builder", "Grouper"]
|
|
20
|
+
self._ACCEPTED_PRESETS = ["Swiss", "CHG"]
|
|
21
|
+
self.His_Is_Charged = His_Is_Charged
|
|
22
|
+
self.Cys_Is_Polar = Cys_Is_Polar
|
|
23
|
+
self.Charged_Is_Polar = Charged_Is_Polar
|
|
24
|
+
|
|
25
|
+
self.LoadPresetDistribution(Distribution)
|
|
26
|
+
|
|
27
|
+
self._AAs = "ARNDCEQGHILKMFPSTWYV"
|
|
28
|
+
self._POSITIVE = "RK"
|
|
29
|
+
self._NEGATIVE = "ED"
|
|
30
|
+
self._POLAR = "STNQ"
|
|
31
|
+
self._NONPOLAR = "AVILMFYWGP"
|
|
32
|
+
self._CHARGED = "RKED"
|
|
33
|
+
|
|
34
|
+
if His_Is_Charged:
|
|
35
|
+
self._CHARGED += "H"
|
|
36
|
+
self._POSITIVE += "H"
|
|
37
|
+
else:
|
|
38
|
+
self._POLAR += "H"
|
|
39
|
+
|
|
40
|
+
if Cys_Is_Polar:
|
|
41
|
+
self._POLAR += "C"
|
|
42
|
+
else:
|
|
43
|
+
self._NONPOLAR += "C"
|
|
44
|
+
|
|
45
|
+
if Charged_Is_Polar:
|
|
46
|
+
self._POLAR += self._POSITIVE
|
|
47
|
+
self._POLAR += self._NEGATIVE
|
|
48
|
+
|
|
49
|
+
def GetNP(self):
|
|
50
|
+
return self. _NONPOLAR
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def LoadPresetDistribution(self, preset="Swiss"):
|
|
55
|
+
if preset.upper() == "Swiss".upper():
|
|
56
|
+
_stream = pkg_resources.resource_stream("aldepyde", 'json/Swiss_Prot.json')
|
|
57
|
+
self.distribution = json.load(_stream)['Amino Acid Distribution']
|
|
58
|
+
elif preset.upper() == "CHG":
|
|
59
|
+
_stream = pkg_resources.resource_stream("aldepyde", 'json/CHG.json')
|
|
60
|
+
self.distribution = json.load(_stream)['Amino Acid Distribution']
|
|
61
|
+
else:
|
|
62
|
+
raise InvalidDistribution(
|
|
63
|
+
f"Preset must be one of the following: {self._ACCEPTED_PRESETS}\n\t"
|
|
64
|
+
f"Chosen: {preset}")
|
|
65
|
+
|
|
66
|
+
def GetAminoAcids(self):
|
|
67
|
+
return self._AAs
|
|
68
|
+
|
|
69
|
+
def SetAminoAcids(self, AAs):
|
|
70
|
+
if isinstance(AAs, str):
|
|
71
|
+
self._AAs = str
|
|
72
|
+
elif isinstance(AAs, list):
|
|
73
|
+
self._AAs = "".join(AAs)
|
|
74
|
+
|
|
75
|
+
# TODO add setters for polar, nonpolar, etc.
|
|
76
|
+
|
|
77
|
+
def AddAminoAcid(self, AA):
|
|
78
|
+
if AA not in self._AAs:
|
|
79
|
+
self._AAs += AA
|
|
80
|
+
|
|
81
|
+
def RemoveAminoAcid(self, AA):
|
|
82
|
+
if AA in self._AAs:
|
|
83
|
+
self._AAs.replace(AA, "")
|
|
84
|
+
if AA in self._POLAR:
|
|
85
|
+
self._POLAR.replace(AA, "")
|
|
86
|
+
if AA in self._CHARGED:
|
|
87
|
+
self._CHARGED.replace(AA, "")
|
|
88
|
+
if AA in self._POSITIVE:
|
|
89
|
+
self._POSITIVE.replace(AA, "")
|
|
90
|
+
if AA in self._NEGATIVE:
|
|
91
|
+
self._NEGATIVE.replace(AA, "")
|
|
92
|
+
|
|
93
|
+
def ConfigureAA(self, AA, is_polar=False, is_charged=False, is_positive=False, is_negative=False):
|
|
94
|
+
# if is_charged and not (is_positive or is_negative):
|
|
95
|
+
# raise ImpossibleSettingsException("The residue must be either positive or negative if it carries a charge")
|
|
96
|
+
if is_negative and is_positive:
|
|
97
|
+
raise ImpossibleSetting("The residue cannot be both positive and negative")
|
|
98
|
+
|
|
99
|
+
if is_charged and self.Charged_Is_Polar and AA not in self._POLAR:
|
|
100
|
+
self._POLAR += AA
|
|
101
|
+
if is_charged and AA not in self._CHARGED:
|
|
102
|
+
self._CHARGED += AA
|
|
103
|
+
if is_negative and AA not in self._NEGATIVE:
|
|
104
|
+
self._NEGATIVE += AA
|
|
105
|
+
if is_positive and AA not in self._POSITIVE:
|
|
106
|
+
self._POSITIVE += AA
|
|
107
|
+
if is_polar and AA not in self._POLAR:
|
|
108
|
+
self._POSITIVE += AA
|
|
109
|
+
elif AA not in self._NONPOLAR:
|
|
110
|
+
self._NONPOLAR += AA
|
|
111
|
+
if AA not in self._AAs:
|
|
112
|
+
self._AAs += AA
|
|
113
|
+
|
|
114
|
+
def configure(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True):
|
|
115
|
+
self.__init__(His_Is_Charged, Cys_Is_Polar, Charged_Is_Polar)
|
|
116
|
+
|
|
117
|
+
def _load(self, json_str):
|
|
118
|
+
self.distribution = self._NormalizeValues(json.loads(json_str)["Amino Acid Distribution"])
|
|
119
|
+
|
|
120
|
+
def _NormalizeValues(self, dic):
|
|
121
|
+
total = 0
|
|
122
|
+
for key in dic.keys():
|
|
123
|
+
total += float(dic[key])
|
|
124
|
+
for key in dic.keys():
|
|
125
|
+
dic[key] = float(dic[key]) / total
|
|
126
|
+
return dic
|
|
127
|
+
|
|
128
|
+
def LoadDistributionFromFile(self, dist_json_path):
|
|
129
|
+
with open(dist_json_path, "r") as fp:
|
|
130
|
+
self._load(fp.read())
|
|
131
|
+
|
|
132
|
+
def LoadDistributionFromURL(self, URL):
|
|
133
|
+
response = requests.get(URL)
|
|
134
|
+
if response.status_code == 200:
|
|
135
|
+
self._load(json.dumps(response.json()))
|
|
136
|
+
else:
|
|
137
|
+
raise ConnectionError(f"Invalid url: {URL}")
|
|
138
|
+
|
|
139
|
+
def GenerateProtein(self, length, batch_size=1, method="Random", percent_polar=None,
|
|
140
|
+
percent_charged=None, final_charge=None, charge_range=None,
|
|
141
|
+
timeout=30, max_attempts=None, verbose=False, cleanup=False, crash_at_bad_settings=False):
|
|
142
|
+
|
|
143
|
+
self.verbose = verbose
|
|
144
|
+
# TODO make it so max_attempts is actually used
|
|
145
|
+
if not self.VerifySettings(length, batch_size=batch_size, method=method, percent_polar=percent_polar,
|
|
146
|
+
percent_charged=percent_charged, final_charge=final_charge,
|
|
147
|
+
charge_range=charge_range,
|
|
148
|
+
max_attempts=max_attempts, crash=crash_at_bad_settings):
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
method = method.upper()
|
|
152
|
+
ret_list = []
|
|
153
|
+
for _ in range(batch_size):
|
|
154
|
+
sequence = "$" * length
|
|
155
|
+
if method == "Random".upper():
|
|
156
|
+
# sequence = self._Random(length)
|
|
157
|
+
attempt = 0
|
|
158
|
+
while not self._ValidateSequence(sequence, percent_polar,
|
|
159
|
+
percent_charged, final_charge, charge_range):
|
|
160
|
+
sequence = self._Random(length)
|
|
161
|
+
if verbose:
|
|
162
|
+
attempt += 1
|
|
163
|
+
print(f"\rAttempt: {attempt}", end="")
|
|
164
|
+
elif method == "Custom".upper():
|
|
165
|
+
sequence = self._Custom(length)
|
|
166
|
+
elif method == "Builder".upper():
|
|
167
|
+
sequence = self._Builder(length, percent_charged, percent_polar, final_charge)
|
|
168
|
+
elif method == "Grouper".upper():
|
|
169
|
+
attempt = 0
|
|
170
|
+
while not self._ValidateSequence(sequence, percent_polar,
|
|
171
|
+
percent_charged, final_charge, charge_range):
|
|
172
|
+
sequence = self._Grouper(length, percent_polar, percent_charged, final_charge)
|
|
173
|
+
if verbose:
|
|
174
|
+
attempt += 1
|
|
175
|
+
print(f"\rAttempt: {attempt}", end="")
|
|
176
|
+
|
|
177
|
+
if cleanup:
|
|
178
|
+
if verbose:
|
|
179
|
+
print("\nCleaning result sequence")
|
|
180
|
+
sequence = self._CleanPolar(sequence, percent_polar)
|
|
181
|
+
ret_list.append(sequence)
|
|
182
|
+
|
|
183
|
+
if verbose:
|
|
184
|
+
for result in ret_list:
|
|
185
|
+
print(
|
|
186
|
+
f"Result: {result}\nPercent Polar: {self.Analyzer.PercentPolar(result)}\n"
|
|
187
|
+
f"Percent Charged: {self.Analyzer.PercentCharged(result)}"
|
|
188
|
+
f"\nPercent NonPolar: {self.Analyzer.PercentNonPolar(result)}"
|
|
189
|
+
f"\nTotal Charge: {self.Analyzer.GetCharge(result)}\n")
|
|
190
|
+
|
|
191
|
+
return ret_list
|
|
192
|
+
|
|
193
|
+
def _Random(self, length):
|
|
194
|
+
sequence = ""
|
|
195
|
+
for i in range(length):
|
|
196
|
+
sequence += self._AAs[random.randrange(0, len(self._AAs))]
|
|
197
|
+
return sequence
|
|
198
|
+
|
|
199
|
+
def _Custom(self, length):
|
|
200
|
+
items = list(self.distribution.keys())
|
|
201
|
+
probabilities = list(self.distribution.values())
|
|
202
|
+
return "".join(random.choices(items, weights=probabilities, k=length))
|
|
203
|
+
|
|
204
|
+
def _Builder(self, length, percent_charged, percent_polar, final_charge):
|
|
205
|
+
sequence = []
|
|
206
|
+
pc = percent_charged
|
|
207
|
+
pp = percent_polar
|
|
208
|
+
|
|
209
|
+
percent_unclear = 0
|
|
210
|
+
|
|
211
|
+
unclear = ""
|
|
212
|
+
|
|
213
|
+
if percent_charged is None:
|
|
214
|
+
percent_charged = 0
|
|
215
|
+
unclear += self._CHARGED
|
|
216
|
+
if percent_polar is None:
|
|
217
|
+
percent_polar = 0
|
|
218
|
+
unclear += self._POLAR
|
|
219
|
+
unclear += self._NONPOLAR
|
|
220
|
+
|
|
221
|
+
u = set()
|
|
222
|
+
for c in unclear:
|
|
223
|
+
u.add(c)
|
|
224
|
+
unclear = ""
|
|
225
|
+
for c in u:
|
|
226
|
+
unclear += c
|
|
227
|
+
|
|
228
|
+
if pc is None or pp is None:
|
|
229
|
+
percent_unclear = 1 - (percent_polar + percent_charged)
|
|
230
|
+
|
|
231
|
+
# sequence = ["X"] * length
|
|
232
|
+
for i in range(math.ceil(length * percent_polar)):
|
|
233
|
+
sequence.append(random.choice(self._POLAR))
|
|
234
|
+
for i in range(math.ceil(length * percent_polar),
|
|
235
|
+
math.ceil(length * percent_polar) + math.ceil(
|
|
236
|
+
length * percent_charged) - self.Analyzer.NumCharged(sequence)):
|
|
237
|
+
sequence.append(random.choice(self._CHARGED))
|
|
238
|
+
|
|
239
|
+
if percent_unclear < 0:
|
|
240
|
+
for _ in range(length * percent_unclear):
|
|
241
|
+
sequence.append(random.choice(self._NONPOLAR))
|
|
242
|
+
|
|
243
|
+
if final_charge is not None:
|
|
244
|
+
disparity = final_charge - self.Analyzer.GetCharge(sequence)
|
|
245
|
+
ind = 0
|
|
246
|
+
while not (disparity == 0 or disparity == -1 or disparity == 1) and ind < len(sequence):
|
|
247
|
+
if disparity > 0 and sequence[ind] in self._NEGATIVE:
|
|
248
|
+
sequence[ind] = random.choice(self._POSITIVE)
|
|
249
|
+
disparity -= 2
|
|
250
|
+
if disparity < 0 and sequence[ind] in self._POSITIVE:
|
|
251
|
+
sequence[ind] = random.choice(self._NEGATIVE)
|
|
252
|
+
disparity += 2
|
|
253
|
+
ind += 1
|
|
254
|
+
|
|
255
|
+
# if not ind < len(sequence):
|
|
256
|
+
while not (disparity == 0):
|
|
257
|
+
if disparity == 0 or len(sequence) >= length:
|
|
258
|
+
break
|
|
259
|
+
elif disparity > 0:
|
|
260
|
+
sequence += random.choice(self._POSITIVE)
|
|
261
|
+
disparity -= 1
|
|
262
|
+
else:
|
|
263
|
+
sequence += random.choice(self._NEGATIVE)
|
|
264
|
+
disparity += 1
|
|
265
|
+
|
|
266
|
+
while len(sequence) < length:
|
|
267
|
+
sequence.append(random.choice(self._NONPOLAR))
|
|
268
|
+
random.shuffle(sequence)
|
|
269
|
+
return "".join(sequence)
|
|
270
|
+
|
|
271
|
+
def _Grouper(self, length, percent_polar, percent_charged, final_charge):
|
|
272
|
+
if percent_polar is None:
|
|
273
|
+
percent_polar = 0
|
|
274
|
+
if percent_charged is None:
|
|
275
|
+
percent_charged = 0
|
|
276
|
+
if final_charge is None:
|
|
277
|
+
final_charge = 0
|
|
278
|
+
|
|
279
|
+
if final_charge != 0 and percent_charged == 0:
|
|
280
|
+
raise ImpossibleSetting("If final_charge is nonzero, percent_charged must also be nonzero")
|
|
281
|
+
p_pos = 0
|
|
282
|
+
if final_charge > 0:
|
|
283
|
+
p_neg = (1 - (final_charge / (percent_charged * length))) / 2
|
|
284
|
+
p_pos = 1 - p_neg
|
|
285
|
+
# else:
|
|
286
|
+
elif final_charge < 0:
|
|
287
|
+
p_pos = (1 - abs(final_charge) / (percent_charged * length)) / 2
|
|
288
|
+
|
|
289
|
+
if self.Charged_Is_Polar:
|
|
290
|
+
percent_charged = percent_charged + percent_polar * (len(self._CHARGED) / (len(self._POLAR)))
|
|
291
|
+
unique = "".join([c for c in self._POLAR if c not in self._CHARGED])
|
|
292
|
+
percent_polar = percent_polar - percent_polar * (len(unique) / (len(self._POLAR)))
|
|
293
|
+
|
|
294
|
+
sequence = ""
|
|
295
|
+
for i in range(length):
|
|
296
|
+
r = random.random()
|
|
297
|
+
if r < percent_polar:
|
|
298
|
+
sequence += self._POLAR[random.randrange(0, len(self._POLAR))]
|
|
299
|
+
elif percent_polar < r and r < percent_polar + percent_charged:
|
|
300
|
+
r2 = random.random()
|
|
301
|
+
if p_pos > r2:
|
|
302
|
+
sequence += self._POSITIVE[random.randrange(0, len(self._POSITIVE))]
|
|
303
|
+
else:
|
|
304
|
+
sequence += self._NEGATIVE[random.randrange(0, len(self._NEGATIVE))]
|
|
305
|
+
else:
|
|
306
|
+
sequence += self._NONPOLAR[random.randrange(0, len(self._NONPOLAR))]
|
|
307
|
+
return sequence
|
|
308
|
+
|
|
309
|
+
def _ValidateSequence(self, sequence, percent_polar, percent_charged, total_charge, charge_range):
|
|
310
|
+
if '$' in sequence:
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
if charge_range is None:
|
|
314
|
+
charge_range = 0
|
|
315
|
+
|
|
316
|
+
# Polar condition
|
|
317
|
+
|
|
318
|
+
if percent_polar is None:
|
|
319
|
+
polar = True
|
|
320
|
+
elif percent_polar < 0:
|
|
321
|
+
polar = True
|
|
322
|
+
else:
|
|
323
|
+
polar = self.Analyzer.PercentPolar(sequence) >= percent_polar
|
|
324
|
+
# Charge condition
|
|
325
|
+
|
|
326
|
+
if percent_charged is None:
|
|
327
|
+
charge = True
|
|
328
|
+
elif percent_charged < 0:
|
|
329
|
+
charge = True
|
|
330
|
+
else:
|
|
331
|
+
charge = self.Analyzer.PercentCharged(sequence) >= percent_charged
|
|
332
|
+
# Charge total condition
|
|
333
|
+
charge_t = False
|
|
334
|
+
if total_charge is None:
|
|
335
|
+
charge_t = True
|
|
336
|
+
else:
|
|
337
|
+
charge_t = self.Analyzer.GetCharge(sequence) <= (total_charge + charge_range) \
|
|
338
|
+
and self.Analyzer.GetCharge(sequence) >= (total_charge - charge_range)
|
|
339
|
+
|
|
340
|
+
return (polar and charge and charge_t)
|
|
341
|
+
|
|
342
|
+
def VerifySettings(self, length, batch_size, method, percent_polar,
|
|
343
|
+
percent_charged, final_charge, charge_range, max_attempts, crash=True):
|
|
344
|
+
|
|
345
|
+
disregard_charge = False
|
|
346
|
+
if final_charge is None:
|
|
347
|
+
disregard_charge = True
|
|
348
|
+
if batch_size <= 0:
|
|
349
|
+
raise ImpossibleSetting("Batch size must be larger than 0")
|
|
350
|
+
|
|
351
|
+
if (max_attempts is not None) and (max_attempts < 0 or not isinstance(max_attempts, int)):
|
|
352
|
+
if crash:
|
|
353
|
+
raise ImpossibleSetting("The number of specified max attempts must be a positive integer or None")
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
if method not in self._ACCEPTED_METHODS:
|
|
357
|
+
if crash:
|
|
358
|
+
raise ImpossibleSetting(
|
|
359
|
+
f"Selected method must be one of the following: {self._ACCEPTED_METHODS}.\n\t Chosen: {method}")
|
|
360
|
+
return False
|
|
361
|
+
|
|
362
|
+
if not ((percent_charged is None or percent_charged >= 0) and (percent_polar is None or percent_polar >= 0)):
|
|
363
|
+
if crash:
|
|
364
|
+
raise ImpossibleSetting("All provided probabilities must be positive or 0")
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
if not self.Charged_Is_Polar:
|
|
368
|
+
a = 0
|
|
369
|
+
b = 0
|
|
370
|
+
if percent_polar is not None:
|
|
371
|
+
a = percent_polar
|
|
372
|
+
if percent_charged is not None:
|
|
373
|
+
b = percent_charged
|
|
374
|
+
if a + b > 1:
|
|
375
|
+
if crash:
|
|
376
|
+
raise ImpossibleSetting(
|
|
377
|
+
"The sum of provided probabilities must be less than or equal to 1 if charged residues are not considered polar")
|
|
378
|
+
return False
|
|
379
|
+
if not disregard_charge and length < abs(final_charge):
|
|
380
|
+
if crash:
|
|
381
|
+
raise ImpossibleSetting(
|
|
382
|
+
f"The length of the output protein must be less than or equal to the magnitude of the charge ({length} < {final_charge})")
|
|
383
|
+
return False
|
|
384
|
+
return True
|
|
385
|
+
|
|
386
|
+
if percent_charged == 0 and final_charge != 0:
|
|
387
|
+
if crash:
|
|
388
|
+
raise ImpossibleSetting(f"A nonzero charge cannot be present if the percent_charged is not 0")
|
|
389
|
+
return False
|
|
390
|
+
|
|
391
|
+
def _CleanPolar(self, sequence, percent_polar):
|
|
392
|
+
if percent_polar is None:
|
|
393
|
+
return sequence
|
|
394
|
+
disparity = self.Analyzer.NumPolar(sequence) - math.floor(len(sequence) * percent_polar)
|
|
395
|
+
ind = 0
|
|
396
|
+
s = list(sequence)
|
|
397
|
+
while disparity > 0 and ind < len(s):
|
|
398
|
+
if (s[ind] in self._POLAR) and (s[ind] not in self._CHARGED):
|
|
399
|
+
s[ind] = random.choice(self._NONPOLAR)
|
|
400
|
+
disparity -= 1
|
|
401
|
+
ind += 1
|
|
402
|
+
return "".join(s)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# import pkg_resources
|
|
2
|
+
# from aldepyde.data import data as data
|
|
3
|
+
|
|
4
|
+
class ProteinStats:
|
|
5
|
+
#TODO Clean this class a lot... also add some things
|
|
6
|
+
#TODO Isoelectric point
|
|
7
|
+
def __init__(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True):
|
|
8
|
+
self.AAs = "ARNDCEQGHILKMFPSTWYV"
|
|
9
|
+
self.POSITIVE = "RK"
|
|
10
|
+
self.NEGATIVE = "ED"
|
|
11
|
+
self.POLAR = "STNQ"
|
|
12
|
+
self.NONPOLAR = "AVILMFYWGP"
|
|
13
|
+
self.CHARGED = "RKED"
|
|
14
|
+
|
|
15
|
+
if His_Is_Charged:
|
|
16
|
+
self.CHARGED += "H"
|
|
17
|
+
self.POSITIVE += "H"
|
|
18
|
+
else:
|
|
19
|
+
self.POLAR += "H"
|
|
20
|
+
|
|
21
|
+
if Cys_Is_Polar:
|
|
22
|
+
self.POLAR += "C"
|
|
23
|
+
else:
|
|
24
|
+
self.NONPOLAR += "C"
|
|
25
|
+
|
|
26
|
+
if Charged_Is_Polar:
|
|
27
|
+
self.POLAR += self.POSITIVE
|
|
28
|
+
self.POLAR += self.NEGATIVE
|
|
29
|
+
|
|
30
|
+
def GetNumResidues(self, sequence):
|
|
31
|
+
ret_dic = {}
|
|
32
|
+
for c in self.AAs:
|
|
33
|
+
ret_dic[c] = sequence.count(c)
|
|
34
|
+
return ret_dic
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def GetCharge(self, sequence):
|
|
38
|
+
total_charge = 0
|
|
39
|
+
for c in sequence:
|
|
40
|
+
if c in self.POSITIVE:
|
|
41
|
+
total_charge += 1
|
|
42
|
+
elif c in self.NEGATIVE:
|
|
43
|
+
total_charge -= 1
|
|
44
|
+
return total_charge
|
|
45
|
+
|
|
46
|
+
def NumCharged(self, sequence):
|
|
47
|
+
total = 0
|
|
48
|
+
for c in sequence:
|
|
49
|
+
if c in self.CHARGED:
|
|
50
|
+
total += 1
|
|
51
|
+
return total
|
|
52
|
+
|
|
53
|
+
def NumPolar(self, sequence):
|
|
54
|
+
total = 0
|
|
55
|
+
for c in sequence:
|
|
56
|
+
if c in self.POLAR:
|
|
57
|
+
total += 1
|
|
58
|
+
return total
|
|
59
|
+
|
|
60
|
+
def PercentCharged(self, sequence):
|
|
61
|
+
num_charged = 0
|
|
62
|
+
for c in sequence:
|
|
63
|
+
if c in self.POSITIVE or c in self.NEGATIVE:
|
|
64
|
+
num_charged += 1
|
|
65
|
+
return num_charged / len(sequence)
|
|
66
|
+
|
|
67
|
+
def PercentPolarC(self, sequence):
|
|
68
|
+
num_polar = 0
|
|
69
|
+
for c in sequence:
|
|
70
|
+
if c in self.POLAR_CHECK:
|
|
71
|
+
num_polar += 1
|
|
72
|
+
return num_polar / len(sequence)
|
|
73
|
+
|
|
74
|
+
def PercentPolar(self, sequence):
|
|
75
|
+
num_polar = 0
|
|
76
|
+
for c in sequence:
|
|
77
|
+
if c in self.POLAR:
|
|
78
|
+
num_polar += 1
|
|
79
|
+
return num_polar / len(sequence)
|
|
80
|
+
|
|
81
|
+
def PercentNonPolar(self, sequence):
|
|
82
|
+
num_not = 0
|
|
83
|
+
for c in sequence:
|
|
84
|
+
if c in self.NONPOLAR:
|
|
85
|
+
num_not += 1
|
|
86
|
+
return num_not / len(sequence)
|
|
87
|
+
|
|
88
|
+
# def GetMass(self, sequence, from_abundances=False):
|
|
89
|
+
# return data.ProteinMass(sequence, from_abundances=from_abundances)
|
|
File without changes
|