aldepyde 0.0.0a1__py3-none-any.whl → 0.0.0a32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aldepyde might be problematic. Click here for more details.

Files changed (45) hide show
  1. aldepyde/Parsers/_mmcif_parser.py +0 -0
  2. aldepyde/Parsers/_pdb_parser.py +0 -0
  3. aldepyde/__init__.py +50 -0
  4. aldepyde/_config.py +113 -0
  5. aldepyde/biomolecule/Residue.py +9 -0
  6. aldepyde/biomolecule/_Atom.py +95 -0
  7. aldepyde/biomolecule/_AtomFactory.py +71 -0
  8. aldepyde/biomolecule/__init__.py +15 -0
  9. aldepyde/biomolecule/_amino_acid.py +6 -0
  10. aldepyde/biomolecule/_dna.py +6 -0
  11. aldepyde/biomolecule/_pdb.py +455 -0
  12. aldepyde/biomolecule/_rna.py +6 -0
  13. aldepyde/biomolecule/utils.py +60 -0
  14. aldepyde/cache/__init__.py +2 -0
  15. aldepyde/cache/_cache.py +257 -0
  16. aldepyde/cache/cachemanager.py +212 -0
  17. aldepyde/cache/downloader.py +13 -0
  18. aldepyde/cache/utils.py +32 -0
  19. aldepyde/configurable.py +7 -0
  20. aldepyde/data/RemoteFileHandler.py +32 -0
  21. aldepyde/data/__init__.py +1 -0
  22. aldepyde/data.py +148 -0
  23. aldepyde/databases/PDB.py +0 -0
  24. aldepyde/databases/RemoteFileHandler.py +43 -0
  25. aldepyde/databases/UniRef.py +75 -0
  26. aldepyde/databases/__init__.py +0 -0
  27. aldepyde/databases/_database.py +38 -0
  28. aldepyde/env.py +43 -0
  29. aldepyde/fetcher/__init__.py +0 -0
  30. aldepyde/fetcher/test.py +2 -0
  31. aldepyde/json/CHG.json +25 -0
  32. aldepyde/json/Swiss_Prot.json +25 -0
  33. aldepyde/json/chemistry.json +4622 -0
  34. aldepyde/rand/RandomProtein.py +402 -0
  35. aldepyde/rand/__init__.py +3 -0
  36. aldepyde/stats/ProteinStats.py +89 -0
  37. aldepyde/stats/__init__.py +0 -0
  38. aldepyde/utils.py +275 -0
  39. {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info}/METADATA +4 -3
  40. aldepyde-0.0.0a32.dist-info/RECORD +43 -0
  41. {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info}/WHEEL +1 -1
  42. aldepyde-0.0.0a32.dist-info/top_level.txt +1 -0
  43. aldepyde-0.0.0a1.dist-info/RECORD +0 -5
  44. aldepyde-0.0.0a1.dist-info/top_level.txt +0 -1
  45. {aldepyde-0.0.0a1.dist-info → aldepyde-0.0.0a32.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,402 @@
1
+ import random
2
+ import math
3
+ import requests
4
+ import json
5
+
6
+ from aldepyde.stats.ProteinStats import *
7
+
8
+ class InvalidDistribution(Exception):
9
+ pass
10
+
11
+ class ImpossibleSetting(Exception):
12
+ pass
13
+
14
+ class RandomProtein:
15
+ # Hardcode data for now
16
+ def __init__(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True, Distribution="Swiss"):
17
+ self.Analyzer = ProteinStats(His_Is_Charged, Cys_Is_Polar, Charged_Is_Polar)
18
+
19
+ self._ACCEPTED_METHODS = ["Random", "Custom", "Builder", "Grouper"]
20
+ self._ACCEPTED_PRESETS = ["Swiss", "CHG"]
21
+ self.His_Is_Charged = His_Is_Charged
22
+ self.Cys_Is_Polar = Cys_Is_Polar
23
+ self.Charged_Is_Polar = Charged_Is_Polar
24
+
25
+ self.LoadPresetDistribution(Distribution)
26
+
27
+ self._AAs = "ARNDCEQGHILKMFPSTWYV"
28
+ self._POSITIVE = "RK"
29
+ self._NEGATIVE = "ED"
30
+ self._POLAR = "STNQ"
31
+ self._NONPOLAR = "AVILMFYWGP"
32
+ self._CHARGED = "RKED"
33
+
34
+ if His_Is_Charged:
35
+ self._CHARGED += "H"
36
+ self._POSITIVE += "H"
37
+ else:
38
+ self._POLAR += "H"
39
+
40
+ if Cys_Is_Polar:
41
+ self._POLAR += "C"
42
+ else:
43
+ self._NONPOLAR += "C"
44
+
45
+ if Charged_Is_Polar:
46
+ self._POLAR += self._POSITIVE
47
+ self._POLAR += self._NEGATIVE
48
+
49
+ def GetNP(self):
50
+ return self. _NONPOLAR
51
+
52
+
53
+
54
+ def LoadPresetDistribution(self, preset="Swiss"):
55
+ if preset.upper() == "Swiss".upper():
56
+ _stream = pkg_resources.resource_stream("aldepyde", 'json/Swiss_Prot.json')
57
+ self.distribution = json.load(_stream)['Amino Acid Distribution']
58
+ elif preset.upper() == "CHG":
59
+ _stream = pkg_resources.resource_stream("aldepyde", 'json/CHG.json')
60
+ self.distribution = json.load(_stream)['Amino Acid Distribution']
61
+ else:
62
+ raise InvalidDistribution(
63
+ f"Preset must be one of the following: {self._ACCEPTED_PRESETS}\n\t"
64
+ f"Chosen: {preset}")
65
+
66
+ def GetAminoAcids(self):
67
+ return self._AAs
68
+
69
+ def SetAminoAcids(self, AAs):
70
+ if isinstance(AAs, str):
71
+ self._AAs = str
72
+ elif isinstance(AAs, list):
73
+ self._AAs = "".join(AAs)
74
+
75
+ # TODO add setters for polar, nonpolar, etc.
76
+
77
+ def AddAminoAcid(self, AA):
78
+ if AA not in self._AAs:
79
+ self._AAs += AA
80
+
81
+ def RemoveAminoAcid(self, AA):
82
+ if AA in self._AAs:
83
+ self._AAs.replace(AA, "")
84
+ if AA in self._POLAR:
85
+ self._POLAR.replace(AA, "")
86
+ if AA in self._CHARGED:
87
+ self._CHARGED.replace(AA, "")
88
+ if AA in self._POSITIVE:
89
+ self._POSITIVE.replace(AA, "")
90
+ if AA in self._NEGATIVE:
91
+ self._NEGATIVE.replace(AA, "")
92
+
93
+ def ConfigureAA(self, AA, is_polar=False, is_charged=False, is_positive=False, is_negative=False):
94
+ # if is_charged and not (is_positive or is_negative):
95
+ # raise ImpossibleSettingsException("The residue must be either positive or negative if it carries a charge")
96
+ if is_negative and is_positive:
97
+ raise ImpossibleSetting("The residue cannot be both positive and negative")
98
+
99
+ if is_charged and self.Charged_Is_Polar and AA not in self._POLAR:
100
+ self._POLAR += AA
101
+ if is_charged and AA not in self._CHARGED:
102
+ self._CHARGED += AA
103
+ if is_negative and AA not in self._NEGATIVE:
104
+ self._NEGATIVE += AA
105
+ if is_positive and AA not in self._POSITIVE:
106
+ self._POSITIVE += AA
107
+ if is_polar and AA not in self._POLAR:
108
+ self._POSITIVE += AA
109
+ elif AA not in self._NONPOLAR:
110
+ self._NONPOLAR += AA
111
+ if AA not in self._AAs:
112
+ self._AAs += AA
113
+
114
+ def configure(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True):
115
+ self.__init__(His_Is_Charged, Cys_Is_Polar, Charged_Is_Polar)
116
+
117
+ def _load(self, json_str):
118
+ self.distribution = self._NormalizeValues(json.loads(json_str)["Amino Acid Distribution"])
119
+
120
+ def _NormalizeValues(self, dic):
121
+ total = 0
122
+ for key in dic.keys():
123
+ total += float(dic[key])
124
+ for key in dic.keys():
125
+ dic[key] = float(dic[key]) / total
126
+ return dic
127
+
128
+ def LoadDistributionFromFile(self, dist_json_path):
129
+ with open(dist_json_path, "r") as fp:
130
+ self._load(fp.read())
131
+
132
+ def LoadDistributionFromURL(self, URL):
133
+ response = requests.get(URL)
134
+ if response.status_code == 200:
135
+ self._load(json.dumps(response.json()))
136
+ else:
137
+ raise ConnectionError(f"Invalid url: {URL}")
138
+
139
+ def GenerateProtein(self, length, batch_size=1, method="Random", percent_polar=None,
140
+ percent_charged=None, final_charge=None, charge_range=None,
141
+ timeout=30, max_attempts=None, verbose=False, cleanup=False, crash_at_bad_settings=False):
142
+
143
+ self.verbose = verbose
144
+ # TODO make it so max_attempts is actually used
145
+ if not self.VerifySettings(length, batch_size=batch_size, method=method, percent_polar=percent_polar,
146
+ percent_charged=percent_charged, final_charge=final_charge,
147
+ charge_range=charge_range,
148
+ max_attempts=max_attempts, crash=crash_at_bad_settings):
149
+ return None
150
+
151
+ method = method.upper()
152
+ ret_list = []
153
+ for _ in range(batch_size):
154
+ sequence = "$" * length
155
+ if method == "Random".upper():
156
+ # sequence = self._Random(length)
157
+ attempt = 0
158
+ while not self._ValidateSequence(sequence, percent_polar,
159
+ percent_charged, final_charge, charge_range):
160
+ sequence = self._Random(length)
161
+ if verbose:
162
+ attempt += 1
163
+ print(f"\rAttempt: {attempt}", end="")
164
+ elif method == "Custom".upper():
165
+ sequence = self._Custom(length)
166
+ elif method == "Builder".upper():
167
+ sequence = self._Builder(length, percent_charged, percent_polar, final_charge)
168
+ elif method == "Grouper".upper():
169
+ attempt = 0
170
+ while not self._ValidateSequence(sequence, percent_polar,
171
+ percent_charged, final_charge, charge_range):
172
+ sequence = self._Grouper(length, percent_polar, percent_charged, final_charge)
173
+ if verbose:
174
+ attempt += 1
175
+ print(f"\rAttempt: {attempt}", end="")
176
+
177
+ if cleanup:
178
+ if verbose:
179
+ print("\nCleaning result sequence")
180
+ sequence = self._CleanPolar(sequence, percent_polar)
181
+ ret_list.append(sequence)
182
+
183
+ if verbose:
184
+ for result in ret_list:
185
+ print(
186
+ f"Result: {result}\nPercent Polar: {self.Analyzer.PercentPolar(result)}\n"
187
+ f"Percent Charged: {self.Analyzer.PercentCharged(result)}"
188
+ f"\nPercent NonPolar: {self.Analyzer.PercentNonPolar(result)}"
189
+ f"\nTotal Charge: {self.Analyzer.GetCharge(result)}\n")
190
+
191
+ return ret_list
192
+
193
+ def _Random(self, length):
194
+ sequence = ""
195
+ for i in range(length):
196
+ sequence += self._AAs[random.randrange(0, len(self._AAs))]
197
+ return sequence
198
+
199
+ def _Custom(self, length):
200
+ items = list(self.distribution.keys())
201
+ probabilities = list(self.distribution.values())
202
+ return "".join(random.choices(items, weights=probabilities, k=length))
203
+
204
+ def _Builder(self, length, percent_charged, percent_polar, final_charge):
205
+ sequence = []
206
+ pc = percent_charged
207
+ pp = percent_polar
208
+
209
+ percent_unclear = 0
210
+
211
+ unclear = ""
212
+
213
+ if percent_charged is None:
214
+ percent_charged = 0
215
+ unclear += self._CHARGED
216
+ if percent_polar is None:
217
+ percent_polar = 0
218
+ unclear += self._POLAR
219
+ unclear += self._NONPOLAR
220
+
221
+ u = set()
222
+ for c in unclear:
223
+ u.add(c)
224
+ unclear = ""
225
+ for c in u:
226
+ unclear += c
227
+
228
+ if pc is None or pp is None:
229
+ percent_unclear = 1 - (percent_polar + percent_charged)
230
+
231
+ # sequence = ["X"] * length
232
+ for i in range(math.ceil(length * percent_polar)):
233
+ sequence.append(random.choice(self._POLAR))
234
+ for i in range(math.ceil(length * percent_polar),
235
+ math.ceil(length * percent_polar) + math.ceil(
236
+ length * percent_charged) - self.Analyzer.NumCharged(sequence)):
237
+ sequence.append(random.choice(self._CHARGED))
238
+
239
+ if percent_unclear < 0:
240
+ for _ in range(length * percent_unclear):
241
+ sequence.append(random.choice(self._NONPOLAR))
242
+
243
+ if final_charge is not None:
244
+ disparity = final_charge - self.Analyzer.GetCharge(sequence)
245
+ ind = 0
246
+ while not (disparity == 0 or disparity == -1 or disparity == 1) and ind < len(sequence):
247
+ if disparity > 0 and sequence[ind] in self._NEGATIVE:
248
+ sequence[ind] = random.choice(self._POSITIVE)
249
+ disparity -= 2
250
+ if disparity < 0 and sequence[ind] in self._POSITIVE:
251
+ sequence[ind] = random.choice(self._NEGATIVE)
252
+ disparity += 2
253
+ ind += 1
254
+
255
+ # if not ind < len(sequence):
256
+ while not (disparity == 0):
257
+ if disparity == 0 or len(sequence) >= length:
258
+ break
259
+ elif disparity > 0:
260
+ sequence += random.choice(self._POSITIVE)
261
+ disparity -= 1
262
+ else:
263
+ sequence += random.choice(self._NEGATIVE)
264
+ disparity += 1
265
+
266
+ while len(sequence) < length:
267
+ sequence.append(random.choice(self._NONPOLAR))
268
+ random.shuffle(sequence)
269
+ return "".join(sequence)
270
+
271
+ def _Grouper(self, length, percent_polar, percent_charged, final_charge):
272
+ if percent_polar is None:
273
+ percent_polar = 0
274
+ if percent_charged is None:
275
+ percent_charged = 0
276
+ if final_charge is None:
277
+ final_charge = 0
278
+
279
+ if final_charge != 0 and percent_charged == 0:
280
+ raise ImpossibleSetting("If final_charge is nonzero, percent_charged must also be nonzero")
281
+ p_pos = 0
282
+ if final_charge > 0:
283
+ p_neg = (1 - (final_charge / (percent_charged * length))) / 2
284
+ p_pos = 1 - p_neg
285
+ # else:
286
+ elif final_charge < 0:
287
+ p_pos = (1 - abs(final_charge) / (percent_charged * length)) / 2
288
+
289
+ if self.Charged_Is_Polar:
290
+ percent_charged = percent_charged + percent_polar * (len(self._CHARGED) / (len(self._POLAR)))
291
+ unique = "".join([c for c in self._POLAR if c not in self._CHARGED])
292
+ percent_polar = percent_polar - percent_polar * (len(unique) / (len(self._POLAR)))
293
+
294
+ sequence = ""
295
+ for i in range(length):
296
+ r = random.random()
297
+ if r < percent_polar:
298
+ sequence += self._POLAR[random.randrange(0, len(self._POLAR))]
299
+ elif percent_polar < r and r < percent_polar + percent_charged:
300
+ r2 = random.random()
301
+ if p_pos > r2:
302
+ sequence += self._POSITIVE[random.randrange(0, len(self._POSITIVE))]
303
+ else:
304
+ sequence += self._NEGATIVE[random.randrange(0, len(self._NEGATIVE))]
305
+ else:
306
+ sequence += self._NONPOLAR[random.randrange(0, len(self._NONPOLAR))]
307
+ return sequence
308
+
309
+ def _ValidateSequence(self, sequence, percent_polar, percent_charged, total_charge, charge_range):
310
+ if '$' in sequence:
311
+ return False
312
+
313
+ if charge_range is None:
314
+ charge_range = 0
315
+
316
+ # Polar condition
317
+
318
+ if percent_polar is None:
319
+ polar = True
320
+ elif percent_polar < 0:
321
+ polar = True
322
+ else:
323
+ polar = self.Analyzer.PercentPolar(sequence) >= percent_polar
324
+ # Charge condition
325
+
326
+ if percent_charged is None:
327
+ charge = True
328
+ elif percent_charged < 0:
329
+ charge = True
330
+ else:
331
+ charge = self.Analyzer.PercentCharged(sequence) >= percent_charged
332
+ # Charge total condition
333
+ charge_t = False
334
+ if total_charge is None:
335
+ charge_t = True
336
+ else:
337
+ charge_t = self.Analyzer.GetCharge(sequence) <= (total_charge + charge_range) \
338
+ and self.Analyzer.GetCharge(sequence) >= (total_charge - charge_range)
339
+
340
+ return (polar and charge and charge_t)
341
+
342
+ def VerifySettings(self, length, batch_size, method, percent_polar,
343
+ percent_charged, final_charge, charge_range, max_attempts, crash=True):
344
+
345
+ disregard_charge = False
346
+ if final_charge is None:
347
+ disregard_charge = True
348
+ if batch_size <= 0:
349
+ raise ImpossibleSetting("Batch size must be larger than 0")
350
+
351
+ if (max_attempts is not None) and (max_attempts < 0 or not isinstance(max_attempts, int)):
352
+ if crash:
353
+ raise ImpossibleSetting("The number of specified max attempts must be a positive integer or None")
354
+ return False
355
+
356
+ if method not in self._ACCEPTED_METHODS:
357
+ if crash:
358
+ raise ImpossibleSetting(
359
+ f"Selected method must be one of the following: {self._ACCEPTED_METHODS}.\n\t Chosen: {method}")
360
+ return False
361
+
362
+ if not ((percent_charged is None or percent_charged >= 0) and (percent_polar is None or percent_polar >= 0)):
363
+ if crash:
364
+ raise ImpossibleSetting("All provided probabilities must be positive or 0")
365
+ return False
366
+
367
+ if not self.Charged_Is_Polar:
368
+ a = 0
369
+ b = 0
370
+ if percent_polar is not None:
371
+ a = percent_polar
372
+ if percent_charged is not None:
373
+ b = percent_charged
374
+ if a + b > 1:
375
+ if crash:
376
+ raise ImpossibleSetting(
377
+ "The sum of provided probabilities must be less than or equal to 1 if charged residues are not considered polar")
378
+ return False
379
+ if not disregard_charge and length < abs(final_charge):
380
+ if crash:
381
+ raise ImpossibleSetting(
382
+ f"The length of the output protein must be less than or equal to the magnitude of the charge ({length} < {final_charge})")
383
+ return False
384
+ return True
385
+
386
+ if percent_charged == 0 and final_charge != 0:
387
+ if crash:
388
+ raise ImpossibleSetting(f"A nonzero charge cannot be present if the percent_charged is not 0")
389
+ return False
390
+
391
+ def _CleanPolar(self, sequence, percent_polar):
392
+ if percent_polar is None:
393
+ return sequence
394
+ disparity = self.Analyzer.NumPolar(sequence) - math.floor(len(sequence) * percent_polar)
395
+ ind = 0
396
+ s = list(sequence)
397
+ while disparity > 0 and ind < len(s):
398
+ if (s[ind] in self._POLAR) and (s[ind] not in self._CHARGED):
399
+ s[ind] = random.choice(self._NONPOLAR)
400
+ disparity -= 1
401
+ ind += 1
402
+ return "".join(s)
@@ -0,0 +1,3 @@
1
+ from aldepyde.rand.RandomProtein import *
2
+
3
+ __all__ = ['RandomProtein']
@@ -0,0 +1,89 @@
1
+ # import pkg_resources
2
+ # from aldepyde.data import data as data
3
+
4
+ class ProteinStats:
5
+ #TODO Clean this class a lot... also add some things
6
+ #TODO Isoelectric point
7
+ def __init__(self, His_Is_Charged=True, Cys_Is_Polar=True, Charged_Is_Polar=True):
8
+ self.AAs = "ARNDCEQGHILKMFPSTWYV"
9
+ self.POSITIVE = "RK"
10
+ self.NEGATIVE = "ED"
11
+ self.POLAR = "STNQ"
12
+ self.NONPOLAR = "AVILMFYWGP"
13
+ self.CHARGED = "RKED"
14
+
15
+ if His_Is_Charged:
16
+ self.CHARGED += "H"
17
+ self.POSITIVE += "H"
18
+ else:
19
+ self.POLAR += "H"
20
+
21
+ if Cys_Is_Polar:
22
+ self.POLAR += "C"
23
+ else:
24
+ self.NONPOLAR += "C"
25
+
26
+ if Charged_Is_Polar:
27
+ self.POLAR += self.POSITIVE
28
+ self.POLAR += self.NEGATIVE
29
+
30
+ def GetNumResidues(self, sequence):
31
+ ret_dic = {}
32
+ for c in self.AAs:
33
+ ret_dic[c] = sequence.count(c)
34
+ return ret_dic
35
+
36
+
37
+ def GetCharge(self, sequence):
38
+ total_charge = 0
39
+ for c in sequence:
40
+ if c in self.POSITIVE:
41
+ total_charge += 1
42
+ elif c in self.NEGATIVE:
43
+ total_charge -= 1
44
+ return total_charge
45
+
46
+ def NumCharged(self, sequence):
47
+ total = 0
48
+ for c in sequence:
49
+ if c in self.CHARGED:
50
+ total += 1
51
+ return total
52
+
53
+ def NumPolar(self, sequence):
54
+ total = 0
55
+ for c in sequence:
56
+ if c in self.POLAR:
57
+ total += 1
58
+ return total
59
+
60
+ def PercentCharged(self, sequence):
61
+ num_charged = 0
62
+ for c in sequence:
63
+ if c in self.POSITIVE or c in self.NEGATIVE:
64
+ num_charged += 1
65
+ return num_charged / len(sequence)
66
+
67
+ def PercentPolarC(self, sequence):
68
+ num_polar = 0
69
+ for c in sequence:
70
+ if c in self.POLAR_CHECK:
71
+ num_polar += 1
72
+ return num_polar / len(sequence)
73
+
74
+ def PercentPolar(self, sequence):
75
+ num_polar = 0
76
+ for c in sequence:
77
+ if c in self.POLAR:
78
+ num_polar += 1
79
+ return num_polar / len(sequence)
80
+
81
+ def PercentNonPolar(self, sequence):
82
+ num_not = 0
83
+ for c in sequence:
84
+ if c in self.NONPOLAR:
85
+ num_not += 1
86
+ return num_not / len(sequence)
87
+
88
+ # def GetMass(self, sequence, from_abundances=False):
89
+ # return data.ProteinMass(sequence, from_abundances=from_abundances)
File without changes