aldepyde 0.0.0a2__py3-none-any.whl → 0.0.0a33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aldepyde might be problematic. Click here for more details.

Files changed (44) hide show
  1. aldepyde/Parsers/_mmcif_parser.py +0 -0
  2. aldepyde/Parsers/_pdb_parser.py +0 -0
  3. aldepyde/__init__.py +27 -2
  4. aldepyde/_config.py +98 -36
  5. aldepyde/biomolecule/Residue.py +9 -0
  6. aldepyde/biomolecule/_Atom.py +95 -0
  7. aldepyde/biomolecule/_AtomFactory.py +71 -0
  8. aldepyde/biomolecule/__init__.py +18 -0
  9. aldepyde/biomolecule/_amino_acid.py +6 -0
  10. aldepyde/biomolecule/_dna.py +6 -0
  11. aldepyde/biomolecule/_pdb.py +455 -0
  12. aldepyde/biomolecule/_rna.py +6 -0
  13. aldepyde/biomolecule/utils.py +60 -0
  14. aldepyde/cache/__init__.py +2 -0
  15. aldepyde/cache/_cache.py +257 -0
  16. aldepyde/cache/cachemanager.py +212 -0
  17. aldepyde/cache/downloader.py +13 -0
  18. aldepyde/cache/utils.py +32 -0
  19. aldepyde/configurable.py +7 -0
  20. aldepyde/data/RemoteFileHandler.py +32 -0
  21. aldepyde/data/__init__.py +1 -0
  22. aldepyde/data.py +148 -0
  23. aldepyde/databases/PDB.py +0 -0
  24. aldepyde/databases/RemoteFileHandler.py +43 -0
  25. aldepyde/databases/UniRef.py +113 -0
  26. aldepyde/databases/__init__.py +0 -0
  27. aldepyde/databases/_database.py +41 -0
  28. aldepyde/env.py +43 -0
  29. aldepyde/fetcher/__init__.py +0 -0
  30. aldepyde/fetcher/test.py +2 -0
  31. aldepyde/json/CHG.json +25 -0
  32. aldepyde/json/Swiss_Prot.json +25 -0
  33. aldepyde/json/chemistry.json +4622 -0
  34. aldepyde/rand/RandomProtein.py +404 -0
  35. aldepyde/rand/__init__.py +6 -0
  36. aldepyde/stats/ProteinStats.py +89 -0
  37. aldepyde/stats/__init__.py +0 -0
  38. aldepyde/utils.py +275 -0
  39. {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/METADATA +4 -3
  40. aldepyde-0.0.0a33.dist-info/RECORD +43 -0
  41. {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/WHEEL +1 -1
  42. aldepyde-0.0.0a2.dist-info/RECORD +0 -7
  43. {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info/licenses}/LICENSE +0 -0
  44. {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/top_level.txt +0 -0
aldepyde/data.py ADDED
@@ -0,0 +1,148 @@
1
+ from functools import reduce
2
+ from importlib.resources import files
3
+ import json
4
+ import sys
5
+
6
+ # from aldepyde.biomolecule import dna, rna, amino_acid, Residue
7
+
8
+ class _DataSingleton(type):
9
+ _instance = {}
10
+ def __call__(cls, *args, singleton=True, **kwargs):
11
+ if singleton:
12
+ return super(_DataSingleton, cls).__call__(*args, **kwargs)
13
+ if cls not in cls._instance:
14
+ cls._instance[cls] = super(_DataSingleton, cls).__call__(*args, **kwargs)
15
+ return cls._instance[cls]
16
+
17
+
18
+ class Data(metaclass=_DataSingleton):
19
+ # Map paths
20
+ _map = ("map", ("map",))
21
+ _dna_map = ("dna_map", ("map", "dna"))
22
+ _rna_map = ("rna_map", ("map", "rna"))
23
+ _amino_map = ("amino_map", ("map", "amino_acid"))
24
+
25
+ def __init__(self, json_location = None):
26
+ if json_location is None:
27
+ ###### If something breaks, you can use this as a backup way to access the data ######
28
+ # base = os.path.dirname(os.path.abspath(__file__))
29
+ # json_location = os.path.join(base, "json", "chemistry.json")
30
+ json_location = files("aldepyde.json").joinpath("chemistry.json")
31
+ self.json_location = json_location
32
+ self._loaded = {}
33
+
34
+
35
+ # Technically, this is the only function we need.
36
+ # You get the rest because I care <3
37
+ def load_values(self, *args, store_as: str = None):
38
+ with open(self.json_location) as js:
39
+ if args in self._loaded:
40
+ return self.__dict__[self._loaded[args]]
41
+ j_data = reduce(lambda d, key: d[key], args, json.load(js))
42
+ if store_as is not None and args not in self._loaded:
43
+ self._loaded[args] = store_as
44
+ setattr(self, store_as, j_data)
45
+ self.__dict__[store_as]['_key'] = args
46
+ return j_data
47
+
48
+ def unload(self, attr_name: str) -> bool:
49
+ if attr_name not in self.__dict__.keys():
50
+ return False
51
+ try:
52
+ item = self.__dict__.pop(attr_name)['_key']
53
+ self._loaded.pop(item)
54
+ except KeyError:
55
+ # This really shouldn't occur unless you're trying
56
+ raise KeyError(f'An error occured while attempting to remove {attr_name} from the data object.'
57
+ f' Are you sure you are attempting to unload a loaded value?')
58
+ return True
59
+
60
+ # TODO check if something is already loaded
61
+ def GrabParent(self, *args):
62
+ pass
63
+
64
+ # Cute lil' recursive method that shows the structure of a loaded json. Maybe not so practical
65
+ # at runtime, but helpful for debugging and planning your loads
66
+ def reveal(self, *args, indent=" ") -> str:
67
+ j_data = self.load_values(*args, store_as=None)
68
+ return self._reveal_helper(j_data, indent, indent)
69
+
70
+ def _reveal_helper(self, js: dict, indent, adder, ret_str="") -> str:
71
+ for key in js:
72
+ if not isinstance(js[key], dict):
73
+ continue
74
+ ret_str += indent+key + "\n"
75
+ ret_str = self._reveal_helper(js[key], indent+adder, adder, ret_str)
76
+ return ret_str
77
+
78
+ def Map(self, residue: str|None, *args, store_as: str|None =_map[0], residue_type: str ='amino_acid') -> None|str:
79
+ if args == ():
80
+ args = self._map[1]
81
+ if store_as is None:
82
+ store_as = self._map[0]
83
+ residue_type = residue_type.lower()
84
+ if residue_type.lower() not in ["dna", "rna", "amino_acid", "element"]:
85
+ print("Allowed residue_type mappins are 'dna', 'rna', 'amino_acid', and 'element'", file=sys.stderr)
86
+ map = self.load_values(*args, store_as=store_as)
87
+ if residue is None: # Just initialize self.map
88
+ return None
89
+ return map[residue_type][residue.lower()]
90
+
91
+
92
+ # # These three could probably be condensed
93
+ # def CheckDNA(self, value: str, *args) -> bool:
94
+ # if args == ():
95
+ # args = self._dna_map[1]
96
+ # map = self.load_values(*args, store_as=None)
97
+ # if value in map['dna'].keys():
98
+ # return True
99
+ # return False
100
+ #
101
+ #
102
+ # def CheckRNA(self, value: str, *args) -> bool:
103
+ # if args == ():
104
+ # args = self._map[1]
105
+ # map = self.load_values(*args, store_as=None)
106
+ # if value in map['dna'].keys():
107
+ # return True
108
+ # return False
109
+ #
110
+ # def CheckAA(self, value: str, *args) -> bool:
111
+ # if args == ():
112
+ # args = self._map[1]
113
+ # map = self.load_values(*args, store_as=None)
114
+ # if value in map['dna'].keys():
115
+ # return True
116
+ # return False
117
+ #
118
+ # def CheckResidue(self, value: str, *args) -> bool:
119
+ # if args == ():
120
+ # args = self._map[1]
121
+ # if self.CheckAA(value, *args):
122
+ # return True
123
+ # if self.CheckDNA(value, *args):
124
+ # return True
125
+ # if self.CheckRNA(value, *args):
126
+ # return True
127
+ # return False
128
+ #
129
+ # # This method determines if something is DNA, RNA, or an amino acid.
130
+ # # Don't be cheeky with this. If you aren't following the IUPAC naming schemes,
131
+ # # you're gonna have a bad time.
132
+ # #
133
+ # # RNA has exclusively 1-letter codes: A, C, T, G, etc.
134
+ # # DNA has exclusively 2-letter codes: DA, DC, DT, DG, etc.
135
+ # # Amino acids have exclusively 3-letter codes
136
+ # # def ExtrapolateResidueType(self, value: str) -> object:
137
+ # # if self.CheckRNA(value):
138
+ # # return rna
139
+ # # if self.CheckDNA(value):
140
+ # # return dna
141
+ # # if self.CheckAA(value):
142
+ # # return amino_acid
143
+ # # return Residue
144
+
145
+ data = Data()
146
+
147
+
148
+
File without changes
@@ -0,0 +1,43 @@
1
+ import zlib
2
+ from io import BytesIO
3
+ import urllib.request
4
+ import gzip
5
+
6
+ GZIP = b"\x1f\x8b"
7
+ ZIP = b"\x50\x4B\x03\x04"
8
+
9
+ class RemoteFileHandler():
10
+ @staticmethod
11
+ def stream_url(url, chunk_size=8192):
12
+ response = urllib.request.urlopen(url)
13
+ head = response.read(4)
14
+ mode = RemoteFileHandler.determine_ftype(head)
15
+ if mode == 'gzip':
16
+ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
17
+ yield decompressor.decompress(head)
18
+ while stream := response.read(chunk_size):
19
+ if not stream:
20
+ break
21
+ yield decompressor.decompress(stream)
22
+ yield decompressor.flush()
23
+
24
+ @staticmethod
25
+ def determine_ftype(head:bytes) -> str:
26
+ if head.startswith(GZIP):
27
+ return "gzip"
28
+ elif head.startswith(ZIP):
29
+ return "zip"
30
+
31
+
32
+ @staticmethod
33
+ def is_gzip(stream: BytesIO) -> bool:
34
+ magic_start = stream.read(2)
35
+ stream.seek(0)
36
+ return magic_start == b'\x1f\x8b'
37
+
38
+ @staticmethod
39
+ def unpack_tar_gz_bio(stream: BytesIO) -> BytesIO:
40
+ with gzip.open(stream, "r") as gz:
41
+ return BytesIO(gz.read())
42
+
43
+
@@ -0,0 +1,113 @@
1
+ import zlib
2
+
3
+ from aldepyde.databases.RemoteFileHandler import RemoteFileHandler
4
+ from aldepyde.databases._database import _database
5
+ from aldepyde.utils import ProgressBar
6
+ import os
7
+ import gzip
8
+
9
+ class uniref_parser(_database):
10
+ def __init__(self):
11
+ super().__init__()
12
+
13
+ # TODO single entry parsing
14
+ # TODO store metadata upon request
15
+
16
+ @staticmethod
17
+ def stream_uniref_gz(filepath, chunk_size=8192, use_progress_bar=False):
18
+ raw_stream, size = _database.open_stream(filepath)
19
+ pbar = ProgressBar(size//chunk_size) if use_progress_bar else None
20
+ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
21
+ try:
22
+ while True:
23
+ comp_chunk = raw_stream.read(chunk_size)
24
+ if not comp_chunk:
25
+ break
26
+ if pbar is not None:
27
+ pbar.update()
28
+ decomp_chunk = decompressor.decompress(comp_chunk)
29
+ if decomp_chunk:
30
+ yield decomp_chunk
31
+ final = decompressor.flush()
32
+ if final:
33
+ yield final
34
+ finally:
35
+ raw_stream.close()
36
+
37
+ @staticmethod
38
+ def download_file(url, destination, chunk_size=8192, use_progress_bar=False):
39
+ raw_stream, size = _database.open_stream(url)
40
+ pbar = ProgressBar(size // chunk_size) if use_progress_bar else None
41
+ with open(destination, 'wb') as fp:
42
+ while True:
43
+ chunk = raw_stream.read(chunk_size)
44
+ if not chunk:
45
+ break
46
+ if pbar is not None:
47
+ pbar.update()
48
+ fp.write(chunk)
49
+
50
+
51
+
52
+
53
+ @staticmethod
54
+ def stitch_streamed_sequences(stream, as_str=True):
55
+ buffer = b''
56
+ for chunk in stream:
57
+ buffer += chunk
58
+ while buffer.count(b'>') >= 2:
59
+ sequences = [b">" + seq for seq in buffer.split(b">") if seq != b""]
60
+ buffer = buffer[buffer.rfind(b">"):]
61
+ ret_l = [b"".join(sequence.split(b'\n')[1:]).replace(b"\n", b"") for sequence in sequences[:-1]]
62
+ for s in ret_l:
63
+ yield s if not as_str else s.decode()
64
+ yield uniref_parser._final_sequence(buffer) if not as_str else uniref_parser._final_sequence(buffer).decode()
65
+
66
+ @staticmethod
67
+ def _final_sequence(buffer):
68
+ lines = buffer.split(b'\n')
69
+ return b"".join(lines[1:])
70
+
71
+ @staticmethod
72
+ def stream_uniref50(chunk_size=8192, use_progress_bar=False, stitch=False):
73
+ if not stitch:
74
+ yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz',
75
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
76
+ else:
77
+ yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
78
+ 'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz',
79
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar))
80
+
81
+ @staticmethod
82
+ def stream_uniref90(chunk_size=8192, use_progress_bar=False, stitch=False):
83
+ if not stitch:
84
+ yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz',
85
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
86
+ else:
87
+ yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
88
+ 'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz',
89
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar))
90
+
91
+ @staticmethod
92
+ def stream_uniref100(chunk_size=8192, use_progress_bar=False, stitch=False):
93
+ if not stitch:
94
+ yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz',
95
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
96
+ else:
97
+ yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
98
+ 'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz',
99
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar))
100
+
101
+ @staticmethod
102
+ def download_uniref50(destination='uniref50.fasta.gz', chunk_size=8192, use_progress_bar=False):
103
+ uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz', destination=destination,
104
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
105
+
106
+ @staticmethod
107
+ def download_uniref90(destination='uniref90.fasta.gz', chunk_size=8192, use_progress_bar=False):
108
+ uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz', destination=destination,
109
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
110
+ @staticmethod
111
+ def download_uniref100(destination='uniref100.fasta.gz', chunk_size=8192, use_progress_bar=False):
112
+ uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz', destination=destination,
113
+ chunk_size=chunk_size, use_progress_bar=use_progress_bar)
File without changes
@@ -0,0 +1,41 @@
1
+ from abc import ABC, abstractmethod
2
+ import gzip
3
+ import requests
4
+ import os
5
+ from typing import Tuple, BinaryIO
6
+ from io import TextIOWrapper
7
+
8
+ class _database(ABC):
9
+
10
+ def __init__(self):
11
+ pass
12
+
13
+ @abstractmethod
14
+ def fetch(self, url):
15
+ pass
16
+
17
+ @abstractmethod
18
+ def fetch_code(self, codes):
19
+ pass
20
+
21
+ @abstractmethod
22
+ def parse(self, text):
23
+ pass
24
+
25
+ @staticmethod
26
+ def open_stream(source:str) -> Tuple[BinaryIO, int] | None:
27
+ if source.startswith('http://') or source.startswith('https://'):
28
+ resp = requests.get(source, stream=True)
29
+ resp.raise_for_status()
30
+ length = resp.headers.get("Content-Length")
31
+ return resp.raw, int(length) if length else None
32
+ else:
33
+ size = os.path.getsize(source)
34
+ return open(source, 'rb'), size
35
+
36
+
37
+
38
+ # Yes, I know the first conditionals do the same thing
39
+
40
+ def __call__(self):
41
+ pass
aldepyde/env.py ADDED
@@ -0,0 +1,43 @@
1
+ import os
2
+ import sys
3
+
4
+ class ENV():
5
+ CACHE_PATH = "ALDEPYDE_CACHE_DIRECTORY"
6
+ CACHE_REPAIR = "ALDEPYDE_REPAIR_POLICY"
7
+ VERBOSE = "ALDEPYDE_VERBOSE_POLICY"
8
+ APP = "aldepyde"
9
+
10
+ @staticmethod
11
+ def set_default_env_vars():
12
+ ENV.set_env(ENV.CACHE_PATH, ENV.get_default_path())
13
+ ENV.set_env(ENV.CACHE_REPAIR, "fail")
14
+ ENV.set_env(ENV.VERBOSE, "false")
15
+
16
+ @staticmethod
17
+ def set_env(var, val, force=True):
18
+ if not hasattr(ENV, var):
19
+ raise ValueError(f"{var} is not a valid aldepyde.ENV key")
20
+ env_var = getattr(ENV, var)
21
+
22
+ if not force and env_var in os.environ:
23
+ print(f"Aldepyde variable {env_var} is already set. Use force=True to override")
24
+ return
25
+
26
+ os.environ[env_var] = str(val)
27
+ print(f"Set {env_var} = {val}")
28
+ return
29
+
30
+ # TODO Test all this somehow
31
+ @staticmethod
32
+ def get_default_path():
33
+ platform = sys.platform
34
+ xdg = os.getenv('XDG_CACHE_HOME')
35
+ if xdg:
36
+ return os.path.join(os.path.expanduser(xdg), ENV.APP)
37
+ if platform == "win32": # Windows
38
+ base = os.getenv("LOCALAPPDATA", os.path.expanduser("~\\AppData\\Local"))
39
+ return os.path.join(base, ENV.APP, "Cache")
40
+ elif sys.platform == "darwin": # MacOS
41
+ return os.path.join(os.path.expanduser("~/Library/Caches"), ENV.APP)
42
+ else: # Linux without XDG set
43
+ return os.path.join(os.path.expanduser("~/.cache"), ENV.APP)
File without changes
@@ -0,0 +1,2 @@
1
+ def test_m():
2
+ pass
aldepyde/json/CHG.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "Amino Acid Distribution": {
3
+ "A": 0.0704,
4
+ "C": 0.0231,
5
+ "D": 0.0484,
6
+ "E": 0.0692,
7
+ "F": 0.0378,
8
+ "G": 0.0675,
9
+ "H": 0.0256,
10
+ "I": 0.0450,
11
+ "K": 0.0565,
12
+ "L": 0.0984,
13
+ "M": 0.0237,
14
+ "N": 0.0368,
15
+ "P": 0.0610,
16
+ "Q": 0.0465,
17
+ "R": 0.0552,
18
+ "S": 0.0799,
19
+ "T": 0.0534,
20
+ "V": 0.0613,
21
+ "W": 0.0121,
22
+ "Y": 0.0282
23
+ },
24
+ "Reference": "Shen, Shiyi et al. ''Probabilistic analysis of the frequencies of amino acid pairs within characterized protein sequences.'' Physica A vol. 370,2 (2006): 651-662. doi:10.1016/j.physa.2006.03.004"
25
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "Amino Acid Distribution": {
3
+ "A": 0.0777,
4
+ "C": 0.0157,
5
+ "D": 0.0530,
6
+ "E": 0.0656,
7
+ "F": 0.0405,
8
+ "G": 0.0691,
9
+ "H": 0.0227,
10
+ "I": 0.0591,
11
+ "K": 0.0595,
12
+ "L": 0.096,
13
+ "M": 0.0238,
14
+ "N": 0.0427,
15
+ "P": 0.0469,
16
+ "Q": 0.0393,
17
+ "R": 0.0526,
18
+ "S": 0.0694,
19
+ "T": 0.055,
20
+ "V": 0.0667,
21
+ "W": 0.0118,
22
+ "Y": 0.0311
23
+ },
24
+ "Reference": "Shen, Shiyi et al. ''Probabilistic analysis of the frequencies of amino acid pairs within characterized protein sequences.'' Physica A vol. 370,2 (2006): 651-662. doi:10.1016/j.physa.2006.03.004"
25
+ }