aldepyde 0.0.0a2__py3-none-any.whl → 0.0.0a32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aldepyde might be problematic. Click here for more details.
- aldepyde/Parsers/_mmcif_parser.py +0 -0
- aldepyde/Parsers/_pdb_parser.py +0 -0
- aldepyde/__init__.py +46 -2
- aldepyde/_config.py +98 -36
- aldepyde/biomolecule/Residue.py +9 -0
- aldepyde/biomolecule/_Atom.py +95 -0
- aldepyde/biomolecule/_AtomFactory.py +71 -0
- aldepyde/biomolecule/__init__.py +15 -0
- aldepyde/biomolecule/_amino_acid.py +6 -0
- aldepyde/biomolecule/_dna.py +6 -0
- aldepyde/biomolecule/_pdb.py +455 -0
- aldepyde/biomolecule/_rna.py +6 -0
- aldepyde/biomolecule/utils.py +60 -0
- aldepyde/cache/__init__.py +2 -0
- aldepyde/cache/_cache.py +257 -0
- aldepyde/cache/cachemanager.py +212 -0
- aldepyde/cache/downloader.py +13 -0
- aldepyde/cache/utils.py +32 -0
- aldepyde/configurable.py +7 -0
- aldepyde/data/RemoteFileHandler.py +32 -0
- aldepyde/data/__init__.py +1 -0
- aldepyde/data.py +148 -0
- aldepyde/databases/PDB.py +0 -0
- aldepyde/databases/RemoteFileHandler.py +43 -0
- aldepyde/databases/UniRef.py +75 -0
- aldepyde/databases/__init__.py +0 -0
- aldepyde/databases/_database.py +38 -0
- aldepyde/env.py +43 -0
- aldepyde/fetcher/__init__.py +0 -0
- aldepyde/fetcher/test.py +2 -0
- aldepyde/json/CHG.json +25 -0
- aldepyde/json/Swiss_Prot.json +25 -0
- aldepyde/json/chemistry.json +4622 -0
- aldepyde/rand/RandomProtein.py +402 -0
- aldepyde/rand/__init__.py +3 -0
- aldepyde/stats/ProteinStats.py +89 -0
- aldepyde/stats/__init__.py +0 -0
- aldepyde/utils.py +275 -0
- {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a32.dist-info}/METADATA +4 -3
- aldepyde-0.0.0a32.dist-info/RECORD +43 -0
- {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a32.dist-info}/WHEEL +1 -1
- aldepyde-0.0.0a2.dist-info/RECORD +0 -7
- {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a32.dist-info/licenses}/LICENSE +0 -0
- {aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a32.dist-info}/top_level.txt +0 -0
aldepyde/cache/_cache.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# # Data cache handler
|
|
2
|
+
# import shutil
|
|
3
|
+
# import os
|
|
4
|
+
# import requests
|
|
5
|
+
# import re # We're getting spicy with this
|
|
6
|
+
# import gzip
|
|
7
|
+
# from io import BytesIO, TextIOWrapper
|
|
8
|
+
# import json
|
|
9
|
+
# from dataclasses import dataclass
|
|
10
|
+
# from aldepyde.env import ENV
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
# class _cache_handler():
|
|
14
|
+
# class CachePointerException(Exception):
|
|
15
|
+
# pass
|
|
16
|
+
#
|
|
17
|
+
# @dataclass
|
|
18
|
+
# class EvictionResult():
|
|
19
|
+
# success: bool
|
|
20
|
+
# deleted: list[str]
|
|
21
|
+
# error: Exception | None = None
|
|
22
|
+
#
|
|
23
|
+
# @property
|
|
24
|
+
# def error_message(self) -> str | None:
|
|
25
|
+
# return str(self.error) if self.error is not None else None
|
|
26
|
+
#
|
|
27
|
+
# def __init__(self, enabled: bool=True, path: str=None, max_memory: str="2gib", null: bool=False):
|
|
28
|
+
# self.null = null
|
|
29
|
+
# if null:
|
|
30
|
+
# self._enabled = False
|
|
31
|
+
# return
|
|
32
|
+
# self.cache_marker = ".aldepyde_cache"
|
|
33
|
+
# self.version = "1.0.0"
|
|
34
|
+
# if path is None:
|
|
35
|
+
# self._path = ENV.get_default_path()
|
|
36
|
+
# else:
|
|
37
|
+
# self._path = path
|
|
38
|
+
# if os.path.exists(os.path.join(self._path, self.cache_marker)):
|
|
39
|
+
# marker_path = os.path.join(self._path, self.cache_marker)
|
|
40
|
+
# try:
|
|
41
|
+
# self.load(marker_path)
|
|
42
|
+
# except Exception as e:
|
|
43
|
+
# self._rectify_marker(marker_path)
|
|
44
|
+
# else:
|
|
45
|
+
# self._enabled = enabled
|
|
46
|
+
# self._max_memory = self._parse_memory(max_memory) # Memory in MiB
|
|
47
|
+
# self.designate_cache(self._path)
|
|
48
|
+
#
|
|
49
|
+
# def _rectify_marker(self, marker_path): # Since settings have not been loaded, this MUST come from an environment variable
|
|
50
|
+
# env = os.getenv(ENV.CACHE_REPAIR)
|
|
51
|
+
# print(os.environ)
|
|
52
|
+
# if env is None:
|
|
53
|
+
# raise RuntimeError("No repair policy set. Please set environment variable 'ALDEPYDE_REPAIR_POLICY'"
|
|
54
|
+
# " to one of the following: ['fail', 'replace', 'backup']")
|
|
55
|
+
# elif env.lower() == "replace": # Just overwrite the old file with defaults
|
|
56
|
+
# self._set_defaults()
|
|
57
|
+
# elif env.lower() == "backup": # Rename the old file to filename.bak
|
|
58
|
+
# marker_path = os.path.join(self._path, self.cache_marker)
|
|
59
|
+
# os.rename(marker_path, marker_path + ".bak")
|
|
60
|
+
# self._set_defaults()
|
|
61
|
+
# elif env.lower() == "fail": # Raise an exception
|
|
62
|
+
# raise RuntimeError(f"The aldepyde module cache has been corrupted.\n"
|
|
63
|
+
# f"Rerun your program after setting {ENV.CACHE_REPAIR} to either 'replace' or 'backup'"
|
|
64
|
+
# f"to repair the cache.")
|
|
65
|
+
# else: # Incorrect or unknown command
|
|
66
|
+
# raise ValueError(f"Unkown value set to environment variable {ENV.CACHE_REPAIR}. Valid inputs are ['replace', 'backup', 'fail']\n"
|
|
67
|
+
# f"\t{ENV.CACHE_REPAIR}={env}")
|
|
68
|
+
#
|
|
69
|
+
# def _set_defaults(self):
|
|
70
|
+
# self._enabled = True
|
|
71
|
+
# self._max_memory = self._parse_memory("2gib")
|
|
72
|
+
#
|
|
73
|
+
# def load(self, path):
|
|
74
|
+
# with open(path, "r") as fp:
|
|
75
|
+
# settings = json.load(fp)
|
|
76
|
+
# self._enabled = settings['enabled']
|
|
77
|
+
# self._max_memory = settings['max_memory']
|
|
78
|
+
#
|
|
79
|
+
# def _save_cache(self):
|
|
80
|
+
# marker = os.path.join(self._path, self.cache_marker)
|
|
81
|
+
# # with open(marker, "r") as fp:
|
|
82
|
+
# # settings = json.load(fp)
|
|
83
|
+
# settings = {}
|
|
84
|
+
# settings["version"] = self.version
|
|
85
|
+
# settings["enabled"] = self._enabled
|
|
86
|
+
# settings["path"] = self._path
|
|
87
|
+
# settings["max_memory"] = self._max_memory
|
|
88
|
+
# with open(marker, "w") as fp:
|
|
89
|
+
# fp.write(json.dumps(settings, indent=2))
|
|
90
|
+
#
|
|
91
|
+
# def designate_cache(self, path) -> None:
|
|
92
|
+
# os.makedirs(self._path, exist_ok=True)
|
|
93
|
+
# self._save_cache()
|
|
94
|
+
# print(os.path.join(path, self.cache_marker))
|
|
95
|
+
# with open(os.path.join(path, self.cache_marker), "w") as fp:
|
|
96
|
+
# fp.write("{}")
|
|
97
|
+
#
|
|
98
|
+
# def _verify_cache_directory(self, path: str) -> bool:
|
|
99
|
+
# return os.path.exists(os.path.join(path))
|
|
100
|
+
#
|
|
101
|
+
# def _is_safe_to_delete(self, path: str) -> bool:
|
|
102
|
+
# marker_path = os.path.join(path, self.cache_marker)
|
|
103
|
+
# return (
|
|
104
|
+
# os.path.exists(marker_path) and
|
|
105
|
+
# os.path.isdir(path) and
|
|
106
|
+
# os.path.abspath(path) != "/" and
|
|
107
|
+
# os.path.basename(path).startswith("aldepyde_cache")
|
|
108
|
+
# )
|
|
109
|
+
#
|
|
110
|
+
# def _get_default_cache_path(self) -> str:
|
|
111
|
+
# return os.path.join(
|
|
112
|
+
# os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "aldepyde")
|
|
113
|
+
#
|
|
114
|
+
#
|
|
115
|
+
#
|
|
116
|
+
# def set_enabled(self, enabled:bool) -> None:
|
|
117
|
+
# self._enabled = enabled
|
|
118
|
+
# self._save_cache()
|
|
119
|
+
#
|
|
120
|
+
# def set_path(self, path:str, cache_policy:str=None) -> None:
|
|
121
|
+
# if cache_policy.lower() == "move":
|
|
122
|
+
# shutil.move(self._path, path)
|
|
123
|
+
# elif cache_policy.lower() == "copy":
|
|
124
|
+
# shutil.copy(self._path, path)
|
|
125
|
+
# self._path = path
|
|
126
|
+
# self._save_cache()
|
|
127
|
+
#
|
|
128
|
+
#
|
|
129
|
+
# def set_max_memory(self, memory: str) -> None:
|
|
130
|
+
# self._max_memory = self._parse_memory(memory)
|
|
131
|
+
# self._save_cache()
|
|
132
|
+
#
|
|
133
|
+
# def _parse_memory(self, memory: str) -> int:
|
|
134
|
+
# ALLOWED_PREFIX = "bkmgt" # No, you don't get to use petabytes
|
|
135
|
+
# full_re = f"[0-9]+[{ALLOWED_PREFIX}]?i?b?"
|
|
136
|
+
# numeric_re = "[0-9]+"
|
|
137
|
+
#
|
|
138
|
+
# if memory.isnumeric():
|
|
139
|
+
# memory += "mib"
|
|
140
|
+
# if re.fullmatch(full_re, memory, flags=re.IGNORECASE) is None:
|
|
141
|
+
# raise ValueError(f"Requested memory must be of the following form: {full_re}")
|
|
142
|
+
#
|
|
143
|
+
# match = re.match(numeric_re, memory)
|
|
144
|
+
# numeric = int(memory[:match.span()[1]])
|
|
145
|
+
# unit = memory[match.span()[1]:]
|
|
146
|
+
# base = 1024 if "i" in unit else 1000
|
|
147
|
+
# multiple = base**(ALLOWED_PREFIX.index(unit[0].lower()))
|
|
148
|
+
# return numeric * multiple
|
|
149
|
+
#
|
|
150
|
+
# def grab_url(self, url: str, filename: str) -> BytesIO:
|
|
151
|
+
# # Return a requested file as a BytesIO stream from a URL or the cache
|
|
152
|
+
# if not self.in_cache(filename):
|
|
153
|
+
# response = requests.get(url)
|
|
154
|
+
# response.raise_for_status()
|
|
155
|
+
# stream_io = BytesIO(response.content)
|
|
156
|
+
# self.save_to_cache(stream_io, filename)
|
|
157
|
+
# with gzip.open(stream_io, "r") as gz:
|
|
158
|
+
# return BytesIO(gz.read())
|
|
159
|
+
# else:
|
|
160
|
+
# return self.extract_from_cache(filename)
|
|
161
|
+
#
|
|
162
|
+
# def clear_cache(self) -> None:
|
|
163
|
+
# if self._is_safe_to_delete(self._path):
|
|
164
|
+
# for p in os.listdir(self._path):
|
|
165
|
+
# path = os.path.join(self._path, p)
|
|
166
|
+
# if os.path.isdir(path):
|
|
167
|
+
# shutil.rmtree(path)
|
|
168
|
+
# else:
|
|
169
|
+
# os.remove(path)
|
|
170
|
+
#
|
|
171
|
+
# # def SaveCache(self, destination, compress=False):
|
|
172
|
+
# # if not compress:
|
|
173
|
+
#
|
|
174
|
+
#
|
|
175
|
+
# def delete_cache(self) -> None:
|
|
176
|
+
# if self._is_safe_to_delete(self._path):
|
|
177
|
+
# shutil.rmtree(self._path)
|
|
178
|
+
#
|
|
179
|
+
# def cache_replace(self, nbytes) -> EvictionResult:
|
|
180
|
+
# cache_list = self.list_cache()
|
|
181
|
+
# cache_list.sort(key=os.path.getatime)
|
|
182
|
+
# deleted = []
|
|
183
|
+
# while nbytes + self.cache_usage() > self._max_memory:
|
|
184
|
+
# if not cache_list:
|
|
185
|
+
# return self.EvictionResult(False, deleted)
|
|
186
|
+
# file = cache_list.pop(0)
|
|
187
|
+
# try:
|
|
188
|
+
# os.remove(file)
|
|
189
|
+
# deleted.append(file)
|
|
190
|
+
# except FileNotFoundError as error:
|
|
191
|
+
# return self.EvictionResult(False, deleted, error)
|
|
192
|
+
# return self.EvictionResult(True, deleted)
|
|
193
|
+
#
|
|
194
|
+
#
|
|
195
|
+
# def _make_cache(self) -> None:
|
|
196
|
+
# if not os.path.isdir(self._path):
|
|
197
|
+
# os.mkdir(self._path)
|
|
198
|
+
#
|
|
199
|
+
#
|
|
200
|
+
#
|
|
201
|
+
# # TODO This could maybe be expanded to look for compressed/decompressed versions of a file, but that may cause issues later
|
|
202
|
+
# def in_cache(self, filename: str) -> bool:
|
|
203
|
+
# if not self._enabled: # If the cache is disabled, we behave as if the file doesn't exist
|
|
204
|
+
# return False
|
|
205
|
+
# if os.path.exists(os.path.join(self._path, filename)):
|
|
206
|
+
# return True
|
|
207
|
+
# return False
|
|
208
|
+
#
|
|
209
|
+
# def extract_from_cache(self, filename:str) -> BytesIO | None:
|
|
210
|
+
# if self.in_cache(filename):
|
|
211
|
+
# _, file_extension = os.path.splitext(filename)
|
|
212
|
+
# if file_extension == ".gz":
|
|
213
|
+
# with gzip.open(os.path.join(self._path, filename), "rb") as gz:
|
|
214
|
+
# return BytesIO(gz.read())
|
|
215
|
+
# else:
|
|
216
|
+
# with open(filename, "rb") as fp:
|
|
217
|
+
# stream = BytesIO()
|
|
218
|
+
# stream.write(fp.read())
|
|
219
|
+
# return stream
|
|
220
|
+
# return None
|
|
221
|
+
#
|
|
222
|
+
# def list_cache(self) -> list:
|
|
223
|
+
# all_files = []
|
|
224
|
+
# for base, _, paths in os.walk(self._path):
|
|
225
|
+
# for path in paths:
|
|
226
|
+
# all_files.append(os.path.join(base, path))
|
|
227
|
+
# return all_files
|
|
228
|
+
#
|
|
229
|
+
# def cache_usage(self, percentage: bool=False) -> float | int:
|
|
230
|
+
# total_size = 0
|
|
231
|
+
# for filepath in self.list_cache(): # Yes this jumps from O(n^2) to O(n^3), but It's probably fine. Just don't use a petabyte-sized cache
|
|
232
|
+
# total_size += os.path.getsize(filepath)
|
|
233
|
+
# if percentage:
|
|
234
|
+
# return total_size / self._max_memory
|
|
235
|
+
# return total_size
|
|
236
|
+
#
|
|
237
|
+
#
|
|
238
|
+
# def save_to_cache(self, stream, filename) -> bool:
|
|
239
|
+
# filename = os.path.join(self._path, filename)
|
|
240
|
+
# self._make_cache()
|
|
241
|
+
# print(stream.getbuffer().nbytes)
|
|
242
|
+
# if not self._enabled:
|
|
243
|
+
# return False
|
|
244
|
+
#
|
|
245
|
+
# # Clear entries in cache by age
|
|
246
|
+
# cache_list = self.list_cache()
|
|
247
|
+
# cache_list.sort(key=os.path.getctime)
|
|
248
|
+
# while stream.getbuffer().nbytes + self.cache_usage() > self._max_memory:
|
|
249
|
+
# # print(f"Removing {cache_list[0]}")
|
|
250
|
+
# os.remove(cache_list[0])
|
|
251
|
+
# cache_list.pop(0)
|
|
252
|
+
# if stream.getbuffer().nbytes + self.cache_usage() < self._max_memory:
|
|
253
|
+
# with open(filename, "wb") as fp:
|
|
254
|
+
# fp.write(stream.read())
|
|
255
|
+
# stream.seek(0)
|
|
256
|
+
# return True
|
|
257
|
+
#
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from aldepyde.env import ENV
|
|
6
|
+
from .utils import _parse_memory, _convert_memory_bytes, _convert_memory_bits
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def requires_enabled(func):
|
|
10
|
+
def wrapper(cls, *args, **kwargs):
|
|
11
|
+
if hasattr(cls, "enabled") and cls.enabled and cls._initialized:
|
|
12
|
+
return func(cls, *args, **kwargs)
|
|
13
|
+
else:
|
|
14
|
+
print("Cache manager not initialized")
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
return wrapper
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CacheManager():
|
|
21
|
+
def __init__(self, path=None, initialize=False):
|
|
22
|
+
if not initialize:
|
|
23
|
+
self._initialized = False
|
|
24
|
+
return
|
|
25
|
+
self._initialized = True
|
|
26
|
+
self._cache_marker = ".aldepyde_cache"
|
|
27
|
+
self.fingerprint = "adpy."
|
|
28
|
+
if path is None:
|
|
29
|
+
self._path = ENV.get_default_path()
|
|
30
|
+
else:
|
|
31
|
+
self._path = path
|
|
32
|
+
if os.path.exists(self.marker_location()):
|
|
33
|
+
self.load_manager()
|
|
34
|
+
else:
|
|
35
|
+
self.load_defaults()
|
|
36
|
+
self.save_settings()
|
|
37
|
+
|
|
38
|
+
@requires_enabled
|
|
39
|
+
def set_cache_location(self, directory):
|
|
40
|
+
try:
|
|
41
|
+
os.makedirs(directory, exist_ok=True)
|
|
42
|
+
if not os.path.isfile(os.path.join(directory, self._cache_marker)):
|
|
43
|
+
self.save_settings(os.path.join(directory, self._cache_marker))
|
|
44
|
+
except OSError:
|
|
45
|
+
print(f"Error establishing cache directory at : {directory}", file=sys.stderr)
|
|
46
|
+
|
|
47
|
+
@requires_enabled
|
|
48
|
+
def set_max_memory(self, memory: str) -> None:
|
|
49
|
+
self.max_memory = _parse_memory(memory)
|
|
50
|
+
self.save_settings()
|
|
51
|
+
|
|
52
|
+
@requires_enabled
|
|
53
|
+
def cache_location(self):
|
|
54
|
+
return self._path
|
|
55
|
+
|
|
56
|
+
@requires_enabled
|
|
57
|
+
def _dump_settings(self):
|
|
58
|
+
return f"{self.__dict__}"
|
|
59
|
+
|
|
60
|
+
@requires_enabled
|
|
61
|
+
def marker_location(self):
|
|
62
|
+
return os.path.join(self._path, self._cache_marker)
|
|
63
|
+
|
|
64
|
+
# Saves settings to a location. If a path is specified, the results are saved to that location
|
|
65
|
+
# but the cache location is NOT changed. Use self.set_cache_location() instead for this
|
|
66
|
+
@requires_enabled
|
|
67
|
+
def save_settings(self, path=None):
|
|
68
|
+
if path is None:
|
|
69
|
+
path = self.marker_location()
|
|
70
|
+
elif os.path.isdir(path):
|
|
71
|
+
path = os.path.join(path, self._cache_marker)
|
|
72
|
+
with open(path, 'w') as fp:
|
|
73
|
+
for v in vars(self):
|
|
74
|
+
print(v)
|
|
75
|
+
fp.write(json.dumps(vars(self), indent=2))
|
|
76
|
+
|
|
77
|
+
@requires_enabled
|
|
78
|
+
def load_manager(self, path=None):
|
|
79
|
+
if path is None:
|
|
80
|
+
path = self.marker_location()
|
|
81
|
+
elif os.path.isdir(path):
|
|
82
|
+
path = os.path.join(path, self._cache_marker)
|
|
83
|
+
with open(path, 'r') as fp:
|
|
84
|
+
settings = json.load(fp)
|
|
85
|
+
for setting in settings:
|
|
86
|
+
self.__dict__[setting] = settings[setting]
|
|
87
|
+
self._path = os.path.dirname(path)
|
|
88
|
+
|
|
89
|
+
def enable(self):
|
|
90
|
+
self.enabled = True
|
|
91
|
+
|
|
92
|
+
def disable(self):
|
|
93
|
+
self.enabled = False
|
|
94
|
+
|
|
95
|
+
def _enabled_and_initialized(self):
|
|
96
|
+
return self.enabled and self._initialized
|
|
97
|
+
|
|
98
|
+
@requires_enabled
|
|
99
|
+
def load_defaults(self):
|
|
100
|
+
self._cache_marker = ".aldepyde_cache"
|
|
101
|
+
self.version = "1.0.0"
|
|
102
|
+
self.max_memory = _parse_memory('2gib')
|
|
103
|
+
self.enabled = True
|
|
104
|
+
|
|
105
|
+
@requires_enabled
|
|
106
|
+
def _inside(self, f):
|
|
107
|
+
return os.path.join(self._path, f)
|
|
108
|
+
|
|
109
|
+
# Requires a filename, not a path. The path will be self._path
|
|
110
|
+
@requires_enabled
|
|
111
|
+
def _is_safe_to_delete(self, filename):
|
|
112
|
+
return (os.path.exists(self.marker_location())
|
|
113
|
+
and (filename in os.listdir(self._path) or filename.startswith(self._path))
|
|
114
|
+
and self.exists(filename)
|
|
115
|
+
and (filename.startswith(self.fingerprint)) or os.path.basename(filename).startswith(self.fingerprint))
|
|
116
|
+
|
|
117
|
+
@requires_enabled
|
|
118
|
+
def delete_from_cache(self, filename) -> bool | None:
|
|
119
|
+
if not self._enabled_and_initialized():
|
|
120
|
+
return None
|
|
121
|
+
if self._is_safe_to_delete(filename):
|
|
122
|
+
os.remove(self._inside(filename))
|
|
123
|
+
return True
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
@requires_enabled
|
|
127
|
+
def clear_cache(self) -> None:
|
|
128
|
+
if not self._enabled_and_initialized():
|
|
129
|
+
return None
|
|
130
|
+
for file in os.listdir(self._path):
|
|
131
|
+
if self._is_safe_to_delete(file):
|
|
132
|
+
os.remove(self._inside(file))
|
|
133
|
+
print(f"Deleting {file}")
|
|
134
|
+
|
|
135
|
+
@requires_enabled
|
|
136
|
+
def get(self, file) -> str | None:
|
|
137
|
+
if not self._enabled_and_initialized():
|
|
138
|
+
return None
|
|
139
|
+
return self._inside(file) if self.exists(file) else None
|
|
140
|
+
|
|
141
|
+
# TODO Create a with cache_manager.open(...) setup here
|
|
142
|
+
# def open(self, filename):
|
|
143
|
+
# pass
|
|
144
|
+
|
|
145
|
+
@requires_enabled
|
|
146
|
+
def exists(self, filename):
|
|
147
|
+
return os.path.isfile(self._inside(filename))
|
|
148
|
+
|
|
149
|
+
@requires_enabled
|
|
150
|
+
def list_cache(self) -> list:
|
|
151
|
+
all_files = []
|
|
152
|
+
for base, _, paths in os.walk(self._path):
|
|
153
|
+
for path in paths:
|
|
154
|
+
all_files.append(os.path.join(base, path))
|
|
155
|
+
return all_files
|
|
156
|
+
|
|
157
|
+
@requires_enabled
|
|
158
|
+
def cache_usage(self, output=None):
|
|
159
|
+
total_size = 0
|
|
160
|
+
for filepath in self.list_cache(): # Yes this jumps from O(n^2) to O(n^3), but It's probably fine. Just don't use a petabyte-sized cache
|
|
161
|
+
total_size += os.path.getsize(filepath)
|
|
162
|
+
if output == 'percentage':
|
|
163
|
+
return total_size / self.max_memory
|
|
164
|
+
elif output == 'bytes':
|
|
165
|
+
return _convert_memory_bytes(total_size)
|
|
166
|
+
elif output == 'bits':
|
|
167
|
+
return _convert_memory_bits(total_size)
|
|
168
|
+
return total_size
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class EvictionResult():
|
|
172
|
+
success: bool
|
|
173
|
+
deleted: list[str]
|
|
174
|
+
error: Exception | None = None
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def error_message(self) -> str | None:
|
|
178
|
+
return str(self.error) if self.error is not None else None
|
|
179
|
+
|
|
180
|
+
@requires_enabled
|
|
181
|
+
def evict(self, nbytes) -> EvictionResult | None:
|
|
182
|
+
if not self._enabled_and_initialized():
|
|
183
|
+
return None
|
|
184
|
+
cache_list = self.list_cache()
|
|
185
|
+
# Clear entries in cache by age
|
|
186
|
+
cache_list.sort(key=os.path.getatime)
|
|
187
|
+
deleted = []
|
|
188
|
+
while nbytes + self.cache_usage() > self.max_memory:
|
|
189
|
+
if not cache_list:
|
|
190
|
+
return self.EvictionResult(False, deleted)
|
|
191
|
+
file = cache_list.pop(0)
|
|
192
|
+
try:
|
|
193
|
+
if self.delete_from_cache(file):
|
|
194
|
+
deleted.append(file)
|
|
195
|
+
except FileNotFoundError as error:
|
|
196
|
+
return self.EvictionResult(False, deleted, error)
|
|
197
|
+
return self.EvictionResult(True, deleted)
|
|
198
|
+
|
|
199
|
+
@requires_enabled
|
|
200
|
+
def clean(self) -> EvictionResult:
|
|
201
|
+
return self.evict(0)
|
|
202
|
+
|
|
203
|
+
@requires_enabled
|
|
204
|
+
def save_to_cache(self, stream, filename) -> bool | None:
|
|
205
|
+
filename = os.path.join(self._path, filename)
|
|
206
|
+
if not self._enabled_and_initialized():
|
|
207
|
+
return None
|
|
208
|
+
self.evict(stream.getbuffer().nbytes)
|
|
209
|
+
with open(filename, "wb") as fp:
|
|
210
|
+
fp.write(stream.read())
|
|
211
|
+
stream.seek(0)
|
|
212
|
+
return True
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from io import BytesIO, TextIOWrapper
|
|
2
|
+
import requests
|
|
3
|
+
import gzip
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _fetch_file_from_pdb(url: str, filename: str) -> BytesIO:
|
|
7
|
+
# Return a requested file as a BytesIO stream from a URL or the cache
|
|
8
|
+
response = requests.get(url)
|
|
9
|
+
response.raise_for_status()
|
|
10
|
+
stream_io = BytesIO(response.content)
|
|
11
|
+
return stream_io
|
|
12
|
+
|
|
13
|
+
|
aldepyde/cache/utils.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
def _verify_cache_directory(path: str) -> bool:
|
|
5
|
+
return os.path.exists(os.path.join(path))
|
|
6
|
+
|
|
7
|
+
def _parse_memory(memory: str) -> int:
|
|
8
|
+
ALLOWED_PREFIX = "bkmgt" # No, you don't get to use petabytes
|
|
9
|
+
full_re = f"[0-9]+[{ALLOWED_PREFIX}]?i?b?"
|
|
10
|
+
numeric_re = "[0-9]+"
|
|
11
|
+
|
|
12
|
+
if memory.isnumeric():
|
|
13
|
+
memory += "mib"
|
|
14
|
+
if re.fullmatch(full_re, memory, flags=re.IGNORECASE) is None:
|
|
15
|
+
raise ValueError(f"Requested memory must be of the following form: {full_re}")
|
|
16
|
+
|
|
17
|
+
match = re.match(numeric_re, memory)
|
|
18
|
+
numeric = int(memory[:match.span()[1]])
|
|
19
|
+
unit = memory[match.span()[1]:]
|
|
20
|
+
base = 1024 if "i" in unit else 1000
|
|
21
|
+
multiple = base**(ALLOWED_PREFIX.index(unit[0].lower()))
|
|
22
|
+
return numeric * multiple
|
|
23
|
+
|
|
24
|
+
def _convert_memory_bits(memory: int) -> str:
|
|
25
|
+
ALLOWED_PREFIX = "bkmgt"
|
|
26
|
+
digits = len(str(memory))
|
|
27
|
+
return f"{memory / (1000 ** (digits//3)):.3f} {ALLOWED_PREFIX[digits//3].upper()}b"
|
|
28
|
+
|
|
29
|
+
def _convert_memory_bytes(memory: int) -> str:
|
|
30
|
+
ALLOWED_PREFIX = "bkmgt"
|
|
31
|
+
digits = len(str(memory))
|
|
32
|
+
return f"{memory / (1024 ** (digits // 3)):.3f} {ALLOWED_PREFIX[digits // 3].upper()}b"
|
aldepyde/configurable.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
import requests
|
|
3
|
+
import gzip
|
|
4
|
+
|
|
5
|
+
import aldepyde
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RemoteFileHandler():
|
|
9
|
+
@staticmethod
|
|
10
|
+
def fetch_file_from_pdb(url: str, name) -> BytesIO:
|
|
11
|
+
cache = aldepyde.get_cache()
|
|
12
|
+
if cache.in_cache(name):
|
|
13
|
+
return cache.extract_from_cache(name)
|
|
14
|
+
# Return a requested file as a BytesIO stream from a URL or the cache
|
|
15
|
+
response = requests.get(url)
|
|
16
|
+
response.raise_for_status()
|
|
17
|
+
stream_io = BytesIO(response.content)
|
|
18
|
+
aldepyde.get_cache().save_to_cache(stream_io, name)
|
|
19
|
+
return stream_io
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def is_gzip(stream: BytesIO) -> bool:
|
|
23
|
+
magic_start = stream.read(2)
|
|
24
|
+
stream.seek(0)
|
|
25
|
+
return magic_start == b'\x1f\x8b'
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def unpack_tar_gz_bio(stream: BytesIO) -> BytesIO:
|
|
29
|
+
with gzip.open(stream, "r") as gz:
|
|
30
|
+
return BytesIO(gz.read())
|
|
31
|
+
|
|
32
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .RemoteFileHandler import RemoteFileHandler
|