PyPI - aldepyde - Versions diffs - 0.0.0a2__py3-none-any.whl → 0.0.0a33__py3-none-any.whl - Mend

aldepyde 0.0.0a2py3-none-any.whl → 0.0.0a33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aldepyde might be problematic. Click here for more details.

Files changed (44) hide show

aldepyde/Parsers/_mmcif_parser.py +0 -0
aldepyde/Parsers/_pdb_parser.py +0 -0
aldepyde/__init__.py +27 -2
aldepyde/_config.py +98 -36
aldepyde/biomolecule/Residue.py +9 -0
aldepyde/biomolecule/_Atom.py +95 -0
aldepyde/biomolecule/_AtomFactory.py +71 -0
aldepyde/biomolecule/__init__.py +18 -0
aldepyde/biomolecule/_amino_acid.py +6 -0
aldepyde/biomolecule/_dna.py +6 -0
aldepyde/biomolecule/_pdb.py +455 -0
aldepyde/biomolecule/_rna.py +6 -0
aldepyde/biomolecule/utils.py +60 -0
aldepyde/cache/__init__.py +2 -0
aldepyde/cache/_cache.py +257 -0
aldepyde/cache/cachemanager.py +212 -0
aldepyde/cache/downloader.py +13 -0
aldepyde/cache/utils.py +32 -0
aldepyde/configurable.py +7 -0
aldepyde/data/RemoteFileHandler.py +32 -0
aldepyde/data/__init__.py +1 -0
aldepyde/data.py +148 -0
aldepyde/databases/PDB.py +0 -0
aldepyde/databases/RemoteFileHandler.py +43 -0
aldepyde/databases/UniRef.py +113 -0
aldepyde/databases/__init__.py +0 -0
aldepyde/databases/_database.py +41 -0
aldepyde/env.py +43 -0
aldepyde/fetcher/__init__.py +0 -0
aldepyde/fetcher/test.py +2 -0
aldepyde/json/CHG.json +25 -0
aldepyde/json/Swiss_Prot.json +25 -0
aldepyde/json/chemistry.json +4622 -0
aldepyde/rand/RandomProtein.py +404 -0
aldepyde/rand/__init__.py +6 -0
aldepyde/stats/ProteinStats.py +89 -0
aldepyde/stats/__init__.py +0 -0
aldepyde/utils.py +275 -0
{aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/METADATA +4 -3
aldepyde-0.0.0a33.dist-info/RECORD +43 -0
{aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/WHEEL +1 -1
aldepyde-0.0.0a2.dist-info/RECORD +0 -7
{aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info/licenses}/LICENSE +0 -0
{aldepyde-0.0.0a2.dist-info → aldepyde-0.0.0a33.dist-info}/top_level.txt +0 -0

aldepyde/data.py ADDED Viewed

@@ -0,0 +1,148 @@
+from functools import reduce
+from importlib.resources import files
+import json
+import sys
+# from aldepyde.biomolecule import dna, rna, amino_acid, Residue
+class _DataSingleton(type):
+    _instance = {}
+    def __call__(cls, *args, singleton=True, **kwargs):
+        if singleton:
+            return super(_DataSingleton, cls).__call__(*args, **kwargs)
+        if cls not in cls._instance:
+            cls._instance[cls] = super(_DataSingleton, cls).__call__(*args, **kwargs)
+        return cls._instance[cls]
+class Data(metaclass=_DataSingleton):
+    # Map paths
+    _map = ("map", ("map",))
+    _dna_map = ("dna_map", ("map", "dna"))
+    _rna_map = ("rna_map", ("map", "rna"))
+    _amino_map = ("amino_map", ("map", "amino_acid"))
+    def __init__(self, json_location = None):
+        if json_location is None:
+            ###### If something breaks, you can use this as a backup way to access the data ######
+            # base = os.path.dirname(os.path.abspath(__file__))
+            # json_location = os.path.join(base, "json", "chemistry.json")
+            json_location = files("aldepyde.json").joinpath("chemistry.json")
+        self.json_location = json_location
+        self._loaded = {}
+    # Technically, this is the only function we need.
+    # You get the rest because I care <3
+    def load_values(self, *args, store_as: str = None):
+        with open(self.json_location) as js:
+            if args in self._loaded:
+                return self.__dict__[self._loaded[args]]
+            j_data = reduce(lambda d, key: d[key], args, json.load(js))
+            if store_as is not None and args not in self._loaded:
+                self._loaded[args] = store_as
+                setattr(self, store_as, j_data)
+                self.__dict__[store_as]['_key'] = args
+            return j_data
+    def unload(self, attr_name: str) -> bool:
+        if attr_name not in self.__dict__.keys():
+            return False
+        try:
+            item = self.__dict__.pop(attr_name)['_key']
+            self._loaded.pop(item)
+        except KeyError:
+            # This really shouldn't occur unless you're trying
+            raise KeyError(f'An error occured while attempting to remove {attr_name} from the data object.'
+                     f' Are you sure you are attempting to unload a loaded value?')
+        return True
+    # TODO check if something is already loaded
+    def GrabParent(self, *args):
+        pass
+    # Cute lil' recursive method that shows the structure of a loaded json. Maybe not so practical
+    # at runtime, but helpful for debugging and planning your loads
+    def reveal(self, *args, indent="  ") -> str:
+        j_data = self.load_values(*args, store_as=None)
+        return self._reveal_helper(j_data, indent, indent)
+    def _reveal_helper(self, js: dict, indent, adder, ret_str="") -> str:
+        for key in js:
+            if not isinstance(js[key], dict):
+                continue
+            ret_str += indent+key + "\n"
+            ret_str = self._reveal_helper(js[key], indent+adder, adder, ret_str)
+        return ret_str
+    def Map(self, residue: str|None, *args, store_as: str|None =_map[0], residue_type: str ='amino_acid') -> None|str:
+        if args == ():
+            args = self._map[1]
+            if store_as is None:
+                store_as = self._map[0]
+        residue_type = residue_type.lower()
+        if residue_type.lower() not in ["dna", "rna", "amino_acid", "element"]:
+            print("Allowed residue_type mappins are 'dna', 'rna', 'amino_acid', and 'element'", file=sys.stderr)
+        map = self.load_values(*args, store_as=store_as)
+        if residue is None: # Just initialize self.map
+            return None
+        return map[residue_type][residue.lower()]
+    # # These three could probably be condensed
+    # def CheckDNA(self, value: str, *args) -> bool:
+    #     if args == ():
+    #         args = self._dna_map[1]
+    #     map = self.load_values(*args, store_as=None)
+    #     if value in map['dna'].keys():
+    #         return True
+    #     return False
+    #
+    #
+    # def CheckRNA(self, value: str, *args) -> bool:
+    #     if args == ():
+    #         args = self._map[1]
+    #     map = self.load_values(*args, store_as=None)
+    #     if value in map['dna'].keys():
+    #         return True
+    #     return False
+    #
+    # def CheckAA(self, value: str, *args) -> bool:
+    #     if args == ():
+    #         args = self._map[1]
+    #     map = self.load_values(*args, store_as=None)
+    #     if value in map['dna'].keys():
+    #         return True
+    #     return False
+    #
+    # def CheckResidue(self, value: str, *args) -> bool:
+    #     if args == ():
+    #         args = self._map[1]
+    #     if self.CheckAA(value, *args):
+    #         return True
+    #     if self.CheckDNA(value, *args):
+    #         return True
+    #     if self.CheckRNA(value, *args):
+    #         return True
+    #     return False
+    #
+    # # This method determines if something is DNA, RNA, or an amino acid.
+    # # Don't be cheeky with this. If you aren't following the IUPAC naming schemes,
+    # # you're gonna have a bad time.
+    # #
+    # # RNA has exclusively 1-letter codes: A, C, T, G, etc.
+    # # DNA has exclusively 2-letter codes: DA, DC, DT, DG, etc.
+    # # Amino acids have exclusively 3-letter codes
+    # # def ExtrapolateResidueType(self, value: str) -> object:
+    # #     if self.CheckRNA(value):
+    # #         return rna
+    # #     if self.CheckDNA(value):
+    # #         return dna
+    # #     if self.CheckAA(value):
+    # #         return amino_acid
+    # #     return Residue
+data = Data()

aldepyde/databases/PDB.py ADDED Viewed

File without changes

aldepyde/databases/RemoteFileHandler.py ADDED Viewed

@@ -0,0 +1,43 @@
+import zlib
+from io import BytesIO
+import urllib.request
+import gzip
+GZIP = b"\x1f\x8b"
+ZIP = b"\x50\x4B\x03\x04"
+class RemoteFileHandler():
+    @staticmethod
+    def stream_url(url, chunk_size=8192):
+        response = urllib.request.urlopen(url)
+        head = response.read(4)
+        mode = RemoteFileHandler.determine_ftype(head)
+        if mode == 'gzip':
+            decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
+            yield decompressor.decompress(head)
+            while stream := response.read(chunk_size):
+                if not stream:
+                    break
+                yield decompressor.decompress(stream)
+            yield decompressor.flush()
+    @staticmethod
+    def determine_ftype(head:bytes) -> str:
+        if head.startswith(GZIP):
+            return "gzip"
+        elif head.startswith(ZIP):
+            return "zip"
+    @staticmethod
+    def is_gzip(stream: BytesIO) -> bool:
+        magic_start = stream.read(2)
+        stream.seek(0)
+        return magic_start == b'\x1f\x8b'
+    @staticmethod
+    def unpack_tar_gz_bio(stream: BytesIO) -> BytesIO:
+        with gzip.open(stream, "r") as gz:
+            return BytesIO(gz.read())

aldepyde/databases/UniRef.py ADDED Viewed

@@ -0,0 +1,113 @@
+import zlib
+from aldepyde.databases.RemoteFileHandler import RemoteFileHandler
+from aldepyde.databases._database import _database
+from aldepyde.utils import ProgressBar
+import os
+import gzip
+class uniref_parser(_database):
+    def __init__(self):
+        super().__init__()
+    # TODO single entry parsing
+    # TODO store metadata upon request
+    @staticmethod
+    def stream_uniref_gz(filepath, chunk_size=8192, use_progress_bar=False):
+        raw_stream, size = _database.open_stream(filepath)
+        pbar = ProgressBar(size//chunk_size) if use_progress_bar else None
+        decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
+        try:
+            while True:
+                comp_chunk = raw_stream.read(chunk_size)
+                if not comp_chunk:
+                    break
+                if pbar is not None:
+                    pbar.update()
+                decomp_chunk = decompressor.decompress(comp_chunk)
+                if decomp_chunk:
+                    yield decomp_chunk
+            final = decompressor.flush()
+            if final:
+                yield final
+        finally:
+            raw_stream.close()
+    @staticmethod
+    def download_file(url, destination, chunk_size=8192, use_progress_bar=False):
+        raw_stream, size = _database.open_stream(url)
+        pbar = ProgressBar(size // chunk_size) if use_progress_bar else None
+        with open(destination, 'wb') as fp:
+            while True:
+                chunk = raw_stream.read(chunk_size)
+                if not chunk:
+                    break
+                if pbar is not None:
+                    pbar.update()
+                fp.write(chunk)
+    @staticmethod
+    def stitch_streamed_sequences(stream, as_str=True):
+        buffer = b''
+        for chunk in stream:
+            buffer += chunk
+            while buffer.count(b'>') >= 2:
+                sequences = [b">" + seq for seq in buffer.split(b">") if seq != b""]
+                buffer = buffer[buffer.rfind(b">"):]
+                ret_l = [b"".join(sequence.split(b'\n')[1:]).replace(b"\n", b"") for sequence in sequences[:-1]]
+                for s in ret_l:
+                    yield s if not as_str else s.decode()
+        yield uniref_parser._final_sequence(buffer) if not as_str else uniref_parser._final_sequence(buffer).decode()
+    @staticmethod
+    def _final_sequence(buffer):
+        lines = buffer.split(b'\n')
+        return b"".join(lines[1:])
+    @staticmethod
+    def stream_uniref50(chunk_size=8192, use_progress_bar=False, stitch=False):
+        if not stitch:
+            yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz',
+                                                  chunk_size=chunk_size, use_progress_bar=use_progress_bar)
+        else:
+            yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
+                'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz',
+                                                  chunk_size=chunk_size, use_progress_bar=use_progress_bar))
+    @staticmethod
+    def stream_uniref90(chunk_size=8192, use_progress_bar=False, stitch=False):
+        if not stitch:
+            yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz',
+                                                  chunk_size=chunk_size, use_progress_bar=use_progress_bar)
+        else:
+            yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
+                'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz',
+                chunk_size=chunk_size, use_progress_bar=use_progress_bar))
+    @staticmethod
+    def stream_uniref100(chunk_size=8192, use_progress_bar=False, stitch=False):
+        if not stitch:
+            yield from uniref_parser.stream_uniref_gz('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz',
+                                                  chunk_size=chunk_size, use_progress_bar=use_progress_bar)
+        else:
+            yield from uniref_parser.stitch_streamed_sequences(uniref_parser.stream_uniref_gz(
+                'https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz',
+                chunk_size=chunk_size, use_progress_bar=use_progress_bar))
+    @staticmethod
+    def download_uniref50(destination='uniref50.fasta.gz', chunk_size=8192, use_progress_bar=False):
+        uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz', destination=destination,
+                                    chunk_size=chunk_size, use_progress_bar=use_progress_bar)
+    @staticmethod
+    def download_uniref90(destination='uniref90.fasta.gz', chunk_size=8192, use_progress_bar=False):
+        uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz', destination=destination,
+                                    chunk_size=chunk_size, use_progress_bar=use_progress_bar)
+    @staticmethod
+    def download_uniref100(destination='uniref100.fasta.gz', chunk_size=8192, use_progress_bar=False):
+        uniref_parser.download_file('https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz', destination=destination,
+                                    chunk_size=chunk_size, use_progress_bar=use_progress_bar)

aldepyde/databases/__init__.py ADDED Viewed

File without changes

aldepyde/databases/_database.py ADDED Viewed

@@ -0,0 +1,41 @@
+from abc import ABC, abstractmethod
+import gzip
+import requests
+import os
+from typing import Tuple, BinaryIO
+from io import TextIOWrapper
+class _database(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def fetch(self, url):
+        pass
+    @abstractmethod
+    def fetch_code(self, codes):
+        pass
+    @abstractmethod
+    def parse(self, text):
+        pass
+    @staticmethod
+    def open_stream(source:str) -> Tuple[BinaryIO, int] | None:
+        if source.startswith('http://') or source.startswith('https://'):
+            resp = requests.get(source, stream=True)
+            resp.raise_for_status()
+            length = resp.headers.get("Content-Length")
+            return resp.raw, int(length) if length else None
+        else:
+            size = os.path.getsize(source)
+            return open(source, 'rb'), size
+    # Yes, I know the first conditionals do the same thing
+    def __call__(self):
+        pass

aldepyde/env.py ADDED Viewed

@@ -0,0 +1,43 @@
+import os
+import sys
+class ENV():
+    CACHE_PATH = "ALDEPYDE_CACHE_DIRECTORY"
+    CACHE_REPAIR = "ALDEPYDE_REPAIR_POLICY"
+    VERBOSE = "ALDEPYDE_VERBOSE_POLICY"
+    APP = "aldepyde"
+    @staticmethod
+    def set_default_env_vars():
+        ENV.set_env(ENV.CACHE_PATH, ENV.get_default_path())
+        ENV.set_env(ENV.CACHE_REPAIR, "fail")
+        ENV.set_env(ENV.VERBOSE, "false")
+    @staticmethod
+    def set_env(var, val, force=True):
+        if not hasattr(ENV, var):
+            raise ValueError(f"{var} is not a valid aldepyde.ENV key")
+        env_var = getattr(ENV, var)
+        if not force and env_var in os.environ:
+            print(f"Aldepyde variable {env_var} is already set. Use force=True to override")
+            return
+        os.environ[env_var] = str(val)
+        print(f"Set {env_var} = {val}")
+        return
+    # TODO Test all this somehow
+    @staticmethod
+    def get_default_path():
+        platform = sys.platform
+        xdg = os.getenv('XDG_CACHE_HOME')
+        if xdg:
+            return os.path.join(os.path.expanduser(xdg), ENV.APP)
+        if platform == "win32": # Windows
+            base = os.getenv("LOCALAPPDATA", os.path.expanduser("~\\AppData\\Local"))
+            return os.path.join(base, ENV.APP, "Cache")
+        elif sys.platform == "darwin": # MacOS
+            return os.path.join(os.path.expanduser("~/Library/Caches"), ENV.APP)
+        else: # Linux without XDG set
+            return os.path.join(os.path.expanduser("~/.cache"), ENV.APP)

aldepyde/fetcher/__init__.py ADDED Viewed

File without changes

aldepyde/fetcher/test.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ def test_m():
2	+ pass

aldepyde/json/CHG.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "Amino Acid Distribution": {
+    "A": 0.0704,
+    "C": 0.0231,
+    "D": 0.0484,
+    "E": 0.0692,
+    "F": 0.0378,
+    "G": 0.0675,
+    "H": 0.0256,
+    "I": 0.0450,
+    "K": 0.0565,
+    "L": 0.0984,
+    "M": 0.0237,
+    "N": 0.0368,
+    "P": 0.0610,
+    "Q": 0.0465,
+    "R": 0.0552,
+    "S": 0.0799,
+    "T": 0.0534,
+    "V": 0.0613,
+    "W": 0.0121,
+    "Y": 0.0282
+  },
+  "Reference": "Shen, Shiyi et al. ''Probabilistic analysis of the frequencies of amino acid pairs within characterized protein sequences.'' Physica A vol. 370,2 (2006): 651-662. doi:10.1016/j.physa.2006.03.004"
+}

aldepyde/json/Swiss_Prot.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "Amino Acid Distribution": {
+    "A": 0.0777,
+    "C": 0.0157,
+    "D": 0.0530,
+    "E": 0.0656,
+    "F": 0.0405,
+    "G": 0.0691,
+    "H": 0.0227,
+    "I": 0.0591,
+    "K": 0.0595,
+    "L": 0.096,
+    "M": 0.0238,
+    "N": 0.0427,
+    "P": 0.0469,
+    "Q": 0.0393,
+    "R": 0.0526,
+    "S": 0.0694,
+    "T": 0.055,
+    "V": 0.0667,
+    "W": 0.0118,
+    "Y": 0.0311
+  },
+  "Reference": "Shen, Shiyi et al. ''Probabilistic analysis of the frequencies of amino acid pairs within characterized protein sequences.'' Physica A vol. 370,2 (2006): 651-662. doi:10.1016/j.physa.2006.03.004"
+}

aldepyde 0.0.0a2__py3-none-any.whl → 0.0.0a33__py3-none-any.whl

Potentially problematic release.

aldepyde 0.0.0a2py3-none-any.whl → 0.0.0a33py3-none-any.whl