aldepyde 0.0.0a33__tar.gz → 0.0.0a37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aldepyde might be problematic. Click here for more details.

Files changed (48) hide show
  1. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/PKG-INFO +1 -1
  2. aldepyde-0.0.0a37/aldepyde/databases/SCOPe_Astral.py +73 -0
  3. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/databases/UniRef.py +24 -23
  4. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/databases/_database.py +33 -2
  5. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde.egg-info/PKG-INFO +1 -1
  6. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde.egg-info/SOURCES.txt +1 -0
  7. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/pyproject.toml +1 -1
  8. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/LICENSE +0 -0
  9. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/README.md +0 -0
  10. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/Parsers/_mmcif_parser.py +0 -0
  11. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/Parsers/_pdb_parser.py +0 -0
  12. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/__init__.py +0 -0
  13. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/_config.py +0 -0
  14. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/Residue.py +0 -0
  15. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_Atom.py +0 -0
  16. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_AtomFactory.py +0 -0
  17. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/__init__.py +0 -0
  18. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_amino_acid.py +0 -0
  19. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_dna.py +0 -0
  20. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_pdb.py +0 -0
  21. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/_rna.py +0 -0
  22. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/biomolecule/utils.py +0 -0
  23. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/cache/__init__.py +0 -0
  24. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/cache/_cache.py +0 -0
  25. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/cache/cachemanager.py +0 -0
  26. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/cache/downloader.py +0 -0
  27. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/cache/utils.py +0 -0
  28. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/configurable.py +0 -0
  29. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/data/RemoteFileHandler.py +0 -0
  30. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/data/__init__.py +0 -0
  31. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/data.py +0 -0
  32. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/databases/PDB.py +0 -0
  33. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/databases/RemoteFileHandler.py +0 -0
  34. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/databases/__init__.py +0 -0
  35. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/env.py +0 -0
  36. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/fetcher/__init__.py +0 -0
  37. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/fetcher/test.py +0 -0
  38. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/json/CHG.json +0 -0
  39. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/json/Swiss_Prot.json +0 -0
  40. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/json/chemistry.json +0 -0
  41. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/rand/RandomProtein.py +0 -0
  42. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/rand/__init__.py +0 -0
  43. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/stats/ProteinStats.py +0 -0
  44. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/stats/__init__.py +0 -0
  45. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde/utils.py +0 -0
  46. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde.egg-info/dependency_links.txt +0 -0
  47. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/aldepyde.egg-info/top_level.txt +0 -0
  48. {aldepyde-0.0.0a33 → aldepyde-0.0.0a37}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aldepyde
3
- Version: 0.0.0a33
3
+ Version: 0.0.0a37
4
4
  Summary: A package of chemistry and biochemical tools
5
5
  Author-email: Nate McMurray <nate.mcmurray13@gmail.com>
6
6
  License: MIT License
@@ -0,0 +1,73 @@
1
+ import io
2
+
3
+ from aldepyde.databases._database import local_database
4
+ import operator
5
+ from contextlib import nullcontext
6
+ import re
7
+
8
+ class scop_parser(local_database):
9
+ op = {
10
+ "and": lambda a,b,c: a and b and c,
11
+ "or": lambda a,b,c: a or b or c
12
+ }
13
+
14
+ def fetch(self, url):
15
+ pass
16
+
17
+ def fetch_code(self, codes):
18
+ pass
19
+
20
+ def parse(self, text):
21
+ pass
22
+
23
+ def extract_all_scop(self):
24
+ pass
25
+
26
+ def partition_scope(self):
27
+ pass
28
+
29
+ def extract_all_astral(self):
30
+ lines = self.fp.readlines()
31
+ entry = b""
32
+ for line in lines:
33
+ if line.startswith(b">") and len(entry) > 0:
34
+ yield entry
35
+ entry = b""
36
+ entry += line
37
+ yield entry
38
+
39
+ # TODO allow a list of search parameters. Big challenge to make efficient, but could be cute
40
+ def partition_astral(self, destination:None|str=None, append=False, class_name:str=b'',contains_id:str=b'' , contains_desc:str=b'', mode="and") -> dict:
41
+ mode = mode.lower()
42
+ # Everything is a byte string in order to play nicely with future parent methods
43
+ if isinstance(class_name, str):
44
+ class_name = class_name.encode('utf-8')
45
+ if isinstance(contains_desc, str):
46
+ contains_desc = contains_desc.encode('utf-8')
47
+ if isinstance(contains_id, str):
48
+ contains_id = contains_id.encode('utf-8')
49
+ if mode != "and" and mode != "or":
50
+ raise ValueError("mode must be \"and\" or \"or\".")
51
+ logic = scop_parser.op[mode]
52
+ regex = re.compile(b">[a-zA-Z0-9_.]* *[a-l](.[0-9]+)?(.[0-9]+)?(.[0-9]+)?")
53
+ if append:
54
+ file_context = open(destination, 'ab') if destination is not None else nullcontext(io.BytesIO())
55
+ else:
56
+ file_context = open(destination, 'wb') if destination is not None else nullcontext(io.BytesIO())
57
+ with file_context as fp:
58
+ ret_dict = dict()
59
+ for line in self.extract_all_astral():
60
+ identifiers = regex.search(line).group().split()
61
+ id = identifiers[0]
62
+ cls = identifiers[1]
63
+ unmatched_spl = regex.sub(b'', line).split(b'\n')
64
+ desc = unmatched_spl[0]
65
+ sequence = unmatched_spl[1:]
66
+ if logic(class_name.lower() in cls.lower(), contains_id.lower() in id.lower(), contains_desc.lower() in desc.lower()):
67
+ ret_dict[id] = { # Yes, I know '>' isn't part of the FASTA identifier. This keeps things more consistant
68
+ "class" : cls,
69
+ "description" : desc,
70
+ "sequence" : b"".join(sequence)
71
+ }
72
+ fp.write(line)
73
+ return ret_dict
@@ -1,42 +1,45 @@
1
1
  import zlib
2
-
3
- from aldepyde.databases.RemoteFileHandler import RemoteFileHandler
4
- from aldepyde.databases._database import _database
2
+ from aldepyde.databases._database import streamable_database
5
3
  from aldepyde.utils import ProgressBar
6
- import os
7
- import gzip
8
4
 
9
- class uniref_parser(_database):
5
+ class uniref_parser(streamable_database):
10
6
  def __init__(self):
11
7
  super().__init__()
12
8
 
13
9
  # TODO single entry parsing
14
10
  # TODO store metadata upon request
11
+ # TODO implement abstract methods
15
12
 
16
13
  @staticmethod
17
- def stream_uniref_gz(filepath, chunk_size=8192, use_progress_bar=False):
18
- raw_stream, size = _database.open_stream(filepath)
14
+ def stream_uniref_gz(filepath, chunk_size=8192, use_progress_bar=False, stitch=False):
15
+ raw_stream, size = streamable_database.open_stream(filepath)
19
16
  pbar = ProgressBar(size//chunk_size) if use_progress_bar else None
20
17
  decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
21
18
  try:
22
- while True:
23
- comp_chunk = raw_stream.read(chunk_size)
24
- if not comp_chunk:
25
- break
26
- if pbar is not None:
27
- pbar.update()
28
- decomp_chunk = decompressor.decompress(comp_chunk)
29
- if decomp_chunk:
30
- yield decomp_chunk
31
- final = decompressor.flush()
32
- if final:
33
- yield final
19
+ if not stitch:
20
+ while True:
21
+ comp_chunk = raw_stream.read(chunk_size)
22
+ if not comp_chunk:
23
+ break
24
+ if pbar is not None:
25
+ pbar.update()
26
+ decomp_chunk = decompressor.decompress(comp_chunk)
27
+ if decomp_chunk:
28
+ yield decomp_chunk
29
+ final = decompressor.flush()
30
+ if final:
31
+ yield final
32
+ else:
33
+ # Really hacky solution for now
34
+ # TODO Clean this up
35
+ yield from uniref_parser.stitch_streamed_sequences(
36
+ uniref_parser.stream_uniref_gz(filepath=filepath, chunk_size=chunk_size, use_progress_bar=use_progress_bar, stitch=False))
34
37
  finally:
35
38
  raw_stream.close()
36
39
 
37
40
  @staticmethod
38
41
  def download_file(url, destination, chunk_size=8192, use_progress_bar=False):
39
- raw_stream, size = _database.open_stream(url)
42
+ raw_stream, size = streamable_database.open_stream(url)
40
43
  pbar = ProgressBar(size // chunk_size) if use_progress_bar else None
41
44
  with open(destination, 'wb') as fp:
42
45
  while True:
@@ -48,8 +51,6 @@ class uniref_parser(_database):
48
51
  fp.write(chunk)
49
52
 
50
53
 
51
-
52
-
53
54
  @staticmethod
54
55
  def stitch_streamed_sequences(stream, as_str=True):
55
56
  buffer = b''
@@ -5,7 +5,7 @@ import os
5
5
  from typing import Tuple, BinaryIO
6
6
  from io import TextIOWrapper
7
7
 
8
- class _database(ABC):
8
+ class streamable_database(ABC):
9
9
 
10
10
  def __init__(self):
11
11
  pass
@@ -38,4 +38,35 @@ class _database(ABC):
38
38
  # Yes, I know the first conditionals do the same thing
39
39
 
40
40
  def __call__(self):
41
- pass
41
+ pass
42
+
43
+ class local_database(ABC):
44
+
45
+ def __init__(self, filepath=None, as_fp=False):
46
+ self.fp = None
47
+ self.as_fp = as_fp
48
+ self.size = None
49
+ self.load_path(filepath)
50
+
51
+ def load_path(self, filepath):
52
+ self.filepath = filepath
53
+
54
+ def get_pointer(self):
55
+ return self.fp
56
+
57
+ def __enter__(self):
58
+ self.fp, self.size = local_database.open_stream(self.filepath)
59
+ if self.as_fp:
60
+ return self.fp
61
+ else:
62
+ return self
63
+
64
+ def __exit__(self, exc_type, exc_val, exc_tb):
65
+ if self.fp is not None:
66
+ self.fp.close()
67
+ self.fp = None
68
+
69
+ @staticmethod
70
+ def open_stream(source:str) -> Tuple[BinaryIO, int] | None:
71
+ size = os.path.getsize(source)
72
+ return open(source, 'rb'), size
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aldepyde
3
- Version: 0.0.0a33
3
+ Version: 0.0.0a37
4
4
  Summary: A package of chemistry and biochemical tools
5
5
  Author-email: Nate McMurray <nate.mcmurray13@gmail.com>
6
6
  License: MIT License
@@ -31,6 +31,7 @@ aldepyde/data/RemoteFileHandler.py
31
31
  aldepyde/data/__init__.py
32
32
  aldepyde/databases/PDB.py
33
33
  aldepyde/databases/RemoteFileHandler.py
34
+ aldepyde/databases/SCOPe_Astral.py
34
35
  aldepyde/databases/UniRef.py
35
36
  aldepyde/databases/__init__.py
36
37
  aldepyde/databases/_database.py
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  "aldepyde" = ["json/*.json"]
6
6
  [project]
7
7
  name = "aldepyde"
8
- version = "0.0.0a33"
8
+ version = "0.0.0a37"
9
9
  authors = [
10
10
  { name="Nate McMurray", email="nate.mcmurray13@gmail.com" },
11
11
  ]
File without changes
File without changes
File without changes
File without changes