nerdd-module 0.3.48__tar.gz → 0.3.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/PKG-INFO +1 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/depth_first_explorer.py +6 -3
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/gzip_reader.py +11 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/list_reader.py +14 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/sdf_reader.py +1 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/stream_reader.py +16 -11
- nerdd_module-0.3.50/nerdd_module/input/tar_reader.py +50 -0
- nerdd_module-0.3.50/nerdd_module/input/zip_reader.py +52 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/sanitize.py +9 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module.egg-info/PKG-INFO +1 -1
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/pyproject.toml +1 -1
- nerdd_module-0.3.48/nerdd_module/input/tar_reader.py +0 -27
- nerdd_module-0.3.48/nerdd_module/input/zip_reader.py +0 -29
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/LICENSE +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/README.md +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/cli.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/models.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/basic_type_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/inchi_reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/smiles_reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/model.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/prediction_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/remove_small_fragments.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/files.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/models/MolWeightModel.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module.egg-info/SOURCES.txt +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module.egg-info/requires.txt +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/setup.cfg +0 -0
- {nerdd_module-0.3.48 → nerdd_module-0.3.50}/tests/test_features.py +0 -0
|
@@ -8,16 +8,17 @@ __all__ = ["DepthFirstExplorer"]
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class InvalidInputReader(Reader):
|
|
11
|
-
def __init__(self) -> None:
|
|
11
|
+
def __init__(self, message: str = "Invalid input") -> None:
|
|
12
12
|
super().__init__()
|
|
13
|
+
self.message = message
|
|
13
14
|
|
|
14
15
|
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
15
16
|
yield MoleculeEntry(
|
|
16
17
|
raw_input=input,
|
|
17
18
|
input_type="unknown",
|
|
18
|
-
source=("
|
|
19
|
+
source=("raw_input",),
|
|
19
20
|
mol=None,
|
|
20
|
-
errors=[Problem("invalid_input",
|
|
21
|
+
errors=[Problem("invalid_input", self.message)],
|
|
21
22
|
)
|
|
22
23
|
|
|
23
24
|
def __repr__(self) -> str:
|
|
@@ -120,6 +121,8 @@ class DepthFirstExplorer(Explorer):
|
|
|
120
121
|
if best_mode == "builtin":
|
|
121
122
|
parent["first_guess"].append(best_reader)
|
|
122
123
|
|
|
124
|
+
# In order to get more fine-grained error messages, we do not handle exceptions here and
|
|
125
|
+
# rely on the readers to do so.
|
|
123
126
|
yield from sample
|
|
124
127
|
yield from generator
|
|
125
128
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
from typing import Any, Iterator
|
|
3
3
|
|
|
4
|
+
from ..problem import Problem
|
|
4
5
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
5
6
|
|
|
6
7
|
__all__ = ["GzipReader"]
|
|
@@ -22,7 +23,16 @@ class GzipReader(Reader):
|
|
|
22
23
|
f.read(1)
|
|
23
24
|
f.seek(0)
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
try:
|
|
27
|
+
yield from explore(f)
|
|
28
|
+
except Exception as e:
|
|
29
|
+
yield MoleculeEntry(
|
|
30
|
+
raw_input="<gzip>",
|
|
31
|
+
input_type="gzip",
|
|
32
|
+
source=("raw_input",),
|
|
33
|
+
mol=None,
|
|
34
|
+
errors=[Problem("invalid_input", f"Invalid gzip file: {e}")],
|
|
35
|
+
)
|
|
26
36
|
|
|
27
37
|
def __repr__(self) -> str:
|
|
28
38
|
return "GzipReader()"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from io import IOBase
|
|
2
2
|
from typing import Any, Iterable, Iterator
|
|
3
3
|
|
|
4
|
+
from ..problem import Problem
|
|
4
5
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
5
6
|
|
|
6
7
|
__all__ = ["ListReader"]
|
|
@@ -16,7 +17,19 @@ class ListReader(Reader):
|
|
|
16
17
|
), f"input must be an iterable, but is {type(input_iterable)}"
|
|
17
18
|
|
|
18
19
|
for entry in input_iterable:
|
|
19
|
-
|
|
20
|
+
try:
|
|
21
|
+
yield from explore(entry)
|
|
22
|
+
except Exception as e:
|
|
23
|
+
raw_input = str(entry)
|
|
24
|
+
if len(raw_input) > 100:
|
|
25
|
+
raw_input = raw_input[:97] + "..."
|
|
26
|
+
yield MoleculeEntry(
|
|
27
|
+
raw_input=raw_input,
|
|
28
|
+
input_type="unknown",
|
|
29
|
+
source=(),
|
|
30
|
+
mol=None,
|
|
31
|
+
errors=[Problem("invalid_list_entry", f"Could not read list entry: {e}")],
|
|
32
|
+
)
|
|
20
33
|
|
|
21
34
|
def __repr__(self) -> str:
|
|
22
35
|
return "ListReader()"
|
|
@@ -11,7 +11,7 @@ __all__ = ["SdfReader"]
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class SdfReader(StreamReader):
|
|
14
|
-
def __init__(self, max_num_lines_mol_block: int =
|
|
14
|
+
def __init__(self, max_num_lines_mol_block: int = 100_000) -> None:
|
|
15
15
|
super().__init__()
|
|
16
16
|
self.max_num_lines_mol_block = max_num_lines_mol_block
|
|
17
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
from codecs import getreader
|
|
3
|
-
from typing import Any, Iterator
|
|
3
|
+
from typing import Any, Iterator, Optional
|
|
4
4
|
|
|
5
5
|
import chardet
|
|
6
6
|
|
|
@@ -10,8 +10,9 @@ __all__ = ["StreamReader"]
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class StreamReader(Reader):
|
|
13
|
-
def __init__(self) -> None:
|
|
13
|
+
def __init__(self, encoding: Optional[str] = "utf-8-sig") -> None:
|
|
14
14
|
super().__init__()
|
|
15
|
+
self.encoding = encoding
|
|
15
16
|
|
|
16
17
|
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
17
18
|
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
@@ -20,24 +21,28 @@ class StreamReader(Reader):
|
|
|
20
21
|
input_stream.seek(0)
|
|
21
22
|
|
|
22
23
|
#
|
|
23
|
-
# detect file encoding
|
|
24
|
+
# detect file encoding (if not provided)
|
|
24
25
|
#
|
|
25
26
|
|
|
26
27
|
# read a portion of the file's content
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
if self.encoding is None:
|
|
29
|
+
sample = input_stream.read(1_000_000)
|
|
30
|
+
result = chardet.detect(sample)
|
|
31
|
+
if result["confidence"] > 0.5 and result["encoding"] is not None:
|
|
32
|
+
encoding = result["encoding"]
|
|
33
|
+
else:
|
|
34
|
+
encoding = "utf-8-sig"
|
|
35
|
+
|
|
36
|
+
input_stream.seek(0)
|
|
31
37
|
else:
|
|
32
|
-
encoding =
|
|
33
|
-
|
|
34
|
-
input_stream.seek(0)
|
|
38
|
+
encoding = self.encoding
|
|
35
39
|
|
|
36
40
|
#
|
|
37
41
|
# read file
|
|
38
42
|
#
|
|
39
43
|
StreamReader = getreader(encoding)
|
|
40
|
-
|
|
44
|
+
# errors="replace": replace invalid characters instead of failing
|
|
45
|
+
reader = StreamReader(input_stream, "replace")
|
|
41
46
|
return self._read_stream(reader, explore)
|
|
42
47
|
|
|
43
48
|
@abstractmethod
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import tarfile
|
|
2
|
+
from typing import Any, Iterator, Tuple
|
|
3
|
+
|
|
4
|
+
from ..problem import Problem
|
|
5
|
+
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
6
|
+
|
|
7
|
+
__all__ = ["TarReader"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TarReader(Reader):
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__()
|
|
13
|
+
|
|
14
|
+
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
15
|
+
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
16
|
+
raise TypeError("input must be a stream-like object")
|
|
17
|
+
|
|
18
|
+
input_stream.seek(0)
|
|
19
|
+
|
|
20
|
+
with tarfile.open(fileobj=input_stream, mode="r") as tar:
|
|
21
|
+
for member in tar.getmembers():
|
|
22
|
+
if not member.isfile():
|
|
23
|
+
continue
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
for entry in explore(tar.extractfile(member)):
|
|
27
|
+
# the underlying reader only sees the file content as a stream
|
|
28
|
+
# -> it might believe that the source is "raw_input"
|
|
29
|
+
# -> we need to correct that here
|
|
30
|
+
if len(entry.source) == 1 and entry.source[0] == "raw_input":
|
|
31
|
+
source: Tuple[str, ...] = tuple()
|
|
32
|
+
else:
|
|
33
|
+
source = entry.source
|
|
34
|
+
yield entry._replace(source=(member.name, *source))
|
|
35
|
+
except Exception as e:
|
|
36
|
+
yield MoleculeEntry(
|
|
37
|
+
raw_input="<tar>",
|
|
38
|
+
input_type="unknown",
|
|
39
|
+
source=(member.name,),
|
|
40
|
+
mol=None,
|
|
41
|
+
errors=[
|
|
42
|
+
Problem(
|
|
43
|
+
"invalid_tar_member",
|
|
44
|
+
f"Could not read tar member '{member.name}': {e}",
|
|
45
|
+
)
|
|
46
|
+
],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def __repr__(self) -> str:
|
|
50
|
+
return "TarReader()"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import zipfile
|
|
2
|
+
from typing import Any, Iterator, Tuple
|
|
3
|
+
|
|
4
|
+
from ..problem import Problem
|
|
5
|
+
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
6
|
+
|
|
7
|
+
__all__ = ["ZipReader"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ZipReader(Reader):
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__()
|
|
13
|
+
|
|
14
|
+
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
15
|
+
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
16
|
+
raise TypeError("input must be a stream-like object")
|
|
17
|
+
|
|
18
|
+
input_stream.seek(0)
|
|
19
|
+
|
|
20
|
+
with zipfile.ZipFile(input_stream, "r") as zipf:
|
|
21
|
+
for member in zipf.namelist():
|
|
22
|
+
# check if the member is a file
|
|
23
|
+
if member.endswith("/"):
|
|
24
|
+
continue
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
with zipf.open(member, "r") as f:
|
|
28
|
+
for entry in explore(f):
|
|
29
|
+
# the underlying reader only sees the file content as a stream
|
|
30
|
+
# -> it might believe that the source is "raw_input"
|
|
31
|
+
# -> we need to correct that here
|
|
32
|
+
if len(entry.source) == 1 and entry.source[0] == "raw_input":
|
|
33
|
+
source: Tuple[str, ...] = tuple()
|
|
34
|
+
else:
|
|
35
|
+
source = entry.source
|
|
36
|
+
|
|
37
|
+
yield entry._replace(source=(member, *source))
|
|
38
|
+
except Exception as e:
|
|
39
|
+
yield MoleculeEntry(
|
|
40
|
+
raw_input="<zip>",
|
|
41
|
+
input_type="unknown",
|
|
42
|
+
source=(member,),
|
|
43
|
+
mol=None,
|
|
44
|
+
errors=[
|
|
45
|
+
Problem(
|
|
46
|
+
"invalid_zip_member", f"Could not read zip member '{member}': {e}"
|
|
47
|
+
)
|
|
48
|
+
],
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
return "ZipReader()"
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
|
-
from rdkit.Chem import
|
|
4
|
+
from rdkit.Chem import (
|
|
5
|
+
AtomKekulizeException,
|
|
6
|
+
AtomValenceException,
|
|
7
|
+
KekulizeException,
|
|
8
|
+
Mol,
|
|
9
|
+
SanitizeMol,
|
|
10
|
+
)
|
|
5
11
|
|
|
6
12
|
from ..problem import Problem
|
|
7
13
|
from .preprocessing_step import PreprocessingStep
|
|
@@ -26,6 +32,8 @@ class Sanitize(PreprocessingStep):
|
|
|
26
32
|
return None, [
|
|
27
33
|
Problem("atom_kekulization_error", "Failed kekulizing an atom in the molecule.")
|
|
28
34
|
]
|
|
35
|
+
except AtomValenceException as e:
|
|
36
|
+
return None, [Problem("valence_error", str(e))]
|
|
29
37
|
except Exception as e:
|
|
30
38
|
logger.exception(e)
|
|
31
39
|
return None, [Problem("sanitization_error", "Failed sanitizing the molecule.")]
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import tarfile
|
|
2
|
-
from typing import Any, Iterator
|
|
3
|
-
|
|
4
|
-
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
5
|
-
|
|
6
|
-
__all__ = ["TarReader"]
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TarReader(Reader):
|
|
10
|
-
def __init__(self) -> None:
|
|
11
|
-
super().__init__()
|
|
12
|
-
|
|
13
|
-
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
14
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
15
|
-
raise TypeError("input must be a stream-like object")
|
|
16
|
-
|
|
17
|
-
input_stream.seek(0)
|
|
18
|
-
|
|
19
|
-
with tarfile.open(fileobj=input_stream, mode="r") as tar:
|
|
20
|
-
for member in tar.getmembers():
|
|
21
|
-
if not member.isfile():
|
|
22
|
-
continue
|
|
23
|
-
for entry in explore(tar.extractfile(member)):
|
|
24
|
-
yield entry._replace(source=(member.name, *entry.source))
|
|
25
|
-
|
|
26
|
-
def __repr__(self) -> str:
|
|
27
|
-
return "TarReader()"
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import zipfile
|
|
2
|
-
from typing import Any, Iterator
|
|
3
|
-
|
|
4
|
-
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
5
|
-
|
|
6
|
-
__all__ = ["ZipReader"]
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class ZipReader(Reader):
|
|
10
|
-
def __init__(self) -> None:
|
|
11
|
-
super().__init__()
|
|
12
|
-
|
|
13
|
-
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
14
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
15
|
-
raise TypeError("input must be a stream-like object")
|
|
16
|
-
|
|
17
|
-
input_stream.seek(0)
|
|
18
|
-
|
|
19
|
-
with zipfile.ZipFile(input_stream, "r") as zipf:
|
|
20
|
-
for member in zipf.namelist():
|
|
21
|
-
# check if the member is a file
|
|
22
|
-
if member.endswith("/"):
|
|
23
|
-
continue
|
|
24
|
-
with zipf.open(member, "r") as f:
|
|
25
|
-
for entry in explore(f):
|
|
26
|
-
yield entry._replace(source=(member, *entry.source))
|
|
27
|
-
|
|
28
|
-
def __repr__(self) -> str:
|
|
29
|
-
return "ZipReader()"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/remove_small_fragments.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.48 → nerdd_module-0.3.50}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|