nerdd-module 0.3.43__tar.gz → 0.3.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/PKG-INFO +4 -2
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/__init__.py +1 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/inchi_reader.py +5 -14
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/reader.py +7 -4
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/sdf_reader.py +14 -15
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/smiles_reader.py +5 -14
- nerdd_module-0.3.45/nerdd_module/input/stream_reader.py +45 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/file_writer.py +23 -2
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/get_entry_points.py +1 -1
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module.egg-info/PKG-INFO +4 -2
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module.egg-info/SOURCES.txt +1 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module.egg-info/requires.txt +3 -1
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/pyproject.toml +4 -2
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/LICENSE +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/README.md +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/cli.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/models.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/basic_type_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/depth_first_explorer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/tar_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/input/zip_reader.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/model.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/prediction_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/remove_small_fragments.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/files.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/models/MolWeightModel.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/setup.cfg +0 -0
- {nerdd_module-0.3.43 → nerdd_module-0.3.45}/tests/test_features.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.45
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -31,6 +31,7 @@ Requires-Dist: rich-click>=1.7.1
|
|
|
31
31
|
Requires-Dist: stringcase>=1.2.0
|
|
32
32
|
Requires-Dist: decorator>=5.1.1
|
|
33
33
|
Requires-Dist: pydantic>=2
|
|
34
|
+
Requires-Dist: chardet>=5
|
|
34
35
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
35
36
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
37
|
Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
|
|
@@ -42,6 +43,7 @@ Requires-Dist: rdkit-stubs; extra == "dev"
|
|
|
42
43
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
43
44
|
Requires-Dist: types-decorator; extra == "dev"
|
|
44
45
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
46
|
+
Requires-Dist: types-chardet; extra == "dev"
|
|
45
47
|
Requires-Dist: pre-commit>=2; extra == "dev"
|
|
46
48
|
Provides-Extra: csp
|
|
47
49
|
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
@@ -58,7 +60,7 @@ Requires-Dist: hypothesis-rdkit; extra == "test"
|
|
|
58
60
|
Provides-Extra: docs
|
|
59
61
|
Requires-Dist: mkdocs; extra == "docs"
|
|
60
62
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
61
|
-
Requires-Dist: mkdocstrings; extra == "docs"
|
|
63
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
62
64
|
Dynamic: license-file
|
|
63
65
|
|
|
64
66
|
# NERDD Module
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromInchi
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
9
8
|
from .reader_config import ReaderConfig
|
|
9
|
+
from .stream_reader import StreamReader
|
|
10
10
|
|
|
11
11
|
__all__ = ["InchiReader"]
|
|
12
12
|
|
|
13
|
-
StreamReader = getreader("utf-8")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
class InchiReader(Reader):
|
|
14
|
+
class InchiReader(StreamReader):
|
|
17
15
|
def __init__(self) -> None:
|
|
18
16
|
super().__init__()
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
|
-
for line in
|
|
21
|
+
for line in input_stream:
|
|
31
22
|
# skip empty lines
|
|
32
23
|
if line.strip() == "":
|
|
33
24
|
continue
|
|
@@ -30,6 +30,13 @@ class Reader(ABC):
|
|
|
30
30
|
def __init__(self) -> None:
|
|
31
31
|
super().__init__()
|
|
32
32
|
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
#
|
|
38
|
+
# Register and manage subclasses
|
|
39
|
+
#
|
|
33
40
|
@classmethod
|
|
34
41
|
def __init_subclass__(
|
|
35
42
|
cls,
|
|
@@ -39,10 +46,6 @@ class Reader(ABC):
|
|
|
39
46
|
if not inspect.isabstract(cls):
|
|
40
47
|
_factories.append(cls)
|
|
41
48
|
|
|
42
|
-
@abstractmethod
|
|
43
|
-
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
44
|
-
pass
|
|
45
|
-
|
|
46
49
|
@classmethod
|
|
47
50
|
def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
|
|
48
51
|
return _factories
|
|
@@ -1,30 +1,21 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromMolBlock
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
8
|
+
from .stream_reader import StreamReader
|
|
9
9
|
|
|
10
10
|
__all__ = ["SdfReader"]
|
|
11
11
|
|
|
12
|
-
StreamReader = getreader("utf-8")
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
class SdfReader(Reader):
|
|
13
|
+
class SdfReader(StreamReader):
|
|
16
14
|
def __init__(self, max_num_lines_mol_block: int = 10000) -> None:
|
|
17
15
|
super().__init__()
|
|
18
16
|
self.max_num_lines_mol_block = max_num_lines_mol_block
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
21
|
# We do not use SDMolSupplier, because it does not accept a stream-like
|
|
@@ -34,7 +25,12 @@ class SdfReader(Reader):
|
|
|
34
25
|
# collect lines to parse as a mol block
|
|
35
26
|
mol_block = ""
|
|
36
27
|
num_lines = 0
|
|
37
|
-
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
line = input_stream.readline()
|
|
31
|
+
except UnicodeDecodeError:
|
|
32
|
+
line = "<invalid_encoding>\n"
|
|
33
|
+
|
|
38
34
|
while line:
|
|
39
35
|
mol_block += line
|
|
40
36
|
if line.strip() == "$$$$":
|
|
@@ -45,7 +41,10 @@ class SdfReader(Reader):
|
|
|
45
41
|
break
|
|
46
42
|
|
|
47
43
|
# read next line
|
|
48
|
-
|
|
44
|
+
try:
|
|
45
|
+
line = input_stream.readline()
|
|
46
|
+
except UnicodeDecodeError:
|
|
47
|
+
line = "<invalid_encoding>\n"
|
|
49
48
|
|
|
50
49
|
if mol_block.strip() != "":
|
|
51
50
|
try:
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromSmiles
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
9
8
|
from .reader_config import ReaderConfig
|
|
9
|
+
from .stream_reader import StreamReader
|
|
10
10
|
|
|
11
11
|
__all__ = ["SmilesReader"]
|
|
12
12
|
|
|
13
|
-
StreamReader = getreader("utf-8")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
class SmilesReader(Reader):
|
|
14
|
+
class SmilesReader(StreamReader):
|
|
17
15
|
def __init__(self) -> None:
|
|
18
16
|
super().__init__()
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
|
-
for line in
|
|
21
|
+
for line in input_stream:
|
|
31
22
|
# skip empty lines
|
|
32
23
|
if line.strip() == "":
|
|
33
24
|
continue
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from codecs import getreader
|
|
3
|
+
from typing import Any, Iterator
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
8
|
+
|
|
9
|
+
__all__ = ["StreamReader"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StreamReader(Reader):
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
super().__init__()
|
|
15
|
+
|
|
16
|
+
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
17
|
+
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
18
|
+
raise TypeError("input must be a stream-like object")
|
|
19
|
+
|
|
20
|
+
input_stream.seek(0)
|
|
21
|
+
|
|
22
|
+
#
|
|
23
|
+
# detect file encoding
|
|
24
|
+
#
|
|
25
|
+
|
|
26
|
+
# read a portion of the file's content
|
|
27
|
+
sample = input_stream.read(1_000_000)
|
|
28
|
+
result = chardet.detect(sample)
|
|
29
|
+
if result["confidence"] > 0.5 and result["encoding"] is not None:
|
|
30
|
+
encoding = result["encoding"]
|
|
31
|
+
else:
|
|
32
|
+
encoding = "utf-8"
|
|
33
|
+
|
|
34
|
+
input_stream.seek(0)
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# read file
|
|
38
|
+
#
|
|
39
|
+
StreamReader = getreader(encoding)
|
|
40
|
+
reader = StreamReader(input_stream)
|
|
41
|
+
return self._read_stream(reader, explore)
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
45
|
+
pass
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import codecs
|
|
2
2
|
from abc import abstractmethod
|
|
3
|
+
from io import BufferedIOBase, TextIOBase, TextIOWrapper
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import IO, Any, BinaryIO, Iterable, TextIO, Union
|
|
5
6
|
|
|
@@ -14,6 +15,13 @@ __all__ = ["FileWriter", "FileLike"]
|
|
|
14
15
|
FileLike = Union[str, Path, TextIO, BinaryIO]
|
|
15
16
|
|
|
16
17
|
|
|
18
|
+
def is_bytes_stream(stream: Union[TextIO, BinaryIO]) -> bool:
|
|
19
|
+
if hasattr(stream, "buffer"):
|
|
20
|
+
return False
|
|
21
|
+
else:
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
|
|
17
25
|
class FileWriter(Writer):
|
|
18
26
|
"""Abstract class for writers."""
|
|
19
27
|
|
|
@@ -28,8 +36,21 @@ class FileWriter(Writer):
|
|
|
28
36
|
with open(self._output_file, mode) as f:
|
|
29
37
|
self._write(f, entries)
|
|
30
38
|
else:
|
|
31
|
-
self.
|
|
32
|
-
|
|
39
|
+
if self._writes_bytes == is_bytes_stream(self._output_file):
|
|
40
|
+
stream = self._output_file
|
|
41
|
+
elif self._writes_bytes:
|
|
42
|
+
# underlying writer expects str (but the writer wants to write bytes)
|
|
43
|
+
assert isinstance(self._output_file, TextIOBase) and hasattr(
|
|
44
|
+
self._output_file, "buffer"
|
|
45
|
+
)
|
|
46
|
+
stream = self._output_file.buffer
|
|
47
|
+
elif not self._writes_bytes:
|
|
48
|
+
# underlying writer expects bytes (but the writer wants to write str)
|
|
49
|
+
assert isinstance(self._output_file, BufferedIOBase)
|
|
50
|
+
stream = TextIOWrapper(self._output_file, encoding="utf-8")
|
|
51
|
+
|
|
52
|
+
self._write(stream, entries)
|
|
53
|
+
stream.flush()
|
|
33
54
|
|
|
34
55
|
@abstractmethod
|
|
35
56
|
def _write(self, output: IO[Any], entries: Iterable[dict]) -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.45
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -31,6 +31,7 @@ Requires-Dist: rich-click>=1.7.1
|
|
|
31
31
|
Requires-Dist: stringcase>=1.2.0
|
|
32
32
|
Requires-Dist: decorator>=5.1.1
|
|
33
33
|
Requires-Dist: pydantic>=2
|
|
34
|
+
Requires-Dist: chardet>=5
|
|
34
35
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
35
36
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
37
|
Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
|
|
@@ -42,6 +43,7 @@ Requires-Dist: rdkit-stubs; extra == "dev"
|
|
|
42
43
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
43
44
|
Requires-Dist: types-decorator; extra == "dev"
|
|
44
45
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
46
|
+
Requires-Dist: types-chardet; extra == "dev"
|
|
45
47
|
Requires-Dist: pre-commit>=2; extra == "dev"
|
|
46
48
|
Provides-Extra: csp
|
|
47
49
|
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
@@ -58,7 +60,7 @@ Requires-Dist: hypothesis-rdkit; extra == "test"
|
|
|
58
60
|
Provides-Extra: docs
|
|
59
61
|
Requires-Dist: mkdocs; extra == "docs"
|
|
60
62
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
61
|
-
Requires-Dist: mkdocstrings; extra == "docs"
|
|
63
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
62
64
|
Dynamic: license-file
|
|
63
65
|
|
|
64
66
|
# NERDD Module
|
|
@@ -41,6 +41,7 @@ nerdd_module/input/reader.py
|
|
|
41
41
|
nerdd_module/input/reader_config.py
|
|
42
42
|
nerdd_module/input/sdf_reader.py
|
|
43
43
|
nerdd_module/input/smiles_reader.py
|
|
44
|
+
nerdd_module/input/stream_reader.py
|
|
44
45
|
nerdd_module/input/string_reader.py
|
|
45
46
|
nerdd_module/input/tar_reader.py
|
|
46
47
|
nerdd_module/input/zip_reader.py
|
|
@@ -6,6 +6,7 @@ rich-click>=1.7.1
|
|
|
6
6
|
stringcase>=1.2.0
|
|
7
7
|
decorator>=5.1.1
|
|
8
8
|
pydantic>=2
|
|
9
|
+
chardet>=5
|
|
9
10
|
|
|
10
11
|
[:python_version < "3.10"]
|
|
11
12
|
importlib-metadata>=4.6
|
|
@@ -27,12 +28,13 @@ rdkit-stubs
|
|
|
27
28
|
types-PyYAML
|
|
28
29
|
types-decorator
|
|
29
30
|
types-setuptools
|
|
31
|
+
types-chardet
|
|
30
32
|
pre-commit>=2
|
|
31
33
|
|
|
32
34
|
[docs]
|
|
33
35
|
mkdocs
|
|
34
36
|
mkdocs-material
|
|
35
|
-
mkdocstrings
|
|
37
|
+
mkdocstrings[python]
|
|
36
38
|
|
|
37
39
|
[test]
|
|
38
40
|
pytest
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nerdd-module"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.45"
|
|
8
8
|
description = "Base package to create NERDD modules"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "BSD-3-Clause"
|
|
@@ -20,6 +20,7 @@ dependencies = [
|
|
|
20
20
|
"stringcase>=1.2.0",
|
|
21
21
|
"decorator>=5.1.1",
|
|
22
22
|
"pydantic>=2",
|
|
23
|
+
"chardet>=5",
|
|
23
24
|
# install importlib-resources and importlib-metadata for old Python versions
|
|
24
25
|
"importlib-resources>=5; python_version<'3.9'",
|
|
25
26
|
"importlib-metadata>=4.6; python_version<'3.10'",
|
|
@@ -57,6 +58,7 @@ dev = [
|
|
|
57
58
|
"types-PyYAML",
|
|
58
59
|
"types-decorator",
|
|
59
60
|
"types-setuptools",
|
|
61
|
+
"types-chardet",
|
|
60
62
|
"pre-commit>=2",
|
|
61
63
|
]
|
|
62
64
|
csp = [
|
|
@@ -78,7 +80,7 @@ test = [
|
|
|
78
80
|
"hypothesis",
|
|
79
81
|
"hypothesis-rdkit",
|
|
80
82
|
]
|
|
81
|
-
docs = ["mkdocs", "mkdocs-material", "mkdocstrings"]
|
|
83
|
+
docs = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"]
|
|
82
84
|
|
|
83
85
|
[project.urls]
|
|
84
86
|
Repository = "https://github.com/molinfo-vienna/nerdd-module"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/remove_small_fragments.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.43 → nerdd_module-0.3.45}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|