nerdd-module 0.3.42__tar.gz → 0.3.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/PKG-INFO +4 -2
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/basic_type_converter.py +6 -3
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/__init__.py +1 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/inchi_reader.py +5 -14
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/reader.py +7 -4
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/sdf_reader.py +14 -15
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/smiles_reader.py +5 -14
- nerdd_module-0.3.44/nerdd_module/input/stream_reader.py +45 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/get_entry_points.py +1 -1
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module.egg-info/PKG-INFO +4 -2
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module.egg-info/SOURCES.txt +1 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module.egg-info/requires.txt +3 -1
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/pyproject.toml +4 -2
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/LICENSE +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/README.md +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/cli.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/models.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/depth_first_explorer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/tar_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/input/zip_reader.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/model.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/prediction_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/remove_small_fragments.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/files.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/models/MolWeightModel.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/setup.cfg +0 -0
- {nerdd_module-0.3.42 → nerdd_module-0.3.44}/tests/test_features.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.44
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -31,6 +31,7 @@ Requires-Dist: rich-click>=1.7.1
|
|
|
31
31
|
Requires-Dist: stringcase>=1.2.0
|
|
32
32
|
Requires-Dist: decorator>=5.1.1
|
|
33
33
|
Requires-Dist: pydantic>=2
|
|
34
|
+
Requires-Dist: chardet>=5
|
|
34
35
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
35
36
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
37
|
Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
|
|
@@ -42,6 +43,7 @@ Requires-Dist: rdkit-stubs; extra == "dev"
|
|
|
42
43
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
43
44
|
Requires-Dist: types-decorator; extra == "dev"
|
|
44
45
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
46
|
+
Requires-Dist: types-chardet; extra == "dev"
|
|
45
47
|
Requires-Dist: pre-commit>=2; extra == "dev"
|
|
46
48
|
Provides-Extra: csp
|
|
47
49
|
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
@@ -58,7 +60,7 @@ Requires-Dist: hypothesis-rdkit; extra == "test"
|
|
|
58
60
|
Provides-Extra: docs
|
|
59
61
|
Requires-Dist: mkdocs; extra == "docs"
|
|
60
62
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
61
|
-
Requires-Dist: mkdocstrings; extra == "docs"
|
|
63
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
62
64
|
Dynamic: license-file
|
|
63
65
|
|
|
64
66
|
# NERDD Module
|
|
@@ -7,9 +7,12 @@ from .converter_config import ALL, ConverterConfig
|
|
|
7
7
|
__all__ = ["BasicTypeConverter", "basic_data_types"]
|
|
8
8
|
|
|
9
9
|
basic_data_types = [
|
|
10
|
+
"integer",
|
|
10
11
|
"int",
|
|
11
12
|
"float",
|
|
12
13
|
"string",
|
|
14
|
+
"str",
|
|
15
|
+
"boolean",
|
|
13
16
|
"bool",
|
|
14
17
|
]
|
|
15
18
|
|
|
@@ -26,13 +29,13 @@ class BasicTypeConverter(Converter):
|
|
|
26
29
|
self.type = self.result_property.type
|
|
27
30
|
|
|
28
31
|
self._f: Callable[[Any], Any]
|
|
29
|
-
if self.type == "int":
|
|
32
|
+
if self.type == "integer" or self.type == "int":
|
|
30
33
|
self._f = int
|
|
31
34
|
elif self.type == "float":
|
|
32
35
|
self._f = float
|
|
33
|
-
elif self.type == "string":
|
|
36
|
+
elif self.type == "string" or self.type == "str":
|
|
34
37
|
self._f = str
|
|
35
|
-
elif self.type == "bool":
|
|
38
|
+
elif self.type == "boolean" or self.type == "bool":
|
|
36
39
|
self._f = bool
|
|
37
40
|
else:
|
|
38
41
|
self._f = lambda v: v
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromInchi
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
9
8
|
from .reader_config import ReaderConfig
|
|
9
|
+
from .stream_reader import StreamReader
|
|
10
10
|
|
|
11
11
|
__all__ = ["InchiReader"]
|
|
12
12
|
|
|
13
|
-
StreamReader = getreader("utf-8")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
class InchiReader(Reader):
|
|
14
|
+
class InchiReader(StreamReader):
|
|
17
15
|
def __init__(self) -> None:
|
|
18
16
|
super().__init__()
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
|
-
for line in
|
|
21
|
+
for line in input_stream:
|
|
31
22
|
# skip empty lines
|
|
32
23
|
if line.strip() == "":
|
|
33
24
|
continue
|
|
@@ -30,6 +30,13 @@ class Reader(ABC):
|
|
|
30
30
|
def __init__(self) -> None:
|
|
31
31
|
super().__init__()
|
|
32
32
|
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
#
|
|
38
|
+
# Register and manage subclasses
|
|
39
|
+
#
|
|
33
40
|
@classmethod
|
|
34
41
|
def __init_subclass__(
|
|
35
42
|
cls,
|
|
@@ -39,10 +46,6 @@ class Reader(ABC):
|
|
|
39
46
|
if not inspect.isabstract(cls):
|
|
40
47
|
_factories.append(cls)
|
|
41
48
|
|
|
42
|
-
@abstractmethod
|
|
43
|
-
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
44
|
-
pass
|
|
45
|
-
|
|
46
49
|
@classmethod
|
|
47
50
|
def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
|
|
48
51
|
return _factories
|
|
@@ -1,30 +1,21 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromMolBlock
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
8
|
+
from .stream_reader import StreamReader
|
|
9
9
|
|
|
10
10
|
__all__ = ["SdfReader"]
|
|
11
11
|
|
|
12
|
-
StreamReader = getreader("utf-8")
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
class SdfReader(Reader):
|
|
13
|
+
class SdfReader(StreamReader):
|
|
16
14
|
def __init__(self, max_num_lines_mol_block: int = 10000) -> None:
|
|
17
15
|
super().__init__()
|
|
18
16
|
self.max_num_lines_mol_block = max_num_lines_mol_block
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
21
|
# We do not use SDMolSupplier, because it does not accept a stream-like
|
|
@@ -34,7 +25,12 @@ class SdfReader(Reader):
|
|
|
34
25
|
# collect lines to parse as a mol block
|
|
35
26
|
mol_block = ""
|
|
36
27
|
num_lines = 0
|
|
37
|
-
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
line = input_stream.readline()
|
|
31
|
+
except UnicodeDecodeError:
|
|
32
|
+
line = "<invalid_encoding>\n"
|
|
33
|
+
|
|
38
34
|
while line:
|
|
39
35
|
mol_block += line
|
|
40
36
|
if line.strip() == "$$$$":
|
|
@@ -45,7 +41,10 @@ class SdfReader(Reader):
|
|
|
45
41
|
break
|
|
46
42
|
|
|
47
43
|
# read next line
|
|
48
|
-
|
|
44
|
+
try:
|
|
45
|
+
line = input_stream.readline()
|
|
46
|
+
except UnicodeDecodeError:
|
|
47
|
+
line = "<invalid_encoding>\n"
|
|
49
48
|
|
|
50
49
|
if mol_block.strip() != "":
|
|
51
50
|
try:
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
-
from codecs import getreader
|
|
2
1
|
from typing import Any, Iterator
|
|
3
2
|
|
|
4
3
|
from rdkit.Chem import MolFromSmiles
|
|
5
4
|
|
|
6
5
|
from ..polyfills import BlockLogs
|
|
7
6
|
from ..problem import Problem
|
|
8
|
-
from .reader import ExploreCallable, MoleculeEntry
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry
|
|
9
8
|
from .reader_config import ReaderConfig
|
|
9
|
+
from .stream_reader import StreamReader
|
|
10
10
|
|
|
11
11
|
__all__ = ["SmilesReader"]
|
|
12
12
|
|
|
13
|
-
StreamReader = getreader("utf-8")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
class SmilesReader(Reader):
|
|
14
|
+
class SmilesReader(StreamReader):
|
|
17
15
|
def __init__(self) -> None:
|
|
18
16
|
super().__init__()
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
22
|
-
raise TypeError("input must be a stream-like object")
|
|
23
|
-
|
|
24
|
-
input_stream.seek(0)
|
|
25
|
-
|
|
26
|
-
reader = StreamReader(input_stream)
|
|
27
|
-
|
|
18
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
28
19
|
# suppress RDKit warnings
|
|
29
20
|
with BlockLogs():
|
|
30
|
-
for line in
|
|
21
|
+
for line in input_stream:
|
|
31
22
|
# skip empty lines
|
|
32
23
|
if line.strip() == "":
|
|
33
24
|
continue
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from codecs import getreader
|
|
3
|
+
from typing import Any, Iterator
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
8
|
+
|
|
9
|
+
__all__ = ["StreamReader"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StreamReader(Reader):
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
super().__init__()
|
|
15
|
+
|
|
16
|
+
def read(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
17
|
+
if not hasattr(input_stream, "read") or not hasattr(input_stream, "seek"):
|
|
18
|
+
raise TypeError("input must be a stream-like object")
|
|
19
|
+
|
|
20
|
+
input_stream.seek(0)
|
|
21
|
+
|
|
22
|
+
#
|
|
23
|
+
# detect file encoding
|
|
24
|
+
#
|
|
25
|
+
|
|
26
|
+
# read a portion of the file's content
|
|
27
|
+
sample = input_stream.read(1_000_000)
|
|
28
|
+
result = chardet.detect(sample)
|
|
29
|
+
if result["confidence"] > 0.5 and result["encoding"] is not None:
|
|
30
|
+
encoding = result["encoding"]
|
|
31
|
+
else:
|
|
32
|
+
encoding = "utf-8"
|
|
33
|
+
|
|
34
|
+
input_stream.seek(0)
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# read file
|
|
38
|
+
#
|
|
39
|
+
StreamReader = getreader(encoding)
|
|
40
|
+
reader = StreamReader(input_stream)
|
|
41
|
+
return self._read_stream(reader, explore)
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
45
|
+
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.44
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -31,6 +31,7 @@ Requires-Dist: rich-click>=1.7.1
|
|
|
31
31
|
Requires-Dist: stringcase>=1.2.0
|
|
32
32
|
Requires-Dist: decorator>=5.1.1
|
|
33
33
|
Requires-Dist: pydantic>=2
|
|
34
|
+
Requires-Dist: chardet>=5
|
|
34
35
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
35
36
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
37
|
Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
|
|
@@ -42,6 +43,7 @@ Requires-Dist: rdkit-stubs; extra == "dev"
|
|
|
42
43
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
43
44
|
Requires-Dist: types-decorator; extra == "dev"
|
|
44
45
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
46
|
+
Requires-Dist: types-chardet; extra == "dev"
|
|
45
47
|
Requires-Dist: pre-commit>=2; extra == "dev"
|
|
46
48
|
Provides-Extra: csp
|
|
47
49
|
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
@@ -58,7 +60,7 @@ Requires-Dist: hypothesis-rdkit; extra == "test"
|
|
|
58
60
|
Provides-Extra: docs
|
|
59
61
|
Requires-Dist: mkdocs; extra == "docs"
|
|
60
62
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
61
|
-
Requires-Dist: mkdocstrings; extra == "docs"
|
|
63
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
62
64
|
Dynamic: license-file
|
|
63
65
|
|
|
64
66
|
# NERDD Module
|
|
@@ -41,6 +41,7 @@ nerdd_module/input/reader.py
|
|
|
41
41
|
nerdd_module/input/reader_config.py
|
|
42
42
|
nerdd_module/input/sdf_reader.py
|
|
43
43
|
nerdd_module/input/smiles_reader.py
|
|
44
|
+
nerdd_module/input/stream_reader.py
|
|
44
45
|
nerdd_module/input/string_reader.py
|
|
45
46
|
nerdd_module/input/tar_reader.py
|
|
46
47
|
nerdd_module/input/zip_reader.py
|
|
@@ -6,6 +6,7 @@ rich-click>=1.7.1
|
|
|
6
6
|
stringcase>=1.2.0
|
|
7
7
|
decorator>=5.1.1
|
|
8
8
|
pydantic>=2
|
|
9
|
+
chardet>=5
|
|
9
10
|
|
|
10
11
|
[:python_version < "3.10"]
|
|
11
12
|
importlib-metadata>=4.6
|
|
@@ -27,12 +28,13 @@ rdkit-stubs
|
|
|
27
28
|
types-PyYAML
|
|
28
29
|
types-decorator
|
|
29
30
|
types-setuptools
|
|
31
|
+
types-chardet
|
|
30
32
|
pre-commit>=2
|
|
31
33
|
|
|
32
34
|
[docs]
|
|
33
35
|
mkdocs
|
|
34
36
|
mkdocs-material
|
|
35
|
-
mkdocstrings
|
|
37
|
+
mkdocstrings[python]
|
|
36
38
|
|
|
37
39
|
[test]
|
|
38
40
|
pytest
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nerdd-module"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.44"
|
|
8
8
|
description = "Base package to create NERDD modules"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "BSD-3-Clause"
|
|
@@ -20,6 +20,7 @@ dependencies = [
|
|
|
20
20
|
"stringcase>=1.2.0",
|
|
21
21
|
"decorator>=5.1.1",
|
|
22
22
|
"pydantic>=2",
|
|
23
|
+
"chardet>=5",
|
|
23
24
|
# install importlib-resources and importlib-metadata for old Python versions
|
|
24
25
|
"importlib-resources>=5; python_version<'3.9'",
|
|
25
26
|
"importlib-metadata>=4.6; python_version<'3.10'",
|
|
@@ -57,6 +58,7 @@ dev = [
|
|
|
57
58
|
"types-PyYAML",
|
|
58
59
|
"types-decorator",
|
|
59
60
|
"types-setuptools",
|
|
61
|
+
"types-chardet",
|
|
60
62
|
"pre-commit>=2",
|
|
61
63
|
]
|
|
62
64
|
csp = [
|
|
@@ -78,7 +80,7 @@ test = [
|
|
|
78
80
|
"hypothesis",
|
|
79
81
|
"hypothesis-rdkit",
|
|
80
82
|
]
|
|
81
|
-
docs = ["mkdocs", "mkdocs-material", "mkdocstrings"]
|
|
83
|
+
docs = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"]
|
|
82
84
|
|
|
83
85
|
[project.urls]
|
|
84
86
|
Repository = "https://github.com/molinfo-vienna/nerdd-module"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/remove_small_fragments.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.42 → nerdd_module-0.3.44}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|