nerdd-module 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/PKG-INFO +1 -1
  2. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/abstract_model.py +2 -2
  3. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/csv_writer.py +1 -1
  4. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/file_reader.py +17 -7
  5. nerdd-module-0.2.5/nerdd_module/io/reader_registry.py +59 -0
  6. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/tests/checks.py +44 -4
  7. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/tests/representations.py +1 -1
  8. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module.egg-info/PKG-INFO +1 -1
  9. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module.egg-info/SOURCES.txt +0 -1
  10. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/setup.py +1 -1
  11. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/conftest.py +1 -1
  12. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/steps/__init__.py +0 -1
  13. nerdd-module-0.2.4/nerdd_module/io/reader_registry.py +0 -30
  14. nerdd-module-0.2.4/tests/steps/molecules.py +0 -54
  15. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/LICENSE +0 -0
  16. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/README.md +0 -0
  17. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/__init__.py +0 -0
  18. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/cli.py +0 -0
  19. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/__init__.py +0 -0
  20. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/auto_configuration.py +0 -0
  21. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/configuration.py +0 -0
  22. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/default_configuration.py +0 -0
  23. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/dict_configuration.py +0 -0
  24. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/merged_configuration.py +0 -0
  25. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/package_configuration.py +0 -0
  26. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/config/yaml_configuration.py +0 -0
  27. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/__init__.py +0 -0
  28. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/depth_first_explorer.py +0 -0
  29. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/explorer.py +0 -0
  30. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/gzip_reader.py +0 -0
  31. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/inchi_reader.py +0 -0
  32. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/list_reader.py +0 -0
  33. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/mol_reader.py +0 -0
  34. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/reader.py +0 -0
  35. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/sdf_reader.py +0 -0
  36. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/sdf_writer.py +0 -0
  37. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/smiles_reader.py +0 -0
  38. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/string_reader.py +0 -0
  39. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/tar_reader.py +0 -0
  40. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/writer.py +0 -0
  41. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/writer_registry.py +0 -0
  42. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/io/zip_reader.py +0 -0
  43. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/polyfills/__init__.py +0 -0
  44. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/polyfills/files.py +0 -0
  45. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/polyfills/get_entry_points.py +0 -0
  46. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/__init__.py +0 -0
  47. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  48. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
  49. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/empty_pipeline.py +0 -0
  50. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  51. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  52. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/pipeline.py +0 -0
  53. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/registry.py +0 -0
  54. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  55. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/preprocessing/step.py +0 -0
  56. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/problem.py +0 -0
  57. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/tests/__init__.py +0 -0
  58. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/tests/predictions.py +0 -0
  59. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module/version.py +0 -0
  60. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module.egg-info/dependency_links.txt +0 -0
  61. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module.egg-info/requires.txt +0 -0
  62. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/nerdd_module.egg-info/top_level.txt +0 -0
  63. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/setup.cfg +0 -0
  64. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/__init__.py +0 -0
  65. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/models/AtomicMassModel.py +0 -0
  66. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/models/MolWeightModel.py +0 -0
  67. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/models/MolWeightModelWithExplicitMolIds.py +0 -0
  68. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/models/MolWeightModelWithExplicitMols.py +0 -0
  69. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/models/__init__.py +0 -0
  70. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/steps/checks.py +0 -0
  71. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/steps/predictors.py +0 -0
  72. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/steps/preprocessing.py +0 -0
  73. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/test_atom_property_prediction.py +0 -0
  74. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/test_molecule_property_prediction.py +0 -0
  75. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/test_preprocessing.py +0 -0
  76. {nerdd-module-0.2.4 → nerdd-module-0.2.5}/tests/test_reading_formats.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Base package to create NERDD modules
5
5
  Home-page: https://github.com/molinfo-vienna/nerdd-module.git
6
6
  Maintainer: Steffen Hirte
@@ -158,7 +158,7 @@ class AbstractModel(ABC):
158
158
  # check that mol_id contains only valid ids
159
159
  assert set(df_predictions.mol_id).issubset(
160
160
  set(df_valid_subset.mol_id)
161
- ), "The mol_id column must only contain valid ids!"
161
+ ), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
162
162
  # use mol_id as index
163
163
  df_predictions.set_index("mol_id", drop=True, inplace=True)
164
164
  elif "mol" in df_predictions.columns:
@@ -166,7 +166,7 @@ class AbstractModel(ABC):
166
166
  names = df_predictions.mol.apply(lambda mol: int(mol.GetProp("_Name")))
167
167
  assert set(names).issubset(
168
168
  set(df_preprocess.mol_id)
169
- ), "The molecule names must only contain valid ids!"
169
+ ), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
170
170
 
171
171
  # use mol_id as index
172
172
  df_predictions.set_index(
@@ -26,5 +26,5 @@ class CsvWriter(Writer):
26
26
  for entry in chain([first_entry], entry_iter):
27
27
  for key, value in entry.items():
28
28
  if isinstance(value, Mol):
29
- entry[key] = MolToSmiles(value)
29
+ entry[key] = MolToSmiles(value, canonical=False)
30
30
  writer.writerow(entry)
@@ -8,24 +8,34 @@ from .reader_registry import register_reader
8
8
  __all__ = ["FileReader"]
9
9
 
10
10
 
11
- @register_reader
11
+ @register_reader("data_dir")
12
12
  class FileReader(Reader):
13
- def __init__(self, data_dir="."):
13
+ def __init__(self, data_dir=None):
14
14
  super().__init__()
15
- self.data_dir = Path(data_dir)
15
+ self.data_dir = data_dir
16
+ if self.data_dir is not None:
17
+ self.data_dir = Path(self.data_dir)
16
18
 
17
19
  def read(self, filename, explore) -> Generator[MoleculeEntry, None, None]:
18
20
  assert isinstance(filename, str), "input must be a string"
19
21
 
22
+ # convert filename to path
20
23
  try:
21
24
  path = Path(filename)
22
-
23
- if not path.is_absolute():
24
- path = self.data_dir / path
25
25
  except:
26
26
  raise ValueError("input must be a valid path")
27
+
28
+ # convert to absolute path
29
+ if not path.is_absolute():
30
+ if self.data_dir is not None:
31
+ path = self.data_dir / path
32
+ else:
33
+ path = Path(".") / path
34
+
35
+ # check that the file is within the data_dir
36
+ assert self.data_dir is None or self.data_dir in path.parents, "input must be a relative path"
27
37
 
28
- assert self.data_dir in path.parents, "input must be a relative path"
38
+ # check that the file exists
29
39
  assert path.exists(), "input must be a valid file"
30
40
 
31
41
  with open(path, "rb") as f:
@@ -0,0 +1,59 @@
1
+ from functools import lru_cache
2
+ from typing import Dict, Generator, List, Tuple, Type
3
+
4
+ from .reader import Reader
5
+
6
+ __all__ = ["ReaderRegistry", "register_reader"]
7
+
8
+
9
+ # lru_cache makes the registry a singleton
10
+ @lru_cache(maxsize=1)
11
+ class ReaderRegistry:
12
+ def __init__(self):
13
+ self._factories : List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
14
+ self._config = {}
15
+
16
+ def _create_reader(self, ReaderClass: Type[Reader], *args, **kwargs) -> Reader:
17
+ # translate all args
18
+ args = [self._config.get(arg, None) for arg in args]
19
+ # translate all kwargs
20
+ kwargs = {k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config}
21
+
22
+ return ReaderClass(*args, **kwargs)
23
+
24
+ def register(self, ReaderClass: Type[Reader], *args :str , **kwargs:str):
25
+ assert issubclass(ReaderClass, Reader)
26
+ assert all([isinstance(arg, str) for arg in args])
27
+ assert all([isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()])
28
+ self._factories.append((ReaderClass, args, kwargs))
29
+
30
+ def readers(self) -> Generator[Reader, None, None]:
31
+ for reader, args, kwargs in self._factories:
32
+ yield self._create_reader(reader, *args, **kwargs)
33
+
34
+ def __iter__(self):
35
+ return iter(self.readers())
36
+
37
+
38
+ def register_reader(*args, **kwargs):
39
+ def wrapper(cls, *args, **kwargs):
40
+ ReaderRegistry().register(cls, *args, **kwargs)
41
+ return cls
42
+
43
+ # Case 1: first argument is a class
44
+ # --> decorator is used without arguments
45
+ # @register_reader
46
+ # class F:
47
+ # ...
48
+ if len(args) > 0 and isinstance(args[0], type):
49
+ return wrapper(args[0], *args[1:], **kwargs)
50
+
51
+ # Case 2: first argument is a not a class
52
+ # --> decorator is used with arguments
53
+ # @register_reader("blah")
54
+ # class F:
55
+ # ...
56
+ def inner(cls):
57
+ assert isinstance(cls, type), "Decorator must be used with a class"
58
+ return wrapper(cls, *args, **kwargs)
59
+ return inner
@@ -1,4 +1,5 @@
1
1
  import json
2
+ from ast import literal_eval
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
@@ -39,14 +40,17 @@ def check_column_range(subset, column_name, low, high):
39
40
 
40
41
 
41
42
  @then(parsers.parse("the value in column '{column_name}' should be '{expected_value}'"))
42
- def check_column_value(predictions, column_name, expected_value):
43
- value = predictions[column_name].iloc[0]
43
+ def check_column_value(subset, column_name, expected_value):
44
+ if len(subset) == 0:
45
+ return
46
+
47
+ value = subset[column_name].iloc[0]
44
48
 
45
49
  # expected value is always provided as string
46
50
  # try to convert to float if possible
47
51
  try:
48
- expected_value = float(expected_value)
49
- except ValueError:
52
+ expected_value = literal_eval(expected_value)
53
+ except:
50
54
  pass
51
55
 
52
56
  if expected_value == "(none)":
@@ -132,3 +136,39 @@ def check_column_length(subset, column_name, length):
132
136
  assert (
133
137
  subset[column_name].map(lambda x: len(x) > length)
134
138
  ).all(), f"Column {column_name} has unexpected length"
139
+
140
+
141
+ @then(parsers.parse("when '{condition_column_name}' is '{condition_value}' "
142
+ "the value in column '{column_name}' should be '{expected_value}'"))
143
+ def check_conditional_column_value(subset, condition_column_name, condition_value, column_name, expected_value):
144
+ # expected value is always provided as string
145
+ # try to convert to float if possible
146
+ try:
147
+ expected_value = literal_eval(expected_value)
148
+ except:
149
+ pass
150
+
151
+ # same for condition value
152
+ try:
153
+ condition_value = literal_eval(condition_value)
154
+ except:
155
+ pass
156
+
157
+ # condition value can be (none) to indicate None
158
+ if condition_value == "(none)":
159
+ subset = subset[pd.isnull(subset[condition_column_name])]
160
+ else:
161
+ subset = subset[subset[condition_column_name] == condition_value]
162
+
163
+ value = subset[column_name]
164
+ assert len(value) > 0, f"No rows found for condition {condition_column_name} == {condition_value}"
165
+
166
+ # expected value can be (none) to indicate None
167
+ if expected_value == "(none)":
168
+ # if expected_value is the magic string "(none)", we expect None
169
+ assert pd.isnull(value).all(), f"Column {column_name} is assigned to {value} != None"
170
+ else:
171
+ # otherwise, we expect the value to be equal to the expected value
172
+ assert (
173
+ (value == expected_value).all()
174
+ ), f"Column {column_name} is assigned to {value} != {expected_value}"
@@ -45,7 +45,7 @@ def representations_from_molecules(molecules, input_type):
45
45
  ),
46
46
  target_fixture="molecules",
47
47
  )
48
- def molecules(num, num_none, random_seed):
48
+ def molecules(num, num_none, random_seed=0):
49
49
  result = None
50
50
 
51
51
  # pytest-bdd and hypothesis don't play well together (yet)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Base package to create NERDD modules
5
5
  Home-page: https://github.com/molinfo-vienna/nerdd-module.git
6
6
  Maintainer: Steffen Hirte
@@ -68,6 +68,5 @@ tests/models/MolWeightModelWithExplicitMols.py
68
68
  tests/models/__init__.py
69
69
  tests/steps/__init__.py
70
70
  tests/steps/checks.py
71
- tests/steps/molecules.py
72
71
  tests/steps/predictors.py
73
72
  tests/steps/preprocessing.py
@@ -16,7 +16,7 @@ rdkit_requirement = ["rdkit>=2022.3.3"] if not rdkit_installed else []
16
16
 
17
17
  setup(
18
18
  name="nerdd-module",
19
- version="0.2.4",
19
+ version="0.2.5",
20
20
  maintainer="Steffen Hirte",
21
21
  maintainer_email="steffen.hirte@univie.ac.at",
22
22
  packages=find_packages(),
@@ -4,4 +4,4 @@
4
4
  # from .steps import *
5
5
  #
6
6
  # instead, we use pytest_plugins to make this work
7
- pytest_plugins = ["tests.steps"]
7
+ pytest_plugins = ["tests.steps", "nerdd_module.tests"]
@@ -1,4 +1,3 @@
1
1
  from .checks import *
2
- from .molecules import *
3
2
  from .predictors import *
4
3
  from .preprocessing import *
@@ -1,30 +0,0 @@
1
- from functools import lru_cache
2
- from typing import Generator, Type
3
-
4
- from .reader import Reader
5
-
6
- __all__ = ["ReaderRegistry", "register_reader"]
7
-
8
-
9
- # lru_cache makes the registry a singleton
10
- @lru_cache(maxsize=1)
11
- class ReaderRegistry:
12
- def __init__(self):
13
- self._factories = []
14
-
15
- def register(self, ReaderClass: Type[Reader], *args, **kwargs):
16
- assert issubclass(ReaderClass, Reader)
17
- self._factories.append(lambda: ReaderClass(*args, **kwargs))
18
-
19
- def readers(self) -> Generator[Reader, None, None]:
20
- for reader in self._factories:
21
- yield reader()
22
-
23
- def __iter__(self):
24
- return iter(map(lambda f: f(), self._factories))
25
-
26
-
27
- def register_reader(clazz, *args, **kwargs):
28
- # TODO: implement both decorator modes
29
- ReaderRegistry().register(clazz, *args, **kwargs)
30
- return clazz
@@ -1,54 +0,0 @@
1
- import numpy as np
2
- from hypothesis import given as hgiven
3
- from hypothesis import settings
4
- from hypothesis import strategies as st
5
- from hypothesis_rdkit import mols
6
- from pytest_bdd import given, parsers
7
- from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
8
-
9
-
10
- @given(
11
- parsers.parse(
12
- "a list of {num:d} random molecules, where {num_none:d} entries are None"
13
- ),
14
- target_fixture="molecules",
15
- )
16
- def molecules(num, num_none):
17
- result = None
18
-
19
- @hgiven(st.lists(mols(), min_size=num, max_size=num, unique_by=MolToSmiles))
20
- @settings(max_examples=1, deadline=None)
21
- def generate(mols):
22
- nonlocal result
23
- # ensure that all molecules are valid
24
- result = mols
25
-
26
- generate()
27
-
28
- # replace random entries with None
29
- indices = np.random.choice(num, num_none, replace=False)
30
- for i in indices:
31
- result[i] = None
32
-
33
- return result
34
-
35
-
36
- @given(
37
- parsers.parse("the representations of the molecules as {input_type}"),
38
- target_fixture="representations",
39
- )
40
- def representations(molecules, input_type):
41
- if input_type == "smiles":
42
- converter = MolToSmiles
43
- elif input_type == "mol_block":
44
- converter = MolToMolBlock
45
- elif input_type == "inchi":
46
- converter = MolToInchi
47
- elif input_type == "rdkit_mol":
48
- converter = lambda mol: mol
49
- else:
50
- raise ValueError(f"Unknown input_type: {input_type}")
51
-
52
- result = [converter(mol) if mol is not None else None for mol in molecules]
53
-
54
- return result
File without changes
File without changes
File without changes