nerdd-module 0.2.1__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/PKG-INFO +1 -1
  2. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/file_reader.py +6 -3
  3. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/polyfills/get_entry_points.py +4 -1
  4. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/chembl_structure_pipeline.py +8 -0
  5. nerdd-module-0.2.4/nerdd_module/tests/__init__.py +3 -0
  6. nerdd-module-0.2.4/nerdd_module/tests/checks.py +134 -0
  7. nerdd-module-0.2.4/nerdd_module/tests/predictions.py +30 -0
  8. nerdd-module-0.2.4/nerdd_module/tests/representations.py +72 -0
  9. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module.egg-info/PKG-INFO +1 -1
  10. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module.egg-info/SOURCES.txt +4 -0
  11. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/setup.py +1 -1
  12. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/LICENSE +0 -0
  13. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/README.md +0 -0
  14. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/__init__.py +0 -0
  15. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/abstract_model.py +0 -0
  16. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/cli.py +0 -0
  17. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/__init__.py +0 -0
  18. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/auto_configuration.py +0 -0
  19. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/configuration.py +0 -0
  20. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/default_configuration.py +0 -0
  21. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/dict_configuration.py +0 -0
  22. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/merged_configuration.py +0 -0
  23. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/package_configuration.py +0 -0
  24. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/config/yaml_configuration.py +0 -0
  25. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/__init__.py +0 -0
  26. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/csv_writer.py +0 -0
  27. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/depth_first_explorer.py +0 -0
  28. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/explorer.py +0 -0
  29. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/gzip_reader.py +0 -0
  30. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/inchi_reader.py +0 -0
  31. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/list_reader.py +0 -0
  32. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/mol_reader.py +0 -0
  33. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/reader.py +0 -0
  34. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/reader_registry.py +0 -0
  35. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/sdf_reader.py +0 -0
  36. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/sdf_writer.py +0 -0
  37. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/smiles_reader.py +0 -0
  38. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/string_reader.py +0 -0
  39. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/tar_reader.py +0 -0
  40. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/writer.py +0 -0
  41. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/writer_registry.py +0 -0
  42. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/io/zip_reader.py +0 -0
  43. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/polyfills/__init__.py +0 -0
  44. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/polyfills/files.py +0 -0
  45. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/__init__.py +0 -0
  46. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  47. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/empty_pipeline.py +0 -0
  48. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  49. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  50. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/pipeline.py +0 -0
  51. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/registry.py +0 -0
  52. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  53. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/preprocessing/step.py +0 -0
  54. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/problem.py +0 -0
  55. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module/version.py +0 -0
  56. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module.egg-info/dependency_links.txt +0 -0
  57. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module.egg-info/requires.txt +0 -0
  58. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/nerdd_module.egg-info/top_level.txt +0 -0
  59. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/setup.cfg +0 -0
  60. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/__init__.py +0 -0
  61. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/conftest.py +0 -0
  62. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/models/AtomicMassModel.py +0 -0
  63. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/models/MolWeightModel.py +0 -0
  64. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/models/MolWeightModelWithExplicitMolIds.py +0 -0
  65. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/models/MolWeightModelWithExplicitMols.py +0 -0
  66. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/models/__init__.py +0 -0
  67. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/steps/__init__.py +0 -0
  68. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/steps/checks.py +0 -0
  69. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/steps/molecules.py +0 -0
  70. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/steps/predictors.py +0 -0
  71. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/steps/preprocessing.py +0 -0
  72. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/test_atom_property_prediction.py +0 -0
  73. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/test_molecule_property_prediction.py +0 -0
  74. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/test_preprocessing.py +0 -0
  75. {nerdd-module-0.2.1 → nerdd-module-0.2.4}/tests/test_reading_formats.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.1
3
+ Version: 0.2.4
4
4
  Summary: Base package to create NERDD modules
5
5
  Home-page: https://github.com/molinfo-vienna/nerdd-module.git
6
6
  Maintainer: Steffen Hirte
@@ -18,14 +18,17 @@ class FileReader(Reader):
18
18
  assert isinstance(filename, str), "input must be a string"
19
19
 
20
20
  try:
21
- path = Path(filename).absolute()
21
+ path = Path(filename)
22
+
23
+ if not path.is_absolute():
24
+ path = self.data_dir / path
22
25
  except:
23
26
  raise ValueError("input must be a valid path")
24
27
 
25
- assert path.is_relative_to(self.data_dir), "input must be a relative path"
28
+ assert self.data_dir in path.parents, "input must be a relative path"
26
29
  assert path.exists(), "input must be a valid file"
27
30
 
28
- with open(filename, "rb") as f:
31
+ with open(path, "rb") as f:
29
32
  for entry in explore(f):
30
33
  if len(entry.source) == 1 and entry.source[0] == "raw_input":
31
34
  source = tuple()
@@ -6,7 +6,10 @@ try:
6
6
  from importlib.metadata import entry_points
7
7
 
8
8
  def get_entry_points(group):
9
- return entry_points(group=group)
9
+ try:
10
+ return entry_points(group=group)
11
+ except TypeError:
12
+ return entry_points().get(group, [])
10
13
 
11
14
  except ImportError:
12
15
  import pkg_resources
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from typing import List, Tuple
2
3
 
3
4
  from rdkit.Chem import Mol
@@ -11,6 +12,13 @@ from .pipeline import Pipeline
11
12
  from .remove_stereochemistry import RemoveStereochemistry
12
13
  from .step import Step
13
14
 
15
+ # before importing chembl_structure_pipeline, we need to suppress RDKit warnings
16
+ warnings.filterwarnings(
17
+ "ignore",
18
+ category=DeprecationWarning,
19
+ module="rdkit.Chem.MolStandardize",
20
+ )
21
+
14
22
  try:
15
23
  # importing chembl_structure_pipeline already logs messages
16
24
  # --> suppress them temporarily
@@ -0,0 +1,3 @@
1
+ from .checks import *
2
+ from .predictions import *
3
+ from .representations import *
@@ -0,0 +1,134 @@
1
+ import json
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from pytest_bdd import parsers, then
6
+
7
+
8
+ @then(parsers.parse("The result should contain the columns:\n{column_names}"))
9
+ def check_result_columns(predictions, column_names):
10
+ column_names = column_names.strip()
11
+ for c in column_names.split("\n"):
12
+ assert (
13
+ c in predictions.columns
14
+ ), f"Column {c} not in predictions {predictions.columns.tolist()}"
15
+
16
+
17
+ @then(
18
+ parsers.parse(
19
+ "the value in column '{column_name}' should be between {low} and {high}"
20
+ )
21
+ )
22
+ def check_column_range(subset, column_name, low, high):
23
+ if low == "infinity":
24
+ low = np.inf
25
+ elif low == "-infinity":
26
+ low = -np.inf
27
+ else:
28
+ low = float(low)
29
+
30
+ if high == "infinity":
31
+ high = np.inf
32
+ elif high == "-infinity":
33
+ high = -np.inf
34
+ else:
35
+ high = float(high)
36
+
37
+ assert (low <= subset[column_name]).all()
38
+ assert (subset[column_name] <= high).all()
39
+
40
+
41
+ @then(parsers.parse("the value in column '{column_name}' should be '{expected_value}'"))
42
+ def check_column_value(predictions, column_name, expected_value):
43
+ value = predictions[column_name].iloc[0]
44
+
45
+ # expected value is always provided as string
46
+ # try to convert to float if possible
47
+ try:
48
+ expected_value = float(expected_value)
49
+ except ValueError:
50
+ pass
51
+
52
+ if expected_value == "(none)":
53
+ # if expected_value is the magic string "(none)", we expect None
54
+ assert pd.isnull(value), f"Column {column_name} is assigned to {value} != None"
55
+ else:
56
+ # otherwise, we expect the value to be equal to the expected value
57
+ assert (
58
+ value == expected_value
59
+ ), f"Column {column_name} is assigned to {value} != {expected_value}"
60
+
61
+
62
+ @then(
63
+ parsers.parse(
64
+ "the value in column '{column_name}' should be a subset of {superset}"
65
+ )
66
+ )
67
+ def check_column_subset(subset, column_name, superset):
68
+ superset = set(json.loads(superset))
69
+
70
+ assert all(
71
+ set(value).issubset(superset) for value in subset[column_name]
72
+ ), f"Column {column_name} contains value not in {superset}"
73
+
74
+
75
+ @then(parsers.parse("the value in column '{column_name}' should be one of {superset}"))
76
+ def check_column_membership(subset, column_name, superset):
77
+ superset = json.loads(superset)
78
+
79
+ assert isinstance(
80
+ superset, list
81
+ ), f"Expected a list for superset, got {type(superset)}"
82
+
83
+ assert (
84
+ subset[column_name].isin(superset).all()
85
+ ), f"Column {column_name} contains value not in {superset}"
86
+
87
+
88
+ @then(parsers.parse("the value in column '{column_name}' should be a png image"))
89
+ def check_png_image(subset, column_name):
90
+ if len(subset) == 0:
91
+ return
92
+
93
+ assert (
94
+ subset[column_name].str.startswith('<img src="data:image/png;base64,')
95
+ ).all(), f"Column {column_name} does not contain a PNG image"
96
+
97
+
98
+ @then(
99
+ parsers.parse("the value in column '{column_name}' should contain only '{value}'")
100
+ )
101
+ def check_column_membership_single(predictions, column_name, value):
102
+ if value == "(none)":
103
+ assert all(
104
+ pd.isnull(predictions[column_name])
105
+ ), f"Column {column_name} must be none"
106
+ else:
107
+ assert all(
108
+ value in values for values in predictions[column_name]
109
+ ), f"Column {column_name} contains value {value}"
110
+
111
+
112
+ @then(
113
+ parsers.parse(
114
+ "the value in column '{column_name}' should have type '{expected_type}'"
115
+ )
116
+ )
117
+ def check_column_type(subset, column_name, expected_type):
118
+ expected_type = eval(expected_type)
119
+
120
+ assert (
121
+ subset[column_name].map(lambda x: isinstance(x, expected_type)).all()
122
+ ), f"Column {column_name} has unexpected type"
123
+
124
+
125
+ @then(
126
+ parsers.parse(
127
+ "the value in column '{column_name}' should have length greater than {length}"
128
+ )
129
+ )
130
+ def check_column_length(subset, column_name, length):
131
+ length = int(length)
132
+ assert (
133
+ subset[column_name].map(lambda x: len(x) > length)
134
+ ).all(), f"Column {column_name} has unexpected length"
@@ -0,0 +1,30 @@
1
+ import pandas as pd
2
+ from pytest_bdd import parsers, then, when
3
+
4
+
5
+ @when(
6
+ parsers.parse("the model generates predictions for the molecule representations"),
7
+ target_fixture="predictions",
8
+ )
9
+ def predictions(
10
+ representations,
11
+ model,
12
+ input_type,
13
+ ):
14
+ return model.predict(
15
+ representations,
16
+ )
17
+
18
+
19
+ @when(
20
+ "The subset of the result where the input was not None is considered",
21
+ target_fixture="subset",
22
+ )
23
+ def subset_without_none(predictions):
24
+ # remove None entries
25
+ return predictions[predictions.preprocessed_mol.notnull()]
26
+
27
+
28
+ @then("the result should be a pandas DataFrame")
29
+ def check_result(predictions):
30
+ assert isinstance(predictions, pd.DataFrame)
@@ -0,0 +1,72 @@
1
+ import numpy as np
2
+ from hypothesis import given as hgiven
3
+ from hypothesis import seed, settings
4
+ from hypothesis import strategies as st
5
+ from hypothesis_rdkit import mols, smiles
6
+ from pytest_bdd import given, parsers
7
+ from rdkit.Chem import MolFromSmiles, MolToMolBlock, MolToSmiles
8
+
9
+
10
+ @given(parsers.parse("a random seed set to {seed:d}"), target_fixture="random_seed")
11
+ def random_seed(seed):
12
+ return seed
13
+
14
+
15
+ @given(
16
+ parsers.parse("an input molecule specified by '{input}'"),
17
+ target_fixture="representations",
18
+ )
19
+ def representations_from_input(input):
20
+ return [input]
21
+
22
+
23
+ @given(
24
+ parsers.parse("the representations of the molecules"),
25
+ target_fixture="representations",
26
+ )
27
+ def representations_from_molecules(molecules, input_type):
28
+ if input_type == "smiles":
29
+ converter = MolToSmiles
30
+ elif input_type == "mol_block":
31
+ converter = MolToMolBlock
32
+ elif input_type == "rdkit_mol":
33
+ converter = lambda mol: mol
34
+ else:
35
+ raise ValueError(f"Unknown input_type: {input_type}")
36
+
37
+ result = [converter(mol) if mol is not None else None for mol in molecules]
38
+
39
+ return result
40
+
41
+
42
+ @given(
43
+ parsers.parse(
44
+ "a list of {num:d} random molecules, where {num_none:d} entries are None"
45
+ ),
46
+ target_fixture="molecules",
47
+ )
48
+ def molecules(num, num_none, random_seed):
49
+ result = None
50
+
51
+ # pytest-bdd and hypothesis don't play well together (yet)
52
+ # --> use this workaround to generate random molecules
53
+ @hgiven(st.lists(mols(), min_size=num, max_size=num, unique_by=MolToSmiles))
54
+ @settings(max_examples=1, deadline=None)
55
+ @seed(random_seed)
56
+ def generate(ms):
57
+ nonlocal result
58
+ result = ms
59
+
60
+ generate()
61
+
62
+ # replace random entries with None
63
+ indices = np.random.choice(num, num_none, replace=False)
64
+ for i in indices:
65
+ result[i] = None
66
+
67
+ return result
68
+
69
+
70
+ @given(parsers.parse("the input type is '{input_type}'"), target_fixture="input_type")
71
+ def input_type(input_type):
72
+ return input_type
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.1
3
+ Version: 0.2.4
4
4
  Summary: Base package to create NERDD modules
5
5
  Home-page: https://github.com/molinfo-vienna/nerdd-module.git
6
6
  Maintainer: Steffen Hirte
@@ -51,6 +51,10 @@ nerdd_module/preprocessing/pipeline.py
51
51
  nerdd_module/preprocessing/registry.py
52
52
  nerdd_module/preprocessing/remove_stereochemistry.py
53
53
  nerdd_module/preprocessing/step.py
54
+ nerdd_module/tests/__init__.py
55
+ nerdd_module/tests/checks.py
56
+ nerdd_module/tests/predictions.py
57
+ nerdd_module/tests/representations.py
54
58
  tests/__init__.py
55
59
  tests/conftest.py
56
60
  tests/test_atom_property_prediction.py
@@ -16,7 +16,7 @@ rdkit_requirement = ["rdkit>=2022.3.3"] if not rdkit_installed else []
16
16
 
17
17
  setup(
18
18
  name="nerdd-module",
19
- version="0.2.1",
19
+ version="0.2.4",
20
20
  maintainer="Steffen Hirte",
21
21
  maintainer_email="steffen.hirte@univie.ac.at",
22
22
  packages=find_packages(),
File without changes
File without changes
File without changes