nerdd-module 0.3.9__tar.gz → 0.3.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/PKG-INFO +3 -2
  2. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/cli.py +40 -28
  3. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/__init__.py +1 -0
  4. nerdd_module-0.3.10/nerdd_module/config/configuration.py +32 -0
  5. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/merged_configuration.py +1 -1
  6. nerdd_module-0.3.10/nerdd_module/config/models.py +178 -0
  7. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/package_configuration.py +1 -1
  8. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/search_yaml_configuration.py +1 -1
  9. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/file_reader.py +3 -0
  10. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/inchi_reader.py +7 -0
  11. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/reader.py +17 -23
  12. nerdd_module-0.3.10/nerdd_module/input/reader_config.py +7 -0
  13. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/smiles_reader.py +3 -0
  14. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/convert_representations_step.py +5 -3
  15. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/enforce_schema_step.py +3 -1
  16. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/model.py +2 -1
  17. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/read_input_step.py +1 -1
  18. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/simple_model.py +12 -10
  19. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/writer.py +7 -0
  20. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module.egg-info/PKG-INFO +3 -2
  21. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module.egg-info/SOURCES.txt +2 -0
  22. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module.egg-info/requires.txt +2 -1
  23. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/pyproject.toml +6 -3
  24. nerdd_module-0.3.9/nerdd_module/config/configuration.py +0 -91
  25. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/LICENSE +0 -0
  26. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/README.md +0 -0
  27. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/__init__.py +0 -0
  28. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/default_configuration.py +0 -0
  29. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/dict_configuration.py +0 -0
  30. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/config/yaml_configuration.py +0 -0
  31. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/converters/__init__.py +0 -0
  32. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/converters/converter.py +0 -0
  33. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/converters/identity_converter.py +0 -0
  34. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/__init__.py +0 -0
  35. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/depth_first_explorer.py +0 -0
  36. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/explorer.py +0 -0
  37. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/gzip_reader.py +0 -0
  38. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/list_reader.py +0 -0
  39. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/mol_reader.py +0 -0
  40. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/sdf_reader.py +0 -0
  41. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/string_reader.py +0 -0
  42. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/tar_reader.py +0 -0
  43. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/input/zip_reader.py +0 -0
  44. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/__init__.py +0 -0
  45. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/assign_mol_id_step.py +0 -0
  46. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/assign_name_step.py +0 -0
  47. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/model/write_output_step.py +0 -0
  48. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/__init__.py +0 -0
  49. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/csv_writer.py +0 -0
  50. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/file_writer.py +0 -0
  51. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/iterator_writer.py +0 -0
  52. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/pandas_writer.py +0 -0
  53. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/record_list_writer.py +0 -0
  54. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/output/sdf_writer.py +0 -0
  55. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/polyfills/__init__.py +0 -0
  56. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/polyfills/files.py +0 -0
  57. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/polyfills/get_entry_points.py +0 -0
  58. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/polyfills/types.py +0 -0
  59. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/polyfills/version.py +0 -0
  60. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/__init__.py +0 -0
  61. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  62. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
  63. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  64. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  65. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
  66. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  67. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/preprocessing/sanitize.py +0 -0
  68. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/problem.py +0 -0
  69. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/py.typed +0 -0
  70. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/steps/__init__.py +0 -0
  71. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/steps/map_step.py +0 -0
  72. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/steps/output_step.py +0 -0
  73. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/steps/step.py +0 -0
  74. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/__init__.py +0 -0
  75. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/checks.py +0 -0
  76. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
  77. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/models/MolWeightModel.py +0 -0
  78. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/models/__init__.py +0 -0
  79. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/predictions.py +0 -0
  80. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
  81. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/__init__.py +0 -0
  82. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/tests/representations.py +0 -0
  83. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/util/__init__.py +0 -0
  84. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/util/call_with_mappings.py +0 -0
  85. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/util/package.py +0 -0
  86. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module/version.py +0 -0
  87. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module.egg-info/dependency_links.txt +0 -0
  88. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/nerdd_module.egg-info/top_level.txt +0 -0
  89. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/setup.cfg +0 -0
  90. {nerdd_module-0.3.9 → nerdd_module-0.3.10}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -57,6 +57,7 @@ Requires-Dist: filetype~=1.2.0
57
57
  Requires-Dist: rich-click>=1.7.1
58
58
  Requires-Dist: stringcase>=1.2.0
59
59
  Requires-Dist: decorator>=5.1.1
60
+ Requires-Dist: pydantic>=2
60
61
  Requires-Dist: importlib-resources>=5; python_version < "3.9"
61
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
62
63
  Provides-Extra: dev
@@ -77,7 +78,7 @@ Requires-Dist: pytest; extra == "test"
77
78
  Requires-Dist: pytest-sugar; extra == "test"
78
79
  Requires-Dist: pytest-cov; extra == "test"
79
80
  Requires-Dist: pytest-asyncio; extra == "test"
80
- Requires-Dist: pytest-bdd; extra == "test"
81
+ Requires-Dist: pytest-bdd==7.3.0; extra == "test"
81
82
  Requires-Dist: pytest-mock; extra == "test"
82
83
  Requires-Dist: pytest-watcher; extra == "test"
83
84
  Requires-Dist: hypothesis; extra == "test"
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  import sys
3
4
  from typing import Any, Callable
4
5
 
@@ -6,7 +7,10 @@ import rich_click as click
6
7
  from decorator import decorator
7
8
  from stringcase import spinalcase
8
9
 
10
+ from .config import JobParameter
11
+ from .input import Reader
9
12
  from .model import Model
13
+ from .output import FileWriter, Writer
10
14
 
11
15
  __all__ = ["auto_cli"]
12
16
 
@@ -21,25 +25,21 @@ Note that input formats shouldn't be mixed.
21
25
  """
22
26
 
23
27
 
24
- def infer_click_type(param: dict) -> click.ParamType:
25
- if "choices" in param:
26
- choices = [c["value"] for c in param["choices"]]
28
+ def infer_click_type(param: JobParameter) -> click.ParamType:
29
+ if param.choices is not None:
30
+ choices = [c.value for c in param.choices]
27
31
  return click.Choice(choices)
28
32
 
29
33
  type_map = {
30
34
  "float": click.FLOAT,
31
- "int": click.INT,
32
- "str": click.STRING,
35
+ "integer": click.INT,
36
+ "string": click.STRING,
33
37
  "bool": click.BOOL,
34
38
  }
35
39
 
36
- if "type" not in param:
37
- raise ValueError(f"Parameter {param['name']} does not have a type")
38
-
39
- t = param["type"]
40
-
40
+ t = param.type
41
41
  if t not in type_map:
42
- raise ValueError(f"Unknown type {t} for parameter {param['name']}")
42
+ raise ValueError(f"Unknown type {t} for parameter {param.name}")
43
43
 
44
44
  return type_map[t]
45
45
 
@@ -47,7 +47,7 @@ def infer_click_type(param: dict) -> click.ParamType:
47
47
  @decorator
48
48
  def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
49
49
  # infer the command name
50
- # command_name = os.path.basename(sys.argv[0])
50
+ command_name = os.path.basename(sys.argv[0])
51
51
 
52
52
  # get the model
53
53
  model = f()
@@ -59,21 +59,33 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
59
59
  description=model.description, input_format_list=input_format_list
60
60
  )
61
61
 
62
- output_format_list = ["sdf", "csv"]
62
+ output_format_list = [
63
+ output_format
64
+ for output_format, writer in Writer.get_writers(output_file=None).items()
65
+ if isinstance(writer, FileWriter)
66
+ ]
63
67
 
64
68
  # compose footer with examples
65
- # TODO: add examples
66
- # examples = []
67
- # if "example_smiles" in config:
68
- # examples.append(config["example_smiles"])
69
-
70
- # if len(examples) > 0:
71
- # footer = "Examples:\n"
72
- # for example in examples:
73
- # footer += f'* {command_name} "{example}"\n'
74
- # else:
75
- # footer = ""
76
- footer = ""
69
+ examples = []
70
+ if hasattr(model, "get_config"):
71
+ example_smiles = model.get_config().example_smiles
72
+ if example_smiles is not None:
73
+ examples.append(example_smiles)
74
+
75
+ for ReaderClass in Reader.get_reader_mapping():
76
+ if hasattr(ReaderClass, "config"):
77
+ reader_examples = ReaderClass.config.get("examples", [])
78
+ for example in reader_examples:
79
+ # check if example fits on one line
80
+ if len(example) < 120 and "\n" not in example:
81
+ examples.append(example)
82
+
83
+ if len(examples) > 0:
84
+ footer = "Examples:\n"
85
+ for example in examples:
86
+ footer += f'* {command_name} "{example}"\n'
87
+ else:
88
+ footer = ""
77
89
 
78
90
  #
79
91
  # Define the CLI entry point
@@ -107,12 +119,12 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
107
119
  #
108
120
  for param in model.job_parameters:
109
121
  # convert parameter name to spinal case (e.g. "max_confs" -> "max-confs")
110
- param_name = spinalcase(param["name"])
122
+ param_name = spinalcase(param.name)
111
123
  main = click.option(
112
124
  f"--{param_name}",
113
- default=param.get("default", None),
125
+ default=param.default,
114
126
  type=infer_click_type(param),
115
- help=param.get("help_text", None),
127
+ help=param.help_text,
116
128
  )(main)
117
129
 
118
130
  #
@@ -2,6 +2,7 @@ from .configuration import *
2
2
  from .default_configuration import *
3
3
  from .dict_configuration import *
4
4
  from .merged_configuration import *
5
+ from .models import *
5
6
  from .package_configuration import *
6
7
  from .search_yaml_configuration import *
7
8
  from .yaml_configuration import *
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from .models import Module
5
+
6
+ __all__ = ["Configuration"]
7
+
8
+
9
+ class Configuration(ABC):
10
+ def __init__(self) -> None:
11
+ self._cached_config: Optional[Module] = None
12
+
13
+ def get_dict(self) -> Module:
14
+ if self._cached_config is None:
15
+ config = self._get_dict()
16
+
17
+ # validate the config
18
+ module = Module(**config)
19
+
20
+ self._cached_config = module
21
+
22
+ return self._cached_config
23
+
24
+ @abstractmethod
25
+ def _get_dict(self) -> dict:
26
+ pass
27
+
28
+ def is_empty(self) -> bool:
29
+ return self.get_dict() == {}
30
+
31
+ def __repr__(self) -> str:
32
+ return f"{self.__class__.__name__}({self._get_dict()})"
@@ -41,4 +41,4 @@ def merge(*args: dict) -> dict:
41
41
 
42
42
  class MergedConfiguration(DictConfiguration):
43
43
  def __init__(self, *configs: Configuration):
44
- super().__init__(merge(*[c.get_dict() for c in configs]))
44
+ super().__init__(merge(*[c._get_dict() for c in configs]))
@@ -0,0 +1,178 @@
1
+ from typing import Any, List, Literal, Optional, Union
2
+
3
+ from pydantic import BaseModel, model_validator
4
+
5
+
6
+ class Partner(BaseModel):
7
+ name: str
8
+ logo: str
9
+ url: Optional[str] = None
10
+
11
+
12
+ class Author(BaseModel):
13
+ """
14
+ Author information
15
+
16
+ Attributes:
17
+ first_name : str
18
+ First name of the author.
19
+ last_name : str
20
+ Last name of the author.
21
+ email : Optional[str]
22
+ Email of the author. If provided, the author is a corresponding author.
23
+ """
24
+
25
+ first_name: str
26
+ last_name: str
27
+ email: Optional[str] = None
28
+
29
+
30
+ class Publication(BaseModel):
31
+ title: str
32
+ authors: List[Author] = []
33
+ journal: str
34
+ year: int
35
+ doi: Optional[str]
36
+
37
+
38
+ class JobParameterChoice(BaseModel):
39
+ value: str
40
+ label: Optional[str] = None
41
+
42
+
43
+ class JobParameter(BaseModel):
44
+ name: str
45
+ type: str
46
+ visible_name: Optional[str] = None
47
+ help_text: Optional[str] = None
48
+ default: Optional[str] = None
49
+ required: bool = False
50
+ choices: Optional[List[JobParameterChoice]] = None
51
+
52
+
53
+ Task = Literal[
54
+ "molecular_property_prediction",
55
+ "atom_property_prediction",
56
+ "derivative_property_prediction",
57
+ ]
58
+ Level = Literal["molecule", "atom", "derivative"]
59
+
60
+ FormatSpec = Union[List[str], str]
61
+
62
+
63
+ class IncludeExcludeFormatSpec(BaseModel):
64
+ include: Optional[FormatSpec]
65
+ exclude: Optional[FormatSpec]
66
+
67
+
68
+ class ResultProperty(BaseModel):
69
+ name: str
70
+ type: str
71
+ visible_name: Optional[str] = None
72
+ help_text: Optional[str] = None
73
+ sortable: bool = False
74
+ group: Optional[str] = None
75
+ level: Level = "molecule"
76
+ formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
77
+ representation: Optional[str] = None
78
+
79
+ def is_visible(self, output_format: str) -> bool:
80
+ formats = self.formats
81
+
82
+ if formats is None:
83
+ return True
84
+ elif isinstance(formats, list):
85
+ return output_format in formats
86
+ elif isinstance(formats, IncludeExcludeFormatSpec):
87
+ include = formats.include
88
+ exclude = formats.exclude or []
89
+ return (include is None or output_format in include) and output_format not in exclude
90
+ else:
91
+ raise ValueError(f"Invalid formats declaration {formats} in result property {self}")
92
+
93
+
94
+ class Module(BaseModel):
95
+ task: Optional[Task] = None
96
+ rank: Optional[int] = None
97
+ name: Optional[str] = None
98
+ batch_size: int = 100
99
+ version: Optional[str] = None
100
+ visible_name: Optional[str] = None
101
+ logo: Optional[str] = None
102
+ logo_title: Optional[str] = None
103
+ logo_caption: Optional[str] = None
104
+ example_smiles: Optional[str] = None
105
+ title: Optional[str] = None
106
+ description: Optional[str] = None
107
+ partners: List[Partner] = []
108
+ publications: List[Publication] = []
109
+ about: Optional[str] = None
110
+ job_parameters: List[JobParameter] = []
111
+ result_properties: List[ResultProperty] = []
112
+
113
+ def get_property_columns_of_type(self, t: Level) -> List[ResultProperty]:
114
+ return [c for c in self.result_properties if c.level == t]
115
+
116
+ def molecular_property_columns(self) -> List[ResultProperty]:
117
+ return self.get_property_columns_of_type("molecule")
118
+
119
+ def atom_property_columns(self) -> List[ResultProperty]:
120
+ return self.get_property_columns_of_type("atom")
121
+
122
+ def derivative_property_columns(self) -> List[ResultProperty]:
123
+ return self.get_property_columns_of_type("derivative")
124
+
125
+ def get_visible_properties(self, output_format: str) -> List[ResultProperty]:
126
+ return [p for p in self.result_properties if p.is_visible(output_format)]
127
+
128
+ @model_validator(mode="after")
129
+ @classmethod
130
+ def validate_model(cls, values: Any) -> Any:
131
+ assert isinstance(values, Module)
132
+
133
+ num_atom_properties = len(values.get_property_columns_of_type("atom"))
134
+ num_derivative_properties = len(values.get_property_columns_of_type("derivative"))
135
+ task = values.task
136
+ if task is None:
137
+ # if task is not specified, try to derive it from the result_properties
138
+ if num_atom_properties > 0:
139
+ task = "atom_property_prediction"
140
+ elif num_derivative_properties > 0:
141
+ task = "derivative_property_prediction"
142
+ else:
143
+ task = "molecular_property_prediction"
144
+
145
+ values.task = task
146
+ else:
147
+ # if task is specified, check if it is consistent with the result_properties
148
+ if num_atom_properties > 0:
149
+ assert (
150
+ task == "atom_property_prediction"
151
+ ), "Task should be atom_property_prediction if atom properties are present."
152
+ elif num_derivative_properties > 0:
153
+ assert task == "derivative_property_prediction", (
154
+ "Task should be derivative_property_prediction if derivative properties "
155
+ "are present."
156
+ )
157
+ else:
158
+ assert task == "molecular_property_prediction", (
159
+ "Task should be molecular_property_prediction if no atom or derivative "
160
+ "properties are present."
161
+ )
162
+
163
+ # check that a module can only predict atom or derivative properties, not both
164
+ assert (
165
+ num_atom_properties == 0 or num_derivative_properties == 0
166
+ ), "A module can only predict atom or derivative properties, not both."
167
+
168
+ # check that two properties with the same group appear next to each other
169
+ groups = [p.group for p in values.result_properties if p.group is not None]
170
+ for group in groups:
171
+ indices = [i for i, p in enumerate(values.result_properties) if p.group == group]
172
+ for i, j in zip(indices[:-1], indices[1:]):
173
+ assert i + 1 == j, (
174
+ f"Properties with the same group should appear next to each other, "
175
+ f"but group {group} appears at incides {i} and {j}."
176
+ )
177
+
178
+ return values
@@ -32,4 +32,4 @@ class PackageConfiguration(Configuration):
32
32
  self.config = DictConfiguration({})
33
33
 
34
34
  def _get_dict(self) -> dict:
35
- return self.config.get_dict()
35
+ return self.config._get_dict()
@@ -34,4 +34,4 @@ class SearchYamlConfiguration(DictConfiguration):
34
34
  logger.info(f"Found configuration file in project directory: {default_config_file}")
35
35
  config = YamlConfiguration(default_config_file, base_path)
36
36
 
37
- super().__init__(config.get_dict())
37
+ super().__init__(config._get_dict())
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  from typing import Any, Iterator, Tuple, Union
4
4
 
5
5
  from .reader import ExploreCallable, MoleculeEntry, Reader
6
+ from .reader_config import ReaderConfig
6
7
 
7
8
  __all__ = ["FileReader"]
8
9
 
@@ -48,3 +49,5 @@ class FileReader(Reader):
48
49
 
49
50
  def __repr__(self) -> str:
50
51
  return f"FileReader(data_dir={self.data_dir})"
52
+
53
+ config = ReaderConfig(examples=["compounds.smiles"])
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["InchiReader"]
11
12
 
@@ -55,3 +56,9 @@ class InchiReader(Reader):
55
56
 
56
57
  def __repr__(self) -> str:
57
58
  return "InchiReader()"
59
+
60
+ config = ReaderConfig(
61
+ examples=[
62
+ "InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20/h3-11,13,19H,2H2,1H3"
63
+ ]
64
+ )
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from abc import ABC, ABCMeta, abstractmethod
5
- from functools import partial
4
+ from abc import ABC, abstractmethod
6
5
  from typing import Any, Callable, Iterator, List, NamedTuple, Optional, Tuple, Type
7
6
 
8
7
  from rdkit.Chem import Mol
9
- from typing_extensions import Protocol
10
8
 
11
9
  from ..problem import Problem
12
10
  from ..util import call_with_mappings
@@ -25,34 +23,30 @@ class MoleculeEntry(NamedTuple):
25
23
  ExploreCallable = Callable[[Any], Iterator[MoleculeEntry]]
26
24
 
27
25
 
28
- class ReaderFactory(Protocol):
29
- def __call__(self, config: dict, *args: Any, **kwargs: Any) -> Reader: ...
26
+ _factories: List[Type["Reader"]] = []
30
27
 
31
28
 
32
- _factories: List[ReaderFactory] = []
33
-
34
-
35
- class ReaderMeta(ABCMeta):
36
- def __init__(cls, name: str, bases: Tuple[type, ...], dct: dict) -> None:
37
- super().__init__(name, bases, dct)
38
-
39
- if not inspect.isabstract(cls):
40
- _factories.append(
41
- partial(
42
- call_with_mappings,
43
- cls,
44
- )
45
- )
46
-
47
-
48
- class Reader(ABC, metaclass=ReaderMeta):
29
+ class Reader(ABC):
49
30
  def __init__(self) -> None:
50
31
  super().__init__()
51
32
 
33
+ @classmethod
34
+ def __init_subclass__(
35
+ cls,
36
+ **kwargs: Any,
37
+ ) -> None:
38
+ super().__init_subclass__(**kwargs)
39
+ if not inspect.isabstract(cls):
40
+ _factories.append(cls)
41
+
52
42
  @abstractmethod
53
43
  def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
54
44
  pass
55
45
 
46
+ @classmethod
47
+ def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
48
+ return _factories
49
+
56
50
  @classmethod
57
51
  def get_readers(cls: Type[Reader], **kwargs: Any) -> List[Reader]:
58
- return [factory(kwargs) for factory in _factories]
52
+ return [call_with_mappings(factory, kwargs) for factory in _factories]
@@ -0,0 +1,7 @@
1
+ from typing import List, TypedDict
2
+
3
+ __all__ = ["ReaderConfig"]
4
+
5
+
6
+ class ReaderConfig(TypedDict):
7
+ examples: List[str]
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["SmilesReader"]
11
12
 
@@ -65,3 +66,5 @@ class SmilesReader(Reader):
65
66
 
66
67
  def __repr__(self) -> str:
67
68
  return "SmilesReader()"
69
+
70
+ config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])
@@ -9,10 +9,12 @@ __all__ = ["ConvertRepresentationsStep"]
9
9
  class ConvertRepresentationsStep(MapStep):
10
10
  def __init__(self, result_properties: list, output_format: str, **kwargs: Any) -> None:
11
11
  super().__init__()
12
- self._property_type_map = {
13
- p["name"]: Converter.get_converter(p.get("type"), output_format, **kwargs)
12
+ self._converter_map = {
13
+ p.name: Converter.get_converter(p.type, output_format, property=p, **kwargs)
14
14
  for p in result_properties
15
15
  }
16
16
 
17
17
  def _process(self, record: dict) -> dict:
18
- return {k: self._property_type_map[k].convert(v, record) for k, v in record.items()}
18
+ return {
19
+ k: self._converter_map[k].convert(input=v, context=record) for k, v in record.items()
20
+ }
@@ -12,7 +12,9 @@ logger = logging.getLogger(__name__)
12
12
  class EnforceSchemaStep(Step):
13
13
  def __init__(self, config: Configuration, output_format: str) -> None:
14
14
  super().__init__()
15
- self._property_names = [p["name"] for p in config.get_visible_properties(output_format)]
15
+ self._property_names = [
16
+ p.name for p in config.get_dict().get_visible_properties(output_format)
17
+ ]
16
18
 
17
19
  # check that properties are unique
18
20
  if len(self._property_names) != len(set(self._property_names)):
@@ -6,6 +6,7 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple
6
6
  from rdkit.Chem import Mol
7
7
  from stringcase import snakecase # type: ignore
8
8
 
9
+ from ..config import JobParameter
9
10
  from ..problem import Problem
10
11
  from ..steps import OutputStep, Step
11
12
  from ..util import call_with_mappings
@@ -96,7 +97,7 @@ class Model(ABC):
96
97
 
97
98
  description = property(fget=lambda self: self._get_description())
98
99
 
99
- def _get_job_parameters(self) -> List[dict]:
100
+ def _get_job_parameters(self) -> List[JobParameter]:
100
101
  return []
101
102
 
102
103
  job_parameters = property(fget=lambda self: self._get_job_parameters())
@@ -15,7 +15,7 @@ class ReadInputStep(Step):
15
15
  def _run(self, source: Iterator[dict]) -> Iterator[dict]:
16
16
  for entry in self._explorer.explore(self._input):
17
17
  record = dict(
18
- raw_input=entry.raw_input,
18
+ input_text=entry.raw_input,
19
19
  source=entry.source,
20
20
  input_type=entry.input_type,
21
21
  input_mol=entry.mol,
@@ -7,7 +7,9 @@ from ..config import (
7
7
  Configuration,
8
8
  DefaultConfiguration,
9
9
  DictConfiguration,
10
+ JobParameter,
10
11
  MergedConfiguration,
12
+ Module,
11
13
  PackageConfiguration,
12
14
  SearchYamlConfiguration,
13
15
  )
@@ -64,7 +66,7 @@ class SimpleModel(Model):
64
66
  return [
65
67
  EnforceSchemaStep(self._get_config(), output_format),
66
68
  ConvertRepresentationsStep(
67
- self.get_config().get("result_properties", []), output_format, **kwargs
69
+ self.get_config().result_properties, output_format, **kwargs
68
70
  ),
69
71
  WriteOutputStep(output_format, **kwargs),
70
72
  ]
@@ -105,7 +107,7 @@ class SimpleModel(Model):
105
107
  ]
106
108
 
107
109
  # add default properties mol_id, raw_input, etc.
108
- task = MergedConfiguration(*configs).get_task()
110
+ task = MergedConfiguration(*configs).get_dict().task
109
111
 
110
112
  # check whether we need to add to add a property "atom_id" or "derivative_id"
111
113
  task_based_property = []
@@ -121,9 +123,9 @@ class SimpleModel(Model):
121
123
  default_properties_start = [
122
124
  {"name": "mol_id", "type": "integer"},
123
125
  *task_based_property,
124
- {"name": "raw_input", "type": "string"},
126
+ {"name": "input_text", "type": "string"},
125
127
  {"name": "input_type", "type": "string"},
126
- {"name": "source"},
128
+ {"name": "source", "type": "string"},
127
129
  {"name": "name", "type": "string"},
128
130
  {"name": "input_mol", "type": "mol"},
129
131
  {"name": "preprocessed_mol", "type": "mol"},
@@ -141,23 +143,23 @@ class SimpleModel(Model):
141
143
 
142
144
  return MergedConfiguration(*configs)
143
145
 
144
- def get_config(self) -> dict:
146
+ def get_config(self) -> Module:
145
147
  return self._get_config().get_dict()
146
148
 
147
149
  def _get_batch_size(self) -> int:
148
150
  default = super()._get_batch_size()
149
- return self.get_config().get("batch_size", default)
151
+ return self.get_config().batch_size or default
150
152
 
151
153
  def _get_name(self) -> str:
152
154
  default = super()._get_name()
153
- return self.get_config().get("name", default)
155
+ return self.get_config().name or default
154
156
 
155
157
  def _get_description(self) -> str:
156
158
  default = super()._get_description()
157
- return self.get_config().get("description", default)
159
+ return self.get_config().description or default
158
160
 
159
- def _get_job_parameters(self) -> List[dict]:
160
- return super()._get_job_parameters() + self.get_config().get("job_parameters", [])
161
+ def _get_job_parameters(self) -> List[JobParameter]:
162
+ return super()._get_job_parameters() + self.get_config().job_parameters
161
163
 
162
164
 
163
165
  class CustomPreprocessingStep(PreprocessingStep):
@@ -46,6 +46,13 @@ class Writer(ABC):
46
46
  raise ValueError(f"Unknown output format: {output_format}")
47
47
  return _factories[output_format](kwargs)
48
48
 
49
+ @classmethod
50
+ def get_writers(cls, **kwargs: Any) -> Dict[str, Writer]:
51
+ return {
52
+ output_format: cls.get_writer(output_format, **kwargs)
53
+ for output_format in _factories.keys()
54
+ }
55
+
49
56
  @classmethod
50
57
  def get_output_formats(cls) -> List[str]:
51
58
  return list(_factories.keys())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -57,6 +57,7 @@ Requires-Dist: filetype~=1.2.0
57
57
  Requires-Dist: rich-click>=1.7.1
58
58
  Requires-Dist: stringcase>=1.2.0
59
59
  Requires-Dist: decorator>=5.1.1
60
+ Requires-Dist: pydantic>=2
60
61
  Requires-Dist: importlib-resources>=5; python_version < "3.9"
61
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
62
63
  Provides-Extra: dev
@@ -77,7 +78,7 @@ Requires-Dist: pytest; extra == "test"
77
78
  Requires-Dist: pytest-sugar; extra == "test"
78
79
  Requires-Dist: pytest-cov; extra == "test"
79
80
  Requires-Dist: pytest-asyncio; extra == "test"
80
- Requires-Dist: pytest-bdd; extra == "test"
81
+ Requires-Dist: pytest-bdd==7.3.0; extra == "test"
81
82
  Requires-Dist: pytest-mock; extra == "test"
82
83
  Requires-Dist: pytest-watcher; extra == "test"
83
84
  Requires-Dist: hypothesis; extra == "test"
@@ -16,6 +16,7 @@ nerdd_module/config/configuration.py
16
16
  nerdd_module/config/default_configuration.py
17
17
  nerdd_module/config/dict_configuration.py
18
18
  nerdd_module/config/merged_configuration.py
19
+ nerdd_module/config/models.py
19
20
  nerdd_module/config/package_configuration.py
20
21
  nerdd_module/config/search_yaml_configuration.py
21
22
  nerdd_module/config/yaml_configuration.py
@@ -31,6 +32,7 @@ nerdd_module/input/inchi_reader.py
31
32
  nerdd_module/input/list_reader.py
32
33
  nerdd_module/input/mol_reader.py
33
34
  nerdd_module/input/reader.py
35
+ nerdd_module/input/reader_config.py
34
36
  nerdd_module/input/sdf_reader.py
35
37
  nerdd_module/input/smiles_reader.py
36
38
  nerdd_module/input/string_reader.py
@@ -4,6 +4,7 @@ filetype~=1.2.0
4
4
  rich-click>=1.7.1
5
5
  stringcase>=1.2.0
6
6
  decorator>=5.1.1
7
+ pydantic>=2
7
8
 
8
9
  [:python_version < "3.10"]
9
10
  importlib-metadata>=4.6
@@ -37,7 +38,7 @@ pytest
37
38
  pytest-sugar
38
39
  pytest-cov
39
40
  pytest-asyncio
40
- pytest-bdd
41
+ pytest-bdd==7.3.0
41
42
  pytest-mock
42
43
  pytest-watcher
43
44
  hypothesis
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nerdd-module"
7
- version = "0.3.9"
7
+ version = "0.3.10"
8
8
  description = "Base package to create NERDD modules"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -17,6 +17,7 @@ dependencies = [
17
17
  "rich-click>=1.7.1",
18
18
  "stringcase>=1.2.0",
19
19
  "decorator>=5.1.1",
20
+ "pydantic>=2",
20
21
  # install importlib-resources and importlib-metadata for old Python versions
21
22
  "importlib-resources>=5; python_version<'3.9'",
22
23
  "importlib-metadata>=4.6; python_version<'3.10'",
@@ -76,7 +77,7 @@ test = [
76
77
  "pytest-sugar",
77
78
  "pytest-cov",
78
79
  "pytest-asyncio",
79
- "pytest-bdd",
80
+ "pytest-bdd==7.3.0",
80
81
  "pytest-mock",
81
82
  "pytest-watcher",
82
83
  "hypothesis",
@@ -125,10 +126,12 @@ ignore = [
125
126
  ]
126
127
 
127
128
  [tool.mypy]
129
+ # type stubs of packages lacking types are in the "typings" directory
128
130
  mypy_path="typings"
129
131
  # strict=true
130
132
  disallow_untyped_defs = true
131
- # no_implicit_optional = True
133
+ # disallow_any_unimported = true
134
+ no_implicit_optional = true
132
135
  # check_untyped_defs = True
133
136
  # show_error_codes = True
134
137
 
@@ -1,91 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import List, Optional
3
-
4
- __all__ = ["Configuration"]
5
-
6
-
7
- def get_property_columns_of_type(config: dict, t: str) -> List[dict]:
8
- return [c for c in config["result_properties"] if c.get("level", "molecule") == t]
9
-
10
-
11
- def is_visible(result_property: dict, output_format: str) -> bool:
12
- formats = result_property.get("formats", {})
13
-
14
- if isinstance(formats, list):
15
- return output_format in formats
16
- elif isinstance(formats, dict):
17
- include = formats.get("include", "*")
18
- exclude = formats.get("exclude", [])
19
- assert include == "*" or isinstance(
20
- include, list
21
- ), f"Expected include to be a list or '*', got {include}"
22
- assert isinstance(exclude, list), f"Expected exclude to be a list, got {exclude}"
23
- return (include == "*" or output_format in include) and output_format not in exclude
24
- else:
25
- raise ValueError(
26
- f"Invalid formats declaration {formats} in result property " f"{result_property}"
27
- )
28
-
29
-
30
- class Configuration(ABC):
31
- def __init__(self) -> None:
32
- self._cached_config: Optional[dict] = None
33
-
34
- def get_dict(self) -> dict:
35
- if self._cached_config is None:
36
- config = self._get_dict()
37
-
38
- if "result_properties" not in config:
39
- config["result_properties"] = []
40
-
41
- # check that a module can only predict atom or derivative properties, not both
42
- num_atom_properties = len(get_property_columns_of_type(config, "atom"))
43
- num_derivative_properties = len(get_property_columns_of_type(config, "derivative"))
44
- assert (
45
- num_atom_properties == 0 or num_derivative_properties == 0
46
- ), "A module can only predict atom or derivative properties, not both."
47
-
48
- self._cached_config = config
49
-
50
- return self._cached_config
51
-
52
- @abstractmethod
53
- def _get_dict(self) -> dict:
54
- pass
55
-
56
- def is_empty(self) -> bool:
57
- return self.get_dict() == {}
58
-
59
- def molecular_property_columns(self) -> List[dict]:
60
- return get_property_columns_of_type(self.get_dict(), "molecule")
61
-
62
- def atom_property_columns(self) -> List[dict]:
63
- return get_property_columns_of_type(self.get_dict(), "atom")
64
-
65
- def derivative_property_columns(self) -> List[dict]:
66
- return get_property_columns_of_type(self.get_dict(), "derivative")
67
-
68
- def get_task(self) -> str:
69
- # if task is specified in the config, use that
70
- config = self.get_dict()
71
- if "task" in config:
72
- return config["task"]
73
-
74
- # try to derive the task from the result_properties
75
- num_atom_properties = len(self.atom_property_columns())
76
- num_derivative_properties = len(self.derivative_property_columns())
77
-
78
- if num_atom_properties > 0:
79
- return "atom_property_prediction"
80
- elif num_derivative_properties > 0:
81
- return "derivative_property_prediction"
82
- else:
83
- return "molecular_property_prediction"
84
-
85
- def get_visible_properties(self, output_format: str) -> List[dict]:
86
- return [
87
- p for p in self.get_dict().get("result_properties", []) if is_visible(p, output_format)
88
- ]
89
-
90
- def __repr__(self) -> str:
91
- return f"{self.__class__.__name__}({self._get_dict()})"
File without changes
File without changes
File without changes