nerdd-module 0.3.9__tar.gz → 0.3.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/PKG-INFO +4 -2
  2. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/cli.py +40 -28
  3. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/__init__.py +1 -0
  4. nerdd_module-0.3.11/nerdd_module/config/configuration.py +32 -0
  5. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/merged_configuration.py +1 -1
  6. nerdd_module-0.3.11/nerdd_module/config/models.py +180 -0
  7. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/package_configuration.py +1 -1
  8. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/search_yaml_configuration.py +1 -1
  9. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/file_reader.py +3 -0
  10. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/inchi_reader.py +7 -0
  11. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/reader.py +17 -23
  12. nerdd_module-0.3.11/nerdd_module/input/reader_config.py +9 -0
  13. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/smiles_reader.py +3 -0
  14. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/convert_representations_step.py +5 -3
  15. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/enforce_schema_step.py +3 -1
  16. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/model.py +2 -1
  17. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/read_input_step.py +1 -1
  18. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/simple_model.py +12 -10
  19. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/writer.py +7 -0
  20. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/polyfills/__init__.py +2 -0
  21. nerdd_module-0.3.11/nerdd_module/polyfills/literal.py +8 -0
  22. nerdd_module-0.3.11/nerdd_module/polyfills/typed_dict.py +8 -0
  23. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module.egg-info/PKG-INFO +4 -2
  24. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module.egg-info/SOURCES.txt +4 -0
  25. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module.egg-info/requires.txt +5 -1
  26. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/pyproject.toml +15 -11
  27. nerdd_module-0.3.9/nerdd_module/config/configuration.py +0 -91
  28. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/LICENSE +0 -0
  29. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/README.md +0 -0
  30. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/__init__.py +0 -0
  31. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/default_configuration.py +0 -0
  32. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/dict_configuration.py +0 -0
  33. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/config/yaml_configuration.py +0 -0
  34. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/converters/__init__.py +0 -0
  35. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/converters/converter.py +0 -0
  36. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/converters/identity_converter.py +0 -0
  37. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/__init__.py +0 -0
  38. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/depth_first_explorer.py +0 -0
  39. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/explorer.py +0 -0
  40. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/gzip_reader.py +0 -0
  41. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/list_reader.py +0 -0
  42. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/mol_reader.py +0 -0
  43. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/sdf_reader.py +0 -0
  44. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/string_reader.py +0 -0
  45. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/tar_reader.py +0 -0
  46. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/input/zip_reader.py +0 -0
  47. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/__init__.py +0 -0
  48. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/assign_mol_id_step.py +0 -0
  49. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/assign_name_step.py +0 -0
  50. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/model/write_output_step.py +0 -0
  51. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/__init__.py +0 -0
  52. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/csv_writer.py +0 -0
  53. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/file_writer.py +0 -0
  54. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/iterator_writer.py +0 -0
  55. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/pandas_writer.py +0 -0
  56. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/record_list_writer.py +0 -0
  57. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/output/sdf_writer.py +0 -0
  58. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/polyfills/files.py +0 -0
  59. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/polyfills/get_entry_points.py +0 -0
  60. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/polyfills/types.py +0 -0
  61. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/polyfills/version.py +0 -0
  62. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/__init__.py +0 -0
  63. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  64. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
  65. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  66. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  67. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
  68. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  69. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/preprocessing/sanitize.py +0 -0
  70. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/problem.py +0 -0
  71. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/py.typed +0 -0
  72. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/steps/__init__.py +0 -0
  73. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/steps/map_step.py +0 -0
  74. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/steps/output_step.py +0 -0
  75. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/steps/step.py +0 -0
  76. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/__init__.py +0 -0
  77. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/checks.py +0 -0
  78. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
  79. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/models/MolWeightModel.py +0 -0
  80. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/models/__init__.py +0 -0
  81. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/predictions.py +0 -0
  82. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
  83. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/preprocessing/__init__.py +0 -0
  84. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/tests/representations.py +0 -0
  85. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/util/__init__.py +0 -0
  86. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/util/call_with_mappings.py +0 -0
  87. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/util/package.py +0 -0
  88. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module/version.py +0 -0
  89. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module.egg-info/dependency_links.txt +0 -0
  90. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/nerdd_module.egg-info/top_level.txt +0 -0
  91. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/setup.cfg +0 -0
  92. {nerdd_module-0.3.9 → nerdd_module-0.3.11}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -57,8 +57,10 @@ Requires-Dist: filetype~=1.2.0
57
57
  Requires-Dist: rich-click>=1.7.1
58
58
  Requires-Dist: stringcase>=1.2.0
59
59
  Requires-Dist: decorator>=5.1.1
60
+ Requires-Dist: pydantic>=2
60
61
  Requires-Dist: importlib-resources>=5; python_version < "3.9"
61
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
63
+ Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
62
64
  Provides-Extra: dev
63
65
  Requires-Dist: mypy==1.13.0; extra == "dev"
64
66
  Requires-Dist: ruff==0.7.1; extra == "dev"
@@ -77,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
77
79
  Requires-Dist: pytest-sugar; extra == "test"
78
80
  Requires-Dist: pytest-cov; extra == "test"
79
81
  Requires-Dist: pytest-asyncio; extra == "test"
80
- Requires-Dist: pytest-bdd; extra == "test"
82
+ Requires-Dist: pytest-bdd==7.3.0; extra == "test"
81
83
  Requires-Dist: pytest-mock; extra == "test"
82
84
  Requires-Dist: pytest-watcher; extra == "test"
83
85
  Requires-Dist: hypothesis; extra == "test"
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  import sys
3
4
  from typing import Any, Callable
4
5
 
@@ -6,7 +7,10 @@ import rich_click as click
6
7
  from decorator import decorator
7
8
  from stringcase import spinalcase
8
9
 
10
+ from .config import JobParameter
11
+ from .input import Reader
9
12
  from .model import Model
13
+ from .output import FileWriter, Writer
10
14
 
11
15
  __all__ = ["auto_cli"]
12
16
 
@@ -21,25 +25,21 @@ Note that input formats shouldn't be mixed.
21
25
  """
22
26
 
23
27
 
24
- def infer_click_type(param: dict) -> click.ParamType:
25
- if "choices" in param:
26
- choices = [c["value"] for c in param["choices"]]
28
+ def infer_click_type(param: JobParameter) -> click.ParamType:
29
+ if param.choices is not None:
30
+ choices = [c.value for c in param.choices]
27
31
  return click.Choice(choices)
28
32
 
29
33
  type_map = {
30
34
  "float": click.FLOAT,
31
- "int": click.INT,
32
- "str": click.STRING,
35
+ "integer": click.INT,
36
+ "string": click.STRING,
33
37
  "bool": click.BOOL,
34
38
  }
35
39
 
36
- if "type" not in param:
37
- raise ValueError(f"Parameter {param['name']} does not have a type")
38
-
39
- t = param["type"]
40
-
40
+ t = param.type
41
41
  if t not in type_map:
42
- raise ValueError(f"Unknown type {t} for parameter {param['name']}")
42
+ raise ValueError(f"Unknown type {t} for parameter {param.name}")
43
43
 
44
44
  return type_map[t]
45
45
 
@@ -47,7 +47,7 @@ def infer_click_type(param: dict) -> click.ParamType:
47
47
  @decorator
48
48
  def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
49
49
  # infer the command name
50
- # command_name = os.path.basename(sys.argv[0])
50
+ command_name = os.path.basename(sys.argv[0])
51
51
 
52
52
  # get the model
53
53
  model = f()
@@ -59,21 +59,33 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
59
59
  description=model.description, input_format_list=input_format_list
60
60
  )
61
61
 
62
- output_format_list = ["sdf", "csv"]
62
+ output_format_list = [
63
+ output_format
64
+ for output_format, writer in Writer.get_writers(output_file=None).items()
65
+ if isinstance(writer, FileWriter)
66
+ ]
63
67
 
64
68
  # compose footer with examples
65
- # TODO: add examples
66
- # examples = []
67
- # if "example_smiles" in config:
68
- # examples.append(config["example_smiles"])
69
-
70
- # if len(examples) > 0:
71
- # footer = "Examples:\n"
72
- # for example in examples:
73
- # footer += f'* {command_name} "{example}"\n'
74
- # else:
75
- # footer = ""
76
- footer = ""
69
+ examples = []
70
+ if hasattr(model, "get_config"):
71
+ example_smiles = model.get_config().example_smiles
72
+ if example_smiles is not None:
73
+ examples.append(example_smiles)
74
+
75
+ for ReaderClass in Reader.get_reader_mapping():
76
+ if hasattr(ReaderClass, "config"):
77
+ reader_examples = ReaderClass.config.get("examples", [])
78
+ for example in reader_examples:
79
+ # check if example fits on one line
80
+ if len(example) < 120 and "\n" not in example:
81
+ examples.append(example)
82
+
83
+ if len(examples) > 0:
84
+ footer = "Examples:\n"
85
+ for example in examples:
86
+ footer += f'* {command_name} "{example}"\n'
87
+ else:
88
+ footer = ""
77
89
 
78
90
  #
79
91
  # Define the CLI entry point
@@ -107,12 +119,12 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
107
119
  #
108
120
  for param in model.job_parameters:
109
121
  # convert parameter name to spinal case (e.g. "max_confs" -> "max-confs")
110
- param_name = spinalcase(param["name"])
122
+ param_name = spinalcase(param.name)
111
123
  main = click.option(
112
124
  f"--{param_name}",
113
- default=param.get("default", None),
125
+ default=param.default,
114
126
  type=infer_click_type(param),
115
- help=param.get("help_text", None),
127
+ help=param.help_text,
116
128
  )(main)
117
129
 
118
130
  #
@@ -2,6 +2,7 @@ from .configuration import *
2
2
  from .default_configuration import *
3
3
  from .dict_configuration import *
4
4
  from .merged_configuration import *
5
+ from .models import *
5
6
  from .package_configuration import *
6
7
  from .search_yaml_configuration import *
7
8
  from .yaml_configuration import *
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from .models import Module
5
+
6
+ __all__ = ["Configuration"]
7
+
8
+
9
+ class Configuration(ABC):
10
+ def __init__(self) -> None:
11
+ self._cached_config: Optional[Module] = None
12
+
13
+ def get_dict(self) -> Module:
14
+ if self._cached_config is None:
15
+ config = self._get_dict()
16
+
17
+ # validate the config
18
+ module = Module(**config)
19
+
20
+ self._cached_config = module
21
+
22
+ return self._cached_config
23
+
24
+ @abstractmethod
25
+ def _get_dict(self) -> dict:
26
+ pass
27
+
28
+ def is_empty(self) -> bool:
29
+ return self.get_dict() == {}
30
+
31
+ def __repr__(self) -> str:
32
+ return f"{self.__class__.__name__}({self._get_dict()})"
@@ -41,4 +41,4 @@ def merge(*args: dict) -> dict:
41
41
 
42
42
  class MergedConfiguration(DictConfiguration):
43
43
  def __init__(self, *configs: Configuration):
44
- super().__init__(merge(*[c.get_dict() for c in configs]))
44
+ super().__init__(merge(*[c._get_dict() for c in configs]))
@@ -0,0 +1,180 @@
1
+ from typing import Any, List, Optional, Union
2
+
3
+ from pydantic import BaseModel, model_validator
4
+
5
+ from ..polyfills import Literal
6
+
7
+
8
+ class Partner(BaseModel):
9
+ name: str
10
+ logo: str
11
+ url: Optional[str] = None
12
+
13
+
14
+ class Author(BaseModel):
15
+ """
16
+ Author information
17
+
18
+ Attributes:
19
+ first_name : str
20
+ First name of the author.
21
+ last_name : str
22
+ Last name of the author.
23
+ email : Optional[str]
24
+ Email of the author. If provided, the author is a corresponding author.
25
+ """
26
+
27
+ first_name: str
28
+ last_name: str
29
+ email: Optional[str] = None
30
+
31
+
32
+ class Publication(BaseModel):
33
+ title: str
34
+ authors: List[Author] = []
35
+ journal: str
36
+ year: int
37
+ doi: Optional[str]
38
+
39
+
40
+ class JobParameterChoice(BaseModel):
41
+ value: str
42
+ label: Optional[str] = None
43
+
44
+
45
+ class JobParameter(BaseModel):
46
+ name: str
47
+ type: str
48
+ visible_name: Optional[str] = None
49
+ help_text: Optional[str] = None
50
+ default: Optional[str] = None
51
+ required: bool = False
52
+ choices: Optional[List[JobParameterChoice]] = None
53
+
54
+
55
+ Task = Literal[
56
+ "molecular_property_prediction",
57
+ "atom_property_prediction",
58
+ "derivative_property_prediction",
59
+ ]
60
+ Level = Literal["molecule", "atom", "derivative"]
61
+
62
+ FormatSpec = Union[List[str], str]
63
+
64
+
65
+ class IncludeExcludeFormatSpec(BaseModel):
66
+ include: Optional[FormatSpec]
67
+ exclude: Optional[FormatSpec]
68
+
69
+
70
+ class ResultProperty(BaseModel):
71
+ name: str
72
+ type: str
73
+ visible_name: Optional[str] = None
74
+ help_text: Optional[str] = None
75
+ sortable: bool = False
76
+ group: Optional[str] = None
77
+ level: Level = "molecule"
78
+ formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
79
+ representation: Optional[str] = None
80
+
81
+ def is_visible(self, output_format: str) -> bool:
82
+ formats = self.formats
83
+
84
+ if formats is None:
85
+ return True
86
+ elif isinstance(formats, list):
87
+ return output_format in formats
88
+ elif isinstance(formats, IncludeExcludeFormatSpec):
89
+ include = formats.include
90
+ exclude = formats.exclude or []
91
+ return (include is None or output_format in include) and output_format not in exclude
92
+ else:
93
+ raise ValueError(f"Invalid formats declaration {formats} in result property {self}")
94
+
95
+
96
+ class Module(BaseModel):
97
+ task: Optional[Task] = None
98
+ rank: Optional[int] = None
99
+ name: Optional[str] = None
100
+ batch_size: int = 100
101
+ version: Optional[str] = None
102
+ visible_name: Optional[str] = None
103
+ logo: Optional[str] = None
104
+ logo_title: Optional[str] = None
105
+ logo_caption: Optional[str] = None
106
+ example_smiles: Optional[str] = None
107
+ title: Optional[str] = None
108
+ description: Optional[str] = None
109
+ partners: List[Partner] = []
110
+ publications: List[Publication] = []
111
+ about: Optional[str] = None
112
+ job_parameters: List[JobParameter] = []
113
+ result_properties: List[ResultProperty] = []
114
+
115
+ def get_property_columns_of_type(self, t: Level) -> List[ResultProperty]:
116
+ return [c for c in self.result_properties if c.level == t]
117
+
118
+ def molecular_property_columns(self) -> List[ResultProperty]:
119
+ return self.get_property_columns_of_type("molecule")
120
+
121
+ def atom_property_columns(self) -> List[ResultProperty]:
122
+ return self.get_property_columns_of_type("atom")
123
+
124
+ def derivative_property_columns(self) -> List[ResultProperty]:
125
+ return self.get_property_columns_of_type("derivative")
126
+
127
+ def get_visible_properties(self, output_format: str) -> List[ResultProperty]:
128
+ return [p for p in self.result_properties if p.is_visible(output_format)]
129
+
130
+ @model_validator(mode="after")
131
+ @classmethod
132
+ def validate_model(cls, values: Any) -> Any:
133
+ assert isinstance(values, Module)
134
+
135
+ num_atom_properties = len(values.get_property_columns_of_type("atom"))
136
+ num_derivative_properties = len(values.get_property_columns_of_type("derivative"))
137
+ task = values.task
138
+ if task is None:
139
+ # if task is not specified, try to derive it from the result_properties
140
+ if num_atom_properties > 0:
141
+ task = "atom_property_prediction"
142
+ elif num_derivative_properties > 0:
143
+ task = "derivative_property_prediction"
144
+ else:
145
+ task = "molecular_property_prediction"
146
+
147
+ values.task = task
148
+ else:
149
+ # if task is specified, check if it is consistent with the result_properties
150
+ if num_atom_properties > 0:
151
+ assert (
152
+ task == "atom_property_prediction"
153
+ ), "Task should be atom_property_prediction if atom properties are present."
154
+ elif num_derivative_properties > 0:
155
+ assert task == "derivative_property_prediction", (
156
+ "Task should be derivative_property_prediction if derivative properties "
157
+ "are present."
158
+ )
159
+ else:
160
+ assert task == "molecular_property_prediction", (
161
+ "Task should be molecular_property_prediction if no atom or derivative "
162
+ "properties are present."
163
+ )
164
+
165
+ # check that a module can only predict atom or derivative properties, not both
166
+ assert (
167
+ num_atom_properties == 0 or num_derivative_properties == 0
168
+ ), "A module can only predict atom or derivative properties, not both."
169
+
170
+ # check that two properties with the same group appear next to each other
171
+ groups = [p.group for p in values.result_properties if p.group is not None]
172
+ for group in groups:
173
+ indices = [i for i, p in enumerate(values.result_properties) if p.group == group]
174
+ for i, j in zip(indices[:-1], indices[1:]):
175
+ assert i + 1 == j, (
176
+ f"Properties with the same group should appear next to each other, "
177
+ f"but group {group} appears at incides {i} and {j}."
178
+ )
179
+
180
+ return values
@@ -32,4 +32,4 @@ class PackageConfiguration(Configuration):
32
32
  self.config = DictConfiguration({})
33
33
 
34
34
  def _get_dict(self) -> dict:
35
- return self.config.get_dict()
35
+ return self.config._get_dict()
@@ -34,4 +34,4 @@ class SearchYamlConfiguration(DictConfiguration):
34
34
  logger.info(f"Found configuration file in project directory: {default_config_file}")
35
35
  config = YamlConfiguration(default_config_file, base_path)
36
36
 
37
- super().__init__(config.get_dict())
37
+ super().__init__(config._get_dict())
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  from typing import Any, Iterator, Tuple, Union
4
4
 
5
5
  from .reader import ExploreCallable, MoleculeEntry, Reader
6
+ from .reader_config import ReaderConfig
6
7
 
7
8
  __all__ = ["FileReader"]
8
9
 
@@ -48,3 +49,5 @@ class FileReader(Reader):
48
49
 
49
50
  def __repr__(self) -> str:
50
51
  return f"FileReader(data_dir={self.data_dir})"
52
+
53
+ config = ReaderConfig(examples=["compounds.smiles"])
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["InchiReader"]
11
12
 
@@ -55,3 +56,9 @@ class InchiReader(Reader):
55
56
 
56
57
  def __repr__(self) -> str:
57
58
  return "InchiReader()"
59
+
60
+ config = ReaderConfig(
61
+ examples=[
62
+ "InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20/h3-11,13,19H,2H2,1H3"
63
+ ]
64
+ )
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from abc import ABC, ABCMeta, abstractmethod
5
- from functools import partial
4
+ from abc import ABC, abstractmethod
6
5
  from typing import Any, Callable, Iterator, List, NamedTuple, Optional, Tuple, Type
7
6
 
8
7
  from rdkit.Chem import Mol
9
- from typing_extensions import Protocol
10
8
 
11
9
  from ..problem import Problem
12
10
  from ..util import call_with_mappings
@@ -25,34 +23,30 @@ class MoleculeEntry(NamedTuple):
25
23
  ExploreCallable = Callable[[Any], Iterator[MoleculeEntry]]
26
24
 
27
25
 
28
- class ReaderFactory(Protocol):
29
- def __call__(self, config: dict, *args: Any, **kwargs: Any) -> Reader: ...
26
+ _factories: List[Type["Reader"]] = []
30
27
 
31
28
 
32
- _factories: List[ReaderFactory] = []
33
-
34
-
35
- class ReaderMeta(ABCMeta):
36
- def __init__(cls, name: str, bases: Tuple[type, ...], dct: dict) -> None:
37
- super().__init__(name, bases, dct)
38
-
39
- if not inspect.isabstract(cls):
40
- _factories.append(
41
- partial(
42
- call_with_mappings,
43
- cls,
44
- )
45
- )
46
-
47
-
48
- class Reader(ABC, metaclass=ReaderMeta):
29
+ class Reader(ABC):
49
30
  def __init__(self) -> None:
50
31
  super().__init__()
51
32
 
33
+ @classmethod
34
+ def __init_subclass__(
35
+ cls,
36
+ **kwargs: Any,
37
+ ) -> None:
38
+ super().__init_subclass__(**kwargs)
39
+ if not inspect.isabstract(cls):
40
+ _factories.append(cls)
41
+
52
42
  @abstractmethod
53
43
  def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
54
44
  pass
55
45
 
46
+ @classmethod
47
+ def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
48
+ return _factories
49
+
56
50
  @classmethod
57
51
  def get_readers(cls: Type[Reader], **kwargs: Any) -> List[Reader]:
58
- return [factory(kwargs) for factory in _factories]
52
+ return [call_with_mappings(factory, kwargs) for factory in _factories]
@@ -0,0 +1,9 @@
1
+ from typing import List
2
+
3
+ from ..polyfills import TypedDict
4
+
5
+ __all__ = ["ReaderConfig"]
6
+
7
+
8
+ class ReaderConfig(TypedDict):
9
+ examples: List[str]
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["SmilesReader"]
11
12
 
@@ -65,3 +66,5 @@ class SmilesReader(Reader):
65
66
 
66
67
  def __repr__(self) -> str:
67
68
  return "SmilesReader()"
69
+
70
+ config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])
@@ -9,10 +9,12 @@ __all__ = ["ConvertRepresentationsStep"]
9
9
  class ConvertRepresentationsStep(MapStep):
10
10
  def __init__(self, result_properties: list, output_format: str, **kwargs: Any) -> None:
11
11
  super().__init__()
12
- self._property_type_map = {
13
- p["name"]: Converter.get_converter(p.get("type"), output_format, **kwargs)
12
+ self._converter_map = {
13
+ p.name: Converter.get_converter(p.type, output_format, property=p, **kwargs)
14
14
  for p in result_properties
15
15
  }
16
16
 
17
17
  def _process(self, record: dict) -> dict:
18
- return {k: self._property_type_map[k].convert(v, record) for k, v in record.items()}
18
+ return {
19
+ k: self._converter_map[k].convert(input=v, context=record) for k, v in record.items()
20
+ }
@@ -12,7 +12,9 @@ logger = logging.getLogger(__name__)
12
12
  class EnforceSchemaStep(Step):
13
13
  def __init__(self, config: Configuration, output_format: str) -> None:
14
14
  super().__init__()
15
- self._property_names = [p["name"] for p in config.get_visible_properties(output_format)]
15
+ self._property_names = [
16
+ p.name for p in config.get_dict().get_visible_properties(output_format)
17
+ ]
16
18
 
17
19
  # check that properties are unique
18
20
  if len(self._property_names) != len(set(self._property_names)):
@@ -6,6 +6,7 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple
6
6
  from rdkit.Chem import Mol
7
7
  from stringcase import snakecase # type: ignore
8
8
 
9
+ from ..config import JobParameter
9
10
  from ..problem import Problem
10
11
  from ..steps import OutputStep, Step
11
12
  from ..util import call_with_mappings
@@ -96,7 +97,7 @@ class Model(ABC):
96
97
 
97
98
  description = property(fget=lambda self: self._get_description())
98
99
 
99
- def _get_job_parameters(self) -> List[dict]:
100
+ def _get_job_parameters(self) -> List[JobParameter]:
100
101
  return []
101
102
 
102
103
  job_parameters = property(fget=lambda self: self._get_job_parameters())
@@ -15,7 +15,7 @@ class ReadInputStep(Step):
15
15
  def _run(self, source: Iterator[dict]) -> Iterator[dict]:
16
16
  for entry in self._explorer.explore(self._input):
17
17
  record = dict(
18
- raw_input=entry.raw_input,
18
+ input_text=entry.raw_input,
19
19
  source=entry.source,
20
20
  input_type=entry.input_type,
21
21
  input_mol=entry.mol,
@@ -7,7 +7,9 @@ from ..config import (
7
7
  Configuration,
8
8
  DefaultConfiguration,
9
9
  DictConfiguration,
10
+ JobParameter,
10
11
  MergedConfiguration,
12
+ Module,
11
13
  PackageConfiguration,
12
14
  SearchYamlConfiguration,
13
15
  )
@@ -64,7 +66,7 @@ class SimpleModel(Model):
64
66
  return [
65
67
  EnforceSchemaStep(self._get_config(), output_format),
66
68
  ConvertRepresentationsStep(
67
- self.get_config().get("result_properties", []), output_format, **kwargs
69
+ self.get_config().result_properties, output_format, **kwargs
68
70
  ),
69
71
  WriteOutputStep(output_format, **kwargs),
70
72
  ]
@@ -105,7 +107,7 @@ class SimpleModel(Model):
105
107
  ]
106
108
 
107
109
  # add default properties mol_id, raw_input, etc.
108
- task = MergedConfiguration(*configs).get_task()
110
+ task = MergedConfiguration(*configs).get_dict().task
109
111
 
110
112
  # check whether we need to add to add a property "atom_id" or "derivative_id"
111
113
  task_based_property = []
@@ -121,9 +123,9 @@ class SimpleModel(Model):
121
123
  default_properties_start = [
122
124
  {"name": "mol_id", "type": "integer"},
123
125
  *task_based_property,
124
- {"name": "raw_input", "type": "string"},
126
+ {"name": "input_text", "type": "string"},
125
127
  {"name": "input_type", "type": "string"},
126
- {"name": "source"},
128
+ {"name": "source", "type": "string"},
127
129
  {"name": "name", "type": "string"},
128
130
  {"name": "input_mol", "type": "mol"},
129
131
  {"name": "preprocessed_mol", "type": "mol"},
@@ -141,23 +143,23 @@ class SimpleModel(Model):
141
143
 
142
144
  return MergedConfiguration(*configs)
143
145
 
144
- def get_config(self) -> dict:
146
+ def get_config(self) -> Module:
145
147
  return self._get_config().get_dict()
146
148
 
147
149
  def _get_batch_size(self) -> int:
148
150
  default = super()._get_batch_size()
149
- return self.get_config().get("batch_size", default)
151
+ return self.get_config().batch_size or default
150
152
 
151
153
  def _get_name(self) -> str:
152
154
  default = super()._get_name()
153
- return self.get_config().get("name", default)
155
+ return self.get_config().name or default
154
156
 
155
157
  def _get_description(self) -> str:
156
158
  default = super()._get_description()
157
- return self.get_config().get("description", default)
159
+ return self.get_config().description or default
158
160
 
159
- def _get_job_parameters(self) -> List[dict]:
160
- return super()._get_job_parameters() + self.get_config().get("job_parameters", [])
161
+ def _get_job_parameters(self) -> List[JobParameter]:
162
+ return super()._get_job_parameters() + self.get_config().job_parameters
161
163
 
162
164
 
163
165
  class CustomPreprocessingStep(PreprocessingStep):
@@ -46,6 +46,13 @@ class Writer(ABC):
46
46
  raise ValueError(f"Unknown output format: {output_format}")
47
47
  return _factories[output_format](kwargs)
48
48
 
49
+ @classmethod
50
+ def get_writers(cls, **kwargs: Any) -> Dict[str, Writer]:
51
+ return {
52
+ output_format: cls.get_writer(output_format, **kwargs)
53
+ for output_format in _factories.keys()
54
+ }
55
+
49
56
  @classmethod
50
57
  def get_output_formats(cls) -> List[str]:
51
58
  return list(_factories.keys())
@@ -1,4 +1,6 @@
1
1
  from .files import *
2
2
  from .get_entry_points import *
3
+ from .literal import *
4
+ from .typed_dict import *
3
5
  from .types import *
4
6
  from .version import *
@@ -0,0 +1,8 @@
1
+ import sys
2
+
3
+ __all__ = ["Literal"]
4
+
5
+ if sys.version_info < (3, 8):
6
+ from typing_extensions import Literal
7
+ else:
8
+ from typing import Literal
@@ -0,0 +1,8 @@
1
+ import sys
2
+
3
+ __all__ = ["TypedDict"]
4
+
5
+ if sys.version_info < (3, 8):
6
+ from typing_extensions import TypedDict
7
+ else:
8
+ from typing import TypedDict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -57,8 +57,10 @@ Requires-Dist: filetype~=1.2.0
57
57
  Requires-Dist: rich-click>=1.7.1
58
58
  Requires-Dist: stringcase>=1.2.0
59
59
  Requires-Dist: decorator>=5.1.1
60
+ Requires-Dist: pydantic>=2
60
61
  Requires-Dist: importlib-resources>=5; python_version < "3.9"
61
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
63
+ Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
62
64
  Provides-Extra: dev
63
65
  Requires-Dist: mypy==1.13.0; extra == "dev"
64
66
  Requires-Dist: ruff==0.7.1; extra == "dev"
@@ -77,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
77
79
  Requires-Dist: pytest-sugar; extra == "test"
78
80
  Requires-Dist: pytest-cov; extra == "test"
79
81
  Requires-Dist: pytest-asyncio; extra == "test"
80
- Requires-Dist: pytest-bdd; extra == "test"
82
+ Requires-Dist: pytest-bdd==7.3.0; extra == "test"
81
83
  Requires-Dist: pytest-mock; extra == "test"
82
84
  Requires-Dist: pytest-watcher; extra == "test"
83
85
  Requires-Dist: hypothesis; extra == "test"
@@ -16,6 +16,7 @@ nerdd_module/config/configuration.py
16
16
  nerdd_module/config/default_configuration.py
17
17
  nerdd_module/config/dict_configuration.py
18
18
  nerdd_module/config/merged_configuration.py
19
+ nerdd_module/config/models.py
19
20
  nerdd_module/config/package_configuration.py
20
21
  nerdd_module/config/search_yaml_configuration.py
21
22
  nerdd_module/config/yaml_configuration.py
@@ -31,6 +32,7 @@ nerdd_module/input/inchi_reader.py
31
32
  nerdd_module/input/list_reader.py
32
33
  nerdd_module/input/mol_reader.py
33
34
  nerdd_module/input/reader.py
35
+ nerdd_module/input/reader_config.py
34
36
  nerdd_module/input/sdf_reader.py
35
37
  nerdd_module/input/smiles_reader.py
36
38
  nerdd_module/input/string_reader.py
@@ -56,6 +58,8 @@ nerdd_module/output/writer.py
56
58
  nerdd_module/polyfills/__init__.py
57
59
  nerdd_module/polyfills/files.py
58
60
  nerdd_module/polyfills/get_entry_points.py
61
+ nerdd_module/polyfills/literal.py
62
+ nerdd_module/polyfills/typed_dict.py
59
63
  nerdd_module/polyfills/types.py
60
64
  nerdd_module/polyfills/version.py
61
65
  nerdd_module/preprocessing/__init__.py
@@ -4,10 +4,14 @@ filetype~=1.2.0
4
4
  rich-click>=1.7.1
5
5
  stringcase>=1.2.0
6
6
  decorator>=5.1.1
7
+ pydantic>=2
7
8
 
8
9
  [:python_version < "3.10"]
9
10
  importlib-metadata>=4.6
10
11
 
12
+ [:python_version < "3.8"]
13
+ typing_extensions>=4.0.1
14
+
11
15
  [:python_version < "3.9"]
12
16
  importlib-resources>=5
13
17
 
@@ -37,7 +41,7 @@ pytest
37
41
  pytest-sugar
38
42
  pytest-cov
39
43
  pytest-asyncio
40
- pytest-bdd
44
+ pytest-bdd==7.3.0
41
45
  pytest-mock
42
46
  pytest-watcher
43
47
  hypothesis
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nerdd-module"
7
- version = "0.3.9"
7
+ version = "0.3.11"
8
8
  description = "Base package to create NERDD modules"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -17,9 +17,11 @@ dependencies = [
17
17
  "rich-click>=1.7.1",
18
18
  "stringcase>=1.2.0",
19
19
  "decorator>=5.1.1",
20
+ "pydantic>=2",
20
21
  # install importlib-resources and importlib-metadata for old Python versions
21
22
  "importlib-resources>=5; python_version<'3.9'",
22
23
  "importlib-metadata>=4.6; python_version<'3.10'",
24
+ "typing_extensions>=4.0.1; python_version<'3.8'",
23
25
  ]
24
26
  keywords = ["science", "research", "development", "nerdd"]
25
27
  classifiers = [
@@ -76,7 +78,7 @@ test = [
76
78
  "pytest-sugar",
77
79
  "pytest-cov",
78
80
  "pytest-asyncio",
79
- "pytest-bdd",
81
+ "pytest-bdd==7.3.0",
80
82
  "pytest-mock",
81
83
  "pytest-watcher",
82
84
  "hypothesis",
@@ -111,24 +113,26 @@ extend-exclude = ["tests", "nerdd_module/tests"]
111
113
 
112
114
  [tool.ruff.lint]
113
115
  select = [
114
- "E", # pycodestyle errors
115
- "W", # pycodestyle warnings
116
- "F", # pyflakes
117
- "I", # isort
118
- "B", # flake8-bugbear
116
+ "E", # pycodestyle errors
117
+ "W", # pycodestyle warnings
118
+ "F", # pyflakes
119
+ "I", # isort
120
+ "B", # flake8-bugbear
119
121
  "C4", # flake8-comprehensions
120
- "T20" # no print statements
122
+ "T20", # no print statements
121
123
  ]
122
124
  ignore = [
123
125
  "F403", # I often use 'from .submodule import *' in __init__.py files
124
- "C408" # I prefer dict(a=5) over {'a': 5}
126
+ "C408", # I prefer dict(a=5) over {'a': 5}
125
127
  ]
126
128
 
127
129
  [tool.mypy]
128
- mypy_path="typings"
130
+ # type stubs of packages lacking types are in the "typings" directory
131
+ mypy_path = "typings"
129
132
  # strict=true
130
133
  disallow_untyped_defs = true
131
- # no_implicit_optional = True
134
+ # disallow_any_unimported = true
135
+ no_implicit_optional = true
132
136
  # check_untyped_defs = True
133
137
  # show_error_codes = True
134
138
 
@@ -1,91 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import List, Optional
3
-
4
- __all__ = ["Configuration"]
5
-
6
-
7
- def get_property_columns_of_type(config: dict, t: str) -> List[dict]:
8
- return [c for c in config["result_properties"] if c.get("level", "molecule") == t]
9
-
10
-
11
- def is_visible(result_property: dict, output_format: str) -> bool:
12
- formats = result_property.get("formats", {})
13
-
14
- if isinstance(formats, list):
15
- return output_format in formats
16
- elif isinstance(formats, dict):
17
- include = formats.get("include", "*")
18
- exclude = formats.get("exclude", [])
19
- assert include == "*" or isinstance(
20
- include, list
21
- ), f"Expected include to be a list or '*', got {include}"
22
- assert isinstance(exclude, list), f"Expected exclude to be a list, got {exclude}"
23
- return (include == "*" or output_format in include) and output_format not in exclude
24
- else:
25
- raise ValueError(
26
- f"Invalid formats declaration {formats} in result property " f"{result_property}"
27
- )
28
-
29
-
30
- class Configuration(ABC):
31
- def __init__(self) -> None:
32
- self._cached_config: Optional[dict] = None
33
-
34
- def get_dict(self) -> dict:
35
- if self._cached_config is None:
36
- config = self._get_dict()
37
-
38
- if "result_properties" not in config:
39
- config["result_properties"] = []
40
-
41
- # check that a module can only predict atom or derivative properties, not both
42
- num_atom_properties = len(get_property_columns_of_type(config, "atom"))
43
- num_derivative_properties = len(get_property_columns_of_type(config, "derivative"))
44
- assert (
45
- num_atom_properties == 0 or num_derivative_properties == 0
46
- ), "A module can only predict atom or derivative properties, not both."
47
-
48
- self._cached_config = config
49
-
50
- return self._cached_config
51
-
52
- @abstractmethod
53
- def _get_dict(self) -> dict:
54
- pass
55
-
56
- def is_empty(self) -> bool:
57
- return self.get_dict() == {}
58
-
59
- def molecular_property_columns(self) -> List[dict]:
60
- return get_property_columns_of_type(self.get_dict(), "molecule")
61
-
62
- def atom_property_columns(self) -> List[dict]:
63
- return get_property_columns_of_type(self.get_dict(), "atom")
64
-
65
- def derivative_property_columns(self) -> List[dict]:
66
- return get_property_columns_of_type(self.get_dict(), "derivative")
67
-
68
- def get_task(self) -> str:
69
- # if task is specified in the config, use that
70
- config = self.get_dict()
71
- if "task" in config:
72
- return config["task"]
73
-
74
- # try to derive the task from the result_properties
75
- num_atom_properties = len(self.atom_property_columns())
76
- num_derivative_properties = len(self.derivative_property_columns())
77
-
78
- if num_atom_properties > 0:
79
- return "atom_property_prediction"
80
- elif num_derivative_properties > 0:
81
- return "derivative_property_prediction"
82
- else:
83
- return "molecular_property_prediction"
84
-
85
- def get_visible_properties(self, output_format: str) -> List[dict]:
86
- return [
87
- p for p in self.get_dict().get("result_properties", []) if is_visible(p, output_format)
88
- ]
89
-
90
- def __repr__(self) -> str:
91
- return f"{self.__class__.__name__}({self._get_dict()})"
File without changes
File without changes
File without changes