data-sitter 0.1.3__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data_sitter-0.1.6/PKG-INFO +220 -0
  2. data_sitter-0.1.6/README.md +202 -0
  3. data_sitter-0.1.6/data_sitter/Contract.py +129 -0
  4. data_sitter-0.1.6/data_sitter/FieldResolver.py +62 -0
  5. data_sitter-0.1.6/data_sitter/Validation.py +39 -0
  6. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/cli.py +1 -1
  7. data_sitter-0.1.6/data_sitter/field_types/BaseField.py +67 -0
  8. data_sitter-0.1.6/data_sitter/field_types/FieldTypes.py +9 -0
  9. data_sitter-0.1.6/data_sitter/field_types/FloatField.py +26 -0
  10. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/field_types/IntegerField.py +2 -0
  11. data_sitter-0.1.6/data_sitter/field_types/NumericField.py +81 -0
  12. data_sitter-0.1.6/data_sitter/field_types/StringField.py +124 -0
  13. data_sitter-0.1.6/data_sitter/rules/Enums.py +7 -0
  14. data_sitter-0.1.6/data_sitter/rules/LogicalRule.py +68 -0
  15. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/MatchedRule.py +17 -14
  16. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/Parser/alias_parameters_parser.py +0 -20
  17. data_sitter-0.1.6/data_sitter/rules/ProcessedRule.py +24 -0
  18. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/Rule.py +19 -2
  19. data_sitter-0.1.6/data_sitter/rules/RuleRegistry.py +86 -0
  20. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/__init__.py +7 -1
  21. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/utils/logger_config.py +1 -1
  22. data_sitter-0.1.6/data_sitter.egg-info/PKG-INFO +220 -0
  23. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter.egg-info/SOURCES.txt +9 -2
  24. data_sitter-0.1.6/data_sitter.egg-info/requires.txt +11 -0
  25. {data_sitter-0.1.3 → data_sitter-0.1.6}/pyproject.toml +13 -2
  26. data_sitter-0.1.6/tests/test_cli.py +177 -0
  27. data_sitter-0.1.6/tests/test_contract.py +165 -0
  28. data_sitter-0.1.6/tests/test_field_resolver.py +159 -0
  29. data_sitter-0.1.6/tests/test_validation.py +130 -0
  30. data_sitter-0.1.3/PKG-INFO +0 -8
  31. data_sitter-0.1.3/README.md +0 -133
  32. data_sitter-0.1.3/data_sitter/Contract.py +0 -106
  33. data_sitter-0.1.3/data_sitter/FieldResolver.py +0 -51
  34. data_sitter-0.1.3/data_sitter/Validation.py +0 -30
  35. data_sitter-0.1.3/data_sitter/field_types/BaseField.py +0 -57
  36. data_sitter-0.1.3/data_sitter/field_types/FloatField.py +0 -7
  37. data_sitter-0.1.3/data_sitter/field_types/NumericField.py +0 -75
  38. data_sitter-0.1.3/data_sitter/field_types/StringField.py +0 -89
  39. data_sitter-0.1.3/data_sitter/rules/RuleRegistry.py +0 -65
  40. data_sitter-0.1.3/data_sitter.egg-info/PKG-INFO +0 -8
  41. data_sitter-0.1.3/data_sitter.egg-info/requires.txt +0 -3
  42. data_sitter-0.1.3/setup.py +0 -19
  43. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/__init__.py +0 -0
  44. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/field_types/__init__.py +0 -0
  45. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/Parser/RuleParser.py +0 -0
  46. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/Parser/__init__.py +0 -0
  47. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/rules/Parser/parser_utils.py +0 -0
  48. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter/utils/__init__.py +0 -0
  49. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter.egg-info/dependency_links.txt +0 -0
  50. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter.egg-info/entry_points.txt +0 -0
  51. {data_sitter-0.1.3 → data_sitter-0.1.6}/data_sitter.egg-info/top_level.txt +0 -0
  52. {data_sitter-0.1.3 → data_sitter-0.1.6}/setup.cfg +0 -0
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-sitter
3
+ Version: 0.1.6
4
+ Summary: A Python library that reads data contracts and generates Pydantic models for seamless data validation.
5
+ Author-email: Lázaro Pereira Candea <lazaro@candea.es>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: python-dotenv==1.0.1
9
+ Requires-Dist: PyYAML==6.0.2
10
+ Requires-Dist: parse_type==0.6.4
11
+ Requires-Dist: pydantic==2.10.5
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest==8.3.5; extra == "dev"
14
+ Requires-Dist: pytest-cov==6.0.0; extra == "dev"
15
+ Requires-Dist: pytest-mock==3.14.0; extra == "dev"
16
+ Requires-Dist: twine==6.1.0; extra == "dev"
17
+ Requires-Dist: build==1.2.2.post1; extra == "dev"
18
+
19
+ # Data-Sitter
20
+
21
+ ![Coverage](./coverage.svg)
22
+
23
+ ## Overview
24
+
25
+ Data-Sitter is a Python library designed to simplify data validation by converting data contracts into Pydantic models. This allows for easy and efficient validation of structured data, ensuring compliance with predefined rules and constraints.
26
+
27
+ ## Features
28
+
29
+ - Define structured data contracts in JSON format.
30
+ - Generate Pydantic models automatically from contracts.
31
+ - Enforce validation rules at the field level.
32
+ - Support for rule references within the contract.
33
+
34
+ ## Installation
35
+
36
+ ```sh
37
+ pip install data-sitter
38
+ ```
39
+
40
+ ## Development and Deployment
41
+
42
+ ### CI/CD Pipeline
43
+
44
+ The project uses GitHub Actions for continuous integration and deployment:
45
+
46
+ 1. **Pull Request Checks**
47
+ - Automatically checks if the version has been bumped in `pyproject.toml`
48
+ - Fails if the version is the same as in the main branch
49
+ - Ensures every PR includes a version update
50
+
51
+ 2. **Automatic Releases**
52
+ - When code is merged to the main branch:
53
+ - Builds the package
54
+ - Publishes to PyPI automatically
55
+ - Uses PyPI API token for secure authentication
56
+
57
+ To set up the CI/CD pipeline:
58
+
59
+ 1. Create a PyPI API token:
60
+ - Go to [PyPI Account Settings](https://pypi.org/manage/account/)
61
+ - Create a new API token with "Upload" scope
62
+ - Copy the token
63
+
64
+ 2. Add the token to GitHub:
65
+ - Go to your repository's Settings > Secrets and variables > Actions
66
+ - Create a new secret named `PYPI_API_TOKEN`
67
+ - Paste your PyPI API token
68
+
69
+ ### Setting Up Development Environment
70
+
71
+ To set up a development environment with all the necessary tools, install the package with development dependencies:
72
+
73
+ ```sh
74
+ pip install -e ".[dev]"
75
+ ```
76
+
77
+ This will install:
78
+ - The package in editable mode
79
+ - Testing tools (pytest, pytest-cov, pytest-mock)
80
+ - Build tools (build, twine)
81
+
82
+ ### Building the Package
83
+
84
+ To build the package, run:
85
+
86
+ ```sh
87
+ python -m build
88
+ ```
89
+
90
+ This will create a `dist` directory containing both a source distribution (`.tar.gz`) and a wheel (`.whl`).
91
+
92
+ ### Deploying to PyPI
93
+
94
+ To upload to PyPI:
95
+
96
+ ```sh
97
+ twine upload dist/*
98
+ ```
99
+
100
+ You'll be prompted for your PyPI username and password. For security, it's recommended to use an API token instead of your password.
101
+
102
+ ## Usage
103
+
104
+ ### Creating a Pydantic Model from a Contract
105
+
106
+ To convert a data contract into a Pydantic model, follow these steps:
107
+
108
+ ```python
109
+ from data_sitter import Contract
110
+
111
+ contract_dict = {
112
+ "name": "test",
113
+ "fields": [
114
+ {
115
+ "name": "FID",
116
+ "type": "Integer",
117
+ "rules": ["Positive"]
118
+ },
119
+ {
120
+ "name": "SECCLASS",
121
+ "type": "String",
122
+ "rules": [
123
+ "Validate Not Null",
124
+ "Value In ['UNCLASSIFIED', 'CLASSIFIED']",
125
+ ]
126
+ }
127
+ ],
128
+ }
129
+
130
+ contract = Contract.from_dict(contract_dict)
131
+ pydantic_contract = contract.pydantic_model
132
+ ```
133
+
134
+ ### Using Rule References
135
+
136
+ Data-Sitter allows you to define reusable values in the `values` key and reference them in field rules using `$values.[key]`. For example:
137
+
138
+ ```json
139
+ {
140
+ "name": "example_contract",
141
+ "fields": [
142
+ {
143
+ "name": "CATEGORY",
144
+ "type": "String",
145
+ "rules": ["Value In $values.categories"]
146
+ },
147
+ {
148
+ "name": "NAME",
149
+ "type": "String",
150
+ "rules": [
151
+ "Length Between $values.min_length and $values.max_length"
152
+ ]
153
+ }
154
+
155
+ ],
156
+ "values": {"categories": ["A", "B", "C"], "min_length": 5,"max_length": 50}
157
+ }
158
+ ```
159
+
160
+ ## Available Rules
161
+
162
+ The available validation rules can be retrieved programmatically:
163
+
164
+ ```python
165
+ from data_sitter import RuleRegistry
166
+
167
+ rules = RuleRegistry.get_rules_definition()
168
+ print(rules)
169
+ ```
170
+
171
+ ### Rule Definitions
172
+
173
+ Below are the available rules grouped by field type:
174
+
175
+ #### Base
176
+
177
+ - Is not null
178
+
179
+ #### String - (Inherits from `Base`)
180
+
181
+ - Is not empty
182
+ - Starts with {prefix:String}
183
+ - Ends with {suffix:String}
184
+ - Is not one of {possible_values:Strings}
185
+ - Is one of {possible_values:Strings}
186
+ - Has length between {min_val:Integer} and {max_val:Integer}
187
+ - Has maximum length {max_len:Integer}
188
+ - Has minimum length {min_len:Integer}
189
+ - Is uppercase
190
+ - Is lowercase
191
+ - Matches regex {pattern:String}
192
+ - Is valid email
193
+ - Is valid URL
194
+ - Has no digits
195
+
196
+ #### Numeric - (Inherits from `Base`)
197
+
198
+ - Is not zero
199
+ - Is positive
200
+ - Is negative
201
+ - Is at least {min_val:Number}
202
+ - Is at most {max_val:Number}
203
+ - Is greater than {threshold:Number}
204
+ - Is less than {threshold:Number}
205
+ - Is not between {min_val:Number} and {max_val:Number}
206
+ - Is between {min_val:Number} and {max_val:Number}
207
+
208
+ #### Integer - (Inherits from `Numeric`)
209
+
210
+ #### Float - (Inherits from `Numeric`)
211
+
212
+ - Has at most {decimal_places:Integer} decimal places
213
+
214
+ ## Contributing
215
+
216
+ Contributions are welcome! Feel free to submit issues or pull requests in the [GitHub repository](https://github.com/lcandea/data-sitter).
217
+
218
+ ## License
219
+
220
+ Data-Sitter is licensed under the MIT License.
@@ -0,0 +1,202 @@
1
+ # Data-Sitter
2
+
3
+ ![Coverage](./coverage.svg)
4
+
5
+ ## Overview
6
+
7
+ Data-Sitter is a Python library designed to simplify data validation by converting data contracts into Pydantic models. This allows for easy and efficient validation of structured data, ensuring compliance with predefined rules and constraints.
8
+
9
+ ## Features
10
+
11
+ - Define structured data contracts in JSON format.
12
+ - Generate Pydantic models automatically from contracts.
13
+ - Enforce validation rules at the field level.
14
+ - Support for rule references within the contract.
15
+
16
+ ## Installation
17
+
18
+ ```sh
19
+ pip install data-sitter
20
+ ```
21
+
22
+ ## Development and Deployment
23
+
24
+ ### CI/CD Pipeline
25
+
26
+ The project uses GitHub Actions for continuous integration and deployment:
27
+
28
+ 1. **Pull Request Checks**
29
+ - Automatically checks if the version has been bumped in `pyproject.toml`
30
+ - Fails if the version is the same as in the main branch
31
+ - Ensures every PR includes a version update
32
+
33
+ 2. **Automatic Releases**
34
+ - When code is merged to the main branch:
35
+ - Builds the package
36
+ - Publishes to PyPI automatically
37
+ - Uses PyPI API token for secure authentication
38
+
39
+ To set up the CI/CD pipeline:
40
+
41
+ 1. Create a PyPI API token:
42
+ - Go to [PyPI Account Settings](https://pypi.org/manage/account/)
43
+ - Create a new API token with "Upload" scope
44
+ - Copy the token
45
+
46
+ 2. Add the token to GitHub:
47
+ - Go to your repository's Settings > Secrets and variables > Actions
48
+ - Create a new secret named `PYPI_API_TOKEN`
49
+ - Paste your PyPI API token
50
+
51
+ ### Setting Up Development Environment
52
+
53
+ To set up a development environment with all the necessary tools, install the package with development dependencies:
54
+
55
+ ```sh
56
+ pip install -e ".[dev]"
57
+ ```
58
+
59
+ This will install:
60
+ - The package in editable mode
61
+ - Testing tools (pytest, pytest-cov, pytest-mock)
62
+ - Build tools (build, twine)
63
+
64
+ ### Building the Package
65
+
66
+ To build the package, run:
67
+
68
+ ```sh
69
+ python -m build
70
+ ```
71
+
72
+ This will create a `dist` directory containing both a source distribution (`.tar.gz`) and a wheel (`.whl`).
73
+
74
+ ### Deploying to PyPI
75
+
76
+ To upload to PyPI:
77
+
78
+ ```sh
79
+ twine upload dist/*
80
+ ```
81
+
82
+ You'll be prompted for your PyPI username and password. For security, it's recommended to use an API token instead of your password.
83
+
84
+ ## Usage
85
+
86
+ ### Creating a Pydantic Model from a Contract
87
+
88
+ To convert a data contract into a Pydantic model, follow these steps:
89
+
90
+ ```python
91
+ from data_sitter import Contract
92
+
93
+ contract_dict = {
94
+ "name": "test",
95
+ "fields": [
96
+ {
97
+ "name": "FID",
98
+ "type": "Integer",
99
+ "rules": ["Positive"]
100
+ },
101
+ {
102
+ "name": "SECCLASS",
103
+ "type": "String",
104
+ "rules": [
105
+ "Validate Not Null",
106
+ "Value In ['UNCLASSIFIED', 'CLASSIFIED']",
107
+ ]
108
+ }
109
+ ],
110
+ }
111
+
112
+ contract = Contract.from_dict(contract_dict)
113
+ pydantic_contract = contract.pydantic_model
114
+ ```
115
+
116
+ ### Using Rule References
117
+
118
+ Data-Sitter allows you to define reusable values in the `values` key and reference them in field rules using `$values.[key]`. For example:
119
+
120
+ ```json
121
+ {
122
+ "name": "example_contract",
123
+ "fields": [
124
+ {
125
+ "name": "CATEGORY",
126
+ "type": "String",
127
+ "rules": ["Value In $values.categories"]
128
+ },
129
+ {
130
+ "name": "NAME",
131
+ "type": "String",
132
+ "rules": [
133
+ "Length Between $values.min_length and $values.max_length"
134
+ ]
135
+ }
136
+
137
+ ],
138
+ "values": {"categories": ["A", "B", "C"], "min_length": 5,"max_length": 50}
139
+ }
140
+ ```
141
+
142
+ ## Available Rules
143
+
144
+ The available validation rules can be retrieved programmatically:
145
+
146
+ ```python
147
+ from data_sitter import RuleRegistry
148
+
149
+ rules = RuleRegistry.get_rules_definition()
150
+ print(rules)
151
+ ```
152
+
153
+ ### Rule Definitions
154
+
155
+ Below are the available rules grouped by field type:
156
+
157
+ #### Base
158
+
159
+ - Is not null
160
+
161
+ #### String - (Inherits from `Base`)
162
+
163
+ - Is not empty
164
+ - Starts with {prefix:String}
165
+ - Ends with {suffix:String}
166
+ - Is not one of {possible_values:Strings}
167
+ - Is one of {possible_values:Strings}
168
+ - Has length between {min_val:Integer} and {max_val:Integer}
169
+ - Has maximum length {max_len:Integer}
170
+ - Has minimum length {min_len:Integer}
171
+ - Is uppercase
172
+ - Is lowercase
173
+ - Matches regex {pattern:String}
174
+ - Is valid email
175
+ - Is valid URL
176
+ - Has no digits
177
+
178
+ #### Numeric - (Inherits from `Base`)
179
+
180
+ - Is not zero
181
+ - Is positive
182
+ - Is negative
183
+ - Is at least {min_val:Number}
184
+ - Is at most {max_val:Number}
185
+ - Is greater than {threshold:Number}
186
+ - Is less than {threshold:Number}
187
+ - Is not between {min_val:Number} and {max_val:Number}
188
+ - Is between {min_val:Number} and {max_val:Number}
189
+
190
+ #### Integer - (Inherits from `Numeric`)
191
+
192
+ #### Float - (Inherits from `Numeric`)
193
+
194
+ - Has at most {decimal_places:Integer} decimal places
195
+
196
+ ## Contributing
197
+
198
+ Contributions are welcome! Feel free to submit issues or pull requests in the [GitHub repository](https://github.com/lcandea/data-sitter).
199
+
200
+ ## License
201
+
202
+ Data-Sitter is licensed under the MIT License.
@@ -0,0 +1,129 @@
1
+ import json
2
+ import yaml
3
+ from typing import Any, Dict, List, NamedTuple
4
+ from functools import cached_property
5
+
6
+ from pydantic import BaseModel
7
+
8
+ from .Validation import Validation
9
+ from .field_types import BaseField
10
+ from .FieldResolver import FieldResolver
11
+ from .rules import ProcessedRule, RuleRegistry, RuleParser
12
+
13
+
14
+ class ContractWithoutFields(Exception):
15
+ pass
16
+
17
+
18
+ class ContractWithoutName(Exception):
19
+ pass
20
+
21
+
22
+ class Field(NamedTuple):
23
+ name: str
24
+ type: str
25
+ rules: List[str]
26
+
27
+
28
+ class Contract:
29
+ name: str
30
+ fields: List[Field]
31
+ rule_parser: RuleParser
32
+ field_resolvers: Dict[str, FieldResolver]
33
+
34
+
35
+ def __init__(self, name: str, fields: List[Field], values: Dict[str, Any]) -> None:
36
+ self.name = name
37
+ self.fields = fields
38
+ self.rule_parser = RuleParser(values)
39
+ self.field_resolvers = {
40
+ _type: FieldResolver(RuleRegistry.get_type(_type), self.rule_parser)
41
+ for _type in list({field.type for field in self.fields}) # Unique types
42
+ }
43
+
44
+ @classmethod
45
+ def from_dict(cls, contract_dict: dict):
46
+ if "name" not in contract_dict:
47
+ raise ContractWithoutName()
48
+ if "fields" not in contract_dict:
49
+ raise ContractWithoutFields()
50
+
51
+ return cls(
52
+ name=contract_dict["name"],
53
+ fields=[Field(**field) for field in contract_dict["fields"]],
54
+ values=contract_dict.get("values", {}),
55
+ )
56
+
57
+ @classmethod
58
+ def from_json(cls, contract_json: str):
59
+ return cls.from_dict(json.loads(contract_json))
60
+
61
+ @classmethod
62
+ def from_yaml(cls, contract_yaml: str):
63
+ return cls.from_dict(yaml.load(contract_yaml, yaml.Loader))
64
+
65
+ @cached_property
66
+ def field_validators(self) -> Dict[str, BaseField]:
67
+ field_validators = {}
68
+ for field in self.fields:
69
+ field_resolver = self.field_resolvers[field.type]
70
+ field_validators[field.name] = field_resolver.get_field_validator(field.name, field.rules)
71
+ return field_validators
72
+
73
+ @cached_property
74
+ def rules(self) -> Dict[str, List[ProcessedRule]]:
75
+ rules = {}
76
+ for field in self.fields:
77
+ field_resolver = self.field_resolvers[field.type]
78
+ rules[field.name] = field_resolver.get_processed_rules(field.rules)
79
+ return rules
80
+
81
+ def validate(self, item: dict) -> Validation:
82
+ return Validation.validate(self.pydantic_model, item)
83
+
84
+ @cached_property
85
+ def pydantic_model(self) -> BaseModel:
86
+ return type(self.name, (BaseModel,), {
87
+ "__annotations__": {
88
+ name: field_validator.get_annotation()
89
+ for name, field_validator in self.field_validators.items()
90
+ }
91
+ })
92
+
93
+ @cached_property
94
+ def contract(self) -> dict:
95
+ return {
96
+ "name": self.name,
97
+ "fields": [
98
+ {
99
+ "name": name,
100
+ "type": field_validator.type_name.value,
101
+ "rules": [rule.parsed_rule for rule in self.rules.get(name, [])]
102
+ }
103
+ for name, field_validator in self.field_validators.items()
104
+ ],
105
+ "values": self.rule_parser.values
106
+ }
107
+
108
+ def get_json_contract(self, indent: int=2) -> str:
109
+ return json.dumps(self.contract, indent=indent)
110
+
111
+ def get_yaml_contract(self, indent: int=2) -> str:
112
+ return yaml.dump(self.contract, Dumper=yaml.Dumper, indent=indent, sort_keys=False)
113
+
114
+ def get_front_end_contract(self) -> dict:
115
+ return {
116
+ "name": self.name,
117
+ "fields": [
118
+ {
119
+ "name": name,
120
+ "type": field_validator.type_name.value,
121
+ "rules": [
122
+ rule.get_front_end_repr()
123
+ for rule in self.rules.get(name, [])
124
+ ]
125
+ }
126
+ for name, field_validator in self.field_validators.items()
127
+ ],
128
+ "values": self.rule_parser.values
129
+ }
@@ -0,0 +1,62 @@
1
+ from typing import Dict, List, Type, Union
2
+
3
+ from .field_types import BaseField
4
+ from .rules import Rule, ProcessedRule, LogicalRule, MatchedRule, RuleRegistry, LogicalOperator
5
+ from .rules.Parser import RuleParser
6
+
7
+
8
+ class RuleNotFoundError(Exception):
9
+ """No matching rule found for the given parsed rule."""
10
+
11
+
12
+ class MalformedLogicalRuleError(Exception):
13
+ """Logical rule structure not recognised."""
14
+
15
+
16
+ class FieldResolver:
17
+ field_class: Type[BaseField]
18
+ rule_parser: RuleParser
19
+ rules: List[Rule]
20
+ _match_rule_cache: Dict[str, MatchedRule]
21
+
22
+ def __init__(self, field_class: Type[BaseField], rule_parser: RuleParser) -> None:
23
+ self.field_class = field_class
24
+ self.rule_parser = rule_parser
25
+ self.rules = RuleRegistry.get_rules_for(field_class)
26
+ self._match_rule_cache = {}
27
+
28
+ def get_field_validator(self, name: str, parsed_rules: List[Union[str, dict]]) -> BaseField:
29
+ field_validator = self.field_class(name)
30
+ processed_rules = self.get_processed_rules(parsed_rules)
31
+ validators = [pr.get_validator(field_validator) for pr in processed_rules]
32
+ field_validator.validators = validators
33
+ return field_validator
34
+
35
+ def get_processed_rules(self, parsed_rules: List[Union[str, dict]]) -> List[ProcessedRule]:
36
+ processed_rules = []
37
+ for parsed_rule in parsed_rules:
38
+ if isinstance(parsed_rule, dict):
39
+ if len(keys := tuple(parsed_rule)) != 1 or (operator := keys[0]) not in LogicalOperator:
40
+ raise MalformedLogicalRuleError()
41
+ if operator == LogicalOperator.NOT and not isinstance(parsed_rule[operator], list):
42
+ parsed_rule = {operator: [parsed_rule[operator]]} # NOT operator can be a single rule
43
+ processed_rule = LogicalRule(operator, self.get_processed_rules(parsed_rule[operator]))
44
+ elif isinstance(parsed_rule, str):
45
+ processed_rule = self._match_rule(parsed_rule)
46
+ if not processed_rule:
47
+ raise RuleNotFoundError(f"Rule not found for parsed rule: '{parsed_rule}'")
48
+ else:
49
+ raise TypeError(f'Parsed Rule type not recognised: {type(parsed_rule)}')
50
+ processed_rules.append(processed_rule)
51
+ return processed_rules
52
+
53
+ def _match_rule(self, parsed_rule: str) -> MatchedRule:
54
+ if parsed_rule in self._match_rule_cache:
55
+ return self._match_rule_cache[parsed_rule]
56
+
57
+ for rule in self.rules:
58
+ matched_rule = self.rule_parser.match(rule, parsed_rule)
59
+ if matched_rule:
60
+ self._match_rule_cache[parsed_rule] = matched_rule
61
+ return matched_rule
62
+ return None
@@ -0,0 +1,39 @@
1
+ from collections import defaultdict
2
+ from typing import Any, Dict, List, Type
3
+
4
+ from pydantic import BaseModel, ValidationError
5
+
6
+
7
+ class Validation():
8
+ item: Dict[str, Any]
9
+ errors: Dict[str, List[str]]
10
+ unknowns: Dict[str, Any]
11
+
12
+ def __init__(self, item: dict, errors: dict = None, unknowns: dict = None):
13
+ self.item = item
14
+ self.errors = errors if errors else None
15
+ self.unknowns = unknowns if unknowns else None
16
+
17
+ def to_dict(self) -> dict:
18
+ return {key: value for key in ["item", "errors", "unknowns"] if (value := getattr(self, key))}
19
+
20
+ @classmethod
21
+ def validate(cls, PydanticModel: Type[BaseModel], input_item: dict) -> "Validation":
22
+ model_keys = PydanticModel.model_json_schema()['properties'].keys()
23
+ item = {key: None for key in model_keys} # Filling not present values with Nones
24
+ errors = defaultdict(list)
25
+ unknowns = {}
26
+ for key, value in input_item.items():
27
+ if key in item:
28
+ item[key] = value
29
+ else:
30
+ unknowns[key] = value
31
+ try:
32
+ validated = PydanticModel(**item).model_dump()
33
+ except ValidationError as e:
34
+ validated = item
35
+ for error in e.errors():
36
+ field = error['loc'][0] # Extract the field name
37
+ msg = error['msg']
38
+ errors[field].append(msg)
39
+ return Validation(item=validated, errors=dict(errors), unknowns=unknowns)
@@ -44,5 +44,5 @@ def main():
44
44
  print(f"The file {args.file} pass the contract {args.contract}")
45
45
 
46
46
 
47
- if __name__ == '__main__':
47
+ if __name__ == '__main__': # pragma: no cover
48
48
  main()