data-sitter 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. data_sitter-0.1.0/PKG-INFO +8 -0
  2. data_sitter-0.1.0/README.md +133 -0
  3. data_sitter-0.1.0/data_sitter/Contract.py +101 -0
  4. data_sitter-0.1.0/data_sitter/FieldResolver.py +50 -0
  5. data_sitter-0.1.0/data_sitter/__init__.py +7 -0
  6. data_sitter-0.1.0/data_sitter/cli.py +48 -0
  7. data_sitter-0.1.0/data_sitter/field_types/BaseField.py +50 -0
  8. data_sitter-0.1.0/data_sitter/field_types/FloatField.py +7 -0
  9. data_sitter-0.1.0/data_sitter/field_types/IntegerField.py +7 -0
  10. data_sitter-0.1.0/data_sitter/field_types/NumericField.py +75 -0
  11. data_sitter-0.1.0/data_sitter/field_types/StringField.py +89 -0
  12. data_sitter-0.1.0/data_sitter/field_types/__init__.py +14 -0
  13. data_sitter-0.1.0/data_sitter/rules/MatchedRule.py +57 -0
  14. data_sitter-0.1.0/data_sitter/rules/Parser/RuleParser.py +54 -0
  15. data_sitter-0.1.0/data_sitter/rules/Parser/__init__.py +8 -0
  16. data_sitter-0.1.0/data_sitter/rules/Parser/alias_parameters_parser.py +78 -0
  17. data_sitter-0.1.0/data_sitter/rules/Parser/parser_utils.py +27 -0
  18. data_sitter-0.1.0/data_sitter/rules/Rule.py +44 -0
  19. data_sitter-0.1.0/data_sitter/rules/RuleRegistry.py +65 -0
  20. data_sitter-0.1.0/data_sitter/rules/__init__.py +14 -0
  21. data_sitter-0.1.0/data_sitter/utils/__init__.py +0 -0
  22. data_sitter-0.1.0/data_sitter/utils/logger_config.py +43 -0
  23. data_sitter-0.1.0/data_sitter.egg-info/PKG-INFO +8 -0
  24. data_sitter-0.1.0/data_sitter.egg-info/SOURCES.txt +29 -0
  25. data_sitter-0.1.0/data_sitter.egg-info/dependency_links.txt +1 -0
  26. data_sitter-0.1.0/data_sitter.egg-info/entry_points.txt +2 -0
  27. data_sitter-0.1.0/data_sitter.egg-info/requires.txt +3 -0
  28. data_sitter-0.1.0/data_sitter.egg-info/top_level.txt +1 -0
  29. data_sitter-0.1.0/pyproject.toml +20 -0
  30. data_sitter-0.1.0/setup.cfg +4 -0
  31. data_sitter-0.1.0/setup.py +19 -0
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.2
2
+ Name: data-sitter
3
+ Version: 0.1.0
4
+ Summary: A Python library that reads data contracts and generates Pydantic models for seamless data validation.
5
+ Author-email: Lázaro Pereira Candea <lazaro@candea.es>
6
+ Requires-Dist: python-dotenv==1.0.1
7
+ Requires-Dist: parse_type==0.6.4
8
+ Requires-Dist: pydantic==2.10.6
@@ -0,0 +1,133 @@
1
+ # Data-Sitter
2
+
3
+ ## Overview
4
+
5
+ Data-Sitter is a Python library designed to simplify data validation by converting data contracts into Pydantic models. This allows for easy and efficient validation of structured data, ensuring compliance with predefined rules and constraints.
6
+
7
+ ## Features
8
+
9
+ - Define structured data contracts in JSON format.
10
+ - Generate Pydantic models automatically from contracts.
11
+ - Enforce validation rules at the field level.
12
+ - Support for rule references within the contract.
13
+
14
+ ## Installation
15
+
16
+ You can install Data-Sitter directly from GitHub:
17
+
18
+ ```sh
19
+ pip install git+https://github.com/Kenr0t/data-sitter.git@main
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ### Creating a Pydantic Model from a Contract
25
+
26
+ To convert a data contract into a Pydantic model, follow these steps:
27
+
28
+ ```python
29
+ from data_sitter import Contract
30
+
31
+ contract_dict = {
32
+ "name": "test",
33
+ "fields": [
34
+ {
35
+ "field_name": "FID",
36
+ "field_type": "IntegerField",
37
+ "field_rules": ["Positive"]
38
+ },
39
+ {
40
+ "field_name": "SECCLASS",
41
+ "field_type": "StringField",
42
+ "field_rules": [
43
+ "Validate Not Null",
44
+ "Value In ['UNCLASSIFIED', 'CLASSIFIED']",
45
+ ]
46
+ }
47
+ ],
48
+ }
49
+
50
+ contract = Contract.from_dict(contract_dict)
51
+ pydantic_contract = contract.get_pydantic_model()
52
+ ```
53
+
54
+ ### Using Rule References
55
+
56
+ Data-Sitter allows you to define reusable values in the `values` key and reference them in field rules using `$values.[key]`. For example:
57
+
58
+ ```json
59
+ {
60
+ "name": "example_contract",
61
+ "fields": [
62
+ {
63
+ "field_name": "CATEGORY",
64
+ "field_type": "StringField",
65
+ "field_rules": ["Value In $values.categories"]
66
+ },
67
+ {
68
+ "field_name": "NAME",
69
+ "field_type": "StringField",
70
+ "field_rules": [
71
+ "Length Between $values.min_length and $values.max_length"
72
+ ]
73
+ }
74
+
75
+ ],
76
+ "values": {"categories": ["A", "B", "C"], "min_length": 5,"max_length": 50}
77
+ }
78
+ ```
79
+
80
+ ## Available Rules
81
+
82
+ The available validation rules can be retrieved programmatically:
83
+
84
+ ```python
85
+ from data_sitter import RuleRegistry
86
+
87
+ rules = RuleRegistry.get_rules_definition()
88
+ print(rules)
89
+ ```
90
+
91
+ ### Rule Definitions
92
+
93
+ Below are the available rules grouped by field type:
94
+
95
+ #### BaseField
96
+
97
+ - Validate Not Null
98
+
99
+ #### StringField - (Inherits from `BaseField`)
100
+
101
+ - Is not empty
102
+ - Starts with `{prefix:String}`
103
+ - Ends with `{sufix:String}`
104
+ - Value in `{possible_values:Strings}`
105
+ - Length between `{min_val:Integer}` and `{max_val:Integer}`
106
+ - Maximum length of `{max_len:Integer}`
107
+ - Length shorter than `{max_len:Integer}`
108
+ - Minimum length of `{min_len:Integer}`
109
+ - Length longer than `{min_len:Integer}`
110
+ - Is uppercase
111
+
112
+ #### NumericField - (Inherits from `BaseField`)
113
+
114
+ - Not Zero
115
+ - Positive
116
+ - Negative
117
+ - Minimum `{min_val:Number}`
118
+ - Maximum `{max_val:Number}`
119
+ - Greater than `{threshold:Number}`
120
+ - Less than `{threshold:Number}`
121
+ - Between `{min_val:Number}` and `{max_val:Number}`
122
+
123
+ #### IntegerField - (Inherits from `NumericField`)
124
+
125
+ #### FloatField - (Inherits from `NumericField`)
126
+
127
+ ## Contributing
128
+
129
+ Contributions are welcome! Feel free to submit issues or pull requests in the [GitHub repository](https://github.com/Kenr0t/data-sitter).
130
+
131
+ ## License
132
+
133
+ Data-Sitter is licensed under the MIT License.
@@ -0,0 +1,101 @@
1
+ from typing import Any, Dict, List, NamedTuple
2
+ from functools import cached_property
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from .field_types import BaseField
7
+ from .FieldResolver import FieldResolver
8
+ from .rules import MatchedRule, RuleRegistry, RuleParser
9
+
10
+
11
+ class ContractWithoutFields(Exception):
12
+ pass
13
+
14
+
15
+ class ContractWithoutName(Exception):
16
+ pass
17
+
18
+
19
+ class Field(NamedTuple):
20
+ field_name: str
21
+ field_type: str
22
+ field_rules: List[str]
23
+
24
+
25
+ class Contract:
26
+ name: str
27
+ fields: List[Field]
28
+ rule_parser: RuleParser
29
+ field_resolvers: Dict[str, FieldResolver]
30
+
31
+
32
+ def __init__(self, name: str, fields: List[Field], values: Dict[str, Any]) -> None:
33
+ self.name = name
34
+ self.fields = fields
35
+ self.rule_parser = RuleParser(values)
36
+ self.field_resolvers = {
37
+ field_type: FieldResolver(RuleRegistry.get_type(field_type), self.rule_parser)
38
+ for field_type in list({field.field_type for field in self.fields}) # Unique types
39
+ }
40
+
41
+ @classmethod
42
+ def from_dict(cls, contract_dict: dict):
43
+ if "name" not in contract_dict:
44
+ raise ContractWithoutName()
45
+ if "fields" not in contract_dict:
46
+ raise ContractWithoutFields()
47
+
48
+ return cls(
49
+ name=contract_dict["name"],
50
+ fields=[Field(**field) for field in contract_dict["fields"]],
51
+ values=contract_dict.get("values", {}),
52
+ )
53
+
54
+ @cached_property
55
+ def field_validators(self) -> Dict[str, BaseField]:
56
+ field_validators = {}
57
+ for field in self.fields:
58
+ field_resolver = self.field_resolvers[field.field_type]
59
+ field_validators[field.field_name] = field_resolver.get_field_validator(field.field_name, field.field_rules)
60
+ return field_validators
61
+
62
+ @cached_property
63
+ def rules(self) -> Dict[str, List[MatchedRule]]:
64
+ rules = {}
65
+ for field in self.fields:
66
+ field_resolver = self.field_resolvers[field.field_type]
67
+ rules[field.field_name] = field_resolver.get_matched_rules(field.field_rules)
68
+ return rules
69
+
70
+ def model_validate(self, item: dict):
71
+ pydantic_model = self.get_pydantic_model()
72
+ return pydantic_model.model_validate(item).model_dump()
73
+
74
+ def get_pydantic_model(self) -> BaseModel:
75
+ return type(self.name, (BaseModel,), {
76
+ "__annotations__": {
77
+ field_name: field_validator.get_annotation()
78
+ for field_name, field_validator in self.field_validators.items()
79
+ }
80
+ })
81
+
82
+ def get_front_end_contract(self):
83
+ return {
84
+ "name": self.name,
85
+ "fields": [
86
+ {
87
+ "field_name": field_name,
88
+ "field_type": field_validator.__class__.__name__,
89
+ "field_rules": [
90
+ {
91
+ "rule": rule.field_rule,
92
+ "parsed_rule": rule.parsed_rule,
93
+ "rule_params": rule.rule_params,
94
+ "parsed_values": rule.parsed_values,
95
+ }
96
+ for rule in self.rules.get(field_name, [])
97
+ ]
98
+ }
99
+ for field_name, field_validator in self.field_validators.items()
100
+ ]
101
+ }
@@ -0,0 +1,50 @@
1
+
2
+ from typing import Dict, List, Type
3
+
4
+ from .field_types import BaseField
5
+ from .rules import MatchedRule, Rule, RuleRegistry
6
+ from .rules.Parser import RuleParser
7
+
8
+
9
+ class RuleNotFoundError(Exception):
10
+ """No matching rule found for the given parsed rule."""
11
+
12
+
13
+ class FieldResolver:
14
+ field_class: Type[BaseField]
15
+ rule_parser: RuleParser
16
+ rules: List[Rule]
17
+ _match_rule_cache: Dict[str, MatchedRule]
18
+
19
+ def __init__(self, field_class: Type[BaseField], rule_parser: RuleParser) -> None:
20
+ self.field_class = field_class
21
+ self.rule_parser = rule_parser
22
+ self.rules = RuleRegistry.get_rules_for(field_class)
23
+ self._match_rule_cache = {}
24
+
25
+ def get_matched_rules(self, parsed_rules: List[str]) -> List[MatchedRule]:
26
+ matched_rules = []
27
+ for parsed_rule in parsed_rules:
28
+ matched_rule = self.match_rule(parsed_rule)
29
+ if not matched_rule:
30
+ raise RuleNotFoundError(f"Rule not found for parsed rule: '{parsed_rule}'")
31
+ matched_rules.append(matched_rule)
32
+ return matched_rules
33
+
34
+ def get_field_validator(self, field_name: str, parsed_rules: List[str]) -> BaseField:
35
+ validator = self.field_class(field_name)
36
+ matched_rules = self.get_matched_rules(parsed_rules)
37
+ for matched_rule in matched_rules:
38
+ matched_rule.add_to_instance(validator)
39
+ return validator
40
+
41
+ def match_rule(self, parsed_rule: str) -> MatchedRule:
42
+ if parsed_rule in self._match_rule_cache:
43
+ return self._match_rule_cache[parsed_rule]
44
+
45
+ for rule in self.rules:
46
+ matched_rule = self.rule_parser.match(rule, parsed_rule)
47
+ if matched_rule:
48
+ self._match_rule_cache[parsed_rule] = matched_rule
49
+ return matched_rule
50
+ return None
@@ -0,0 +1,7 @@
1
+ from .Contract import Contract
2
+ from .rules import RuleRegistry
3
+
4
+ __all__ = [
5
+ "Contract",
6
+ "RuleRegistry"
7
+ ]
@@ -0,0 +1,48 @@
1
+ import csv
2
+ import json
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from .Contract import Contract
7
+
8
+
9
+ DEFAULT_ENCODING = "utf8"
10
+
11
+ def main():
12
+ parser = argparse.ArgumentParser(description='Data Sitter CLI')
13
+ parser.add_argument('-c', '--contract', required=True, help='Path to contract file')
14
+ parser.add_argument('-f', '--file', required=True, help='Path to data file')
15
+ parser.add_argument('-e', '--encoding', help='Files Encoding', default=DEFAULT_ENCODING)
16
+
17
+ args = parser.parse_args()
18
+ # Add your logic here using args.contract and args.file
19
+ print(f"Processing {args.file} with contract {args.contract}")
20
+
21
+ file_path = Path(args.file)
22
+ encoding = args.encoding
23
+ contract_path = Path(args.contract)
24
+ contract_dict = json.loads(contract_path.read_text(encoding))
25
+ contract = Contract.from_dict(contract_dict)
26
+ pydantic_contract = contract.get_pydantic_model()
27
+
28
+ if file_path.suffix == '.csv':
29
+ with open(file_path, encoding=encoding) as f:
30
+ reader = csv.DictReader(f)
31
+ reader.fieldnames = [name.strip() for name in reader.fieldnames]
32
+ records = [{k: v.strip() for k, v in row.items()} for row in reader]
33
+
34
+ elif file_path.suffix == '.json':
35
+ file_data = json.loads(file_path.read_text(encoding))
36
+ if isinstance(file_data, dict):
37
+ records = [file_data]
38
+ else:
39
+ records = file_data
40
+ else:
41
+ raise NotImplementedError(f"Type {file_path.suffix} not implemented.")
42
+
43
+ _ = [pydantic_contract.model_validate(row) for row in records]
44
+ print(f"The file {args.file} pass the contract {args.contract}")
45
+
46
+
47
+ if __name__ == '__main__':
48
+ main()
@@ -0,0 +1,50 @@
1
+ from abc import ABC
2
+ from typing import Annotated, List, Type
3
+
4
+ from pydantic import AfterValidator
5
+ from ..rules import register_rule, register_field
6
+
7
+
8
+ def aggregated_validator(validators: List[callable]):
9
+ def _validator(value):
10
+ for validator_func in validators:
11
+ validator_func(value)
12
+ return value
13
+ return _validator
14
+
15
+ @register_field
16
+ class BaseField(ABC):
17
+ name: str
18
+ validators = None
19
+ field_type = None
20
+
21
+ def __init__(self, name) -> None:
22
+ self.name = name
23
+ self.validators = []
24
+
25
+ @register_rule("Validate Not Null")
26
+ def validator_not_null(self):
27
+ def _validator(value):
28
+ if value is None:
29
+ raise ValueError()
30
+ return value
31
+
32
+ self.validators.append(_validator)
33
+
34
+ def validate(self, value):
35
+ for validator in self.validators:
36
+ validator(value)
37
+
38
+ def get_annotation(self):
39
+ return Annotated[self.field_type, AfterValidator(aggregated_validator(self.validators))]
40
+
41
+ @classmethod
42
+ def get_parents(cls: Type["BaseField"]) -> List[Type["BaseField"]]:
43
+ if cls.__name__ == "BaseField":
44
+ return []
45
+ ancestors = []
46
+ for base in cls.__bases__:
47
+ if base.__name__.endswith("Field"):
48
+ ancestors.append(base)
49
+ ancestors.extend(base.get_parents()) # It wont break because we have a base case
50
+ return ancestors
@@ -0,0 +1,7 @@
1
+ from .NumericField import NumericField
2
+ from ..rules import register_field
3
+
4
+
5
+ @register_field
6
+ class FloatField(NumericField):
7
+ field_type = float
@@ -0,0 +1,7 @@
1
+ from .NumericField import NumericField
2
+ from ..rules import register_field
3
+
4
+
5
+ @register_field
6
+ class IntegerField(NumericField):
7
+ field_type = int
@@ -0,0 +1,75 @@
1
+ from typing import Union
2
+
3
+ from .BaseField import BaseField
4
+ from ..rules import register_rule, register_field
5
+
6
+ Numeric = Union[int, float]
7
+
8
+
9
+ @register_field
10
+ class NumericField(BaseField):
11
+ field_type = Numeric
12
+
13
+ @register_rule("Not Zero")
14
+ def validate_non_zero(self):
15
+ def validator(value: Numeric):
16
+ if value == 0:
17
+ raise ValueError("Value must not be zero")
18
+ return value
19
+ self.validators.append(validator)
20
+
21
+ @register_rule("Positive")
22
+ def validate_positive(self):
23
+ def validator(value: Numeric):
24
+ if value < 0:
25
+ raise ValueError(f"Value {value} is not positive")
26
+ return value
27
+ self.validators.append(validator)
28
+
29
+ @register_rule("Negative")
30
+ def validate_negative(self):
31
+ def validator(value: Numeric):
32
+ if value >= 0:
33
+ raise ValueError(f"Value {value} is not negative")
34
+ return value
35
+ self.validators.append(validator)
36
+
37
+ @register_rule("Minimum {min_val:Number}")
38
+ def validate_min(self, min_val: Numeric):
39
+ def validator(value: Numeric):
40
+ if value < min_val:
41
+ raise ValueError(f"Value {value} is less than minimum {min_val}")
42
+ return value
43
+ self.validators.append(validator)
44
+
45
+ @register_rule("Maximum {max_val:Number}")
46
+ def validate_max(self, max_val: Numeric):
47
+ def validator(value: Numeric):
48
+ if value > max_val:
49
+ raise ValueError(f"Value {value} exceeds maximum {max_val}")
50
+ return value
51
+ self.validators.append(validator)
52
+
53
+ @register_rule("Greate than {threshold:Number}")
54
+ def validate_greater_than(self, threshold: Numeric):
55
+ def validator(value: Numeric):
56
+ if value <= threshold:
57
+ raise ValueError(f"Value {value} is not greater than {threshold}")
58
+ return value
59
+ self.validators.append(validator)
60
+
61
+ @register_rule("Less than {threshold:Number}")
62
+ def validate_less_than(self, threshold: Numeric):
63
+ def validator(value: Numeric):
64
+ if value >= threshold:
65
+ raise ValueError(f"Value {value} is not less than {threshold}")
66
+ return value
67
+ self.validators.append(validator)
68
+
69
+ @register_rule("Between {min_val:Number} and {max_val:Number}")
70
+ def validate_between(self, min_val: Numeric, max_val: Numeric):
71
+ def validator(value: Numeric):
72
+ if not (min_val < value < max_val):
73
+ raise ValueError(f"Value {value} not in Between {min_val} and {max_val}.")
74
+ return value
75
+ self.validators.append(validator)
@@ -0,0 +1,89 @@
1
+ from typing import List
2
+
3
+ from .BaseField import BaseField
4
+ from ..rules import register_rule, register_field
5
+
6
+
7
+ @register_field
8
+ class StringField(BaseField):
9
+ field_type = str
10
+
11
+ @register_rule("Is not empty")
12
+ def validate_not_empty(self):
13
+ def validator(value: str):
14
+ if value == "":
15
+ raise ValueError("The value is empty")
16
+ return value
17
+ self.validators.append(validator)
18
+
19
+ @register_rule("Starts with {prefix:String}")
20
+ def validate_starts_with(self, prefix: List[str]):
21
+ def validator(value: str):
22
+ if not value.startswith(prefix):
23
+ raise ValueError(f"The value '{value}' does not start with '{prefix}'.")
24
+ return value
25
+ self.validators.append(validator)
26
+
27
+ @register_rule("Ends with {sufix:String}")
28
+ def validate_ends_with(self, sufix: List[str]):
29
+ def validator(value: str):
30
+ if not value.endswith(sufix):
31
+ raise ValueError(f"The value '{value}' does not ends with '{sufix}'.")
32
+ return value
33
+ self.validators.append(validator)
34
+
35
+ @register_rule("Value in {possible_values:Strings}")
36
+ def validate_in(self, possible_values: List[str]):
37
+ def validator(value: str):
38
+ if value not in possible_values:
39
+ raise ValueError(f"The value '{value}' is not in the list.")
40
+ return value
41
+ self.validators.append(validator)
42
+
43
+ @register_rule("Length between {min_val:Integer} and {max_val:Integer}")
44
+ def validate_length_between(self, min_val: int, max_val: int):
45
+ def validator(value: str):
46
+ if not (min_val < len(value) < max_val):
47
+ raise ValueError(f"Length {len(value)} is not in between {min_val} and {max_val}.")
48
+ return value
49
+ self.validators.append(validator)
50
+
51
+ @register_rule("Maximum length of {max_len:Integer}")
52
+ def validate_max_length(self, max_len: int):
53
+ def validator(value: str):
54
+ if len(value) > max_len:
55
+ raise ValueError(f"Length {len(value)} is longer than {max_len}.")
56
+ return value
57
+ self.validators.append(validator)
58
+
59
+ @register_rule("Length shorter than {max_len:Integer}")
60
+ def validate_shorter_than(self, max_len: int):
61
+ def validator(value: str):
62
+ if len(value) >= max_len:
63
+ raise ValueError(f"Length {len(value)} is not in shorter than {max_len}.")
64
+ return value
65
+ self.validators.append(validator)
66
+
67
+ @register_rule("Minimum length of {min_len:Integer}")
68
+ def validate_min_length(self, min_len: int):
69
+ def validator(value: str):
70
+ if len(value) < min_len:
71
+ raise ValueError(f"Length {len(value)} is shorter than {min_len}.")
72
+ return value
73
+ self.validators.append(validator)
74
+
75
+ @register_rule("Length longer than {min_len:Integer}")
76
+ def validate_longer_than(self, min_len: int):
77
+ def validator(value: str):
78
+ if len(value) <= min_len:
79
+ raise ValueError(f"Length {len(value)} is not in longer than {min_len}.")
80
+ return value
81
+ self.validators.append(validator)
82
+
83
+ @register_rule("Is uppercase")
84
+ def validate_uppercase(self):
85
+ def validator(value: str):
86
+ if not value.isupper():
87
+ raise ValueError("Not Uppercase")
88
+ return value
89
+ self.validators.append(validator)
@@ -0,0 +1,14 @@
1
+ from .BaseField import BaseField
2
+ from .StringField import StringField
3
+ from .NumericField import NumericField
4
+ from .IntegerField import IntegerField
5
+ from .FloatField import FloatField
6
+
7
+
8
+ __all__ = [
9
+ "BaseField",
10
+ "StringField",
11
+ "NumericField",
12
+ "IntegerField",
13
+ "FloatField",
14
+ ]
@@ -0,0 +1,57 @@
1
+ from typing import TYPE_CHECKING, Any, Dict
2
+
3
+ from .Rule import Rule
4
+ from .RuleRegistry import RuleRegistry
5
+ from .Parser.parser_utils import get_value_from_reference
6
+
7
+ if TYPE_CHECKING:
8
+ from field_types import BaseField
9
+
10
+
11
+ class RuleParsedValuesMismatch(Exception):
12
+ pass
13
+
14
+
15
+ class InvalidFieldTypeError(TypeError):
16
+ """Raised when attempting to add a rule to an incompatible field type."""
17
+
18
+
19
+ class MatchedRule(Rule):
20
+ parsed_rule: str
21
+ parsed_values: Dict[str, Any]
22
+ values: Dict[str, Any]
23
+
24
+ def __init__(self,
25
+ rule: Rule,
26
+ parsed_rule: str,
27
+ parsed_values: Dict[str, Any],
28
+ values: Dict[str, Any]
29
+ ):
30
+ super().__init__(**vars(rule))
31
+ self.parsed_rule = parsed_rule
32
+ self.parsed_values = parsed_values
33
+ self.values = values
34
+ self.__validate_rule_parsed_values()
35
+
36
+ @property
37
+ def resolved_values(self) -> Dict[str, Any]:
38
+ resolved = {}
39
+ for rule_param, param_value in self.parsed_values.items():
40
+ if isinstance(param_value, str) and param_value.startswith('$'):
41
+ resolved[rule_param] = get_value_from_reference(param_value, self.values)
42
+ else:
43
+ resolved[rule_param] = param_value
44
+ return resolved
45
+
46
+ def __validate_rule_parsed_values(self):
47
+ parsed_values_values = set(self.parsed_values.keys())
48
+ if set(self.rule_params) != parsed_values_values:
49
+ raise RuleParsedValuesMismatch(f"Rule Params: {self.rule_params}, Parsed Values: {parsed_values_values}")
50
+
51
+ def add_to_instance(self, field_instance: "BaseField"):
52
+ field_class = RuleRegistry.get_type(self.field_type)
53
+ if not isinstance(field_instance, field_class):
54
+ raise InvalidFieldTypeError(
55
+ f"Cannot add rule to {type(field_instance).__name__}, expected {self.field_type}."
56
+ )
57
+ self.rule_setter(self=field_instance, **self.resolved_values)
@@ -0,0 +1,54 @@
1
+ from typing import Callable, Dict, Optional
2
+
3
+ from parse import with_pattern, Parser
4
+ from parse_type import TypeBuilder
5
+
6
+ from .parser_utils import REF_PATTERN, get_value_from_reference, get_key_from_reference
7
+ from .alias_parameters_parser import NotCompatibleTypes, alias_parameters_types
8
+ from ..Rule import Rule
9
+ from ..MatchedRule import MatchedRule
10
+
11
+
12
+ CASE_SENSITIVE_RULES = False
13
+
14
+
15
+ class RuleParser:
16
+ values: dict
17
+ aliases: dict
18
+ parsers: Dict[str, Parser]
19
+
20
+ def __init__(self, values: dict):
21
+ self.values = values
22
+ self.parsers = {}
23
+ self.aliases = self.get_aliases_with_reference_support()
24
+
25
+ def match(self, rule: Rule, parsed_rule: str) -> Optional[MatchedRule]:
26
+ parser = self.get_parser_for_rule(rule)
27
+ parsed_values = parser.parse(parsed_rule)
28
+ if parsed_values is None:
29
+ return
30
+ return MatchedRule(rule, parsed_rule, parsed_values.named, self.values)
31
+
32
+ def get_parser_for_rule(self, rule: Rule) -> Parser:
33
+ if rule.field_rule not in self.parsers:
34
+ parser = Parser(rule.field_rule, extra_types=self.aliases, case_sensitive=CASE_SENSITIVE_RULES)
35
+ self.parsers[rule.field_rule] = parser
36
+ return self.parsers[rule.field_rule]
37
+
38
+ def parse_reference_of(self, type_name: str, type_parser: Callable):
39
+ _parser = Parser(f"{{value:{type_name}}}", extra_types={type_name: type_parser})
40
+
41
+ def parse_reference(text):
42
+ reference_value = get_value_from_reference(text, self.values)
43
+ validation = _parser.parse(repr(reference_value))
44
+ if validation is None:
45
+ key = get_key_from_reference(text)
46
+ raise NotCompatibleTypes(f"The reference value of '{key}' is not compatible with '{type_name}'.")
47
+ return text
48
+ return with_pattern(REF_PATTERN)(parse_reference)
49
+
50
+ def get_aliases_with_reference_support(self):
51
+ return {
52
+ param_type: TypeBuilder.make_variant([parser_func, self.parse_reference_of(param_type, parser_func)])
53
+ for param_type, parser_func in alias_parameters_types.items()
54
+ }
@@ -0,0 +1,8 @@
1
+ from .RuleParser import RuleParser
2
+ from .parser_utils import get_key_from_reference, get_value_from_reference
3
+
4
+ __all__ = [
5
+ "RuleParser",
6
+ "get_key_from_reference",
7
+ "get_value_from_reference",
8
+ ]
@@ -0,0 +1,78 @@
1
+ from typing import Callable
2
+ from parse import with_pattern, Parser
3
+
4
+ from parse_type import TypeBuilder
5
+
6
+
7
+ class NotCompatibleTypes(Exception):
8
+ pass
9
+
10
+
11
+ @with_pattern(r"-?\d+")
12
+ def parse_int(text):
13
+ return int(text)
14
+
15
+
16
+ @with_pattern(r"-?\d*.\d+")
17
+ def parse_float(text):
18
+ return float(text)
19
+
20
+
21
+ @with_pattern(r"-?\d+.?\d*")
22
+ def parse_number(text):
23
+ if "." in text:
24
+ return float(text)
25
+ return int(text)
26
+
27
+
28
+ @with_pattern(r"|".join([r'"[^"]*"', "'[^']*'"]))
29
+ def parse_string(text: str):
30
+ return text[1:-1]
31
+
32
+
33
+ def parse_array_of(type_name: str, type_parser: Callable):
34
+ items_type = TypeBuilder.with_many0(type_parser, type_parser.pattern, listsep=",")
35
+ _parser = Parser(f"{{value:{type_name}}}", extra_types={type_name: items_type})
36
+
37
+ def parse_list(text: str):
38
+ text_without_brackets = text[1:-1]
39
+ validation = _parser.parse(text_without_brackets)
40
+ if validation is None:
41
+ raise NotCompatibleTypes(f"This shouldn't happens but items of the array '{type_name}' are not compatible?.")
42
+
43
+ return validation['value']
44
+
45
+ list_pattern = rf"\[{items_type.pattern}\]"
46
+ return with_pattern(list_pattern)(parse_list)
47
+
48
+
49
+ alias_parameters_types = {
50
+ "Integer": parse_int,
51
+ "Integers": parse_array_of("Integer", parse_int),
52
+ "Float": parse_float,
53
+ "Floats": parse_array_of("Float", parse_float),
54
+ "Number": parse_number,
55
+ "Numbers": parse_array_of("Number", parse_number),
56
+ "String": parse_string,
57
+ "Strings": parse_array_of("String", parse_string),
58
+ }
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+ # class Store()
67
+ # pass
68
+
69
+
70
+ # values = {"classes": ["UNCLASSIFIED"], "min_length": 5, "max_length": 50}
71
+
72
+
73
+ # alias_parser = Parser("Value in {possible_values:Strings}", extra_types=alias_parameters_types)
74
+ # # print(alias_parser.parse("Value In ['UNCLASSIFIED', 'CLASSIFIED']"))
75
+
76
+ # with Store(values=values) as store:
77
+ # print(alias_parser.parse("Value In $values.classes"))
78
+ # print(store.)
@@ -0,0 +1,27 @@
1
+ import re
2
+
3
+
4
+ REF_PATTERN = r'\$values\.([a-zA-Z0-9_]+)'
5
+ VALUE_REF_PATTERN = re.compile(REF_PATTERN)
6
+
7
+
8
+ class MalformedReference(Exception):
9
+ pass
10
+
11
+ class ReferenceNotFound(Exception):
12
+ pass
13
+
14
+
15
+ def get_key_from_reference(reference: str):
16
+ match = VALUE_REF_PATTERN.fullmatch(reference)
17
+ if match is None:
18
+ raise MalformedReference(f"Unrecognised Reference: {reference}")
19
+
20
+ return match.group(1)
21
+
22
+
23
+ def get_value_from_reference(reference: str, values: dict):
24
+ key = get_key_from_reference(reference)
25
+ if key not in values:
26
+ raise ReferenceNotFound(f"Reference '{key}' not found in values.")
27
+ return values[key]
@@ -0,0 +1,44 @@
1
+ import string
2
+ from inspect import signature
3
+ from typing import Callable
4
+
5
+
6
+ class NotAClassMethod(Exception):
7
+ pass
8
+
9
+
10
+ class RuleFunctionParamsMismatch(Exception):
11
+ pass
12
+
13
+
14
+ class Rule:
15
+ field_type: str
16
+ field_rule: str
17
+ rule_setter: Callable
18
+
19
+ def __init__(self, field_type: str, field_rule: str, rule_setter: Callable) -> None:
20
+ self.field_type = field_type
21
+ self.field_rule = field_rule
22
+ self.rule_setter = rule_setter
23
+ self.__validate_rule_function_params()
24
+
25
+ def __repr__(self):
26
+ return self.field_rule
27
+
28
+ @property
29
+ def rule_params(self) -> dict:
30
+ params = string.Formatter().parse(self.field_rule)
31
+ return {param: param_type for _, param, param_type, _ in params if param is not None}
32
+
33
+ def __get_rule_setter_params(self) -> set:
34
+ rule_setter_sign = signature(self.rule_setter)
35
+ return set(rule_setter_sign.parameters.keys())
36
+
37
+ def __validate_rule_function_params(self):
38
+ rule_setter_params = self.__get_rule_setter_params()
39
+ if "self" not in rule_setter_params:
40
+ raise NotAClassMethod()
41
+
42
+ rule_setter_params.remove("self")
43
+ if set(self.rule_params) != rule_setter_params:
44
+ raise RuleFunctionParamsMismatch(f"Rule Params: {self.rule_params}, Setter Params: {rule_setter_params}")
@@ -0,0 +1,65 @@
1
+ from itertools import chain
2
+ from collections import defaultdict
3
+ from typing import TYPE_CHECKING, Dict, List, Type
4
+
5
+ from .Rule import Rule
6
+ from ..utils.logger_config import get_logger
7
+
8
+
9
+ if TYPE_CHECKING:
10
+ from field_types.BaseField import BaseField
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class RuleRegistry:
16
+ rules: Dict[str, List[Rule]] = defaultdict(list)
17
+ type_map: Dict[str, Type["BaseField"]] = {}
18
+
19
+ @classmethod
20
+ def register_rule(cls, field_rule: str):
21
+ def _register(func: callable):
22
+ field_type, func_name = func.__qualname__.split(".")
23
+ logger.debug("Registering function '%s' for %s. Rule: %s", func_name, field_type, field_rule)
24
+
25
+ rule = Rule(field_type, field_rule, func)
26
+ cls.rules[field_type].append(rule)
27
+ logger.debug("Function '%s' Registered", func_name)
28
+ return func
29
+
30
+ return _register
31
+
32
+ @classmethod
33
+ def register_field(cls, field_class: Type["BaseField"]) -> Type["BaseField"]:
34
+ cls.type_map[field_class.__name__] = field_class
35
+ return field_class
36
+
37
+ @classmethod
38
+ def get_type(cls, field_type: str) -> Type["BaseField"]:
39
+ return cls.type_map.get(field_type)
40
+
41
+ @classmethod
42
+ def get_rules_for(cls, field_class: Type["BaseField"]):
43
+ if field_class.__name__ == "BaseField":
44
+ return cls.rules["BaseField"]
45
+ parent_rules = list(chain.from_iterable(cls.get_rules_for(p) for p in field_class.get_parents()))
46
+ return cls.rules[field_class.__name__] + parent_rules
47
+
48
+ @classmethod
49
+ def get_rules_definition(cls):
50
+ return [
51
+ {
52
+ "field": field_name,
53
+ "parent_field": [p.__name__ for p in field_class.get_parents()],
54
+ "rules": cls.rules.get(field_name, [])
55
+ }
56
+ for field_name, field_class in cls.type_map.items()
57
+ ]
58
+
59
+
60
+ def register_rule(rule: str):
61
+ return RuleRegistry.register_rule(rule)
62
+
63
+
64
+ def register_field(field_class: type):
65
+ return RuleRegistry.register_field(field_class)
@@ -0,0 +1,14 @@
1
+ from .Rule import Rule
2
+ from .Parser import RuleParser
3
+ from .MatchedRule import MatchedRule
4
+ from .RuleRegistry import RuleRegistry, register_rule, register_field
5
+
6
+
7
+ __all__ = [
8
+ "Rule",
9
+ "MatchedRule",
10
+ "RuleParser",
11
+ "RuleRegistry",
12
+ "register_rule",
13
+ "register_field",
14
+ ]
File without changes
@@ -0,0 +1,43 @@
1
+ import logging
2
+ import logging.config
3
+ from os import environ
4
+
5
+ from dotenv import load_dotenv
6
+
7
+
8
+ load_dotenv()
9
+
10
+ DEFAULT_LEVEL = "INFO"
11
+ VALID_LOG_LEVEL = ["CRITICAL", "FATAL", "ERROR", "WARN", "WARNING", "INFO", "DEBUG", "NOTSET"]
12
+ LOG_LEVEL = environ.get("LOG_LEVEL", DEFAULT_LEVEL)
13
+
14
+ if LOG_LEVEL not in VALID_LOG_LEVEL:
15
+ LOG_LEVEL = DEFAULT_LEVEL
16
+
17
+
18
+ def configure_logging():
19
+ logging.config.dictConfig(
20
+ {
21
+ "version": 1,
22
+ "disable_existing_loggers": False,
23
+ "formatters": {
24
+ "standard": {
25
+ "format": "[%(asctime)s] %(name)s [%(levelname)s] %(funcName)s - %(filename)s:%(lineno)d - %(message)s"
26
+ },
27
+ },
28
+ "handlers": {
29
+ "default": {
30
+ "level": LOG_LEVEL,
31
+ "formatter": "standard",
32
+ "class": "logging.StreamHandler",
33
+ "stream": "ext://sys.stdout", # Use standard output
34
+ },
35
+ },
36
+ "loggers": {"": {"handlers": ["default"], "level": LOG_LEVEL, "propagate": True}}, # root logger
37
+ }
38
+ )
39
+
40
+ configure_logging()
41
+
42
+ def get_logger(name: str):
43
+ return logging.getLogger(name)
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.2
2
+ Name: data-sitter
3
+ Version: 0.1.0
4
+ Summary: A Python library that reads data contracts and generates Pydantic models for seamless data validation.
5
+ Author-email: Lázaro Pereira Candea <lazaro@candea.es>
6
+ Requires-Dist: python-dotenv==1.0.1
7
+ Requires-Dist: parse_type==0.6.4
8
+ Requires-Dist: pydantic==2.10.6
@@ -0,0 +1,29 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ data_sitter/Contract.py
5
+ data_sitter/FieldResolver.py
6
+ data_sitter/__init__.py
7
+ data_sitter/cli.py
8
+ data_sitter.egg-info/PKG-INFO
9
+ data_sitter.egg-info/SOURCES.txt
10
+ data_sitter.egg-info/dependency_links.txt
11
+ data_sitter.egg-info/entry_points.txt
12
+ data_sitter.egg-info/requires.txt
13
+ data_sitter.egg-info/top_level.txt
14
+ data_sitter/field_types/BaseField.py
15
+ data_sitter/field_types/FloatField.py
16
+ data_sitter/field_types/IntegerField.py
17
+ data_sitter/field_types/NumericField.py
18
+ data_sitter/field_types/StringField.py
19
+ data_sitter/field_types/__init__.py
20
+ data_sitter/rules/MatchedRule.py
21
+ data_sitter/rules/Rule.py
22
+ data_sitter/rules/RuleRegistry.py
23
+ data_sitter/rules/__init__.py
24
+ data_sitter/rules/Parser/RuleParser.py
25
+ data_sitter/rules/Parser/__init__.py
26
+ data_sitter/rules/Parser/alias_parameters_parser.py
27
+ data_sitter/rules/Parser/parser_utils.py
28
+ data_sitter/utils/__init__.py
29
+ data_sitter/utils/logger_config.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ data-sitter = data_sitter.cli:main
@@ -0,0 +1,3 @@
1
+ python-dotenv==1.0.1
2
+ parse_type==0.6.4
3
+ pydantic==2.10.6
@@ -0,0 +1 @@
1
+ data_sitter
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["setuptools>=76", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = 'data-sitter'
7
+ version = "0.1.0"
8
+ description = "A Python library that reads data contracts and generates Pydantic models for seamless data validation."
9
+ authors = [
10
+ {name = 'Lázaro Pereira Candea', email = 'lazaro@candea.es'},
11
+ ]
12
+ dependencies = [
13
+ # Keep this in sync with setup.py
14
+ "python-dotenv==1.0.1",
15
+ "parse_type==0.6.4",
16
+ "pydantic==2.10.6",
17
+ ]
18
+
19
+ [project.scripts]
20
+ data-sitter = "data_sitter.cli:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ from setuptools import setup, find_packages
2
+
3
+
4
+ setup(
5
+ name='data-sitter',
6
+ version='0.1.0',
7
+ packages=find_packages(),
8
+ install_requires=[
9
+ # Keep this in sync with pyproject.toml
10
+ "python-dotenv==1.0.1",
11
+ "parse_type==0.6.4",
12
+ "pydantic==2.10.6",
13
+ ],
14
+ entry_points={
15
+ 'console_scripts': [
16
+ 'data-sitter=data_sitter.cli:main',
17
+ ],
18
+ },
19
+ )