data-sitter 0.1.3__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data_sitter/Contract.py CHANGED
@@ -1,3 +1,5 @@
1
+ import json
2
+ import yaml
1
3
  from typing import Any, Dict, List, NamedTuple
2
4
  from functools import cached_property
3
5
 
@@ -6,7 +8,7 @@ from pydantic import BaseModel
6
8
  from .Validation import Validation
7
9
  from .field_types import BaseField
8
10
  from .FieldResolver import FieldResolver
9
- from .rules import MatchedRule, RuleRegistry, RuleParser
11
+ from .rules import ProcessedRule, RuleRegistry, RuleParser
10
12
 
11
13
 
12
14
  class ContractWithoutFields(Exception):
@@ -18,9 +20,9 @@ class ContractWithoutName(Exception):
18
20
 
19
21
 
20
22
  class Field(NamedTuple):
21
- field_name: str
22
- field_type: str
23
- field_rules: List[str]
23
+ name: str
24
+ type: str
25
+ rules: List[str]
24
26
 
25
27
 
26
28
  class Contract:
@@ -35,8 +37,8 @@ class Contract:
35
37
  self.fields = fields
36
38
  self.rule_parser = RuleParser(values)
37
39
  self.field_resolvers = {
38
- field_type: FieldResolver(RuleRegistry.get_type(field_type), self.rule_parser)
39
- for field_type in list({field.field_type for field in self.fields}) # Unique types
40
+ _type: FieldResolver(RuleRegistry.get_type(_type), self.rule_parser)
41
+ for _type in list({field.type for field in self.fields}) # Unique types
40
42
  }
41
43
 
42
44
  @classmethod
@@ -52,25 +54,30 @@ class Contract:
52
54
  values=contract_dict.get("values", {}),
53
55
  )
54
56
 
57
+ @classmethod
58
+ def from_json(cls, contract_json: str):
59
+ return cls.from_dict(json.loads(contract_json))
60
+
61
+ @classmethod
62
+ def from_yaml(cls, contract_yaml: str):
63
+ return cls.from_dict(yaml.load(contract_yaml, yaml.Loader))
64
+
55
65
  @cached_property
56
66
  def field_validators(self) -> Dict[str, BaseField]:
57
67
  field_validators = {}
58
68
  for field in self.fields:
59
- field_resolver = self.field_resolvers[field.field_type]
60
- field_validators[field.field_name] = field_resolver.get_field_validator(field.field_name, field.field_rules)
69
+ field_resolver = self.field_resolvers[field.type]
70
+ field_validators[field.name] = field_resolver.get_field_validator(field.name, field.rules)
61
71
  return field_validators
62
72
 
63
73
  @cached_property
64
- def rules(self) -> Dict[str, List[MatchedRule]]:
74
+ def rules(self) -> Dict[str, List[ProcessedRule]]:
65
75
  rules = {}
66
76
  for field in self.fields:
67
- field_resolver = self.field_resolvers[field.field_type]
68
- rules[field.field_name] = field_resolver.get_matched_rules(field.field_rules)
77
+ field_resolver = self.field_resolvers[field.type]
78
+ rules[field.name] = field_resolver.get_processed_rules(field.rules)
69
79
  return rules
70
80
 
71
- def model_validate(self, item: dict):
72
- return self.pydantic_model.model_validate(item).model_dump()
73
-
74
81
  def validate(self, item: dict) -> Validation:
75
82
  return Validation.validate(self.pydantic_model, item)
76
83
 
@@ -78,29 +85,45 @@ class Contract:
78
85
  def pydantic_model(self) -> BaseModel:
79
86
  return type(self.name, (BaseModel,), {
80
87
  "__annotations__": {
81
- field_name: field_validator.get_annotation()
82
- for field_name, field_validator in self.field_validators.items()
88
+ name: field_validator.get_annotation()
89
+ for name, field_validator in self.field_validators.items()
83
90
  }
84
91
  })
85
92
 
86
- def get_front_end_contract(self):
93
+ @cached_property
94
+ def contract(self) -> dict:
95
+ return {
96
+ "name": self.name,
97
+ "fields": [
98
+ {
99
+ "name": name,
100
+ "type": field_validator.type_name.value,
101
+ "rules": [rule.parsed_rule for rule in self.rules.get(name, [])]
102
+ }
103
+ for name, field_validator in self.field_validators.items()
104
+ ],
105
+ "values": self.rule_parser.values
106
+ }
107
+
108
+ def get_json_contract(self, indent: int=2) -> str:
109
+ return json.dumps(self.contract, indent=indent)
110
+
111
+ def get_yaml_contract(self, indent: int=2) -> str:
112
+ return yaml.dump(self.contract, Dumper=yaml.Dumper, indent=indent, sort_keys=False)
113
+
114
+ def get_front_end_contract(self) -> dict:
87
115
  return {
88
116
  "name": self.name,
89
117
  "fields": [
90
118
  {
91
- "field_name": field_name,
92
- "field_type": field_validator.__class__.__name__,
93
- "field_rules": [
94
- {
95
- "rule": rule.field_rule,
96
- "parsed_rule": rule.parsed_rule,
97
- "rule_params": rule.rule_params,
98
- "parsed_values": rule.parsed_values,
99
- }
100
- for rule in self.rules.get(field_name, [])
119
+ "name": name,
120
+ "type": field_validator.type_name.value,
121
+ "rules": [
122
+ rule.get_front_end_repr()
123
+ for rule in self.rules.get(name, [])
101
124
  ]
102
125
  }
103
- for field_name, field_validator in self.field_validators.items()
126
+ for name, field_validator in self.field_validators.items()
104
127
  ],
105
128
  "values": self.rule_parser.values
106
129
  }
@@ -1,8 +1,7 @@
1
-
2
- from typing import Dict, List, Type
1
+ from typing import Dict, List, Type, Union
3
2
 
4
3
  from .field_types import BaseField
5
- from .rules import MatchedRule, Rule, RuleRegistry
4
+ from .rules import Rule, ProcessedRule, LogicalRule, MatchedRule, RuleRegistry, LogicalOperator
6
5
  from .rules.Parser import RuleParser
7
6
 
8
7
 
@@ -10,6 +9,10 @@ class RuleNotFoundError(Exception):
10
9
  """No matching rule found for the given parsed rule."""
11
10
 
12
11
 
12
+ class MalformedLogicalRuleError(Exception):
13
+ """Logical rule structure not recognised."""
14
+
15
+
13
16
  class FieldResolver:
14
17
  field_class: Type[BaseField]
15
18
  rule_parser: RuleParser
@@ -22,24 +25,32 @@ class FieldResolver:
22
25
  self.rules = RuleRegistry.get_rules_for(field_class)
23
26
  self._match_rule_cache = {}
24
27
 
25
- def get_matched_rules(self, parsed_rules: List[str]) -> List[MatchedRule]:
26
- matched_rules = []
28
+ def get_field_validator(self, name: str, parsed_rules: List[Union[str, dict]]) -> BaseField:
29
+ field_validator = self.field_class(name)
30
+ processed_rules = self.get_processed_rules(parsed_rules)
31
+ validators = [pr.get_validator(field_validator) for pr in processed_rules]
32
+ field_validator.validators = validators
33
+ return field_validator
34
+
35
+ def get_processed_rules(self, parsed_rules: List[Union[str, dict]]) -> List[ProcessedRule]:
36
+ processed_rules = []
27
37
  for parsed_rule in parsed_rules:
28
- matched_rule = self.match_rule(parsed_rule)
29
- if not matched_rule:
30
- raise RuleNotFoundError(f"Rule not found for parsed rule: '{parsed_rule}'")
31
- matched_rules.append(matched_rule)
32
- return matched_rules
33
-
34
- def get_field_validator(self, field_name: str, parsed_rules: List[str]) -> BaseField:
35
- is_optional = "Validate Not Null" not in parsed_rules
36
- validator = self.field_class(field_name, is_optional)
37
- matched_rules = self.get_matched_rules(parsed_rules)
38
- for matched_rule in matched_rules:
39
- matched_rule.add_to_instance(validator)
40
- return validator
41
-
42
- def match_rule(self, parsed_rule: str) -> MatchedRule:
38
+ if isinstance(parsed_rule, dict):
39
+ if len(keys := tuple(parsed_rule)) != 1 or (operator := keys[0]) not in LogicalOperator:
40
+ raise MalformedLogicalRuleError()
41
+ if operator == LogicalOperator.NOT and not isinstance(parsed_rule[operator], list):
42
+ parsed_rule = {operator: [parsed_rule[operator]]} # NOT operator can be a single rule
43
+ processed_rule = LogicalRule(operator, self.get_processed_rules(parsed_rule[operator]))
44
+ elif isinstance(parsed_rule, str):
45
+ processed_rule = self._match_rule(parsed_rule)
46
+ if not processed_rule:
47
+ raise RuleNotFoundError(f"Rule not found for parsed rule: '{parsed_rule}'")
48
+ else:
49
+ raise TypeError(f'Parsed Rule type not recognised: {type(parsed_rule)}')
50
+ processed_rules.append(processed_rule)
51
+ return processed_rules
52
+
53
+ def _match_rule(self, parsed_rule: str) -> MatchedRule:
43
54
  if parsed_rule in self._match_rule_cache:
44
55
  return self._match_rule_cache[parsed_rule]
45
56
 
data_sitter/Validation.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  from collections import defaultdict
3
2
  from typing import Any, Dict, List, Type
4
3
 
@@ -6,25 +5,35 @@ from pydantic import BaseModel, ValidationError
6
5
 
7
6
 
8
7
  class Validation():
9
- row: Dict[str, Any]
8
+ item: Dict[str, Any]
10
9
  errors: Dict[str, List[str]]
10
+ unknowns: Dict[str, Any]
11
11
 
12
- def __init__(self, row: dict, errors: dict = None):
13
- self.row = row
14
- self.errors = errors or {}
12
+ def __init__(self, item: dict, errors: dict = None, unknowns: dict = None):
13
+ self.item = item
14
+ self.errors = errors if errors else None
15
+ self.unknowns = unknowns if unknowns else None
15
16
 
16
17
  def to_dict(self) -> dict:
17
- return {"row": self.row, "errors": self.errors}
18
-
18
+ return {key: value for key in ["item", "errors", "unknowns"] if (value := getattr(self, key))}
19
+
19
20
  @classmethod
20
- def validate(cls, model: Type[BaseModel], item: dict) -> "Validation":
21
+ def validate(cls, PydanticModel: Type[BaseModel], input_item: dict) -> "Validation":
22
+ model_keys = PydanticModel.model_json_schema()['properties'].keys()
23
+ item = {key: None for key in model_keys} # Filling not present values with Nones
24
+ errors = defaultdict(list)
25
+ unknowns = {}
26
+ for key, value in input_item.items():
27
+ if key in item:
28
+ item[key] = value
29
+ else:
30
+ unknowns[key] = value
21
31
  try:
22
- row = model(**item) # Validate the row
23
- return Validation(row=row.model_dump())
32
+ validated = PydanticModel(**item).model_dump()
24
33
  except ValidationError as e:
25
- errors = defaultdict(list)
34
+ validated = item
26
35
  for error in e.errors():
27
36
  field = error['loc'][0] # Extract the field name
28
37
  msg = error['msg']
29
38
  errors[field].append(msg)
30
- return Validation(row=item, errors=dict(errors))
39
+ return Validation(item=validated, errors=dict(errors), unknowns=unknowns)
data_sitter/cli.py CHANGED
@@ -44,5 +44,5 @@ def main():
44
44
  print(f"The file {args.file} pass the contract {args.contract}")
45
45
 
46
46
 
47
- if __name__ == '__main__':
47
+ if __name__ == '__main__': # pragma: no cover
48
48
  main()
@@ -1,18 +1,24 @@
1
1
  from abc import ABC
2
- from typing import Annotated, List, Optional, Type
2
+ from typing import Annotated, Callable, List, Optional, Type
3
3
 
4
4
  from pydantic import AfterValidator
5
+
6
+ from .FieldTypes import FieldTypes
5
7
  from ..rules import register_rule, register_field
6
8
 
7
9
 
8
- def aggregated_validator(validators: List[callable], is_optional: bool):
9
- def _validator(value):
10
+ class NotInitialisedError(Exception):
11
+ """The field instance is initialised without validators"""
12
+
13
+
14
+ def aggregated_validator(validators: List[Callable], is_optional: bool):
15
+ def validator(value):
10
16
  if is_optional and value is None:
11
17
  return value
12
18
  for validator_func in validators:
13
19
  validator_func(value)
14
20
  return value
15
- return _validator
21
+ return validator
16
22
 
17
23
  @register_field
18
24
  class BaseField(ABC):
@@ -20,38 +26,42 @@ class BaseField(ABC):
20
26
  is_optional: bool
21
27
  validators = None
22
28
  field_type = None
29
+ type_name = FieldTypes.BASE
23
30
 
24
- def __init__(self, name: str, is_optional: bool) -> None:
31
+ def __init__(self, name: str) -> None:
25
32
  self.name = name
26
- self.is_optional = is_optional
27
- self.validators = []
33
+ self.is_optional = True
34
+ self.validators = None
28
35
 
29
- @register_rule("Validate Not Null")
36
+ @register_rule("Is not null")
30
37
  def validator_not_null(self):
31
- def _validator(value):
32
- if self.is_optional:
33
- return value
38
+ def validator(value):
34
39
  if value is None:
35
- raise ValueError("Value cannot be null")
40
+ raise ValueError("Value cannot be null.")
36
41
  return value
37
42
 
38
- self.validators.append(_validator)
43
+ self.is_optional = False
44
+ return validator
39
45
 
40
46
  def validate(self, value):
47
+ if self.validators is None:
48
+ raise NotInitialisedError()
41
49
  for validator in self.validators:
42
50
  validator(value)
43
51
 
44
52
  def get_annotation(self):
53
+ if self.validators is None:
54
+ raise NotInitialisedError()
45
55
  field_type = Optional[self.field_type] if self.is_optional else self.field_type
46
56
  return Annotated[field_type, AfterValidator(aggregated_validator(self.validators, self.is_optional))]
47
57
 
48
58
  @classmethod
49
59
  def get_parents(cls: Type["BaseField"]) -> List[Type["BaseField"]]:
50
- if cls.__name__ == "BaseField":
60
+ if cls == BaseField:
51
61
  return []
52
- ancestors = []
62
+ ancestors = set()
53
63
  for base in cls.__bases__:
54
- if base.__name__.endswith("Field"):
55
- ancestors.append(base)
56
- ancestors.extend(base.get_parents()) # It wont break because we have a base case
57
- return ancestors
64
+ if issubclass(base, BaseField):
65
+ ancestors.add(base)
66
+ ancestors.update(base.get_parents())
67
+ return list(ancestors)
@@ -0,0 +1,9 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class FieldTypes(StrEnum):
5
+ BASE = "Base"
6
+ INT = "Integer"
7
+ FLOAT = "Float"
8
+ STRING = "String"
9
+ NUMERIC = "Numeric"
@@ -1,7 +1,26 @@
1
+ from .FieldTypes import FieldTypes
1
2
  from .NumericField import NumericField
2
- from ..rules import register_field
3
+ from ..rules import register_field, register_rule
4
+ from decimal import Decimal
3
5
 
4
6
 
5
7
  @register_field
6
8
  class FloatField(NumericField):
7
9
  field_type = float
10
+ type_name = FieldTypes.FLOAT
11
+
12
+
13
+ @register_rule("Has at most {decimal_places:Integer} decimal places")
14
+ def validate_max_decimal_places(self, decimal_places: int):
15
+ def validator(value):
16
+ decimal_str = str(Decimal(str(value)).normalize())
17
+ # If no decimal point or only zeros after decimal, it has 0 decimal places
18
+ if '.' not in decimal_str:
19
+ decimal_places_count = 0
20
+ else:
21
+ decimal_places_count = len(decimal_str.split('.')[1])
22
+
23
+ if decimal_places_count > decimal_places:
24
+ raise ValueError(f"Value must have at most {decimal_places} decimal places.")
25
+ return value
26
+ return validator
@@ -1,3 +1,4 @@
1
+ from .FieldTypes import FieldTypes
1
2
  from .NumericField import NumericField
2
3
  from ..rules import register_field
3
4
 
@@ -5,3 +6,4 @@ from ..rules import register_field
5
6
  @register_field
6
7
  class IntegerField(NumericField):
7
8
  field_type = int
9
+ type_name = FieldTypes.INT
@@ -1,6 +1,7 @@
1
1
  from typing import Union
2
2
 
3
3
  from .BaseField import BaseField
4
+ from .FieldTypes import FieldTypes
4
5
  from ..rules import register_rule, register_field
5
6
 
6
7
  Numeric = Union[int, float]
@@ -9,67 +10,72 @@ Numeric = Union[int, float]
9
10
  @register_field
10
11
  class NumericField(BaseField):
11
12
  field_type = Numeric
13
+ type_name = FieldTypes.NUMERIC
12
14
 
13
- @register_rule("Not Zero")
15
+ @register_rule("Is not zero")
14
16
  def validate_non_zero(self):
15
17
  def validator(value: Numeric):
16
18
  if value == 0:
17
- raise ValueError("Value must not be zero")
19
+ raise ValueError("Value cannot be zero.")
18
20
  return value
19
- self.validators.append(validator)
21
+ return validator
20
22
 
21
- @register_rule("Positive")
23
+ @register_rule("Is positive")
22
24
  def validate_positive(self):
23
25
  def validator(value: Numeric):
24
- if value < 0:
25
- raise ValueError(f"Value {value} is not positive")
26
+ if value <= 0:
27
+ raise ValueError("Value must be positive.")
26
28
  return value
27
- self.validators.append(validator)
29
+ return validator
28
30
 
29
- @register_rule("Negative")
31
+ @register_rule("Is negative")
30
32
  def validate_negative(self):
31
33
  def validator(value: Numeric):
32
34
  if value >= 0:
33
- raise ValueError(f"Value {value} is not negative")
35
+ raise ValueError("Value must be less than zero.")
34
36
  return value
35
- self.validators.append(validator)
37
+ return validator
36
38
 
37
- @register_rule("Minimum {min_val:Number}")
39
+ @register_rule("Is at least {min_val:Number}")
38
40
  def validate_min(self, min_val: Numeric):
39
41
  def validator(value: Numeric):
40
42
  if value < min_val:
41
- raise ValueError(f"Value {value} is less than minimum {min_val}")
43
+ raise ValueError(f"Value must be at least {min_val}.")
42
44
  return value
43
- self.validators.append(validator)
45
+ return validator
44
46
 
45
- @register_rule("Maximum {max_val:Number}")
47
+ @register_rule("Is at most {max_val:Number}")
46
48
  def validate_max(self, max_val: Numeric):
47
49
  def validator(value: Numeric):
48
50
  if value > max_val:
49
- raise ValueError(f"Value {value} exceeds maximum {max_val}")
51
+ raise ValueError(f"Value must not exceed {max_val}.")
50
52
  return value
51
- self.validators.append(validator)
53
+ return validator
52
54
 
53
- @register_rule("Greate than {threshold:Number}")
55
+ @register_rule("Is greater than {threshold:Number}")
54
56
  def validate_greater_than(self, threshold: Numeric):
55
57
  def validator(value: Numeric):
56
58
  if value <= threshold:
57
- raise ValueError(f"Value {value} is not greater than {threshold}")
59
+ raise ValueError(f"Value must be greater than {threshold}.")
58
60
  return value
59
- self.validators.append(validator)
61
+ return validator
60
62
 
61
- @register_rule("Less than {threshold:Number}")
63
+ @register_rule("Is less than {threshold:Number}")
62
64
  def validate_less_than(self, threshold: Numeric):
63
65
  def validator(value: Numeric):
64
66
  if value >= threshold:
65
- raise ValueError(f"Value {value} is not less than {threshold}")
67
+ raise ValueError(f"Value must be less than {threshold}.")
66
68
  return value
67
- self.validators.append(validator)
69
+ return validator
68
70
 
69
- @register_rule("Between {min_val:Number} and {max_val:Number}")
70
- def validate_between(self, min_val: Numeric, max_val: Numeric):
71
+ @register_rule("Is between {min_val:Number} and {max_val:Number}", fixed_params={"negative": False})
72
+ @register_rule("Is not between {min_val:Number} and {max_val:Number}", fixed_params={"negative": True})
73
+ def validate_between(self, min_val: Numeric, max_val: Numeric, negative: bool):
71
74
  def validator(value: Numeric):
72
- if not (min_val < value < max_val):
73
- raise ValueError(f"Value {value} not in Between {min_val} and {max_val}.")
75
+ condition = (min_val < value < max_val)
76
+ if condition and negative:
77
+ raise ValueError(f"Value must not be between {min_val} and {max_val}.")
78
+ if not condition and not negative:
79
+ raise ValueError(f"Value must be between {min_val} and {max_val}.")
74
80
  return value
75
- self.validators.append(validator)
81
+ return validator