data-sitter 0.1.3__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_sitter/Contract.py +51 -28
- data_sitter/FieldResolver.py +31 -20
- data_sitter/Validation.py +21 -12
- data_sitter/cli.py +1 -1
- data_sitter/field_types/BaseField.py +29 -19
- data_sitter/field_types/FieldTypes.py +9 -0
- data_sitter/field_types/FloatField.py +20 -1
- data_sitter/field_types/IntegerField.py +2 -0
- data_sitter/field_types/NumericField.py +33 -27
- data_sitter/field_types/StringField.py +75 -40
- data_sitter/rules/Enums.py +7 -0
- data_sitter/rules/LogicalRule.py +68 -0
- data_sitter/rules/MatchedRule.py +17 -14
- data_sitter/rules/Parser/alias_parameters_parser.py +0 -20
- data_sitter/rules/ProcessedRule.py +24 -0
- data_sitter/rules/Rule.py +19 -2
- data_sitter/rules/RuleRegistry.py +50 -29
- data_sitter/rules/__init__.py +7 -1
- data_sitter/utils/logger_config.py +1 -1
- data_sitter-0.1.6.dist-info/METADATA +220 -0
- data_sitter-0.1.6.dist-info/RECORD +30 -0
- {data_sitter-0.1.3.dist-info → data_sitter-0.1.6.dist-info}/WHEEL +1 -1
- data_sitter-0.1.3.dist-info/METADATA +0 -8
- data_sitter-0.1.3.dist-info/RECORD +0 -26
- {data_sitter-0.1.3.dist-info → data_sitter-0.1.6.dist-info}/entry_points.txt +0 -0
- {data_sitter-0.1.3.dist-info → data_sitter-0.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,220 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: data-sitter
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: A Python library that reads data contracts and generates Pydantic models for seamless data validation.
|
5
|
+
Author-email: Lázaro Pereira Candea <lazaro@candea.es>
|
6
|
+
Requires-Python: >=3.8
|
7
|
+
Description-Content-Type: text/markdown
|
8
|
+
Requires-Dist: python-dotenv==1.0.1
|
9
|
+
Requires-Dist: PyYAML==6.0.2
|
10
|
+
Requires-Dist: parse_type==0.6.4
|
11
|
+
Requires-Dist: pydantic==2.10.5
|
12
|
+
Provides-Extra: dev
|
13
|
+
Requires-Dist: pytest==8.3.5; extra == "dev"
|
14
|
+
Requires-Dist: pytest-cov==6.0.0; extra == "dev"
|
15
|
+
Requires-Dist: pytest-mock==3.14.0; extra == "dev"
|
16
|
+
Requires-Dist: twine==6.1.0; extra == "dev"
|
17
|
+
Requires-Dist: build==1.2.2.post1; extra == "dev"
|
18
|
+
|
19
|
+
# Data-Sitter
|
20
|
+
|
21
|
+

|
22
|
+
|
23
|
+
## Overview
|
24
|
+
|
25
|
+
Data-Sitter is a Python library designed to simplify data validation by converting data contracts into Pydantic models. This allows for easy and efficient validation of structured data, ensuring compliance with predefined rules and constraints.
|
26
|
+
|
27
|
+
## Features
|
28
|
+
|
29
|
+
- Define structured data contracts in JSON format.
|
30
|
+
- Generate Pydantic models automatically from contracts.
|
31
|
+
- Enforce validation rules at the field level.
|
32
|
+
- Support for rule references within the contract.
|
33
|
+
|
34
|
+
## Installation
|
35
|
+
|
36
|
+
```sh
|
37
|
+
pip install data-sitter
|
38
|
+
```
|
39
|
+
|
40
|
+
## Development and Deployment
|
41
|
+
|
42
|
+
### CI/CD Pipeline
|
43
|
+
|
44
|
+
The project uses GitHub Actions for continuous integration and deployment:
|
45
|
+
|
46
|
+
1. **Pull Request Checks**
|
47
|
+
- Automatically checks if the version has been bumped in `pyproject.toml`
|
48
|
+
- Fails if the version is the same as in the main branch
|
49
|
+
- Ensures every PR includes a version update
|
50
|
+
|
51
|
+
2. **Automatic Releases**
|
52
|
+
- When code is merged to the main branch:
|
53
|
+
- Builds the package
|
54
|
+
- Publishes to PyPI automatically
|
55
|
+
- Uses PyPI API token for secure authentication
|
56
|
+
|
57
|
+
To set up the CI/CD pipeline:
|
58
|
+
|
59
|
+
1. Create a PyPI API token:
|
60
|
+
- Go to [PyPI Account Settings](https://pypi.org/manage/account/)
|
61
|
+
- Create a new API token with "Upload" scope
|
62
|
+
- Copy the token
|
63
|
+
|
64
|
+
2. Add the token to GitHub:
|
65
|
+
- Go to your repository's Settings > Secrets and variables > Actions
|
66
|
+
- Create a new secret named `PYPI_API_TOKEN`
|
67
|
+
- Paste your PyPI API token
|
68
|
+
|
69
|
+
### Setting Up Development Environment
|
70
|
+
|
71
|
+
To set up a development environment with all the necessary tools, install the package with development dependencies:
|
72
|
+
|
73
|
+
```sh
|
74
|
+
pip install -e ".[dev]"
|
75
|
+
```
|
76
|
+
|
77
|
+
This will install:
|
78
|
+
- The package in editable mode
|
79
|
+
- Testing tools (pytest, pytest-cov, pytest-mock)
|
80
|
+
- Build tools (build, twine)
|
81
|
+
|
82
|
+
### Building the Package
|
83
|
+
|
84
|
+
To build the package, run:
|
85
|
+
|
86
|
+
```sh
|
87
|
+
python -m build
|
88
|
+
```
|
89
|
+
|
90
|
+
This will create a `dist` directory containing both a source distribution (`.tar.gz`) and a wheel (`.whl`).
|
91
|
+
|
92
|
+
### Deploying to PyPI
|
93
|
+
|
94
|
+
To upload to PyPI:
|
95
|
+
|
96
|
+
```sh
|
97
|
+
twine upload dist/*
|
98
|
+
```
|
99
|
+
|
100
|
+
You'll be prompted for your PyPI username and password. For security, it's recommended to use an API token instead of your password.
|
101
|
+
|
102
|
+
## Usage
|
103
|
+
|
104
|
+
### Creating a Pydantic Model from a Contract
|
105
|
+
|
106
|
+
To convert a data contract into a Pydantic model, follow these steps:
|
107
|
+
|
108
|
+
```python
|
109
|
+
from data_sitter import Contract
|
110
|
+
|
111
|
+
contract_dict = {
|
112
|
+
"name": "test",
|
113
|
+
"fields": [
|
114
|
+
{
|
115
|
+
"name": "FID",
|
116
|
+
"type": "Integer",
|
117
|
+
"rules": ["Positive"]
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"name": "SECCLASS",
|
121
|
+
"type": "String",
|
122
|
+
"rules": [
|
123
|
+
"Validate Not Null",
|
124
|
+
"Value In ['UNCLASSIFIED', 'CLASSIFIED']",
|
125
|
+
]
|
126
|
+
}
|
127
|
+
],
|
128
|
+
}
|
129
|
+
|
130
|
+
contract = Contract.from_dict(contract_dict)
|
131
|
+
pydantic_contract = contract.pydantic_model
|
132
|
+
```
|
133
|
+
|
134
|
+
### Using Rule References
|
135
|
+
|
136
|
+
Data-Sitter allows you to define reusable values in the `values` key and reference them in field rules using `$values.[key]`. For example:
|
137
|
+
|
138
|
+
```json
|
139
|
+
{
|
140
|
+
"name": "example_contract",
|
141
|
+
"fields": [
|
142
|
+
{
|
143
|
+
"name": "CATEGORY",
|
144
|
+
"type": "String",
|
145
|
+
"rules": ["Value In $values.categories"]
|
146
|
+
},
|
147
|
+
{
|
148
|
+
"name": "NAME",
|
149
|
+
"type": "String",
|
150
|
+
"rules": [
|
151
|
+
"Length Between $values.min_length and $values.max_length"
|
152
|
+
]
|
153
|
+
}
|
154
|
+
|
155
|
+
],
|
156
|
+
"values": {"categories": ["A", "B", "C"], "min_length": 5,"max_length": 50}
|
157
|
+
}
|
158
|
+
```
|
159
|
+
|
160
|
+
## Available Rules
|
161
|
+
|
162
|
+
The available validation rules can be retrieved programmatically:
|
163
|
+
|
164
|
+
```python
|
165
|
+
from data_sitter import RuleRegistry
|
166
|
+
|
167
|
+
rules = RuleRegistry.get_rules_definition()
|
168
|
+
print(rules)
|
169
|
+
```
|
170
|
+
|
171
|
+
### Rule Definitions
|
172
|
+
|
173
|
+
Below are the available rules grouped by field type:
|
174
|
+
|
175
|
+
#### Base
|
176
|
+
|
177
|
+
- Is not null
|
178
|
+
|
179
|
+
#### String - (Inherits from `Base`)
|
180
|
+
|
181
|
+
- Is not empty
|
182
|
+
- Starts with {prefix:String}
|
183
|
+
- Ends with {suffix:String}
|
184
|
+
- Is not one of {possible_values:Strings}
|
185
|
+
- Is one of {possible_values:Strings}
|
186
|
+
- Has length between {min_val:Integer} and {max_val:Integer}
|
187
|
+
- Has maximum length {max_len:Integer}
|
188
|
+
- Has minimum length {min_len:Integer}
|
189
|
+
- Is uppercase
|
190
|
+
- Is lowercase
|
191
|
+
- Matches regex {pattern:String}
|
192
|
+
- Is valid email
|
193
|
+
- Is valid URL
|
194
|
+
- Has no digits
|
195
|
+
|
196
|
+
#### Numeric - (Inherits from `Base`)
|
197
|
+
|
198
|
+
- Is not zero
|
199
|
+
- Is positive
|
200
|
+
- Is negative
|
201
|
+
- Is at least {min_val:Number}
|
202
|
+
- Is at most {max_val:Number}
|
203
|
+
- Is greater than {threshold:Number}
|
204
|
+
- Is less than {threshold:Number}
|
205
|
+
- Is not between {min_val:Number} and {max_val:Number}
|
206
|
+
- Is between {min_val:Number} and {max_val:Number}
|
207
|
+
|
208
|
+
#### Integer - (Inherits from `Numeric`)
|
209
|
+
|
210
|
+
#### Float - (Inherits from `Numeric`)
|
211
|
+
|
212
|
+
- Has at most {decimal_places:Integer} decimal places
|
213
|
+
|
214
|
+
## Contributing
|
215
|
+
|
216
|
+
Contributions are welcome! Feel free to submit issues or pull requests in the [GitHub repository](https://github.com/lcandea/data-sitter).
|
217
|
+
|
218
|
+
## License
|
219
|
+
|
220
|
+
Data-Sitter is licensed under the MIT License.
|
@@ -0,0 +1,30 @@
|
|
1
|
+
data_sitter/Contract.py,sha256=ykeBA_gr7r7MO4FYvdDrstUzGiq7dyIIipkOZRk8qkA,4042
|
2
|
+
data_sitter/FieldResolver.py,sha256=Bh7_MTTO7E87S31dQq3tvkL9E_K-4EDlh3NJSn0eLU0,2732
|
3
|
+
data_sitter/Validation.py,sha256=5jdIQZyTrEmXZ_SJP0lq-EEFKrGbYH6z4EQ56oFR7Ck,1474
|
4
|
+
data_sitter/__init__.py,sha256=qbE-wU8ELMFwOMG4UTK0lmzn5XF2MK3rc22E8ROgypo,113
|
5
|
+
data_sitter/cli.py,sha256=SBmxNC508qt8-C4x2IS6XNZLihtfeALw34QLLYr_p_Q,1686
|
6
|
+
data_sitter/field_types/BaseField.py,sha256=2s5wJjz1NoNWvSa-69mMxVf7f5wptuxr6UGVubM6MAQ,1997
|
7
|
+
data_sitter/field_types/FieldTypes.py,sha256=ntuguQtLnVon1cB2YvG0p2c1r0zk67qw6o4qfJHxLlY,158
|
8
|
+
data_sitter/field_types/FloatField.py,sha256=gHmiSg8Eft57T_J8covrEctMEGa0zuT6R3xJP2UlyIY,945
|
9
|
+
data_sitter/field_types/IntegerField.py,sha256=Ll6eool8Rwo2pzyXQz95TjJpzdgW9ShtCK1uHmOi_pQ,213
|
10
|
+
data_sitter/field_types/NumericField.py,sha256=uJ6ZB8vJg0xsY1grrVL6k8Heygm1O84fQjEtVg1uJjw,2916
|
11
|
+
data_sitter/field_types/StringField.py,sha256=ayxo5d_9xzNR9rAWS6dHNKiRapkFdsCxMtT4K5Qb7ek,4493
|
12
|
+
data_sitter/field_types/__init__.py,sha256=GdssttQCJksGcZn7oPM53vOsqOL6R5xRiRJDEtr38Ww,293
|
13
|
+
data_sitter/rules/Enums.py,sha256=W-3vXP7NWgkgZexrn8J9EytqZoaWK3EwIX0q5O3QJKY,105
|
14
|
+
data_sitter/rules/LogicalRule.py,sha256=4HRPw62HSpmMz_Yr2en4s1eVijlPurPDBHfwSliBS3o,2794
|
15
|
+
data_sitter/rules/MatchedRule.py,sha256=DudYNI50EpeIvsFQ7cMpKI1fziBG_yfMPNUZ7OWdygo,2162
|
16
|
+
data_sitter/rules/ProcessedRule.py,sha256=5EWjTJ6wgDih1cm8uxJOghLDQs3XK_ywGTGF5hhNly8,640
|
17
|
+
data_sitter/rules/Rule.py,sha256=xE31dUwLHD3IzcmPfdqLuXS_rmw-kFdIaCKHvpIArig,2115
|
18
|
+
data_sitter/rules/RuleRegistry.py,sha256=1ZMl-5u-6DZ1AGYZA77s6DpAxeN64EFEF-XKVKeQy-U,2698
|
19
|
+
data_sitter/rules/__init__.py,sha256=SHQZYp4VNzuygP-ZPegEXwmgj-_oUqEchi9TdbasA6U,465
|
20
|
+
data_sitter/rules/Parser/RuleParser.py,sha256=7biF5N3Cf3Rf5bgB4pXUpBaZ4r5EL1I9YHvSTjdydBA,2127
|
21
|
+
data_sitter/rules/Parser/__init__.py,sha256=F8qJ7luwq0C65e7pNOzBHB2sF1lMcvIFYfDNJj6XQTc,205
|
22
|
+
data_sitter/rules/Parser/alias_parameters_parser.py,sha256=xUgOFJCm42w1eUmZOQ2OsOhCsKGHev5g4gsm_kizciA,1529
|
23
|
+
data_sitter/rules/Parser/parser_utils.py,sha256=ypI021uYJTsHAoKGShAfnhd5xQGtqqTGTHozleefsLQ,642
|
24
|
+
data_sitter/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
+
data_sitter/utils/logger_config.py,sha256=fBbDNZOsmDnO6TtLHI1tty4EXi2AnbrA-OgkhXcm1Aw,1235
|
26
|
+
data_sitter-0.1.6.dist-info/METADATA,sha256=H3QVZTUxe4F2FtWVEhe0JEmlgZG4n8krQCyBqp5XgdQ,5597
|
27
|
+
data_sitter-0.1.6.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
28
|
+
data_sitter-0.1.6.dist-info/entry_points.txt,sha256=1I7xxqFZvA78wmDx7NGavttAb8JFWM3Wxgehftx_5C4,53
|
29
|
+
data_sitter-0.1.6.dist-info/top_level.txt,sha256=Q7N21PYeqIdRbDvZQCJXhbbv0PFIf876gu1_DpInH_E,12
|
30
|
+
data_sitter-0.1.6.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: data-sitter
|
3
|
-
Version: 0.1.3
|
4
|
-
Summary: A Python library that reads data contracts and generates Pydantic models for seamless data validation.
|
5
|
-
Author-email: Lázaro Pereira Candea <lazaro@candea.es>
|
6
|
-
Requires-Dist: python-dotenv==1.0.1
|
7
|
-
Requires-Dist: parse_type==0.6.4
|
8
|
-
Requires-Dist: pydantic==2.10.5
|
@@ -1,26 +0,0 @@
|
|
1
|
-
data_sitter/Contract.py,sha256=E3VYrCQZhGk79coHRTV0hCvLAV8uEhYRBMCnCuF_e48,3494
|
2
|
-
data_sitter/FieldResolver.py,sha256=aSavmk3V8QCphLRL6i3T_V2DIsWfEKBdcCnZC71hrx0,1895
|
3
|
-
data_sitter/Validation.py,sha256=MHwPMK06XO9YqdWMQMZ0QSFSk7UfkfmE19TmCTUfi3c,940
|
4
|
-
data_sitter/__init__.py,sha256=qbE-wU8ELMFwOMG4UTK0lmzn5XF2MK3rc22E8ROgypo,113
|
5
|
-
data_sitter/cli.py,sha256=1ICrtokqV5RvvWhzWKAeS5ZUSUpiviQyy2JSK71ER10,1666
|
6
|
-
data_sitter/field_types/BaseField.py,sha256=_Pg6a7gdmQFwb4f7LDyOxElX8j0NnTYZGOJJr_jddt8,1797
|
7
|
-
data_sitter/field_types/FloatField.py,sha256=pWU449uUFzlpnIpZI-2WxN1YKv7PxIiYe_c7W91VqCc,147
|
8
|
-
data_sitter/field_types/IntegerField.py,sha256=o__5z3bg6wsx7FIfJbBYZW5b760-WSZw_05J-OSKXR0,147
|
9
|
-
data_sitter/field_types/NumericField.py,sha256=ncPSrUOOgU5MPcA7in_lR6Luqb1xOo8m7UL6TW1qqGw,2665
|
10
|
-
data_sitter/field_types/StringField.py,sha256=XVN_0SE7bVlJijEiVIe_Ce22olY-DI0lmmZq3LU7TVE,3444
|
11
|
-
data_sitter/field_types/__init__.py,sha256=GdssttQCJksGcZn7oPM53vOsqOL6R5xRiRJDEtr38Ww,293
|
12
|
-
data_sitter/rules/MatchedRule.py,sha256=uHXuo7Np-Bq7IOHaHMYFmYFPRT8aYDEKCdFKcvWf4DM,1946
|
13
|
-
data_sitter/rules/Rule.py,sha256=UzZku6qo6GEUmDT86KS1Ee-2S9f_TTYlHC_O87V-984,1327
|
14
|
-
data_sitter/rules/RuleRegistry.py,sha256=YwBTsGY8S0ZDuwHuyQwcFgUWYORXK0Q44mV8v2oFVKc,2052
|
15
|
-
data_sitter/rules/__init__.py,sha256=_cTO0SUkW_WW2VBx2NGd8n5TUio7gptkBr9MorW2ZZk,289
|
16
|
-
data_sitter/rules/Parser/RuleParser.py,sha256=7biF5N3Cf3Rf5bgB4pXUpBaZ4r5EL1I9YHvSTjdydBA,2127
|
17
|
-
data_sitter/rules/Parser/__init__.py,sha256=F8qJ7luwq0C65e7pNOzBHB2sF1lMcvIFYfDNJj6XQTc,205
|
18
|
-
data_sitter/rules/Parser/alias_parameters_parser.py,sha256=jsx_JWzkA4lY2nq4hzc4fG7_nnh7yLxmVj6WIP1Mm68,1933
|
19
|
-
data_sitter/rules/Parser/parser_utils.py,sha256=ypI021uYJTsHAoKGShAfnhd5xQGtqqTGTHozleefsLQ,642
|
20
|
-
data_sitter/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
data_sitter/utils/logger_config.py,sha256=w9E4jWfGJnkC9tZz4qrolSqglKm4jEB8l6vjC-qfj8A,1215
|
22
|
-
data_sitter-0.1.3.dist-info/METADATA,sha256=cKlUMxk_rNht0G8bHiotyRn7w6Ho6Vila8-0N7yjVhs,324
|
23
|
-
data_sitter-0.1.3.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
24
|
-
data_sitter-0.1.3.dist-info/entry_points.txt,sha256=1I7xxqFZvA78wmDx7NGavttAb8JFWM3Wxgehftx_5C4,53
|
25
|
-
data_sitter-0.1.3.dist-info/top_level.txt,sha256=Q7N21PYeqIdRbDvZQCJXhbbv0PFIf876gu1_DpInH_E,12
|
26
|
-
data_sitter-0.1.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|