excel-validator 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 EU H2020 AGENT project
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ prune src/excel_validator/config/agent
@@ -0,0 +1,67 @@
1
+ Metadata-Version: 2.1
2
+ Name: excel-validator
3
+ Version: 0.0.1
4
+ Summary: Validation of template based Excel files
5
+ Author-email: Matthijs Brouwer <matthijs.brouwer@wur.nl>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2024 EU H2020 AGENT project
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/matthijsbrouwer/excel-validator
29
+ Classifier: Topic :: Internet :: Proxy Servers
30
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
31
+ Classifier: Programming Language :: Python :: 3.11
32
+ Classifier: Operating System :: OS Independent
33
+ Requires-Python: >=3.11
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: frictionless>=5.17.0
37
+ Requires-Dist: jsonschema>=4.22.0
38
+ Requires-Dist: numpy>=1.26.4
39
+ Requires-Dist: openpyxl>=3.1.2
40
+ Requires-Dist: pandas>=2.2.2
41
+ Requires-Dist: rich>=13.7.1
42
+ Requires-Dist: tqdm>=4.66.4
43
+
44
+ # Excel Validator
45
+
46
+ Excel Validator is a Python package designed to validate Excel files (.xlsx) based on configured schemas. The tool ensures your Excel files adhere to specified schemas and generates detailed reports in case of validation errors. Built on the robust Frictionless library, Excel Validator also allows for dynamic schema creation, where fields are included based on row data from other sheets.
47
+
48
+ ## Features
49
+
50
+ * Validate Excel files against predefined schemas.
51
+ * Generate detailed reports highlighting any validation issues.
52
+ * Dynamic schema creation based on data from other sheets.
53
+ * Easy integration with your existing data processing workflows.
54
+ * Built on top of the Frictionless library for reliable and extensible validation.
55
+
56
+ ## Installation
57
+
58
+ You can install Excel Validator via pip:
59
+
60
+ ```
61
+ pip install excel-validator
62
+ ```
63
+
64
+
65
+ ---
66
+ This software has been developed for the [AGENT](https://www.agent-project.eu/) project
67
+
@@ -0,0 +1,24 @@
1
+ # Excel Validator
2
+
3
+ Excel Validator is a Python package designed to validate Excel files (.xlsx) based on configured schemas. The tool ensures your Excel files adhere to specified schemas and generates detailed reports in case of validation errors. Built on the robust Frictionless library, Excel Validator also allows for dynamic schema creation, where fields are included based on row data from other sheets.
4
+
5
+ ## Features
6
+
7
+ * Validate Excel files against predefined schemas.
8
+ * Generate detailed reports highlighting any validation issues.
9
+ * Dynamic schema creation based on data from other sheets.
10
+ * Easy integration with your existing data processing workflows.
11
+ * Built on top of the Frictionless library for reliable and extensible validation.
12
+
13
+ ## Installation
14
+
15
+ You can install Excel Validator via pip:
16
+
17
+ ```
18
+ pip install excel-validator
19
+ ```
20
+
21
+
22
+ ---
23
+ This software has been developed for the [AGENT](https://www.agent-project.eu/) project
24
+
@@ -0,0 +1,40 @@
1
+ [project]
2
+ name = "excel-validator"
3
+ authors = [
4
+ { name="Matthijs Brouwer", email="matthijs.brouwer@wur.nl" },
5
+ ]
6
+ description = "Validation of template based Excel files"
7
+ readme = "README.md"
8
+ license = {file = "LICENSE"}
9
+ requires-python = ">=3.11"
10
+ dynamic = ["dependencies","version"]
11
+ classifiers = [
12
+ "Topic :: Internet :: Proxy Servers",
13
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
14
+ "Programming Language :: Python :: 3.11",
15
+ "Operating System :: OS Independent"
16
+ ]
17
+
18
+ [project.urls]
19
+ Homepage = "https://github.com/matthijsbrouwer/excel-validator"
20
+
21
+ [project.scripts]
22
+ excel_validator = "excel_validator.script.service:service"
23
+
24
+ [build-system]
25
+ requires = [
26
+ "read_version[toml] ~= 0.3.0",
27
+ "setuptools >= 42.0.0",
28
+ "wheel"
29
+ ]
30
+ build-backend = "setuptools.build_meta"
31
+
32
+ [tool.setuptools.package-data]
33
+ excel_validator = ["**/*.json"]
34
+
35
+ [tool.setuptools.dynamic]
36
+ dependencies = {file = ["requirements.txt"]}
37
+
38
+ [tool.read_version]
39
+ version = "src.excel_validator._version:__version__"
40
+
@@ -0,0 +1,7 @@
1
+ frictionless>=5.17.0
2
+ jsonschema>=4.22.0
3
+ numpy>=1.26.4
4
+ openpyxl>=3.1.2
5
+ pandas>=2.2.2
6
+ rich>=13.7.1
7
+ tqdm>=4.66.4
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ from ._version import __version__
2
+ from .validator import Validate
3
+ from .module import AbstractValidationModule
4
+
5
+ __author__ = "Matthijs Brouwer"
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,7 @@
1
+ {
2
+ "settings": {
3
+ "allowAdditionalSheets": true,
4
+ "schemaPath": "schema"
5
+ },
6
+ "sheets":[]
7
+ }
@@ -0,0 +1,291 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://www.plantbreeding.wur.nl/xlsx-validator/configuration.schema.json",
4
+ "title": "Configuration xlsx-validator",
5
+ "description": "A configuration file for the xlsx-validator package",
6
+ "type": "object",
7
+ "properties": {
8
+ "settings": {
9
+ "type": "object",
10
+ "description": "General settings",
11
+ "properties": {
12
+ "allowAdditionalSheets": {
13
+ "type": "boolean",
14
+ "description": "Allow presence of additional sheets",
15
+ "default": false
16
+ },
17
+ "requireSheetOrder": {
18
+ "type": "boolean",
19
+ "description": "Require the sheets to be in the defined order",
20
+ "default": false
21
+ },
22
+ "adjustTypeForStringColumns": {
23
+ "type": "boolean",
24
+ "description": "Automatically adjust type for string columns",
25
+ "default": false
26
+ },
27
+ "removeEmptyRows": {
28
+ "type": "boolean",
29
+ "description": "Automatically remove empty rows",
30
+ "default": false
31
+ },
32
+ "removeEmptyColumns": {
33
+ "type": "boolean",
34
+ "description": "Automatically remove empty columns",
35
+ "default": false
36
+ },
37
+ "schemaPath": {
38
+ "type": "string",
39
+ "description": "Location additional modules",
40
+ "default": false
41
+ },
42
+ "modulesPath": {
43
+ "type": "string",
44
+ "description": "Location additional modules",
45
+ "default": false
46
+ }
47
+ },
48
+ "additionalProperties": false
49
+ },
50
+ "sheets": {
51
+ "type": "array",
52
+ "items": {
53
+ "type": "object",
54
+ "properties": {
55
+ "name": {"type": "string", "description": "Name of the sheet"},
56
+ "optional": {"type": "boolean"},
57
+ "dependencies": {
58
+ "type": "array",
59
+ "items": {
60
+ "type": "string"
61
+ }
62
+ },
63
+ "resource": {
64
+ "type": "string",
65
+ "description": "Name of the resource; can only contain lowercase alphanumeric characters plus ., - and _",
66
+ "pattern": "^[a-z0-9][a-z0-9\\.,\\-_]*$"},
67
+ "adjustTypeForStringColumns": {
68
+ "type": "boolean",
69
+ "description": "Automatically adjust type for string columns, does override settings"
70
+ },
71
+ "removeEmptyRows": {
72
+ "type": "boolean",
73
+ "description": "Automatically remove empty rows, does override settings"
74
+ },
75
+ "removeEmptyColumns": {
76
+ "type": "boolean",
77
+ "description": "Automatically remove empty columns, does override settings"
78
+ },
79
+ "schema": {
80
+ "type": "object",
81
+ "properties": {
82
+ "file": {"type": "string"},
83
+ "data": {"type": "object"},
84
+ "dynamic": {
85
+ "type": "array",
86
+ "items": {
87
+ "type": "object",
88
+ "properties": {
89
+ "position": {"type": "string", "enum": ["before","after"]},
90
+ "field": {"type": "string"},
91
+ "dynamicResources": {
92
+ "type": "object",
93
+ "additionalProperties": {
94
+ "type": "object",
95
+ "properties": {
96
+ "resource": {"type": "string"},
97
+ "condition": {"$ref": "#/$defs/conditionResource"}
98
+ },
99
+ "required": ["resource"],
100
+ "additionalProperties": false
101
+ }
102
+ },
103
+ "linkedResources": {
104
+ "type": "object",
105
+ "additionalProperties": {
106
+ "type": "object",
107
+ "properties": {
108
+ "resource": {"type": "string"},
109
+ "condition": {"$ref": "#/$defs/conditionLinkedResource"}
110
+ },
111
+ "required": ["resource"],
112
+ "additionalProperties": false
113
+ }
114
+ },
115
+ "mappings": {
116
+ "type": "object",
117
+ "additionalProperties": {
118
+ "type": "object",
119
+ "properties": {
120
+ "default": {"type": "string"},
121
+ "map": {
122
+ "type": "object",
123
+ "additionalProperties": {"$ref": "#/$defs/anyValue"}
124
+ }
125
+ },
126
+ "required": ["map"],
127
+ "additionalProperties": false
128
+ }
129
+ },
130
+ "fields": {
131
+ "type": "array",
132
+ "items": {
133
+ "type": "object",
134
+ "patternProperties": {
135
+ "name|type|rdfType|title|format|example|description": {"oneOf": [
136
+ {"type": "string"},
137
+ {"$ref": "#/$defs/fieldProperty"}
138
+ ]}
139
+ },
140
+ "properties": {
141
+ "constraints": {
142
+ "type": "object",
143
+ "patternProperties": {
144
+ "required|unique": {"oneOf": [
145
+ {"type": "boolean"},
146
+ {"$ref": "#/$defs/fieldProperty"}]
147
+ },
148
+ "minLength|maxLength": {"oneOf": [
149
+ {"type": "integer"},
150
+ {"$ref": "#/$defs/fieldProperty"}
151
+ ]},
152
+ "minimum|maximum": {"oneOf": [
153
+ {"type": "number"},
154
+ {"$ref": "#/$defs/fieldProperty"}
155
+ ]},
156
+ "pattern": {"oneOf": [
157
+ {"type": "string"},
158
+ {"$ref": "#/$defs/fieldProperty"}
159
+ ]},
160
+ "enum": {"oneOf": [
161
+ {"type": "array", "items": {"$ref": "#/$defs/anyValue"}},
162
+ {"$ref": "#/$defs/fieldProperty"}
163
+ ]}
164
+ },
165
+ "additionalProperties": false
166
+ }
167
+ },
168
+ "required": ["name"],
169
+ "additionalProperties": false
170
+ }
171
+ }
172
+ },
173
+ "required": ["position","fields"],
174
+ "additionalProperties": false
175
+ }
176
+ }
177
+ },
178
+ "oneOf": [
179
+ {"required": ["file"], "not": {"required": ["data"]}},
180
+ {"required": ["data"], "not": {"required": ["file"]}}
181
+ ],
182
+ "additionalProperties": false
183
+ },
184
+ "checklist": {
185
+ "type": "object",
186
+ "properties": {
187
+ "file": {"type": "string"},
188
+ "data": {"type": "object"}
189
+ },
190
+ "oneOf": [
191
+ {"required": ["file"], "not": {"required": ["data"]}},
192
+ {"required": ["data"], "not": {"required": ["file"]}}
193
+ ],
194
+ "additionalProperties": false
195
+ },
196
+ "modules": {
197
+ "type": "array",
198
+ "items": {
199
+ "type": "object",
200
+ "properties": {
201
+ "name": {"type": "string", "description": "Name of the module"},
202
+ "config": {"type": "object", "description": "Configuration"}
203
+ }
204
+ },
205
+ "required": ["name"],
206
+ "additionalProperties": false
207
+ }
208
+ },
209
+ "required": ["name"],
210
+ "additionalProperties": false
211
+ }
212
+ }
213
+ },
214
+ "required": ["settings","sheets"],
215
+ "additionalProperties": false,
216
+ "$defs": {
217
+ "anyValue": {
218
+ "type": ["string","number"]
219
+ },
220
+ "conditionResource": {
221
+ "type": "array",
222
+ "items": {
223
+ "type": "object",
224
+ "properties": {
225
+ "field": {"type": "string"},
226
+ "value": {"$ref": "#/$defs/anyValue"}
227
+ }
228
+ },
229
+ "required": ["field","value"]
230
+ },
231
+ "conditionLinkedResource": {
232
+ "type": "array",
233
+ "description": "condition on linkedResource based on value or dynamicResource",
234
+ "items": {
235
+ "type": "object",
236
+ "properties": {
237
+ "field": {"type": "string"},
238
+ "value": {
239
+ "oneOf": [
240
+ {"$ref": "#/$defs/anyValue"},
241
+ {
242
+ "type": "object",
243
+ "properties": {
244
+ "field": {"type": "string"},
245
+ "dynamicResource": {"type": "string"}
246
+ },
247
+ "required": ["field","dynamicResource"],
248
+ "additionalProperties": false
249
+ }
250
+ ]
251
+ }
252
+ }
253
+ },
254
+ "required": ["field","value"]
255
+ },
256
+ "conditionFieldProperty": {
257
+ "type": "array",
258
+ "description": "condition on dynamicResource to be satisfied to add field property",
259
+ "items": {
260
+ "type": "object",
261
+ "properties": {
262
+ "dynamicResource": {"type": "string"},
263
+ "field": {"type": "string"},
264
+ "value": {"oneOf": [
265
+ {"type": "array", "items": {"$ref": "#/$defs/anyValue"}},
266
+ {"$ref": "#/$defs/anyValue"}
267
+ ]}
268
+ }
269
+ },
270
+ "required": ["dynamicResource","field","value"]
271
+ },
272
+ "fieldProperty": {
273
+ "type": "object",
274
+ "description": "field property with value generated from linkedResource or dynamicResource",
275
+ "properties": {
276
+ "field": {"type": "string"},
277
+ "linkedResource": {"type": "string"},
278
+ "dynamicResource": {"type": "string"},
279
+ "condition": {"$ref": "#/$defs/conditionFieldProperty"},
280
+ "mapping": {"type": "string"},
281
+ "prefix": {"type": "string"},
282
+ "postfix": {"type": "string"}
283
+ },
284
+ "oneOf": [
285
+ {"required": ["field","linkedResource"], "not": {"required": ["dynamicResource"]}},
286
+ {"required": ["field","dynamicResource"], "not": {"required": ["linkedResource"]}}
287
+ ],
288
+ "additionalProperties": false
289
+ }
290
+ }
291
+ }