excel-validator 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ from ._version import __version__
2
+ from .validator import Validate
3
+ from .module import AbstractValidationModule
4
+
5
+ __author__ = "Matthijs Brouwer"
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,7 @@
1
+ {
2
+ "settings": {
3
+ "allowAdditionalSheets": true,
4
+ "schemaPath": "schema"
5
+ },
6
+ "sheets":[]
7
+ }
@@ -0,0 +1,291 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://www.plantbreeding.wur.nl/xlsx-validator/configuration.schema.json",
4
+ "title": "Configuration xlsx-validator",
5
+ "description": "A configuration file for the xlsx-validator package",
6
+ "type": "object",
7
+ "properties": {
8
+ "settings": {
9
+ "type": "object",
10
+ "description": "General settings",
11
+ "properties": {
12
+ "allowAdditionalSheets": {
13
+ "type": "boolean",
14
+ "description": "Allow presence of additional sheets",
15
+ "default": false
16
+ },
17
+ "requireSheetOrder": {
18
+ "type": "boolean",
19
+ "description": "Require the sheets to be in the defined order",
20
+ "default": false
21
+ },
22
+ "adjustTypeForStringColumns": {
23
+ "type": "boolean",
24
+ "description": "Automatically adjust type for string columns",
25
+ "default": false
26
+ },
27
+ "removeEmptyRows": {
28
+ "type": "boolean",
29
+ "description": "Automatically remove empty rows",
30
+ "default": false
31
+ },
32
+ "removeEmptyColumns": {
33
+ "type": "boolean",
34
+ "description": "Automatically remove empty columns",
35
+ "default": false
36
+ },
37
+ "schemaPath": {
38
+ "type": "string",
39
+ "description": "Location additional modules",
40
+ "default": false
41
+ },
42
+ "modulesPath": {
43
+ "type": "string",
44
+ "description": "Location additional modules",
45
+ "default": false
46
+ }
47
+ },
48
+ "additionalProperties": false
49
+ },
50
+ "sheets": {
51
+ "type": "array",
52
+ "items": {
53
+ "type": "object",
54
+ "properties": {
55
+ "name": {"type": "string", "description": "Name of the sheet"},
56
+ "optional": {"type": "boolean"},
57
+ "dependencies": {
58
+ "type": "array",
59
+ "items": {
60
+ "type": "string"
61
+ }
62
+ },
63
+ "resource": {
64
+ "type": "string",
65
+ "description": "Name of the resource; can only contain lowercase alphanumeric characters plus ., - and _",
66
+ "pattern": "^[a-z0-9][a-z0-9\\.,\\-_]*$"},
67
+ "adjustTypeForStringColumns": {
68
+ "type": "boolean",
69
+ "description": "Automatically adjust type for string columns, does override settings"
70
+ },
71
+ "removeEmptyRows": {
72
+ "type": "boolean",
73
+ "description": "Automatically remove empty rows, does override settings"
74
+ },
75
+ "removeEmptyColumns": {
76
+ "type": "boolean",
77
+ "description": "Automatically remove empty columns, does override settings"
78
+ },
79
+ "schema": {
80
+ "type": "object",
81
+ "properties": {
82
+ "file": {"type": "string"},
83
+ "data": {"type": "object"},
84
+ "dynamic": {
85
+ "type": "array",
86
+ "items": {
87
+ "type": "object",
88
+ "properties": {
89
+ "position": {"type": "string", "enum": ["before","after"]},
90
+ "field": {"type": "string"},
91
+ "dynamicResources": {
92
+ "type": "object",
93
+ "additionalProperties": {
94
+ "type": "object",
95
+ "properties": {
96
+ "resource": {"type": "string"},
97
+ "condition": {"$ref": "#/$defs/conditionResource"}
98
+ },
99
+ "required": ["resource"],
100
+ "additionalProperties": false
101
+ }
102
+ },
103
+ "linkedResources": {
104
+ "type": "object",
105
+ "additionalProperties": {
106
+ "type": "object",
107
+ "properties": {
108
+ "resource": {"type": "string"},
109
+ "condition": {"$ref": "#/$defs/conditionLinkedResource"}
110
+ },
111
+ "required": ["resource"],
112
+ "additionalProperties": false
113
+ }
114
+ },
115
+ "mappings": {
116
+ "type": "object",
117
+ "additionalProperties": {
118
+ "type": "object",
119
+ "properties": {
120
+ "default": {"type": "string"},
121
+ "map": {
122
+ "type": "object",
123
+ "additionalProperties": {"$ref": "#/$defs/anyValue"}
124
+ }
125
+ },
126
+ "required": ["map"],
127
+ "additionalProperties": false
128
+ }
129
+ },
130
+ "fields": {
131
+ "type": "array",
132
+ "items": {
133
+ "type": "object",
134
+ "patternProperties": {
135
+ "name|type|rdfType|title|format|example|description": {"oneOf": [
136
+ {"type": "string"},
137
+ {"$ref": "#/$defs/fieldProperty"}
138
+ ]}
139
+ },
140
+ "properties": {
141
+ "constraints": {
142
+ "type": "object",
143
+ "patternProperties": {
144
+ "required|unique": {"oneOf": [
145
+ {"type": "boolean"},
146
+ {"$ref": "#/$defs/fieldProperty"}]
147
+ },
148
+ "minLength|maxLength": {"oneOf": [
149
+ {"type": "integer"},
150
+ {"$ref": "#/$defs/fieldProperty"}
151
+ ]},
152
+ "minimum|maximum": {"oneOf": [
153
+ {"type": "number"},
154
+ {"$ref": "#/$defs/fieldProperty"}
155
+ ]},
156
+ "pattern": {"oneOf": [
157
+ {"type": "string"},
158
+ {"$ref": "#/$defs/fieldProperty"}
159
+ ]},
160
+ "enum": {"oneOf": [
161
+ {"type": "array", "items": {"$ref": "#/$defs/anyValue"}},
162
+ {"$ref": "#/$defs/fieldProperty"}
163
+ ]}
164
+ },
165
+ "additionalProperties": false
166
+ }
167
+ },
168
+ "required": ["name"],
169
+ "additionalProperties": false
170
+ }
171
+ }
172
+ },
173
+ "required": ["position","fields"],
174
+ "additionalProperties": false
175
+ }
176
+ }
177
+ },
178
+ "oneOf": [
179
+ {"required": ["file"], "not": {"required": ["data"]}},
180
+ {"required": ["data"], "not": {"required": ["file"]}}
181
+ ],
182
+ "additionalProperties": false
183
+ },
184
+ "checklist": {
185
+ "type": "object",
186
+ "properties": {
187
+ "file": {"type": "string"},
188
+ "data": {"type": "object"}
189
+ },
190
+ "oneOf": [
191
+ {"required": ["file"], "not": {"required": ["data"]}},
192
+ {"required": ["data"], "not": {"required": ["file"]}}
193
+ ],
194
+ "additionalProperties": false
195
+ },
196
+ "modules": {
197
+ "type": "array",
198
+ "items": {
199
+ "type": "object",
200
+ "properties": {
201
+ "name": {"type": "string", "description": "Name of the module"},
202
+ "config": {"type": "object", "description": "Configuration"}
203
+ }
204
+ },
205
+ "required": ["name"],
206
+ "additionalProperties": false
207
+ }
208
+ },
209
+ "required": ["name"],
210
+ "additionalProperties": false
211
+ }
212
+ }
213
+ },
214
+ "required": ["settings","sheets"],
215
+ "additionalProperties": false,
216
+ "$defs": {
217
+ "anyValue": {
218
+ "type": ["string","number"]
219
+ },
220
+ "conditionResource": {
221
+ "type": "array",
222
+ "items": {
223
+ "type": "object",
224
+ "properties": {
225
+ "field": {"type": "string"},
226
+ "value": {"$ref": "#/$defs/anyValue"}
227
+ }
228
+ },
229
+ "required": ["field","value"]
230
+ },
231
+ "conditionLinkedResource": {
232
+ "type": "array",
233
+ "description": "condition on linkedResource based on value or dynamicResource",
234
+ "items": {
235
+ "type": "object",
236
+ "properties": {
237
+ "field": {"type": "string"},
238
+ "value": {
239
+ "oneOf": [
240
+ {"$ref": "#/$defs/anyValue"},
241
+ {
242
+ "type": "object",
243
+ "properties": {
244
+ "field": {"type": "string"},
245
+ "dynamicResource": {"type": "string"}
246
+ },
247
+ "required": ["field","dynamicResource"],
248
+ "additionalProperties": false
249
+ }
250
+ ]
251
+ }
252
+ }
253
+ },
254
+ "required": ["field","value"]
255
+ },
256
+ "conditionFieldProperty": {
257
+ "type": "array",
258
+ "description": "condition on dynamicResource to be satisfied to add field property",
259
+ "items": {
260
+ "type": "object",
261
+ "properties": {
262
+ "dynamicResource": {"type": "string"},
263
+ "field": {"type": "string"},
264
+ "value": {"oneOf": [
265
+ {"type": "array", "items": {"$ref": "#/$defs/anyValue"}},
266
+ {"$ref": "#/$defs/anyValue"}
267
+ ]}
268
+ }
269
+ },
270
+ "required": ["dynamicResource","field","value"]
271
+ },
272
+ "fieldProperty": {
273
+ "type": "object",
274
+ "description": "field property with value generated from linkedResource or dynamicResource",
275
+ "properties": {
276
+ "field": {"type": "string"},
277
+ "linkedResource": {"type": "string"},
278
+ "dynamicResource": {"type": "string"},
279
+ "condition": {"$ref": "#/$defs/conditionFieldProperty"},
280
+ "mapping": {"type": "string"},
281
+ "prefix": {"type": "string"},
282
+ "postfix": {"type": "string"}
283
+ },
284
+ "oneOf": [
285
+ {"required": ["field","linkedResource"], "not": {"required": ["dynamicResource"]}},
286
+ {"required": ["field","dynamicResource"], "not": {"required": ["linkedResource"]}}
287
+ ],
288
+ "additionalProperties": false
289
+ }
290
+ }
291
+ }
@@ -0,0 +1,217 @@
1
+ # pylint: disable=line-too-long,invalid-name,consider-using-f-string
2
+ """Additional functions."""
3
+
4
+ import numpy as np
5
+ from numbers import Number
6
+
7
+ def checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
8
+ """
9
+ check value condition for descriptor
10
+ """
11
+ try:
12
+ if key in entry:
13
+ if isinstance(entry[key],dict) and "condition" in entry[key]:
14
+ for condition in entry[key]["condition"]:
15
+ value = dynamicResourcesEntry.get(condition["dynamicResource"],{}).get(condition["field"],None)
16
+ if value is None:
17
+ return False
18
+ elif isinstance(value,list):
19
+ if not any(valueEntry==condition["value"] for valueEntry in value):
20
+ return False
21
+ elif not value==condition["value"]:
22
+ return False
23
+ return True
24
+ except Exception as ex:
25
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
26
+ return False
27
+
28
+ def setDescriptorValueDynamicString(key,descriptor,entry,dynamicResourcesEntry,mappings,linkedResources,logger):
29
+ """
30
+ set descriptor key/value for string
31
+ """
32
+ try:
33
+ if checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
34
+ value = None
35
+ if isinstance(entry[key],str):
36
+ value = entry[key]
37
+ elif "dynamicResource" in entry[key]:
38
+ value = dynamicResourcesEntry.get(entry[key]["dynamicResource"],{}).get(entry[key]["field"],None)
39
+ elif "linkedResource" in entry[key]:
40
+ if entry[key]["linkedResource"] in linkedResources:
41
+ if linkedResources[entry[key]["linkedResource"]].shape[0]>0:
42
+ valueList = list(linkedResources[entry[key]["linkedResource"]][entry[key]["field"]].values)
43
+ for item in valueList:
44
+ if not item is None:
45
+ value = str(item)
46
+ break
47
+ #mapping
48
+ if not value is None:
49
+ mappingValue = None
50
+ if isinstance(entry[key],dict) and "mapping" in entry[key]:
51
+ if entry[key]["mapping"] in mappings:
52
+ if value in mappings[entry[key]["mapping"]]["map"]:
53
+ mappingValue = str(mappings[entry[key]["mapping"]]["map"][value])
54
+ elif "default" in mappings[entry[key]["mapping"]]:
55
+ mappingValue = str(mappings[entry[key]["mapping"]]["default"])
56
+ else:
57
+ mappingValue = str(value)
58
+ #prefix/postfix
59
+ if not mappingValue is None:
60
+ prefix = entry[key].get("prefix","") if isinstance(entry[key],dict) else ""
61
+ postfix = entry[key].get("postfix","") if isinstance(entry[key],dict) else ""
62
+ descriptor[key] = "%s%s%s" % (prefix,mappingValue,postfix)
63
+ except Exception as ex:
64
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
65
+ return descriptor
66
+
67
+ def setDescriptorValueDynamicInteger(key,descriptor,entry,dynamicResourcesEntry,mappings,linkedResources,logger):
68
+ """
69
+ set descriptor key/value for integer
70
+ """
71
+ try:
72
+ if checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
73
+ value = None
74
+ if isinstance(entry[key],int):
75
+ value = entry[key]
76
+ elif "dynamicResource" in entry[key]:
77
+ value = dynamicResourcesEntry.get(entry[key]["dynamicResource"],{}).get(entry[key]["field"],None)
78
+ elif "linkedResource" in entry[key]:
79
+ if entry[key]["linkedResource"] in linkedResources:
80
+ if linkedResources[entry[key]["linkedResource"]].shape[0]>0:
81
+ valueList = list(linkedResources[entry[key]["linkedResource"]][entry[key]["field"]].values)
82
+ for item in valueList:
83
+ if not item is None:
84
+ value = int(item)
85
+ break
86
+ if not value is None:
87
+ if isinstance(entry[key],dict) and "mapping" in entry[key]:
88
+ if entry[key]["mapping"] in mappings:
89
+ if value in mappings[entry[key]["mapping"]]["map"]:
90
+ descriptor[key] = int(mappings[entry[key]["mapping"]]["map"][value])
91
+ elif "default" in mappings[entry[key]["mapping"]]:
92
+ descriptor[key] = int(mappings[entry[key]["mapping"]]["default"])
93
+ else:
94
+ descriptor[key] = int(value)
95
+ except Exception as ex:
96
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
97
+ return descriptor
98
+
99
+ def setDescriptorValueDynamicNumber(key,descriptor,entry,dynamicResourcesEntry,mappings,linkedResources,logger):
100
+ """
101
+ set descriptor key/value for integer
102
+ """
103
+ try:
104
+ if checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
105
+ value = None
106
+ if isinstance(entry[key],Number):
107
+ value = entry[key]
108
+ elif "dynamicResource" in entry[key]:
109
+ value = dynamicResourcesEntry.get(entry[key]["dynamicResource"],{}).get(entry[key]["field"],None)
110
+ elif "linkedResource" in entry[key]:
111
+ if entry[key]["linkedResource"] in linkedResources:
112
+ if linkedResources[entry[key]["linkedResource"]].shape[0]>0:
113
+ valueList = list(linkedResources[entry[key]["linkedResource"]][entry[key]["field"]].values)
114
+ for item in valueList:
115
+ if not item is None:
116
+ value = item
117
+ break
118
+ if not value is None:
119
+ if isinstance(entry[key],dict) and "mapping" in entry[key]:
120
+ if entry[key]["mapping"] in mappings:
121
+ if value in mappings[entry[key]["mapping"]]["map"]:
122
+ descriptor[key] = mappings[entry[key]["mapping"]]["map"][value]
123
+ elif "default" in mappings[entry[key]["mapping"]]:
124
+ descriptor[key] = mappings[entry[key]["mapping"]]["default"]
125
+ else:
126
+ descriptor[key] = value
127
+ except Exception as ex:
128
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
129
+ return descriptor
130
+
131
+ def setDescriptorValueDynamicBoolean(key,descriptor,entry,dynamicResourcesEntry,mappings,linkedResources,logger):
132
+ """
133
+ set descriptor key/value for boolean
134
+ """
135
+ try:
136
+ if checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
137
+ value = None
138
+ if isinstance(entry[key],bool):
139
+ value = entry[key]
140
+ elif "dynamicResource" in entry[key]:
141
+ value = dynamicResourcesEntry.get(entry[key]["dynamicResource"],{}).get(entry[key]["field"],None)
142
+ elif "linkedResource" in entry[key]:
143
+ if entry[key]["linkedResource"] in linkedResources:
144
+ if linkedResources[entry[key]["linkedResource"]].shape[0]>0:
145
+ valueList = list(linkedResources[entry[key]["linkedResource"]][entry[key]["field"]].values)
146
+ for item in valueList:
147
+ if not item is None:
148
+ value = bool(item)
149
+ break
150
+ if not value is None:
151
+ if isinstance(entry[key],dict) and "mapping" in entry[key]:
152
+ if entry[key]["mapping"] in mappings:
153
+ if value in mappings[entry[key]["mapping"]]["map"]:
154
+ descriptor[key] = bool(mappings[entry[key]["mapping"]]["map"][value])
155
+ elif "default" in mappings[entry[key]["mapping"]]:
156
+ descriptor[key] = bool(mappings[entry[key]["mapping"]]["default"])
157
+ else:
158
+ descriptor[key] = bool(value)
159
+ except Exception as ex:
160
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
161
+ return descriptor
162
+
163
+ def setDescriptorValueDynamicList(key,descriptor,entry,dynamicResourcesEntry,mappings,linkedResources,logger):
164
+ """
165
+ set descriptor key/value for list
166
+ """
167
+ try:
168
+ if checkDescriptorValueCondition(key,entry,dynamicResourcesEntry,logger):
169
+ value = None
170
+ if isinstance(entry[key],list):
171
+ value = entry[key].tolist()
172
+ elif "dynamicResource" in entry[key]:
173
+ value = None
174
+ elif "linkedResource" in entry[key]:
175
+ if entry[key]["linkedResource"] in linkedResources:
176
+ if linkedResources[entry[key]["linkedResource"]].shape[0]>0:
177
+ value = list(linkedResources[entry[key]["linkedResource"]][entry[key]["field"]].values.tolist())
178
+ if not value is None:
179
+ if isinstance(entry[key],dict) and "mapping" in entry[key]:
180
+ if entry[key]["mapping"] in mappings:
181
+ mapping = mappings[entry[key]["mapping"]]
182
+ descriptor[key] = []
183
+ for item in value:
184
+ if item in mapping["map"]:
185
+ descriptor[key].append(mapping["map"][item])
186
+ elif "default" in mapping:
187
+ descriptor[key].append(mapping["default"])
188
+ else:
189
+ descriptor[key] = value
190
+ except Exception as ex:
191
+ logger.error("%s : %s, line %s" % (type(ex).__name__, __file__, ex.__traceback__.tb_lineno))
192
+ return descriptor
193
+
194
+ def excelCoordinates(error):
195
+ """
196
+ compute excel coordinates for frictionless error
197
+ """
198
+ try:
199
+ row = error.get_defined(name="row_number",default=None)
200
+ col = error.get_defined(name="field_number",default=None)
201
+ LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
202
+ if not col is None and not np.isnan(col):
203
+ result = []
204
+ while col:
205
+ col, rem = divmod(col-1, 26)
206
+ result[:0] = LETTERS[int(rem)]
207
+ if not row is None and not np.isnan(row):
208
+ text = "{}{}".format("".join(result),int(row))
209
+ return text
210
+ text = "column {}".format("".join(result))
211
+ return text
212
+ if not row is None and not np.isnan(row):
213
+ text = "row {}".format(int(row))
214
+ return text
215
+ return None
216
+ except:
217
+ return None
@@ -0,0 +1,11 @@
1
+
2
+
3
+ class AbstractValidationModule:
4
+ """
5
+ module xlsx validation
6
+ """
7
+
8
+ def __init__(self, package, resourceName):
9
+ self._package = package
10
+ self._resourceName = resourceName
11
+ assert self._package.has_resource(resourceName)