sapiopycommons 2024.3.18a156__py3-none-any.whl → 2025.1.17a402__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- sapiopycommons/callbacks/__init__.py +0 -0
- sapiopycommons/callbacks/callback_util.py +2041 -0
- sapiopycommons/callbacks/field_builder.py +545 -0
- sapiopycommons/chem/IndigoMolecules.py +52 -5
- sapiopycommons/chem/Molecules.py +114 -30
- sapiopycommons/customreport/__init__.py +0 -0
- sapiopycommons/customreport/column_builder.py +60 -0
- sapiopycommons/customreport/custom_report_builder.py +137 -0
- sapiopycommons/customreport/term_builder.py +315 -0
- sapiopycommons/datatype/attachment_util.py +17 -15
- sapiopycommons/datatype/data_fields.py +61 -0
- sapiopycommons/datatype/pseudo_data_types.py +440 -0
- sapiopycommons/eln/experiment_handler.py +390 -90
- sapiopycommons/eln/experiment_report_util.py +649 -0
- sapiopycommons/eln/plate_designer.py +152 -0
- sapiopycommons/files/complex_data_loader.py +31 -0
- sapiopycommons/files/file_bridge.py +153 -25
- sapiopycommons/files/file_bridge_handler.py +555 -0
- sapiopycommons/files/file_data_handler.py +633 -0
- sapiopycommons/files/file_util.py +270 -158
- sapiopycommons/files/file_validator.py +569 -0
- sapiopycommons/files/file_writer.py +377 -0
- sapiopycommons/flowcyto/flow_cyto.py +77 -0
- sapiopycommons/flowcyto/flowcyto_data.py +75 -0
- sapiopycommons/general/accession_service.py +375 -0
- sapiopycommons/general/aliases.py +259 -18
- sapiopycommons/general/audit_log.py +185 -0
- sapiopycommons/general/custom_report_util.py +252 -31
- sapiopycommons/general/directive_util.py +86 -0
- sapiopycommons/general/exceptions.py +69 -7
- sapiopycommons/general/popup_util.py +85 -18
- sapiopycommons/general/sapio_links.py +50 -0
- sapiopycommons/general/storage_util.py +148 -0
- sapiopycommons/general/time_util.py +97 -7
- sapiopycommons/multimodal/multimodal.py +146 -0
- sapiopycommons/multimodal/multimodal_data.py +490 -0
- sapiopycommons/processtracking/__init__.py +0 -0
- sapiopycommons/processtracking/custom_workflow_handler.py +406 -0
- sapiopycommons/processtracking/endpoints.py +192 -0
- sapiopycommons/recordmodel/record_handler.py +653 -149
- sapiopycommons/rules/eln_rule_handler.py +89 -8
- sapiopycommons/rules/on_save_rule_handler.py +89 -12
- sapiopycommons/sftpconnect/__init__.py +0 -0
- sapiopycommons/sftpconnect/sftp_builder.py +70 -0
- sapiopycommons/webhook/webhook_context.py +39 -0
- sapiopycommons/webhook/webhook_handlers.py +617 -69
- sapiopycommons/webhook/webservice_handlers.py +317 -0
- {sapiopycommons-2024.3.18a156.dist-info → sapiopycommons-2025.1.17a402.dist-info}/METADATA +5 -4
- sapiopycommons-2025.1.17a402.dist-info/RECORD +60 -0
- {sapiopycommons-2024.3.18a156.dist-info → sapiopycommons-2025.1.17a402.dist-info}/WHEEL +1 -1
- sapiopycommons-2024.3.18a156.dist-info/RECORD +0 -28
- {sapiopycommons-2024.3.18a156.dist-info → sapiopycommons-2025.1.17a402.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from abc import abstractmethod
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from sapiopylib.rest.User import SapioUser
|
|
8
|
+
from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
|
|
9
|
+
from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
|
|
10
|
+
AbstractVeloxFieldDefinition
|
|
11
|
+
|
|
12
|
+
from sapiopycommons.callbacks.callback_util import CallbackUtil
|
|
13
|
+
from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
|
|
14
|
+
from sapiopycommons.general.aliases import UserIdentifier, AliasUtil
|
|
15
|
+
from sapiopycommons.general.custom_report_util import CustomReportUtil
|
|
16
|
+
from sapiopycommons.general.exceptions import SapioUserCancelledException
|
|
17
|
+
from sapiopycommons.general.time_util import TimeUtil
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileValidator:
|
|
21
|
+
"""
|
|
22
|
+
The FileValidator is a class used to verify that a file provided by a user meets certain specs. For example, you
|
|
23
|
+
may require that all cells in a column not be blank, or have a certain range of values, or that no two values in
|
|
24
|
+
a column are the same. These validation rules can be added to the FileValidator along with the data for the file.
|
|
25
|
+
Then, when the file is validated, the index of any rows that failed a rule will be returned along with the list of
|
|
26
|
+
rules that they failed, allowing you to report the specifics of why a rule failed to the user.
|
|
27
|
+
|
|
28
|
+
Look into using this in combination with FileDataHandler to prepare files for the FileValidator and for use in
|
|
29
|
+
data record fields.
|
|
30
|
+
"""
|
|
31
|
+
file_data: list[dict[str, Any]]
|
|
32
|
+
rules: list[ValidationRule]
|
|
33
|
+
|
|
34
|
+
def __init__(self, file_data: list[dict[str, Any]]):
|
|
35
|
+
"""
|
|
36
|
+
:param file_data: A list of dictionaries. Every dictionary in the list is expected to have the same keys.
|
|
37
|
+
FileUtil.tokenize_csv and tokenize_xlsx can be used to convert a file into such a list.
|
|
38
|
+
CustomReportUtil can also generate lists of dictionaries that match this criteria.
|
|
39
|
+
"""
|
|
40
|
+
self.file_data = file_data
|
|
41
|
+
self.rules = []
|
|
42
|
+
|
|
43
|
+
def add_rule(self, rule: ValidationRule) -> None:
|
|
44
|
+
"""
|
|
45
|
+
Add a new validation rule to this validator. When the validate_file function is called, each added rule will be
|
|
46
|
+
run on the file. Rules are evaluated in the same order that they are added to the validator.
|
|
47
|
+
|
|
48
|
+
Custom validation rules can be created by defining a class that extends RowRule or ColumnRule and implements
|
|
49
|
+
the validation method.
|
|
50
|
+
|
|
51
|
+
:param rule: A validation rule to be run when the file is validated.
|
|
52
|
+
"""
|
|
53
|
+
self.rules.append(rule)
|
|
54
|
+
|
|
55
|
+
def validate_file(self) -> dict[int, list[ValidationRule]]:
|
|
56
|
+
"""
|
|
57
|
+
Validate the file, returning any rule failures that are encountered.
|
|
58
|
+
|
|
59
|
+
:return: A dictionary mapping row indices to a list of the rules that they failed. This can then be used to
|
|
60
|
+
report errors back to the user who uploaded the file by checking the class type of the rules.
|
|
61
|
+
"""
|
|
62
|
+
failed_rows: dict[int, list[ValidationRule]] = {}
|
|
63
|
+
|
|
64
|
+
# Check each rule for this validator.
|
|
65
|
+
for rule in self.rules:
|
|
66
|
+
# If this is a row rule, then the rule only runs on a per-row basis. Iterate over every row in the
|
|
67
|
+
# file and use the rule to validate them.
|
|
68
|
+
if isinstance(rule, RowRule):
|
|
69
|
+
for index, row in enumerate(self.file_data):
|
|
70
|
+
if FileDataHandler.skip_row(index, row, whitelist=rule.whitelist, blacklist=rule.blacklist):
|
|
71
|
+
continue
|
|
72
|
+
# These rules return a boolean for whether the rule passed or not.
|
|
73
|
+
if not rule.validate(row):
|
|
74
|
+
failed_rows.setdefault(index, []).append(rule)
|
|
75
|
+
# If this is a column rule, then the rule runs down an entire column in the file. Pass the entire file data
|
|
76
|
+
# list to the rule for validation.
|
|
77
|
+
elif isinstance(rule, ColumnRule):
|
|
78
|
+
# These rules return a list of row indices that caused the rule to fail for the entire column.
|
|
79
|
+
for index in rule.validate(self.file_data):
|
|
80
|
+
failed_rows.setdefault(index, []).append(rule)
|
|
81
|
+
|
|
82
|
+
return failed_rows
|
|
83
|
+
|
|
84
|
+
def build_violation_report(self, context: UserIdentifier,
|
|
85
|
+
rule_violations: dict[int, list[ValidationRule]]) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Display a simple report of any rule violations in the file to the user as a table dialog.
|
|
88
|
+
|
|
89
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
90
|
+
:param rule_violations: A dict of rule violations generated by a call to validate_file.
|
|
91
|
+
"""
|
|
92
|
+
if not rule_violations:
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
file_handler = FileDataHandler(self.file_data)
|
|
96
|
+
columns: list[AbstractVeloxFieldDefinition] = [
|
|
97
|
+
VeloxIntegerFieldDefinition("Errors", "RowNum", "Row Number"),
|
|
98
|
+
VeloxStringFieldDefinition("Errors", "Header", "Header"),
|
|
99
|
+
VeloxStringFieldDefinition("Errors", "Value", "Value", max_length=500),
|
|
100
|
+
VeloxStringFieldDefinition("Errors", "Reason", "Reason", max_length=2000, default_table_column_width=500)
|
|
101
|
+
]
|
|
102
|
+
rows: list[dict[str, Any]] = []
|
|
103
|
+
for index, violations in rule_violations.items():
|
|
104
|
+
file_row: dict[str, Any] = file_handler.get_row(index)
|
|
105
|
+
for violation in violations:
|
|
106
|
+
if isinstance(violation, ColumnRule):
|
|
107
|
+
# Column rules always act upon a specific header, so list the header and the value for this run
|
|
108
|
+
# under that header alongside the reason.
|
|
109
|
+
rows.append({
|
|
110
|
+
"RowNum": index + 2,
|
|
111
|
+
"Header": violation.header,
|
|
112
|
+
"Value": str(file_row.get(violation.header)),
|
|
113
|
+
"Reason": violation.reason[:2000]
|
|
114
|
+
})
|
|
115
|
+
elif isinstance(violation, RowRule):
|
|
116
|
+
# We can't know what header(s) a row rule was looking at, so just leave header and value
|
|
117
|
+
# blank and trust that the reason describes enough what the issue was.
|
|
118
|
+
rows.append({
|
|
119
|
+
"RowNum": index + 2,
|
|
120
|
+
"Header": "N/A",
|
|
121
|
+
"Value": "N/A",
|
|
122
|
+
"Reason": violation.reason[:2000]
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
callback = CallbackUtil(context)
|
|
126
|
+
callback.table_dialog("Errors", "The following rule violations were encountered in the provided file.",
|
|
127
|
+
columns, rows)
|
|
128
|
+
|
|
129
|
+
def validate_and_report_errors(self, context: UserIdentifier) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Validate the file. If any rule violations are found, display a simple report of any rule violations in the file
|
|
132
|
+
to the user as a table dialog and throw a SapioUserCancelled exception after the user acknowledges the dialog
|
|
133
|
+
to end the webhook interaction.
|
|
134
|
+
|
|
135
|
+
Shorthand for calling validate_file() and then build_violation_report() if there are any errors.
|
|
136
|
+
|
|
137
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
138
|
+
"""
|
|
139
|
+
violations = self.validate_file()
|
|
140
|
+
if violations:
|
|
141
|
+
self.build_violation_report(context, violations)
|
|
142
|
+
raise SapioUserCancelledException()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class ValidationRule:
|
|
146
|
+
"""
|
|
147
|
+
The base class for all rules. Each rule has a reason that can be provided for why a file violated the rule and
|
|
148
|
+
optional whitelists and blacklists for rows to skip checking the rule for.
|
|
149
|
+
|
|
150
|
+
This class should not be extended. If you want to create a custom rule, extend RowRule or ColumnRule.
|
|
151
|
+
"""
|
|
152
|
+
reason: str
|
|
153
|
+
whitelist: FilterList
|
|
154
|
+
blacklist: FilterList
|
|
155
|
+
|
|
156
|
+
def __init__(self, reason: str, whitelist: FilterList, blacklist: FilterList):
|
|
157
|
+
"""
|
|
158
|
+
:param reason: A string explaining the reason why an violation occurred for this rule.
|
|
159
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
160
|
+
description for the forms that a whitelist can take.
|
|
161
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
162
|
+
description for the forms that a blacklist can take.
|
|
163
|
+
"""
|
|
164
|
+
self.reason = reason
|
|
165
|
+
self.blacklist = blacklist
|
|
166
|
+
self.whitelist = whitelist
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class RowRule(ValidationRule):
|
|
170
|
+
"""
|
|
171
|
+
The base class for all row rules. Row rules aren't tied to a specific header and act across multiple cells in a row.
|
|
172
|
+
Extend this class to create your own custom file validation rules.
|
|
173
|
+
"""
|
|
174
|
+
def __init__(self, reason: str, whitelist: FilterList = None, blacklist: FilterList = None):
|
|
175
|
+
"""
|
|
176
|
+
:param reason: A string explaining the reason why an violation occurred for this rule.
|
|
177
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
178
|
+
description for the forms that a whitelist can take.
|
|
179
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
180
|
+
description for the forms that a blacklist can take.
|
|
181
|
+
"""
|
|
182
|
+
super().__init__(reason, whitelist, blacklist)
|
|
183
|
+
|
|
184
|
+
@abstractmethod
|
|
185
|
+
def validate(self, row: dict[str, Any]) -> bool:
|
|
186
|
+
"""
|
|
187
|
+
Validate that a row meets the requirements of this rule.
|
|
188
|
+
|
|
189
|
+
:param row: A row from the file.
|
|
190
|
+
:return: Whether the given row has passed the rule validation.
|
|
191
|
+
"""
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class ColumnRule(ValidationRule):
|
|
196
|
+
"""
|
|
197
|
+
The base class for all column rules. Column rules are tied to specific a header and act upon every cell under that
|
|
198
|
+
header. Extend this class to create your own custom file validation rules.
|
|
199
|
+
"""
|
|
200
|
+
header: str
|
|
201
|
+
|
|
202
|
+
def __init__(self, header: str, reason: str, whitelist: FilterList = None, blacklist: FilterList = None):
|
|
203
|
+
"""
|
|
204
|
+
:param header: The header that this rule acts upon.
|
|
205
|
+
:param reason: A string explaining the reason why a violation occurred for this rule.
|
|
206
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
207
|
+
description for the forms that a whitelist can take.
|
|
208
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
209
|
+
description for the forms that a blacklist can take.
|
|
210
|
+
"""
|
|
211
|
+
super().__init__(reason, whitelist, blacklist)
|
|
212
|
+
self.header = header
|
|
213
|
+
|
|
214
|
+
@abstractmethod
|
|
215
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
216
|
+
"""
|
|
217
|
+
Validate that a column meets the requirements of this rule.
|
|
218
|
+
|
|
219
|
+
:param rows: Every row from the file.
|
|
220
|
+
:return: A list of the indices of all rows that failed the rule validation.
|
|
221
|
+
"""
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class NoBlanksRule(ColumnRule):
|
|
226
|
+
"""
|
|
227
|
+
Require that every cell in a column have a non-blank value.
|
|
228
|
+
"""
|
|
229
|
+
def __init__(self, header: str, *, reason: str | None = None, whitelist: FilterList = None,
|
|
230
|
+
blacklist: FilterList = None):
|
|
231
|
+
"""
|
|
232
|
+
:param header: The header that this rule acts upon.
|
|
233
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
234
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
235
|
+
the default.
|
|
236
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
237
|
+
description for the forms that a whitelist can take.
|
|
238
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
239
|
+
description for the forms that a blacklist can take.
|
|
240
|
+
"""
|
|
241
|
+
if reason is None:
|
|
242
|
+
reason = "This value is not allowed to be blank."
|
|
243
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
244
|
+
|
|
245
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
246
|
+
handler = FileDataHandler(rows)
|
|
247
|
+
return handler.empty_cells(self.header, whitelist=self.whitelist, blacklist=self.blacklist)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class NoDuplicatesRule(ColumnRule):
|
|
251
|
+
"""
|
|
252
|
+
Require that no two values in a column be duplicates.
|
|
253
|
+
"""
|
|
254
|
+
def __init__(self, header: str, *, reason: str | None = None, whitelist: FilterList = None,
|
|
255
|
+
blacklist: FilterList = None):
|
|
256
|
+
"""
|
|
257
|
+
:param header: The header that this rule acts upon.
|
|
258
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
259
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
260
|
+
the default.
|
|
261
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
262
|
+
description for the forms that a whitelist can take.
|
|
263
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
264
|
+
description for the forms that a blacklist can take.
|
|
265
|
+
"""
|
|
266
|
+
if reason is None:
|
|
267
|
+
reason = "This value is a duplicate of another value in the same column."
|
|
268
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
269
|
+
|
|
270
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
271
|
+
handler = FileDataHandler(rows)
|
|
272
|
+
duplicates: dict[Any, list[int]] = handler.get_duplicates(self.header,
|
|
273
|
+
whitelist=self.whitelist, blacklist=self.blacklist)
|
|
274
|
+
indices: list[int] = []
|
|
275
|
+
for cells in duplicates.values():
|
|
276
|
+
indices += cells
|
|
277
|
+
return indices
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class AllowedValuesRule(ColumnRule):
|
|
281
|
+
"""
|
|
282
|
+
Require that every cell in a column has a value that is present in the given list.
|
|
283
|
+
"""
|
|
284
|
+
values: list[Any]
|
|
285
|
+
|
|
286
|
+
def __init__(self, header: str, values: list[Any], *, reason: str | None = None, whitelist: FilterList = None,
|
|
287
|
+
blacklist: FilterList = None):
|
|
288
|
+
"""
|
|
289
|
+
:param header: The header that this rule acts upon.
|
|
290
|
+
:param values: A list of any values.
|
|
291
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
292
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
293
|
+
the default.
|
|
294
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
295
|
+
description for the forms that a whitelist can take.
|
|
296
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
297
|
+
description for the forms that a blacklist can take.
|
|
298
|
+
"""
|
|
299
|
+
if reason is None:
|
|
300
|
+
reason = f"This value is not one of the allowed values: {', '.join([str(x) for x in values])}"
|
|
301
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
302
|
+
self.values = values
|
|
303
|
+
|
|
304
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
305
|
+
handler = FileDataHandler(rows)
|
|
306
|
+
return handler.get_not_in_list(self.header, self.values, whitelist=self.whitelist, blacklist=self.blacklist)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class MatchesPatternRule(ColumnRule):
|
|
310
|
+
"""
|
|
311
|
+
Requires that every cell in a column has a value that matches a regex pattern.
|
|
312
|
+
"""
|
|
313
|
+
pattern: str
|
|
314
|
+
|
|
315
|
+
def __init__(self, header: str, pattern: str | re.Pattern[str], *, reason: str | None = None,
|
|
316
|
+
whitelist: FilterList = None, blacklist: FilterList = None):
|
|
317
|
+
"""
|
|
318
|
+
:param header: The header that this rule acts upon.
|
|
319
|
+
:param pattern: A regex pattern.
|
|
320
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
321
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
322
|
+
the default.
|
|
323
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
324
|
+
description for the forms that a whitelist can take.
|
|
325
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
326
|
+
description for the forms that a blacklist can take.
|
|
327
|
+
"""
|
|
328
|
+
if reason is None:
|
|
329
|
+
reason = f"This value does not match the expected format: {pattern}."
|
|
330
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
331
|
+
self.pattern = pattern
|
|
332
|
+
|
|
333
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
334
|
+
handler = FileDataHandler(rows)
|
|
335
|
+
return handler.get_mismatches(self.header, self.pattern, whitelist=self.whitelist, blacklist=self.blacklist)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class MatchesDateFormatRule(ColumnRule):
|
|
339
|
+
"""
|
|
340
|
+
Requires that every cell in a column has a value that matches a given date format.
|
|
341
|
+
"""
|
|
342
|
+
time_format: str
|
|
343
|
+
|
|
344
|
+
def __init__(self, header: str, time_format: str, *, reason: str | None = None, whitelist: FilterList = None,
|
|
345
|
+
blacklist: FilterList = None):
|
|
346
|
+
"""
|
|
347
|
+
:param header: The header that this rule acts upon.
|
|
348
|
+
:param time_format: A date/time format. See TimeUtil for more specifics.
|
|
349
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
350
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
351
|
+
the default.
|
|
352
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
353
|
+
description for the forms that a whitelist can take.
|
|
354
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
355
|
+
description for the forms that a blacklist can take.
|
|
356
|
+
"""
|
|
357
|
+
if reason is None:
|
|
358
|
+
reason = f"This value does not match the expected format: {time_format}."
|
|
359
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
360
|
+
self.time_format = time_format
|
|
361
|
+
|
|
362
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
363
|
+
handler = FileDataHandler(rows)
|
|
364
|
+
return handler.get_by_function(lambda i, r: not TimeUtil.str_matches_format(r.get(self.header), self.time_format),
|
|
365
|
+
whitelist=self.whitelist, blacklist=self.blacklist)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class ValueIsCastableRule(ColumnRule):
|
|
369
|
+
"""
|
|
370
|
+
Requires that every cell in a column has a value that can be cast to a given type. Intended for determining if
|
|
371
|
+
string values can be cast to ints or floats, but you may provide other types as well. Works by checking if
|
|
372
|
+
type(value) raises an exception, so if the type you provide wouldn't raise an exception for a bad string, then
|
|
373
|
+
this rule will never fail.
|
|
374
|
+
"""
|
|
375
|
+
cast_type: type
|
|
376
|
+
|
|
377
|
+
def __init__(self, header: str, cast_type: type, *, reason: str | None = None, whitelist: FilterList = None,
|
|
378
|
+
blacklist: FilterList = None):
|
|
379
|
+
"""
|
|
380
|
+
:param header: The header that this rule acts upon.
|
|
381
|
+
:param cast_type: A type to attempt to cast the values under the header to.
|
|
382
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
383
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
384
|
+
the default.
|
|
385
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
386
|
+
description for the forms that a whitelist can take.
|
|
387
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
388
|
+
description for the forms that a blacklist can take.
|
|
389
|
+
"""
|
|
390
|
+
if reason is None:
|
|
391
|
+
reason = f"This value cannot be casted to the type {cast_type}"
|
|
392
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
393
|
+
self.cast_type = cast_type
|
|
394
|
+
|
|
395
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
396
|
+
# Get the index of every row that can't be cast to the desired type.
|
|
397
|
+
def func(index: int, row: dict[str, Any]) -> bool:
|
|
398
|
+
try:
|
|
399
|
+
self.cast_type(row.get(self.header))
|
|
400
|
+
return False
|
|
401
|
+
except Exception:
|
|
402
|
+
return True
|
|
403
|
+
|
|
404
|
+
handler = FileDataHandler(rows)
|
|
405
|
+
return handler.get_by_function(func, whitelist=self.whitelist, blacklist=self.blacklist)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class ValueInsideRangeRule(ColumnRule):
|
|
409
|
+
"""
|
|
410
|
+
Requires that every cell in a column has a value that is inside the given range (inclusive).
|
|
411
|
+
"""
|
|
412
|
+
min_val: float | int
|
|
413
|
+
max_val: float | int
|
|
414
|
+
|
|
415
|
+
def __init__(self, header: str, min_val: float | int, max_val: float | int,
|
|
416
|
+
*, reason: str | None = None, whitelist: FilterList = None, blacklist: FilterList = None):
|
|
417
|
+
"""
|
|
418
|
+
:param header: The header that this rule acts upon.
|
|
419
|
+
:param min_val: The minimum allowed value of the cell.
|
|
420
|
+
:param max_val: The maximum allowed value of the cell.
|
|
421
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
422
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
423
|
+
the default.
|
|
424
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
425
|
+
description for the forms that a whitelist can take.
|
|
426
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
427
|
+
description for the forms that a blacklist can take.
|
|
428
|
+
"""
|
|
429
|
+
if reason is None:
|
|
430
|
+
reason = f"This value is not within the range [{min_val}, {max_val}]."
|
|
431
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
432
|
+
self.min_val = min_val
|
|
433
|
+
self.max_val = max_val
|
|
434
|
+
|
|
435
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
436
|
+
handler = FileDataHandler(rows)
|
|
437
|
+
return handler.get_inside_range(self.header, self.min_val, self.max_val,
|
|
438
|
+
whitelist=self.whitelist, blacklist=self.blacklist)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class ValueOutsideRangeRule(ColumnRule):
|
|
442
|
+
"""
|
|
443
|
+
Requires that every cell in a column has a value that is outside the given range (exclusive).
|
|
444
|
+
"""
|
|
445
|
+
min_val: float | int
|
|
446
|
+
max_val: float | int
|
|
447
|
+
|
|
448
|
+
def __init__(self, header: str, min_val: float | int, max_val: float | int,
|
|
449
|
+
*, reason: str | None = None, whitelist: FilterList = None, blacklist: FilterList = None):
|
|
450
|
+
"""
|
|
451
|
+
:param header: The header that this rule acts upon.
|
|
452
|
+
:param min_val: The value that the cell may be below.
|
|
453
|
+
:param max_val: The value that the cell may be above.
|
|
454
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
455
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
456
|
+
the default.
|
|
457
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
458
|
+
description for the forms that a whitelist can take.
|
|
459
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
460
|
+
description for the forms that a blacklist can take.
|
|
461
|
+
"""
|
|
462
|
+
if reason is None:
|
|
463
|
+
reason = f"This value is not outside of the range [{min_val}, {max_val}]"
|
|
464
|
+
super().__init__(header, reason, whitelist, blacklist)
|
|
465
|
+
self.min_val = min_val
|
|
466
|
+
self.max_val = max_val
|
|
467
|
+
|
|
468
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
469
|
+
handler = FileDataHandler(rows)
|
|
470
|
+
return handler.get_outside_range(self.header, self.min_val, self.max_val,
|
|
471
|
+
whitelist=self.whitelist, blacklist=self.blacklist)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
class ContainsSubstringFromCellRule(RowRule):
|
|
475
|
+
"""
|
|
476
|
+
Requires that the cell in the first column contains the value in the cell of the second column.
|
|
477
|
+
"""
|
|
478
|
+
first: str
|
|
479
|
+
second: str
|
|
480
|
+
|
|
481
|
+
def __init__(self, first: str, second: str, *, reason: str | None = None, whitelist: FilterList = None,
|
|
482
|
+
blacklist: FilterList = None):
|
|
483
|
+
"""
|
|
484
|
+
:param first: The header to check the contents up.
|
|
485
|
+
:param second: The header to use the values of to check the above header.
|
|
486
|
+
:param reason: A string explaining the reason why a violation occurred for this rule. A default reason is
|
|
487
|
+
provided if none is given by this initialization, but you may want to provide more detailed reasoning than
|
|
488
|
+
the default.
|
|
489
|
+
:param whitelist: If a row doesn't match the whitelist, it will be skipped over. See the FilterList alias
|
|
490
|
+
description for the forms that a whitelist can take.
|
|
491
|
+
:param blacklist: If a row matches the blacklist, it will be skipped over. See the FilterList alias
|
|
492
|
+
description for the forms that a blacklist can take.
|
|
493
|
+
"""
|
|
494
|
+
if reason is None:
|
|
495
|
+
reason = f"The value in column {first} does not contain the value in the column {second}."
|
|
496
|
+
super().__init__(reason, whitelist, blacklist)
|
|
497
|
+
self.first = first
|
|
498
|
+
self.second = second
|
|
499
|
+
|
|
500
|
+
def validate(self, row: dict[str, Any]) -> bool:
|
|
501
|
+
return row.get(self.second) in row.get(self.first)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class UniqueSystemValueRule(ColumnRule):
|
|
505
|
+
"""
|
|
506
|
+
Requires that every cell in the column has a value that is not already in use in the system for a given data type
|
|
507
|
+
and field name.
|
|
508
|
+
"""
|
|
509
|
+
user: SapioUser
|
|
510
|
+
data_type_name: str
|
|
511
|
+
data_field_name: str
|
|
512
|
+
|
|
513
|
+
def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
|
|
514
|
+
data_field_name: str):
|
|
515
|
+
"""
|
|
516
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
517
|
+
:param header: The header that this rule acts upon.
|
|
518
|
+
:param data_type_name: The data type name to search on.
|
|
519
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
520
|
+
"""
|
|
521
|
+
self.user = AliasUtil.to_sapio_user(context)
|
|
522
|
+
self.data_type_name = data_type_name
|
|
523
|
+
self.data_field_name = data_field_name
|
|
524
|
+
super().__init__(header, f"This value already exists in the system.")
|
|
525
|
+
|
|
526
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
527
|
+
file_handler = FileDataHandler(rows)
|
|
528
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
529
|
+
|
|
530
|
+
# Run a quick report for all records of this type that match these field values.
|
|
531
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
532
|
+
"{" + ",".join(values) + "}")
|
|
533
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
534
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
535
|
+
return file_handler.get_in_list(self.header, existing_values)
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
class ExistingSystemValueRule(ColumnRule):
|
|
539
|
+
"""
|
|
540
|
+
Requires that every cell in the column has a value that is already in use in the system for a given data type
|
|
541
|
+
and field name.
|
|
542
|
+
"""
|
|
543
|
+
user: SapioUser
|
|
544
|
+
data_type_name: str
|
|
545
|
+
data_field_name: str
|
|
546
|
+
|
|
547
|
+
def __init__(self, context: UserIdentifier, header: str, data_type_name: str,
|
|
548
|
+
data_field_name: str):
|
|
549
|
+
"""
|
|
550
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
551
|
+
:param header: The header that this rule acts upon.
|
|
552
|
+
:param data_type_name: The data type name to search on.
|
|
553
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
554
|
+
"""
|
|
555
|
+
self.user = AliasUtil.to_sapio_user(context)
|
|
556
|
+
self.data_type_name = data_type_name
|
|
557
|
+
self.data_field_name = data_field_name
|
|
558
|
+
super().__init__(header, f"This value doesn't exist in the system.")
|
|
559
|
+
|
|
560
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
561
|
+
file_handler = FileDataHandler(rows)
|
|
562
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
563
|
+
|
|
564
|
+
# Run a quick report for all records of this type that match these field values.
|
|
565
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
566
|
+
"{" + ",".join(values) + "}")
|
|
567
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
568
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
569
|
+
return file_handler.get_not_in_list(self.header, existing_values)
|