sapiopycommons 2025.6.19a564__py3-none-any.whl → 2026.1.22a847__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sapiopycommons/ai/__init__.py +0 -0
- sapiopycommons/ai/agent_service_base.py +2051 -0
- sapiopycommons/ai/converter_service_base.py +163 -0
- sapiopycommons/ai/external_credentials.py +131 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2.py +87 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2.pyi +282 -0
- sapiopycommons/ai/protoapi/agent/agent_pb2_grpc.py +154 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2.py +49 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2.pyi +40 -0
- sapiopycommons/ai/protoapi/agent/entry_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2.py +61 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2.pyi +181 -0
- sapiopycommons/ai/protoapi/agent/item/item_container_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.py +41 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.pyi +36 -0
- sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.py +51 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.pyi +59 -0
- sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.py +123 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.pyi +599 -0
- sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.py +59 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.pyi +68 -0
- sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2_grpc.py +149 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2.py +69 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2.pyi +109 -0
- sapiopycommons/ai/protoapi/pipeline/script/script_pb2_grpc.py +153 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2.py +49 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2.pyi +56 -0
- sapiopycommons/ai/protoapi/pipeline/step_output_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2.py +43 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2.pyi +44 -0
- sapiopycommons/ai/protoapi/pipeline/step_pb2_grpc.py +24 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.py +39 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.pyi +33 -0
- sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2_grpc.py +24 -0
- sapiopycommons/ai/protobuf_utils.py +583 -0
- sapiopycommons/ai/request_validation.py +561 -0
- sapiopycommons/ai/server.py +152 -0
- sapiopycommons/ai/test_client.py +534 -0
- sapiopycommons/callbacks/callback_util.py +53 -24
- sapiopycommons/eln/experiment_handler.py +12 -5
- sapiopycommons/files/assay_plate_reader.py +93 -0
- sapiopycommons/files/file_text_converter.py +207 -0
- sapiopycommons/files/file_util.py +128 -1
- sapiopycommons/files/temp_files.py +82 -0
- sapiopycommons/flowcyto/flow_cyto.py +2 -24
- sapiopycommons/general/accession_service.py +2 -28
- sapiopycommons/general/aliases.py +4 -1
- sapiopycommons/general/macros.py +172 -0
- sapiopycommons/general/time_util.py +199 -4
- sapiopycommons/multimodal/multimodal.py +2 -24
- sapiopycommons/recordmodel/record_handler.py +200 -111
- sapiopycommons/rules/eln_rule_handler.py +3 -0
- sapiopycommons/rules/on_save_rule_handler.py +3 -0
- sapiopycommons/webhook/webhook_handlers.py +6 -4
- sapiopycommons/webhook/webservice_handlers.py +1 -1
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/METADATA +2 -2
- sapiopycommons-2026.1.22a847.dist-info/RECORD +113 -0
- sapiopycommons-2025.6.19a564.dist-info/RECORD +0 -68
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/WHEEL +0 -0
- {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
from sapiopylib.rest.pojo.DataRecord import DataRecord
|
|
7
|
+
|
|
8
|
+
from sapiopycommons.ai.agent_service_base import AgentBase, ContainerType
|
|
9
|
+
from sapiopycommons.general.aliases import AliasUtil
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class InputValidation(ABC):
|
|
13
|
+
"""
|
|
14
|
+
A base class for validating the input to an agent.
|
|
15
|
+
"""
|
|
16
|
+
index: int
|
|
17
|
+
max_entries: int | None
|
|
18
|
+
allow_empty_input: bool
|
|
19
|
+
allow_empty_entries: bool
|
|
20
|
+
|
|
21
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
22
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False):
|
|
23
|
+
"""
|
|
24
|
+
:param index: The index of the input to validate.
|
|
25
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
26
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
27
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty.
|
|
28
|
+
"""
|
|
29
|
+
self.index = index
|
|
30
|
+
self.max_entries = max_entries
|
|
31
|
+
self.allow_empty_input = allow_empty_input
|
|
32
|
+
self.allow_empty_entries = allow_empty_entries
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BinaryValidation(InputValidation):
|
|
36
|
+
"""
|
|
37
|
+
A class representing a validation requirement for a binary input.
|
|
38
|
+
"""
|
|
39
|
+
func: Callable[[bytes], list[str]] | None
|
|
40
|
+
|
|
41
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
42
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False,
|
|
43
|
+
func: Callable[[bytes], list[str]] | None = None):
|
|
44
|
+
"""
|
|
45
|
+
:param index: The index of the input to validate.
|
|
46
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
47
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
48
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty
|
|
49
|
+
:param func: An optional function to run on each entry in the input. The function should take the entry as an
|
|
50
|
+
argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
|
|
51
|
+
should return an empty list. This function will not be called if the input or entry are empty.
|
|
52
|
+
"""
|
|
53
|
+
super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
|
|
54
|
+
self.func = func
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CsvValidation(InputValidation):
|
|
58
|
+
"""
|
|
59
|
+
A class representing a validation requirement for a CSV input.
|
|
60
|
+
"""
|
|
61
|
+
required_headers: list[str] | None = None
|
|
62
|
+
|
|
63
|
+
func: Callable[[dict[str, Any]], list[str]] | None
|
|
64
|
+
|
|
65
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
66
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False,
|
|
67
|
+
required_headers: list[str] | None = None,
|
|
68
|
+
func: Callable[[dict[str, Any]], list[str]] | None = None):
|
|
69
|
+
"""
|
|
70
|
+
:param index: The index of the input to validate.
|
|
71
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
72
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
73
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty.
|
|
74
|
+
:param required_headers: A list of headers that must be present in the CSV input. If None, then no header
|
|
75
|
+
validation will be performed.
|
|
76
|
+
:param func: An optional function to run on each entry in the input. The function should take the entry as an
|
|
77
|
+
argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
|
|
78
|
+
should return an empty list. This function will not be called if the input or entry are empty.
|
|
79
|
+
"""
|
|
80
|
+
super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
|
|
81
|
+
self.required_headers = required_headers
|
|
82
|
+
self.func = func
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class DataRecordValidation(InputValidation):
|
|
86
|
+
"""
|
|
87
|
+
A class representing a validation requirement for a data record input.
|
|
88
|
+
"""
|
|
89
|
+
func: Callable[[DataRecord], list[str]] | None
|
|
90
|
+
data_type_names: list[str] | None
|
|
91
|
+
|
|
92
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
93
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False,
|
|
94
|
+
data_type_names: list[str] | str | None = None,
|
|
95
|
+
func: Callable[[DataRecord], list[str]] | None = None):
|
|
96
|
+
"""
|
|
97
|
+
:param index: The index of the input to validate.
|
|
98
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
99
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
100
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty.
|
|
101
|
+
:param data_type_names: One or more data type names that the records are expected to match. If None, then no
|
|
102
|
+
data type validation will be performed.
|
|
103
|
+
:param func: An optional function to run on each entry in the input. The function should take the entry as an
|
|
104
|
+
argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
|
|
105
|
+
should return an empty list. This function will not be called if the input or entry are empty.
|
|
106
|
+
"""
|
|
107
|
+
super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
|
|
108
|
+
self.func = func
|
|
109
|
+
if data_type_names is None:
|
|
110
|
+
self.data_type_names = None
|
|
111
|
+
elif isinstance(data_type_names, str):
|
|
112
|
+
self.data_type_names = [data_type_names.strip()]
|
|
113
|
+
else:
|
|
114
|
+
self.data_type_names = [x.strip() for x in data_type_names]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class JsonValidation(InputValidation):
|
|
118
|
+
"""
|
|
119
|
+
A class representing a validation requirement for a JSON input.
|
|
120
|
+
"""
|
|
121
|
+
json_requirements: dict[str, JsonKeyValidation]
|
|
122
|
+
|
|
123
|
+
func: Callable[[dict[str, Any]], list[str]] | None
|
|
124
|
+
|
|
125
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
126
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False,
|
|
127
|
+
json_requirements: list[JsonKeyValidation] | None = None,
|
|
128
|
+
func: Callable[[dict[str, Any]], list[str]] | None = None):
|
|
129
|
+
"""
|
|
130
|
+
:param index: The index of the input to validate.
|
|
131
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
132
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
133
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty.
|
|
134
|
+
:param json_requirements: A list of JSON requirements to validate for JSON inputs. Each requirement
|
|
135
|
+
specifies a key to validate, the expected type of the value for that key, and any nested requirements
|
|
136
|
+
for that key. Only applicable to JSON inputs.
|
|
137
|
+
:param func: An optional function to run on each entry in the input. The function should take the entry as an
|
|
138
|
+
argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
|
|
139
|
+
should return an empty list. This function will not be called if the input or entry are empty.
|
|
140
|
+
"""
|
|
141
|
+
super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
|
|
142
|
+
self.json_requirements = {}
|
|
143
|
+
if json_requirements:
|
|
144
|
+
for req in json_requirements:
|
|
145
|
+
if req.key in self.json_requirements:
|
|
146
|
+
raise ValueError(f"Duplicate JSON requirement key {req.key} for input index {index}.")
|
|
147
|
+
self.json_requirements[req.key] = req
|
|
148
|
+
|
|
149
|
+
self.func = func
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class JsonKeyValidation:
|
|
153
|
+
"""
|
|
154
|
+
A class representing a validation requirement for a specific key in a JSON input.
|
|
155
|
+
"""
|
|
156
|
+
key: str
|
|
157
|
+
json_type: type
|
|
158
|
+
required: bool
|
|
159
|
+
allow_empty: bool
|
|
160
|
+
|
|
161
|
+
list_type: type | None = None
|
|
162
|
+
nested_requirements: dict[str, JsonKeyValidation]
|
|
163
|
+
|
|
164
|
+
func: Callable[[str, Any], list[str]] | None = None
|
|
165
|
+
|
|
166
|
+
def __init__(self, key: str, json_type: type, required: bool = True, allow_empty: bool = False,
|
|
167
|
+
list_type: type | None = None, nested_requirements: list[JsonKeyValidation] | None = None,
|
|
168
|
+
func: Callable[[str, Any], list[str]] | None = None):
|
|
169
|
+
"""
|
|
170
|
+
:param key: The key in the JSON input to validate.
|
|
171
|
+
:param json_type: The expected type of the value for this key. This should be one of: str, int, float, bool,
|
|
172
|
+
list, or dict.
|
|
173
|
+
:param required: If true, then this key must be present in the JSON input. If false, then the key is optional,
|
|
174
|
+
but if present, it must still match the other expected criteria.
|
|
175
|
+
:param allow_empty: If true, then the value for this key can be empty (e.g., an empty string, list, or dict).
|
|
176
|
+
If false, then the value must not be empty.
|
|
177
|
+
:param list_type: The expected type of the entries in the list if json_type is list.
|
|
178
|
+
:param nested_requirements: A list of nested JSON requirements to validate for this key if it is a dict. Each
|
|
179
|
+
requirement specifies a key to validate, the expected type of the value for that key, and any nested
|
|
180
|
+
requirements for that key. Only applicable if json_type is dict, or if json_type is list and list_type is
|
|
181
|
+
dict.
|
|
182
|
+
:param func: An optional function to run on the value for this key. The function should take the path and the
|
|
183
|
+
value as arguments, and return a list of error messages if the value is not valid. If the value is valid,
|
|
184
|
+
the function should return an empty list. This function will not be called if the key is missing,
|
|
185
|
+
the value is of the wrong type, or the value is an empty str/list/dict and allow_empty is false.
|
|
186
|
+
"""
|
|
187
|
+
self.key = key
|
|
188
|
+
self.json_type = json_type
|
|
189
|
+
self.required = required
|
|
190
|
+
self.allow_empty = allow_empty
|
|
191
|
+
|
|
192
|
+
self.list_type = list_type
|
|
193
|
+
self.nested_requirements = {}
|
|
194
|
+
if nested_requirements:
|
|
195
|
+
for req in nested_requirements:
|
|
196
|
+
if req.key in self.nested_requirements:
|
|
197
|
+
raise ValueError(f"Duplicate nested requirement key {req.key} for JSON key {key}.")
|
|
198
|
+
self.nested_requirements[req.key] = req
|
|
199
|
+
|
|
200
|
+
self.func = func
|
|
201
|
+
|
|
202
|
+
allowed_types: set[type] = {str, int, float, bool, list, dict}
|
|
203
|
+
if self.json_type not in allowed_types:
|
|
204
|
+
raise ValueError(f"Invalid json_type {self.json_type} for key {key}. Must be one of: "
|
|
205
|
+
f"{', '.join([t.__name__ for t in allowed_types])}.")
|
|
206
|
+
if self.list_type is not None and self.list_type not in allowed_types:
|
|
207
|
+
raise ValueError(f"Invalid list_type {self.list_type} for key {key}. Must be one of: "
|
|
208
|
+
f"{', '.join([t.__name__ for t in allowed_types])}.")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class TextValidation(InputValidation):
|
|
212
|
+
"""
|
|
213
|
+
A class representing a validation requirement for a text input.
|
|
214
|
+
"""
|
|
215
|
+
flatten: bool
|
|
216
|
+
disallowed_characters: str | None = None
|
|
217
|
+
regex: str | None = None
|
|
218
|
+
|
|
219
|
+
func: Callable[[str], list[str]] | None = None
|
|
220
|
+
|
|
221
|
+
def __init__(self, index: int, max_entries: int | None = None,
|
|
222
|
+
allow_empty_input: bool = False, allow_empty_entries: bool = False, flatten: bool = False,
|
|
223
|
+
disallow_characters: str | None = None, regex: str | None = None,
|
|
224
|
+
func: Callable[[str], list[str]] | None = None):
|
|
225
|
+
"""
|
|
226
|
+
:param index: The index of the input to validate.
|
|
227
|
+
:param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
|
|
228
|
+
:param allow_empty_input: If true, then the input can be completely empty.
|
|
229
|
+
:param allow_empty_entries: If true, then individual entries in the input can be empty.
|
|
230
|
+
:param flatten: If true, then the input will be flattened before validation
|
|
231
|
+
:param disallow_characters: A string of characters that are not allowed in any entry in the input. If None,
|
|
232
|
+
then no character validation will be performed. This parameter will not be used if the input or entry are
|
|
233
|
+
empty.
|
|
234
|
+
:param regex: An optional regular expression that each entry in the input must fully match. If None, then no
|
|
235
|
+
regex validation will be performed. This parameter will not be used if the input or entry are empty.
|
|
236
|
+
:param func: An optional function to run on each entry in the input. The function should take the entry as an
|
|
237
|
+
arguments, and return a list of error messages if the entry is not valid. If the entry is valid, the
|
|
238
|
+
function should return an empty list. The function will only be called if the entry passes those previous
|
|
239
|
+
checks (e.g. not empty, doesn't include disallowed characters, passes the regex, etc.).
|
|
240
|
+
"""
|
|
241
|
+
super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
|
|
242
|
+
self.flatten = flatten
|
|
243
|
+
self.disallowed_characters = disallow_characters
|
|
244
|
+
self.regex = regex
|
|
245
|
+
self.func = func
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class InputValidator:
|
|
249
|
+
"""
|
|
250
|
+
A class for validating the inputs to an agent based on their container types and specified validation requirements.
|
|
251
|
+
"""
|
|
252
|
+
agent: AgentBase
|
|
253
|
+
requirements: dict[int, InputValidation]
|
|
254
|
+
|
|
255
|
+
def __init__(self, agent: AgentBase, requirements: list[InputValidation] | None = None):
|
|
256
|
+
"""
|
|
257
|
+
:param agent: The agent to validate the request of.
|
|
258
|
+
:param requirements: A list of validation requirements to apply to the request. If a validation object is
|
|
259
|
+
not provided for a given input, then default validation will be applied. Default validation requires that
|
|
260
|
+
the input is not empty, and that the entries in the input are not empty.
|
|
261
|
+
"""
|
|
262
|
+
self.agent = agent
|
|
263
|
+
self.requirements = {}
|
|
264
|
+
if requirements:
|
|
265
|
+
self.add_requirements(requirements)
|
|
266
|
+
|
|
267
|
+
def add_requirements(self, requirements: list[InputValidation]) -> None:
|
|
268
|
+
for req in requirements:
|
|
269
|
+
if req.index < 0 or req.index >= len(self.agent.input_configs):
|
|
270
|
+
raise ValueError(f"Validation requirement index {req.index} is out of range for agent "
|
|
271
|
+
f"{self.agent} with {len(self.agent.input_configs)} inputs.")
|
|
272
|
+
if req.index in self.requirements:
|
|
273
|
+
raise ValueError(f"Duplicate validation requirement index {req.index} for agent {self.agent}.")
|
|
274
|
+
self.requirements[req.index] = req
|
|
275
|
+
|
|
276
|
+
def run(self) -> list[str]:
|
|
277
|
+
"""
|
|
278
|
+
Run simple validation on all the inputs based on their container types. This requires the following:
|
|
279
|
+
- The input may not be empty.
|
|
280
|
+
- The entries in the input may not be empty, unless allow_empty is set to true.
|
|
281
|
+
- If provided, the number of entries in the input may not exceed a maximum size.
|
|
282
|
+
- If provided, certain keys must be present in the JSON input, and they must match the above behavior.
|
|
283
|
+
|
|
284
|
+
:return: A list of the error messages if the request is not valid. If the request is valid, return an empty
|
|
285
|
+
list.
|
|
286
|
+
"""
|
|
287
|
+
errors: list[str] = []
|
|
288
|
+
for i, (input_type, input_config) in enumerate(zip(self.agent.input_container_types, self.agent.input_configs)):
|
|
289
|
+
match input_type:
|
|
290
|
+
case ContainerType.BINARY:
|
|
291
|
+
r: InputValidation = self.requirements.get(i, BinaryValidation(i))
|
|
292
|
+
if not isinstance(r, BinaryValidation):
|
|
293
|
+
raise ValueError(f"Validation requirement for binary input at index {i} must be a "
|
|
294
|
+
f"BinaryValidation object. Got {type(r)} instead.")
|
|
295
|
+
errors.extend(self.validate_input_binary(i, r))
|
|
296
|
+
case ContainerType.CSV:
|
|
297
|
+
r: InputValidation = self.requirements.get(i, CsvValidation(i))
|
|
298
|
+
if not isinstance(r, CsvValidation):
|
|
299
|
+
raise ValueError(f"Validation requirement for CSV input at index {i} must be a "
|
|
300
|
+
f"CsvValidation object. Got {type(r)} instead.")
|
|
301
|
+
errors.extend(self.validate_input_csv(i, r))
|
|
302
|
+
case ContainerType.JSON:
|
|
303
|
+
r: InputValidation = self.requirements.get(i, JsonValidation(i))
|
|
304
|
+
if not isinstance(r, JsonValidation):
|
|
305
|
+
raise ValueError(f"Validation requirement for JSON input at index {i} must be a "
|
|
306
|
+
f"JsonValidation object. Got {type(r)} instead.")
|
|
307
|
+
errors.extend(self.validate_input_json(i, r))
|
|
308
|
+
case ContainerType.DATA_RECORDS:
|
|
309
|
+
data_type_name: str | None = input_config.base_config.data_type_name
|
|
310
|
+
if data_type_name and data_type_name != "Any":
|
|
311
|
+
base_validation = DataRecordValidation(i, data_type_names=[data_type_name])
|
|
312
|
+
else:
|
|
313
|
+
base_validation = DataRecordValidation(i)
|
|
314
|
+
r: InputValidation = self.requirements.get(i, base_validation)
|
|
315
|
+
if not isinstance(r, DataRecordValidation):
|
|
316
|
+
raise ValueError(f"Validation requirement for data record input at index {i} must be a "
|
|
317
|
+
f"DataRecordValidation object. Got {type(r)} instead.")
|
|
318
|
+
errors.extend(self.validate_input_data_records(i, r))
|
|
319
|
+
case ContainerType.TEXT:
|
|
320
|
+
r: InputValidation = self.requirements.get(i, TextValidation(i))
|
|
321
|
+
if not isinstance(r, TextValidation):
|
|
322
|
+
raise ValueError(f"Validation requirement for text input at index {i} must be a "
|
|
323
|
+
f"TextValidation object. Got {type(r)} instead.")
|
|
324
|
+
errors.extend(self.validate_input_text(i, r))
|
|
325
|
+
return errors
|
|
326
|
+
|
|
327
|
+
def validate_input_binary(self, index: int, r: BinaryValidation) -> list[str]:
|
|
328
|
+
"""
|
|
329
|
+
Run simple validation on the binary input at the given index.
|
|
330
|
+
|
|
331
|
+
:param index: The index of the input to validate.
|
|
332
|
+
:param r: The validation requirement to use for this input.
|
|
333
|
+
:return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
|
|
334
|
+
"""
|
|
335
|
+
input_files: list[bytes] = self.agent.get_input_binary(index)
|
|
336
|
+
errors: list[str] = []
|
|
337
|
+
if not input_files:
|
|
338
|
+
if not r.allow_empty_input:
|
|
339
|
+
errors.append(f"Input {index} is empty.")
|
|
340
|
+
elif r.max_entries is not None and len(input_files) > r.max_entries:
|
|
341
|
+
errors.append(f"Input {index} contains {len(input_files)} entries, which exceeds the maximum allowed "
|
|
342
|
+
f"number of {r.max_entries}.")
|
|
343
|
+
elif not r.allow_empty_entries or r.func:
|
|
344
|
+
for i, entry in enumerate(input_files):
|
|
345
|
+
if not entry.strip():
|
|
346
|
+
if not r.allow_empty_entries:
|
|
347
|
+
errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
|
|
348
|
+
elif r.func:
|
|
349
|
+
func_errors: list[str] = r.func(entry)
|
|
350
|
+
if func_errors:
|
|
351
|
+
for error in func_errors:
|
|
352
|
+
errors.append(f"Error in entry {i} of input {index}: {error}")
|
|
353
|
+
return errors
|
|
354
|
+
|
|
355
|
+
def validate_input_csv(self, index: int, r: CsvValidation) -> list[str]:
|
|
356
|
+
"""
|
|
357
|
+
Run simple validation on the CSV input at the given index.
|
|
358
|
+
|
|
359
|
+
:param index: The index of the input to validate.
|
|
360
|
+
:param r: The validation requirement to use for this input.
|
|
361
|
+
:return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
|
|
362
|
+
"""
|
|
363
|
+
headers, csv = self.agent.get_input_csv(index)
|
|
364
|
+
headers: list[str]
|
|
365
|
+
csv: list[dict[str, Any]]
|
|
366
|
+
|
|
367
|
+
errors: list[str] = []
|
|
368
|
+
if r.required_headers:
|
|
369
|
+
missing_headers: list[str] = [h for h in r.required_headers if h not in headers]
|
|
370
|
+
if missing_headers:
|
|
371
|
+
errors.append(f"Input {index} is missing required headers: {', '.join(missing_headers)}.")
|
|
372
|
+
|
|
373
|
+
if not csv:
|
|
374
|
+
if not r.allow_empty_input:
|
|
375
|
+
errors.append(f"Input {index} is empty.")
|
|
376
|
+
elif r.max_entries is not None and len(csv) > r.max_entries:
|
|
377
|
+
errors.append(f"Input {index} contains {len(csv)} entries, which exceeds the maximum allowed "
|
|
378
|
+
f"number of {r.max_entries}.")
|
|
379
|
+
elif not r.allow_empty_entries or r.func:
|
|
380
|
+
for i, entry in enumerate(csv):
|
|
381
|
+
if not entry or all(not cell.strip() for cell in entry):
|
|
382
|
+
if not r.allow_empty_entries:
|
|
383
|
+
errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
|
|
384
|
+
elif r.func:
|
|
385
|
+
func_errors: list[str] = r.func(entry)
|
|
386
|
+
if func_errors:
|
|
387
|
+
for error in func_errors:
|
|
388
|
+
errors.append(f"Error in entry {i} of input {index}: {error}")
|
|
389
|
+
return errors
|
|
390
|
+
|
|
391
|
+
def validate_input_json(self, index: int, r: JsonValidation) -> list[str]:
|
|
392
|
+
"""
|
|
393
|
+
Run simple validation on the JSON input at the given index.
|
|
394
|
+
|
|
395
|
+
:param index: The index of the input to validate.
|
|
396
|
+
:param r: The validation requirement to use for this input.
|
|
397
|
+
:return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
|
|
398
|
+
"""
|
|
399
|
+
input_json: list[dict[str, Any]] = self.agent.get_input_json(index)
|
|
400
|
+
errors: list[str] = []
|
|
401
|
+
if not input_json:
|
|
402
|
+
if not r.allow_empty_input:
|
|
403
|
+
errors.append(f"Input {index} is empty.")
|
|
404
|
+
elif r.max_entries is not None and len(input_json) > r.max_entries:
|
|
405
|
+
errors.append(f"Input {index} contains {len(input_json)} entries, which exceeds the maximum allowed "
|
|
406
|
+
f"number of {r.max_entries}.")
|
|
407
|
+
elif not r.allow_empty_entries or r.func:
|
|
408
|
+
for i, entry in enumerate(input_json):
|
|
409
|
+
if not entry:
|
|
410
|
+
if not r.allow_empty_entries:
|
|
411
|
+
errors.append(f"Entry {i} of input {index} is empty.")
|
|
412
|
+
elif r.func:
|
|
413
|
+
func_errors: list[str] = r.func(entry)
|
|
414
|
+
if func_errors:
|
|
415
|
+
for error in func_errors:
|
|
416
|
+
errors.append(f"Error in entry {i} of input {index}: {error}")
|
|
417
|
+
|
|
418
|
+
for key, rk in r.json_requirements.items():
|
|
419
|
+
for i, entry in enumerate(input_json):
|
|
420
|
+
errors.extend(self.validate_input_json_key(entry, rk, f"input[{index}][{i}]"))
|
|
421
|
+
|
|
422
|
+
return errors
|
|
423
|
+
|
|
424
|
+
def validate_input_json_key(self, data: dict[str, Any], rk: JsonKeyValidation, path: str) -> list[str]:
|
|
425
|
+
"""
|
|
426
|
+
Recursively validate a JSON key in a JSON object.
|
|
427
|
+
|
|
428
|
+
:param data: The JSON object to validate.
|
|
429
|
+
:param rk: The JSON key validation requirement to use.
|
|
430
|
+
:param path: The path to the current JSON object, for error reporting.
|
|
431
|
+
:return: A list of error messages if the JSON object is not valid. If the JSON object is valid, return an empty
|
|
432
|
+
list.
|
|
433
|
+
"""
|
|
434
|
+
errors: list[str] = []
|
|
435
|
+
if rk.key not in data:
|
|
436
|
+
if rk.required:
|
|
437
|
+
errors.append(f"Missing required key '{rk.key}' at path '{path}'.")
|
|
438
|
+
return errors
|
|
439
|
+
|
|
440
|
+
value: Any = data[rk.key]
|
|
441
|
+
if not isinstance(value, rk.json_type):
|
|
442
|
+
errors.append(f"Key '{rk.key}' at path '{path}' is expected to be of type "
|
|
443
|
+
f"{rk.json_type.__name__}, but got {type(value).__name__}.")
|
|
444
|
+
return errors
|
|
445
|
+
|
|
446
|
+
if isinstance(value, (str, list, dict)) and not value:
|
|
447
|
+
if not rk.allow_empty:
|
|
448
|
+
errors.append(f"Key '{rk.key}' at path '{path}' is empty, but empty values are not allowed.")
|
|
449
|
+
return errors
|
|
450
|
+
|
|
451
|
+
correct_type: bool = True
|
|
452
|
+
if rk.json_type is list and rk.list_type is not None:
|
|
453
|
+
if not isinstance(value, list):
|
|
454
|
+
raise RuntimeError("This should never happen; value was already checked to be of type list.")
|
|
455
|
+
for i, item in enumerate(value):
|
|
456
|
+
if not isinstance(item, rk.list_type):
|
|
457
|
+
errors.append(f"Entry {i} of list key '{rk.key}' at path '{path}' is expected to be of type "
|
|
458
|
+
f"{rk.list_type.__name__}, but got {type(item).__name__}.")
|
|
459
|
+
correct_type = False
|
|
460
|
+
elif rk.list_type is dict and rk.nested_requirements:
|
|
461
|
+
if not isinstance(item, dict):
|
|
462
|
+
raise RuntimeError("This should never happen; item was already checked to be of type dict.")
|
|
463
|
+
for nk, nrk in rk.nested_requirements.items():
|
|
464
|
+
errors.extend(self.validate_input_json_key(item, nrk, f"{path}.{rk.key}[{i}]"))
|
|
465
|
+
|
|
466
|
+
elif rk.json_type is dict and rk.nested_requirements:
|
|
467
|
+
if not isinstance(value, dict):
|
|
468
|
+
raise RuntimeError("This should never happen; value was already checked to be of type dict.")
|
|
469
|
+
for nk, nrk in rk.nested_requirements.items():
|
|
470
|
+
errors.extend(self.validate_input_json_key(value, nrk, f"{path}.{rk.key}"))
|
|
471
|
+
|
|
472
|
+
if rk.func and correct_type:
|
|
473
|
+
errors.extend(rk.func(f"{path}.{rk.key}", value))
|
|
474
|
+
|
|
475
|
+
return errors
|
|
476
|
+
|
|
477
|
+
def validate_input_data_records(self, index: int, r: DataRecordValidation) -> list[str]:
|
|
478
|
+
"""
|
|
479
|
+
Run simple validation on the data record input at the given index.
|
|
480
|
+
|
|
481
|
+
:param index: The index of the input to validate.
|
|
482
|
+
:param r: The validation requirement to use for this input.
|
|
483
|
+
:return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
|
|
484
|
+
"""
|
|
485
|
+
records: list[DataRecord] = self.agent.get_input_records(index)
|
|
486
|
+
errors: list[str] = []
|
|
487
|
+
if not records:
|
|
488
|
+
if not r.allow_empty_input:
|
|
489
|
+
errors.append(f"Input {index} is empty.")
|
|
490
|
+
elif r.max_entries is not None and len(records) > r.max_entries:
|
|
491
|
+
errors.append(f"Input {index} contains {len(records)} entries, which exceeds the maximum allowed "
|
|
492
|
+
f"number of {r.max_entries}.")
|
|
493
|
+
elif r.func or r.data_type_names:
|
|
494
|
+
for i, record in enumerate(records):
|
|
495
|
+
entry_errors: list[str] = []
|
|
496
|
+
if r.data_type_names:
|
|
497
|
+
record_dt: str = AliasUtil.to_data_type_name(record)
|
|
498
|
+
if record_dt not in r.data_type_names:
|
|
499
|
+
entry_errors.append(f"Entry {i} of input {index} has data type \"{record_dt}\", "
|
|
500
|
+
f"expected one of {r.data_type_names}.")
|
|
501
|
+
if r.func and not entry_errors:
|
|
502
|
+
func_errors: list[str] = r.func(record)
|
|
503
|
+
if func_errors:
|
|
504
|
+
for error in func_errors:
|
|
505
|
+
entry_errors.append(f"Error in entry {i} of input {index}: {error}")
|
|
506
|
+
errors.extend(entry_errors)
|
|
507
|
+
return errors
|
|
508
|
+
|
|
509
|
+
def validate_input_text(self, index: int, r: TextValidation) -> list[str]:
|
|
510
|
+
"""
|
|
511
|
+
Run simple validation on the binary input at the given index.
|
|
512
|
+
|
|
513
|
+
:param index: The index of the input to validate.
|
|
514
|
+
:param r: The validation requirement to use for this input.
|
|
515
|
+
:return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
|
|
516
|
+
"""
|
|
517
|
+
input_text: list[str] = self.agent.get_input_text(index)
|
|
518
|
+
if r.flatten:
|
|
519
|
+
input_text = self.agent.flatten_text(input_text)
|
|
520
|
+
|
|
521
|
+
errors: list[str] = []
|
|
522
|
+
if not input_text:
|
|
523
|
+
if not r.allow_empty_input:
|
|
524
|
+
errors.append(f"Input {index} is empty.")
|
|
525
|
+
elif r.max_entries is not None and len(input_text) > r.max_entries:
|
|
526
|
+
errors.append(f"Input {index} contains {len(input_text)} entries, which exceeds the maximum allowed "
|
|
527
|
+
f"number of {r.max_entries}.")
|
|
528
|
+
elif not r.allow_empty_entries or r.regex or r.func:
|
|
529
|
+
for i, entry in enumerate(input_text):
|
|
530
|
+
entry_errors: list[str] = []
|
|
531
|
+
if not entry.strip():
|
|
532
|
+
if not r.allow_empty_entries:
|
|
533
|
+
entry_errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
|
|
534
|
+
continue
|
|
535
|
+
if r.disallowed_characters:
|
|
536
|
+
for c in r.disallowed_characters:
|
|
537
|
+
# Replace special characters with their escaped versions for better error messages.
|
|
538
|
+
if c == "\r":
|
|
539
|
+
c = r"\r"
|
|
540
|
+
elif c == '\n':
|
|
541
|
+
c = r"\n"
|
|
542
|
+
elif c == "\t":
|
|
543
|
+
c = r"\t"
|
|
544
|
+
if c in entry:
|
|
545
|
+
entry_errors.append(f"Entry {i} of input {index} contains disallowed character '{c}'.")
|
|
546
|
+
if r.regex:
|
|
547
|
+
import re
|
|
548
|
+
if not re.fullmatch(r.regex, entry):
|
|
549
|
+
entry_errors.append(f"Entry {i} of input {index} does not fully match the expected regex format "
|
|
550
|
+
f"{r.regex}.")
|
|
551
|
+
if r.func and not entry_errors:
|
|
552
|
+
func_errors: list[str] = r.func(entry)
|
|
553
|
+
if func_errors:
|
|
554
|
+
for error in func_errors:
|
|
555
|
+
entry_errors.append(f"Error in entry {i} of input {index}: {error}")
|
|
556
|
+
errors.extend(entry_errors)
|
|
557
|
+
if errors and r.flatten:
|
|
558
|
+
errors.append(f"Note that input flattening is enabled for input {index}, which may increase the number "
|
|
559
|
+
f"of entries reported in the above errors. Flattening splits each entry on newlines, removes "
|
|
560
|
+
f"empty lines, and iterates over every line in the input as opposed to each entry as a whole.")
|
|
561
|
+
return errors
|