sapiopycommons 2025.6.19a564__py3-none-any.whl → 2026.1.22a847__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sapiopycommons/ai/__init__.py +0 -0
  2. sapiopycommons/ai/agent_service_base.py +2051 -0
  3. sapiopycommons/ai/converter_service_base.py +163 -0
  4. sapiopycommons/ai/external_credentials.py +131 -0
  5. sapiopycommons/ai/protoapi/agent/agent_pb2.py +87 -0
  6. sapiopycommons/ai/protoapi/agent/agent_pb2.pyi +282 -0
  7. sapiopycommons/ai/protoapi/agent/agent_pb2_grpc.py +154 -0
  8. sapiopycommons/ai/protoapi/agent/entry_pb2.py +49 -0
  9. sapiopycommons/ai/protoapi/agent/entry_pb2.pyi +40 -0
  10. sapiopycommons/ai/protoapi/agent/entry_pb2_grpc.py +24 -0
  11. sapiopycommons/ai/protoapi/agent/item/item_container_pb2.py +61 -0
  12. sapiopycommons/ai/protoapi/agent/item/item_container_pb2.pyi +181 -0
  13. sapiopycommons/ai/protoapi/agent/item/item_container_pb2_grpc.py +24 -0
  14. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.py +41 -0
  15. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.pyi +36 -0
  16. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2_grpc.py +24 -0
  17. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.py +51 -0
  18. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.pyi +59 -0
  19. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2_grpc.py +24 -0
  20. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.py +123 -0
  21. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.pyi +599 -0
  22. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2_grpc.py +24 -0
  23. sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.py +59 -0
  24. sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2.pyi +68 -0
  25. sapiopycommons/ai/protoapi/pipeline/converter/converter_pb2_grpc.py +149 -0
  26. sapiopycommons/ai/protoapi/pipeline/script/script_pb2.py +69 -0
  27. sapiopycommons/ai/protoapi/pipeline/script/script_pb2.pyi +109 -0
  28. sapiopycommons/ai/protoapi/pipeline/script/script_pb2_grpc.py +153 -0
  29. sapiopycommons/ai/protoapi/pipeline/step_output_pb2.py +49 -0
  30. sapiopycommons/ai/protoapi/pipeline/step_output_pb2.pyi +56 -0
  31. sapiopycommons/ai/protoapi/pipeline/step_output_pb2_grpc.py +24 -0
  32. sapiopycommons/ai/protoapi/pipeline/step_pb2.py +43 -0
  33. sapiopycommons/ai/protoapi/pipeline/step_pb2.pyi +44 -0
  34. sapiopycommons/ai/protoapi/pipeline/step_pb2_grpc.py +24 -0
  35. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.py +39 -0
  36. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.pyi +33 -0
  37. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2_grpc.py +24 -0
  38. sapiopycommons/ai/protobuf_utils.py +583 -0
  39. sapiopycommons/ai/request_validation.py +561 -0
  40. sapiopycommons/ai/server.py +152 -0
  41. sapiopycommons/ai/test_client.py +534 -0
  42. sapiopycommons/callbacks/callback_util.py +53 -24
  43. sapiopycommons/eln/experiment_handler.py +12 -5
  44. sapiopycommons/files/assay_plate_reader.py +93 -0
  45. sapiopycommons/files/file_text_converter.py +207 -0
  46. sapiopycommons/files/file_util.py +128 -1
  47. sapiopycommons/files/temp_files.py +82 -0
  48. sapiopycommons/flowcyto/flow_cyto.py +2 -24
  49. sapiopycommons/general/accession_service.py +2 -28
  50. sapiopycommons/general/aliases.py +4 -1
  51. sapiopycommons/general/macros.py +172 -0
  52. sapiopycommons/general/time_util.py +199 -4
  53. sapiopycommons/multimodal/multimodal.py +2 -24
  54. sapiopycommons/recordmodel/record_handler.py +200 -111
  55. sapiopycommons/rules/eln_rule_handler.py +3 -0
  56. sapiopycommons/rules/on_save_rule_handler.py +3 -0
  57. sapiopycommons/webhook/webhook_handlers.py +6 -4
  58. sapiopycommons/webhook/webservice_handlers.py +1 -1
  59. {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/METADATA +2 -2
  60. sapiopycommons-2026.1.22a847.dist-info/RECORD +113 -0
  61. sapiopycommons-2025.6.19a564.dist-info/RECORD +0 -68
  62. {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/WHEEL +0 -0
  63. {sapiopycommons-2025.6.19a564.dist-info → sapiopycommons-2026.1.22a847.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,561 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from typing import Any, Callable
5
+
6
+ from sapiopylib.rest.pojo.DataRecord import DataRecord
7
+
8
+ from sapiopycommons.ai.agent_service_base import AgentBase, ContainerType
9
+ from sapiopycommons.general.aliases import AliasUtil
10
+
11
+
12
+ class InputValidation(ABC):
13
+ """
14
+ A base class for validating the input to an agent.
15
+ """
16
+ index: int
17
+ max_entries: int | None
18
+ allow_empty_input: bool
19
+ allow_empty_entries: bool
20
+
21
+ def __init__(self, index: int, max_entries: int | None = None,
22
+ allow_empty_input: bool = False, allow_empty_entries: bool = False):
23
+ """
24
+ :param index: The index of the input to validate.
25
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
26
+ :param allow_empty_input: If true, then the input can be completely empty.
27
+ :param allow_empty_entries: If true, then individual entries in the input can be empty.
28
+ """
29
+ self.index = index
30
+ self.max_entries = max_entries
31
+ self.allow_empty_input = allow_empty_input
32
+ self.allow_empty_entries = allow_empty_entries
33
+
34
+
35
+ class BinaryValidation(InputValidation):
36
+ """
37
+ A class representing a validation requirement for a binary input.
38
+ """
39
+ func: Callable[[bytes], list[str]] | None
40
+
41
+ def __init__(self, index: int, max_entries: int | None = None,
42
+ allow_empty_input: bool = False, allow_empty_entries: bool = False,
43
+ func: Callable[[bytes], list[str]] | None = None):
44
+ """
45
+ :param index: The index of the input to validate.
46
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
47
+ :param allow_empty_input: If true, then the input can be completely empty.
48
+ :param allow_empty_entries: If true, then individual entries in the input can be empty
49
+ :param func: An optional function to run on each entry in the input. The function should take the entry as an
50
+ argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
51
+ should return an empty list. This function will not be called if the input or entry are empty.
52
+ """
53
+ super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
54
+ self.func = func
55
+
56
+
57
+ class CsvValidation(InputValidation):
58
+ """
59
+ A class representing a validation requirement for a CSV input.
60
+ """
61
+ required_headers: list[str] | None = None
62
+
63
+ func: Callable[[dict[str, Any]], list[str]] | None
64
+
65
+ def __init__(self, index: int, max_entries: int | None = None,
66
+ allow_empty_input: bool = False, allow_empty_entries: bool = False,
67
+ required_headers: list[str] | None = None,
68
+ func: Callable[[dict[str, Any]], list[str]] | None = None):
69
+ """
70
+ :param index: The index of the input to validate.
71
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
72
+ :param allow_empty_input: If true, then the input can be completely empty.
73
+ :param allow_empty_entries: If true, then individual entries in the input can be empty.
74
+ :param required_headers: A list of headers that must be present in the CSV input. If None, then no header
75
+ validation will be performed.
76
+ :param func: An optional function to run on each entry in the input. The function should take the entry as an
77
+ argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
78
+ should return an empty list. This function will not be called if the input or entry are empty.
79
+ """
80
+ super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
81
+ self.required_headers = required_headers
82
+ self.func = func
83
+
84
+
85
+ class DataRecordValidation(InputValidation):
86
+ """
87
+ A class representing a validation requirement for a data record input.
88
+ """
89
+ func: Callable[[DataRecord], list[str]] | None
90
+ data_type_names: list[str] | None
91
+
92
+ def __init__(self, index: int, max_entries: int | None = None,
93
+ allow_empty_input: bool = False, allow_empty_entries: bool = False,
94
+ data_type_names: list[str] | str | None = None,
95
+ func: Callable[[DataRecord], list[str]] | None = None):
96
+ """
97
+ :param index: The index of the input to validate.
98
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
99
+ :param allow_empty_input: If true, then the input can be completely empty.
100
+ :param allow_empty_entries: If true, then individual entries in the input can be empty.
101
+ :param data_type_names: One or more data type names that the records are expected to match. If None, then no
102
+ data type validation will be performed.
103
+ :param func: An optional function to run on each entry in the input. The function should take the entry as an
104
+ argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
105
+ should return an empty list. This function will not be called if the input or entry are empty.
106
+ """
107
+ super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
108
+ self.func = func
109
+ if data_type_names is None:
110
+ self.data_type_names = None
111
+ elif isinstance(data_type_names, str):
112
+ self.data_type_names = [data_type_names.strip()]
113
+ else:
114
+ self.data_type_names = [x.strip() for x in data_type_names]
115
+
116
+
117
+ class JsonValidation(InputValidation):
118
+ """
119
+ A class representing a validation requirement for a JSON input.
120
+ """
121
+ json_requirements: dict[str, JsonKeyValidation]
122
+
123
+ func: Callable[[dict[str, Any]], list[str]] | None
124
+
125
+ def __init__(self, index: int, max_entries: int | None = None,
126
+ allow_empty_input: bool = False, allow_empty_entries: bool = False,
127
+ json_requirements: list[JsonKeyValidation] | None = None,
128
+ func: Callable[[dict[str, Any]], list[str]] | None = None):
129
+ """
130
+ :param index: The index of the input to validate.
131
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
132
+ :param allow_empty_input: If true, then the input can be completely empty.
133
+ :param allow_empty_entries: If true, then individual entries in the input can be empty.
134
+ :param json_requirements: A list of JSON requirements to validate for JSON inputs. Each requirement
135
+ specifies a key to validate, the expected type of the value for that key, and any nested requirements
136
+ for that key. Only applicable to JSON inputs.
137
+ :param func: An optional function to run on each entry in the input. The function should take the entry as an
138
+ argument, and return a list of error messages if the entry is not valid. If the entry is valid, the function
139
+ should return an empty list. This function will not be called if the input or entry are empty.
140
+ """
141
+ super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
142
+ self.json_requirements = {}
143
+ if json_requirements:
144
+ for req in json_requirements:
145
+ if req.key in self.json_requirements:
146
+ raise ValueError(f"Duplicate JSON requirement key {req.key} for input index {index}.")
147
+ self.json_requirements[req.key] = req
148
+
149
+ self.func = func
150
+
151
+
152
+ class JsonKeyValidation:
153
+ """
154
+ A class representing a validation requirement for a specific key in a JSON input.
155
+ """
156
+ key: str
157
+ json_type: type
158
+ required: bool
159
+ allow_empty: bool
160
+
161
+ list_type: type | None = None
162
+ nested_requirements: dict[str, JsonKeyValidation]
163
+
164
+ func: Callable[[str, Any], list[str]] | None = None
165
+
166
+ def __init__(self, key: str, json_type: type, required: bool = True, allow_empty: bool = False,
167
+ list_type: type | None = None, nested_requirements: list[JsonKeyValidation] | None = None,
168
+ func: Callable[[str, Any], list[str]] | None = None):
169
+ """
170
+ :param key: The key in the JSON input to validate.
171
+ :param json_type: The expected type of the value for this key. This should be one of: str, int, float, bool,
172
+ list, or dict.
173
+ :param required: If true, then this key must be present in the JSON input. If false, then the key is optional,
174
+ but if present, it must still match the other expected criteria.
175
+ :param allow_empty: If true, then the value for this key can be empty (e.g., an empty string, list, or dict).
176
+ If false, then the value must not be empty.
177
+ :param list_type: The expected type of the entries in the list if json_type is list.
178
+ :param nested_requirements: A list of nested JSON requirements to validate for this key if it is a dict. Each
179
+ requirement specifies a key to validate, the expected type of the value for that key, and any nested
180
+ requirements for that key. Only applicable if json_type is dict, or if json_type is list and list_type is
181
+ dict.
182
+ :param func: An optional function to run on the value for this key. The function should take the path and the
183
+ value as arguments, and return a list of error messages if the value is not valid. If the value is valid,
184
+ the function should return an empty list. This function will not be called if the key is missing,
185
+ the value is of the wrong type, or the value is an empty str/list/dict and allow_empty is false.
186
+ """
187
+ self.key = key
188
+ self.json_type = json_type
189
+ self.required = required
190
+ self.allow_empty = allow_empty
191
+
192
+ self.list_type = list_type
193
+ self.nested_requirements = {}
194
+ if nested_requirements:
195
+ for req in nested_requirements:
196
+ if req.key in self.nested_requirements:
197
+ raise ValueError(f"Duplicate nested requirement key {req.key} for JSON key {key}.")
198
+ self.nested_requirements[req.key] = req
199
+
200
+ self.func = func
201
+
202
+ allowed_types: set[type] = {str, int, float, bool, list, dict}
203
+ if self.json_type not in allowed_types:
204
+ raise ValueError(f"Invalid json_type {self.json_type} for key {key}. Must be one of: "
205
+ f"{', '.join([t.__name__ for t in allowed_types])}.")
206
+ if self.list_type is not None and self.list_type not in allowed_types:
207
+ raise ValueError(f"Invalid list_type {self.list_type} for key {key}. Must be one of: "
208
+ f"{', '.join([t.__name__ for t in allowed_types])}.")
209
+
210
+
211
+ class TextValidation(InputValidation):
212
+ """
213
+ A class representing a validation requirement for a text input.
214
+ """
215
+ flatten: bool
216
+ disallowed_characters: str | None = None
217
+ regex: str | None = None
218
+
219
+ func: Callable[[str], list[str]] | None = None
220
+
221
+ def __init__(self, index: int, max_entries: int | None = None,
222
+ allow_empty_input: bool = False, allow_empty_entries: bool = False, flatten: bool = False,
223
+ disallow_characters: str | None = None, regex: str | None = None,
224
+ func: Callable[[str], list[str]] | None = None):
225
+ """
226
+ :param index: The index of the input to validate.
227
+ :param max_entries: The maximum number of entries allowed for this input. If None, then there is no limit.
228
+ :param allow_empty_input: If true, then the input can be completely empty.
229
+ :param allow_empty_entries: If true, then individual entries in the input can be empty.
230
+ :param flatten: If true, then the input will be flattened before validation
231
+ :param disallow_characters: A string of characters that are not allowed in any entry in the input. If None,
232
+ then no character validation will be performed. This parameter will not be used if the input or entry are
233
+ empty.
234
+ :param regex: An optional regular expression that each entry in the input must fully match. If None, then no
235
+ regex validation will be performed. This parameter will not be used if the input or entry are empty.
236
+ :param func: An optional function to run on each entry in the input. The function should take the entry as an
237
+ arguments, and return a list of error messages if the entry is not valid. If the entry is valid, the
238
+ function should return an empty list. The function will only be called if the entry passes those previous
239
+ checks (e.g. not empty, doesn't include disallowed characters, passes the regex, etc.).
240
+ """
241
+ super().__init__(index, max_entries, allow_empty_input, allow_empty_entries)
242
+ self.flatten = flatten
243
+ self.disallowed_characters = disallow_characters
244
+ self.regex = regex
245
+ self.func = func
246
+
247
+
248
+ class InputValidator:
249
+ """
250
+ A class for validating the inputs to an agent based on their container types and specified validation requirements.
251
+ """
252
+ agent: AgentBase
253
+ requirements: dict[int, InputValidation]
254
+
255
+ def __init__(self, agent: AgentBase, requirements: list[InputValidation] | None = None):
256
+ """
257
+ :param agent: The agent to validate the request of.
258
+ :param requirements: A list of validation requirements to apply to the request. If a validation object is
259
+ not provided for a given input, then default validation will be applied. Default validation requires that
260
+ the input is not empty, and that the entries in the input are not empty.
261
+ """
262
+ self.agent = agent
263
+ self.requirements = {}
264
+ if requirements:
265
+ self.add_requirements(requirements)
266
+
267
+ def add_requirements(self, requirements: list[InputValidation]) -> None:
268
+ for req in requirements:
269
+ if req.index < 0 or req.index >= len(self.agent.input_configs):
270
+ raise ValueError(f"Validation requirement index {req.index} is out of range for agent "
271
+ f"{self.agent} with {len(self.agent.input_configs)} inputs.")
272
+ if req.index in self.requirements:
273
+ raise ValueError(f"Duplicate validation requirement index {req.index} for agent {self.agent}.")
274
+ self.requirements[req.index] = req
275
+
276
+ def run(self) -> list[str]:
277
+ """
278
+ Run simple validation on all the inputs based on their container types. This requires the following:
279
+ - The input may not be empty.
280
+ - The entries in the input may not be empty, unless allow_empty is set to true.
281
+ - If provided, the number of entries in the input may not exceed a maximum size.
282
+ - If provided, certain keys must be present in the JSON input, and they must match the above behavior.
283
+
284
+ :return: A list of the error messages if the request is not valid. If the request is valid, return an empty
285
+ list.
286
+ """
287
+ errors: list[str] = []
288
+ for i, (input_type, input_config) in enumerate(zip(self.agent.input_container_types, self.agent.input_configs)):
289
+ match input_type:
290
+ case ContainerType.BINARY:
291
+ r: InputValidation = self.requirements.get(i, BinaryValidation(i))
292
+ if not isinstance(r, BinaryValidation):
293
+ raise ValueError(f"Validation requirement for binary input at index {i} must be a "
294
+ f"BinaryValidation object. Got {type(r)} instead.")
295
+ errors.extend(self.validate_input_binary(i, r))
296
+ case ContainerType.CSV:
297
+ r: InputValidation = self.requirements.get(i, CsvValidation(i))
298
+ if not isinstance(r, CsvValidation):
299
+ raise ValueError(f"Validation requirement for CSV input at index {i} must be a "
300
+ f"CsvValidation object. Got {type(r)} instead.")
301
+ errors.extend(self.validate_input_csv(i, r))
302
+ case ContainerType.JSON:
303
+ r: InputValidation = self.requirements.get(i, JsonValidation(i))
304
+ if not isinstance(r, JsonValidation):
305
+ raise ValueError(f"Validation requirement for JSON input at index {i} must be a "
306
+ f"JsonValidation object. Got {type(r)} instead.")
307
+ errors.extend(self.validate_input_json(i, r))
308
+ case ContainerType.DATA_RECORDS:
309
+ data_type_name: str | None = input_config.base_config.data_type_name
310
+ if data_type_name and data_type_name != "Any":
311
+ base_validation = DataRecordValidation(i, data_type_names=[data_type_name])
312
+ else:
313
+ base_validation = DataRecordValidation(i)
314
+ r: InputValidation = self.requirements.get(i, base_validation)
315
+ if not isinstance(r, DataRecordValidation):
316
+ raise ValueError(f"Validation requirement for data record input at index {i} must be a "
317
+ f"DataRecordValidation object. Got {type(r)} instead.")
318
+ errors.extend(self.validate_input_data_records(i, r))
319
+ case ContainerType.TEXT:
320
+ r: InputValidation = self.requirements.get(i, TextValidation(i))
321
+ if not isinstance(r, TextValidation):
322
+ raise ValueError(f"Validation requirement for text input at index {i} must be a "
323
+ f"TextValidation object. Got {type(r)} instead.")
324
+ errors.extend(self.validate_input_text(i, r))
325
+ return errors
326
+
327
+ def validate_input_binary(self, index: int, r: BinaryValidation) -> list[str]:
328
+ """
329
+ Run simple validation on the binary input at the given index.
330
+
331
+ :param index: The index of the input to validate.
332
+ :param r: The validation requirement to use for this input.
333
+ :return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
334
+ """
335
+ input_files: list[bytes] = self.agent.get_input_binary(index)
336
+ errors: list[str] = []
337
+ if not input_files:
338
+ if not r.allow_empty_input:
339
+ errors.append(f"Input {index} is empty.")
340
+ elif r.max_entries is not None and len(input_files) > r.max_entries:
341
+ errors.append(f"Input {index} contains {len(input_files)} entries, which exceeds the maximum allowed "
342
+ f"number of {r.max_entries}.")
343
+ elif not r.allow_empty_entries or r.func:
344
+ for i, entry in enumerate(input_files):
345
+ if not entry.strip():
346
+ if not r.allow_empty_entries:
347
+ errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
348
+ elif r.func:
349
+ func_errors: list[str] = r.func(entry)
350
+ if func_errors:
351
+ for error in func_errors:
352
+ errors.append(f"Error in entry {i} of input {index}: {error}")
353
+ return errors
354
+
355
+ def validate_input_csv(self, index: int, r: CsvValidation) -> list[str]:
356
+ """
357
+ Run simple validation on the CSV input at the given index.
358
+
359
+ :param index: The index of the input to validate.
360
+ :param r: The validation requirement to use for this input.
361
+ :return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
362
+ """
363
+ headers, csv = self.agent.get_input_csv(index)
364
+ headers: list[str]
365
+ csv: list[dict[str, Any]]
366
+
367
+ errors: list[str] = []
368
+ if r.required_headers:
369
+ missing_headers: list[str] = [h for h in r.required_headers if h not in headers]
370
+ if missing_headers:
371
+ errors.append(f"Input {index} is missing required headers: {', '.join(missing_headers)}.")
372
+
373
+ if not csv:
374
+ if not r.allow_empty_input:
375
+ errors.append(f"Input {index} is empty.")
376
+ elif r.max_entries is not None and len(csv) > r.max_entries:
377
+ errors.append(f"Input {index} contains {len(csv)} entries, which exceeds the maximum allowed "
378
+ f"number of {r.max_entries}.")
379
+ elif not r.allow_empty_entries or r.func:
380
+ for i, entry in enumerate(csv):
381
+ if not entry or all(not cell.strip() for cell in entry):
382
+ if not r.allow_empty_entries:
383
+ errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
384
+ elif r.func:
385
+ func_errors: list[str] = r.func(entry)
386
+ if func_errors:
387
+ for error in func_errors:
388
+ errors.append(f"Error in entry {i} of input {index}: {error}")
389
+ return errors
390
+
391
+ def validate_input_json(self, index: int, r: JsonValidation) -> list[str]:
392
+ """
393
+ Run simple validation on the JSON input at the given index.
394
+
395
+ :param index: The index of the input to validate.
396
+ :param r: The validation requirement to use for this input.
397
+ :return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
398
+ """
399
+ input_json: list[dict[str, Any]] = self.agent.get_input_json(index)
400
+ errors: list[str] = []
401
+ if not input_json:
402
+ if not r.allow_empty_input:
403
+ errors.append(f"Input {index} is empty.")
404
+ elif r.max_entries is not None and len(input_json) > r.max_entries:
405
+ errors.append(f"Input {index} contains {len(input_json)} entries, which exceeds the maximum allowed "
406
+ f"number of {r.max_entries}.")
407
+ elif not r.allow_empty_entries or r.func:
408
+ for i, entry in enumerate(input_json):
409
+ if not entry:
410
+ if not r.allow_empty_entries:
411
+ errors.append(f"Entry {i} of input {index} is empty.")
412
+ elif r.func:
413
+ func_errors: list[str] = r.func(entry)
414
+ if func_errors:
415
+ for error in func_errors:
416
+ errors.append(f"Error in entry {i} of input {index}: {error}")
417
+
418
+ for key, rk in r.json_requirements.items():
419
+ for i, entry in enumerate(input_json):
420
+ errors.extend(self.validate_input_json_key(entry, rk, f"input[{index}][{i}]"))
421
+
422
+ return errors
423
+
424
+ def validate_input_json_key(self, data: dict[str, Any], rk: JsonKeyValidation, path: str) -> list[str]:
425
+ """
426
+ Recursively validate a JSON key in a JSON object.
427
+
428
+ :param data: The JSON object to validate.
429
+ :param rk: The JSON key validation requirement to use.
430
+ :param path: The path to the current JSON object, for error reporting.
431
+ :return: A list of error messages if the JSON object is not valid. If the JSON object is valid, return an empty
432
+ list.
433
+ """
434
+ errors: list[str] = []
435
+ if rk.key not in data:
436
+ if rk.required:
437
+ errors.append(f"Missing required key '{rk.key}' at path '{path}'.")
438
+ return errors
439
+
440
+ value: Any = data[rk.key]
441
+ if not isinstance(value, rk.json_type):
442
+ errors.append(f"Key '{rk.key}' at path '{path}' is expected to be of type "
443
+ f"{rk.json_type.__name__}, but got {type(value).__name__}.")
444
+ return errors
445
+
446
+ if isinstance(value, (str, list, dict)) and not value:
447
+ if not rk.allow_empty:
448
+ errors.append(f"Key '{rk.key}' at path '{path}' is empty, but empty values are not allowed.")
449
+ return errors
450
+
451
+ correct_type: bool = True
452
+ if rk.json_type is list and rk.list_type is not None:
453
+ if not isinstance(value, list):
454
+ raise RuntimeError("This should never happen; value was already checked to be of type list.")
455
+ for i, item in enumerate(value):
456
+ if not isinstance(item, rk.list_type):
457
+ errors.append(f"Entry {i} of list key '{rk.key}' at path '{path}' is expected to be of type "
458
+ f"{rk.list_type.__name__}, but got {type(item).__name__}.")
459
+ correct_type = False
460
+ elif rk.list_type is dict and rk.nested_requirements:
461
+ if not isinstance(item, dict):
462
+ raise RuntimeError("This should never happen; item was already checked to be of type dict.")
463
+ for nk, nrk in rk.nested_requirements.items():
464
+ errors.extend(self.validate_input_json_key(item, nrk, f"{path}.{rk.key}[{i}]"))
465
+
466
+ elif rk.json_type is dict and rk.nested_requirements:
467
+ if not isinstance(value, dict):
468
+ raise RuntimeError("This should never happen; value was already checked to be of type dict.")
469
+ for nk, nrk in rk.nested_requirements.items():
470
+ errors.extend(self.validate_input_json_key(value, nrk, f"{path}.{rk.key}"))
471
+
472
+ if rk.func and correct_type:
473
+ errors.extend(rk.func(f"{path}.{rk.key}", value))
474
+
475
+ return errors
476
+
477
+ def validate_input_data_records(self, index: int, r: DataRecordValidation) -> list[str]:
478
+ """
479
+ Run simple validation on the data record input at the given index.
480
+
481
+ :param index: The index of the input to validate.
482
+ :param r: The validation requirement to use for this input.
483
+ :return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
484
+ """
485
+ records: list[DataRecord] = self.agent.get_input_records(index)
486
+ errors: list[str] = []
487
+ if not records:
488
+ if not r.allow_empty_input:
489
+ errors.append(f"Input {index} is empty.")
490
+ elif r.max_entries is not None and len(records) > r.max_entries:
491
+ errors.append(f"Input {index} contains {len(records)} entries, which exceeds the maximum allowed "
492
+ f"number of {r.max_entries}.")
493
+ elif r.func or r.data_type_names:
494
+ for i, record in enumerate(records):
495
+ entry_errors: list[str] = []
496
+ if r.data_type_names:
497
+ record_dt: str = AliasUtil.to_data_type_name(record)
498
+ if record_dt not in r.data_type_names:
499
+ entry_errors.append(f"Entry {i} of input {index} has data type \"{record_dt}\", "
500
+ f"expected one of {r.data_type_names}.")
501
+ if r.func and not entry_errors:
502
+ func_errors: list[str] = r.func(record)
503
+ if func_errors:
504
+ for error in func_errors:
505
+ entry_errors.append(f"Error in entry {i} of input {index}: {error}")
506
+ errors.extend(entry_errors)
507
+ return errors
508
+
509
+ def validate_input_text(self, index: int, r: TextValidation) -> list[str]:
510
+ """
511
+ Run simple validation on the binary input at the given index.
512
+
513
+ :param index: The index of the input to validate.
514
+ :param r: The validation requirement to use for this input.
515
+ :return: A list of error messages if the input is not valid. If the input is valid, return an empty list.
516
+ """
517
+ input_text: list[str] = self.agent.get_input_text(index)
518
+ if r.flatten:
519
+ input_text = self.agent.flatten_text(input_text)
520
+
521
+ errors: list[str] = []
522
+ if not input_text:
523
+ if not r.allow_empty_input:
524
+ errors.append(f"Input {index} is empty.")
525
+ elif r.max_entries is not None and len(input_text) > r.max_entries:
526
+ errors.append(f"Input {index} contains {len(input_text)} entries, which exceeds the maximum allowed "
527
+ f"number of {r.max_entries}.")
528
+ elif not r.allow_empty_entries or r.regex or r.func:
529
+ for i, entry in enumerate(input_text):
530
+ entry_errors: list[str] = []
531
+ if not entry.strip():
532
+ if not r.allow_empty_entries:
533
+ entry_errors.append(f"Entry {i} of input {index} is empty or contains only whitespace.")
534
+ continue
535
+ if r.disallowed_characters:
536
+ for c in r.disallowed_characters:
537
+ # Replace special characters with their escaped versions for better error messages.
538
+ if c == "\r":
539
+ c = r"\r"
540
+ elif c == '\n':
541
+ c = r"\n"
542
+ elif c == "\t":
543
+ c = r"\t"
544
+ if c in entry:
545
+ entry_errors.append(f"Entry {i} of input {index} contains disallowed character '{c}'.")
546
+ if r.regex:
547
+ import re
548
+ if not re.fullmatch(r.regex, entry):
549
+ entry_errors.append(f"Entry {i} of input {index} does not fully match the expected regex format "
550
+ f"{r.regex}.")
551
+ if r.func and not entry_errors:
552
+ func_errors: list[str] = r.func(entry)
553
+ if func_errors:
554
+ for error in func_errors:
555
+ entry_errors.append(f"Error in entry {i} of input {index}: {error}")
556
+ errors.extend(entry_errors)
557
+ if errors and r.flatten:
558
+ errors.append(f"Note that input flattening is enabled for input {index}, which may increase the number "
559
+ f"of entries reported in the above errors. Flattening splits each entry on newlines, removes "
560
+ f"empty lines, and iterates over every line in the input as opposed to each entry as a whole.")
561
+ return errors