sapiopycommons 2025.10.17a787__py3-none-any.whl → 2025.10.20a789__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sapiopycommons might be problematic. Click here for more details.

Files changed (50) hide show
  1. sapiopycommons/ai/agent_service_base.py +1226 -0
  2. sapiopycommons/ai/converter_service_base.py +163 -0
  3. sapiopycommons/ai/external_credentials.py +128 -0
  4. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.py +41 -0
  5. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2.pyi +35 -0
  6. sapiopycommons/ai/protoapi/externalcredentials/external_credentials_pb2_grpc.py +24 -0
  7. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.py +43 -0
  8. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2.pyi +31 -0
  9. sapiopycommons/ai/protoapi/fielddefinitions/fields_pb2_grpc.py +24 -0
  10. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.py +123 -0
  11. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2.pyi +598 -0
  12. sapiopycommons/ai/protoapi/fielddefinitions/velox_field_def_pb2_grpc.py +24 -0
  13. sapiopycommons/ai/protoapi/plan/converter/converter_pb2.py +51 -0
  14. sapiopycommons/ai/protoapi/plan/converter/converter_pb2.pyi +63 -0
  15. sapiopycommons/ai/protoapi/plan/converter/converter_pb2_grpc.py +149 -0
  16. sapiopycommons/ai/protoapi/plan/item/item_container_pb2.py +55 -0
  17. sapiopycommons/ai/protoapi/plan/item/item_container_pb2.pyi +90 -0
  18. sapiopycommons/ai/protoapi/plan/item/item_container_pb2_grpc.py +24 -0
  19. sapiopycommons/ai/protoapi/plan/script/script_pb2.py +61 -0
  20. sapiopycommons/ai/protoapi/plan/script/script_pb2.pyi +108 -0
  21. sapiopycommons/ai/protoapi/plan/script/script_pb2_grpc.py +153 -0
  22. sapiopycommons/ai/protoapi/plan/step_output_pb2.py +45 -0
  23. sapiopycommons/ai/protoapi/plan/step_output_pb2.pyi +42 -0
  24. sapiopycommons/ai/protoapi/plan/step_output_pb2_grpc.py +24 -0
  25. sapiopycommons/ai/protoapi/plan/step_pb2.py +43 -0
  26. sapiopycommons/ai/protoapi/plan/step_pb2.pyi +43 -0
  27. sapiopycommons/ai/protoapi/plan/step_pb2_grpc.py +24 -0
  28. sapiopycommons/ai/protoapi/plan/tool/entry_pb2.py +41 -0
  29. sapiopycommons/ai/protoapi/plan/tool/entry_pb2.pyi +35 -0
  30. sapiopycommons/ai/protoapi/plan/tool/entry_pb2_grpc.py +24 -0
  31. sapiopycommons/ai/protoapi/plan/tool/tool_pb2.py +79 -0
  32. sapiopycommons/ai/protoapi/plan/tool/tool_pb2.pyi +261 -0
  33. sapiopycommons/ai/protoapi/plan/tool/tool_pb2_grpc.py +154 -0
  34. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.py +39 -0
  35. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2.pyi +32 -0
  36. sapiopycommons/ai/protoapi/session/sapio_conn_info_pb2_grpc.py +24 -0
  37. sapiopycommons/ai/protobuf_utils.py +504 -0
  38. sapiopycommons/ai/request_validation.py +478 -0
  39. sapiopycommons/ai/server.py +152 -0
  40. sapiopycommons/ai/test_client.py +446 -0
  41. sapiopycommons/callbacks/callback_util.py +4 -18
  42. sapiopycommons/files/file_util.py +128 -1
  43. sapiopycommons/files/temp_files.py +82 -0
  44. sapiopycommons/general/aliases.py +0 -3
  45. sapiopycommons/webhook/webservice_handlers.py +1 -1
  46. {sapiopycommons-2025.10.17a787.dist-info → sapiopycommons-2025.10.20a789.dist-info}/METADATA +1 -1
  47. {sapiopycommons-2025.10.17a787.dist-info → sapiopycommons-2025.10.20a789.dist-info}/RECORD +49 -9
  48. sapiopycommons/ai/tool_of_tools.py +0 -917
  49. {sapiopycommons-2025.10.17a787.dist-info → sapiopycommons-2025.10.20a789.dist-info}/WHEEL +0 -0
  50. {sapiopycommons-2025.10.17a787.dist-info → sapiopycommons-2025.10.20a789.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1226 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import io
5
+ import json
6
+ import logging
7
+ import re
8
+ import subprocess
9
+ import traceback
10
+ from abc import abstractmethod, ABC
11
+ from logging import Logger
12
+ from os import PathLike
13
+ from subprocess import CompletedProcess
14
+ from typing import Any, Iterable, Mapping, Sequence
15
+
16
+ from grpc import ServicerContext
17
+ from sapiopylib.rest.User import SapioUser, ensure_logger_initialized
18
+ from sapiopylib.rest.pojo.datatype.FieldDefinition import AbstractVeloxFieldDefinition
19
+
20
+ from sapiopycommons.ai.external_credentials import ExternalCredentials
21
+ from sapiopycommons.ai.protoapi.externalcredentials.external_credentials_pb2 import ExternalCredentialsPbo
22
+ from sapiopycommons.ai.protoapi.fielddefinitions.fields_pb2 import FieldValueMapPbo, FieldValuePbo
23
+ from sapiopycommons.ai.protoapi.fielddefinitions.velox_field_def_pb2 import VeloxFieldDefPbo, FieldTypePbo, \
24
+ SelectionPropertiesPbo, IntegerPropertiesPbo, DoublePropertiesPbo, BooleanPropertiesPbo, StringPropertiesPbo, \
25
+ FieldValidatorPbo, DatePropertiesPbo
26
+ from sapiopycommons.ai.protoapi.plan.item.item_container_pb2 import ContentTypePbo
27
+ from sapiopycommons.ai.protoapi.plan.tool.entry_pb2 import StepOutputBatchPbo, StepItemContainerPbo, \
28
+ StepBinaryContainerPbo, StepCsvContainerPbo, StepCsvHeaderRowPbo, StepCsvRowPbo, StepJsonContainerPbo, \
29
+ StepTextContainerPbo
30
+ from sapiopycommons.ai.protoapi.plan.tool.tool_pb2 import ToolDetailsRequestPbo, ToolDetailsResponsePbo, \
31
+ ToolDetailsPbo, ProcessStepRequestPbo, ProcessStepResponsePbo, ToolOutputDetailsPbo, ToolIoConfigBasePbo, \
32
+ ToolInputDetailsPbo, ExampleContainerPbo, ProcessStepResponseStatusPbo, ToolCitationPbo
33
+ from sapiopycommons.ai.protoapi.plan.tool.tool_pb2_grpc import ToolServiceServicer
34
+ from sapiopycommons.ai.protoapi.session.sapio_conn_info_pb2 import SapioUserSecretTypePbo, SapioConnectionInfoPbo
35
+ from sapiopycommons.ai.protobuf_utils import ProtobufUtils
36
+ from sapiopycommons.ai.test_client import ContainerType
37
+ from sapiopycommons.files.file_util import FileUtil
38
+ from sapiopycommons.files.temp_files import TempFileHandler
39
+ from sapiopycommons.general.aliases import FieldMap, FieldValue
40
+
41
+
42
+ # FR-47422: Created classes.
43
+ class SapioAgentResult(ABC):
44
+ """
45
+ A class representing a result from a Sapio agent. Instantiate one of the subclasses to create a result object.
46
+ """
47
+
48
+ @abstractmethod
49
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
50
+ """
51
+ Convert this SapioAgentResult object to a StepOutputBatchPbo or list of FieldValueMapPbo proto objects.
52
+ """
53
+ pass
54
+
55
+
56
+ class BinaryResult(SapioAgentResult):
57
+ """
58
+ A class representing binary results from a Sapio agent.
59
+ """
60
+ binary_data: list[bytes]
61
+ content_type: str
62
+ file_extensions: list[str]
63
+ name: str
64
+
65
+ def __init__(self, binary_data: list[bytes], content_type: str = "binary", file_extensions: list[str] = None,
66
+ name: str | None = None):
67
+ """
68
+ :param binary_data: The binary data as a list of bytes.
69
+ :param content_type: The content type of the data.
70
+ :param file_extensions: A list of file extensions that this binary data can be saved as.
71
+ :param name: An optional identifying name for this result that will be accessible to the next agent.
72
+ """
73
+ self.binary_data = binary_data
74
+ self.content_type = content_type
75
+ self.file_extensions = file_extensions if file_extensions else []
76
+ self.name = name
77
+
78
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
79
+ return StepOutputBatchPbo(
80
+ item_container=StepItemContainerPbo(
81
+ content_type=ContentTypePbo(name=self.content_type, extensions=self.file_extensions),
82
+ container_name=self.name,
83
+ binary_container=StepBinaryContainerPbo(items=self.binary_data)
84
+ )
85
+ )
86
+
87
+
88
+ class CsvResult(SapioAgentResult):
89
+ """
90
+ A class representing CSV results from a Sapio agent.
91
+ """
92
+ csv_data: list[dict[str, Any]]
93
+ content_type: str
94
+ file_extensions: list[str]
95
+ name: str
96
+
97
+ def __init__(self, csv_data: list[dict[str, Any]], content_type: str = "csv", file_extensions: list[str] = None,
98
+ name: str | None = None):
99
+ """
100
+ :param csv_data: The list of CSV data results, provided as a list of dictionaries of column name to value.
101
+ :param content_type: The content type of the data.
102
+ :param file_extensions: A list of file extensions that this binary data can be saved as.
103
+ :param name: An optional identifying name for this result that will be accessible to the next agent.
104
+ """
105
+ self.csv_data = csv_data
106
+ self.content_type = content_type
107
+ self.file_extensions = file_extensions if file_extensions else ["csv"]
108
+ self.name = name
109
+
110
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
111
+ return StepOutputBatchPbo(
112
+ item_container=StepItemContainerPbo(
113
+ content_type=ContentTypePbo(name=self.content_type, extensions=self.file_extensions),
114
+ container_name=self.name,
115
+ csv_container=StepCsvContainerPbo(
116
+ header=StepCsvHeaderRowPbo(cells=self.csv_data[0].keys()),
117
+ items=[StepCsvRowPbo(cells=[str(x) for x in row.values()]) for row in self.csv_data]
118
+ )
119
+ ) if self.csv_data else None
120
+ )
121
+
122
+
123
+ class FieldMapResult(SapioAgentResult):
124
+ """
125
+ A class representing field map results from a Sapio agent.
126
+ """
127
+ field_maps: list[FieldMap]
128
+
129
+ def __init__(self, field_maps: list[FieldMap]):
130
+ """
131
+ :param field_maps: A list of field maps, where each map is a dictionary of field names to values. Each entry
132
+ will create a new data record in the system, so long as the agent definition specifies an output data type
133
+ name.
134
+ """
135
+ self.field_maps = field_maps
136
+
137
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
138
+ new_records: list[FieldValueMapPbo] = []
139
+ for field_map in self.field_maps:
140
+ fields: dict[str, FieldValuePbo] = {}
141
+ for field, value in field_map.items():
142
+ field_value = FieldValuePbo()
143
+ if isinstance(value, str):
144
+ field_value.string_value = value
145
+ elif isinstance(value, int):
146
+ field_value.int_value = value
147
+ elif isinstance(value, float):
148
+ field_value.double_value = value
149
+ elif isinstance(value, bool):
150
+ field_value.bool_value = value
151
+ fields[field] = field_value
152
+ new_records.append(FieldValueMapPbo(fields=fields))
153
+ return new_records
154
+
155
+
156
+ class JsonResult(SapioAgentResult):
157
+ """
158
+ A class representing JSON results from a Sapio agent.
159
+ """
160
+ json_data: list[dict[str, Any]]
161
+ content_type: str
162
+ file_extensions: list[str]
163
+ name: str
164
+
165
+ def __init__(self, json_data: list[dict[str, Any]], content_type: str = "json", file_extensions: list[str] = None,
166
+ name: str | None = None):
167
+ """
168
+ :param json_data: The list of JSON data results. Each entry in the list represents a separate JSON object.
169
+ These entries must be able to be serialized to JSON using json.dumps().
170
+ :param content_type: The content type of the data.
171
+ :param file_extensions: A list of file extensions that this binary data can be saved as.
172
+ :param name: An optional identifying name for this result that will be accessible to the next agent.
173
+ """
174
+ # Verify that the given json_data is actually a list of dictionaries.
175
+ if not isinstance(json_data, list) or not all(isinstance(x, dict) for x in json_data):
176
+ raise ValueError("json_data must be a list of dictionaries.")
177
+ self.json_data = json_data
178
+ self.content_type = content_type
179
+ self.file_extensions = file_extensions if file_extensions else ["json"]
180
+ self.name = name
181
+
182
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
183
+ return StepOutputBatchPbo(
184
+ item_container=StepItemContainerPbo(
185
+ content_type=ContentTypePbo(name=self.content_type, extensions=self.file_extensions),
186
+ container_name=self.name,
187
+ json_container=StepJsonContainerPbo(items=[json.dumps(x) for x in self.json_data])
188
+ )
189
+ )
190
+
191
+
192
+ class TextResult(SapioAgentResult):
193
+ """
194
+ A class representing text results from a Sapio agent.
195
+ """
196
+ text_data: list[str]
197
+ content_type: str
198
+ file_extensions: list[str]
199
+ name: str
200
+
201
+ def __init__(self, text_data: list[str], content_type: str = "text", file_extensions: list[str] = None,
202
+ name: str | None = None):
203
+ """
204
+ :param text_data: The text data as a list of strings.
205
+ :param content_type: The content type of the data.
206
+ :param file_extensions: A list of file extensions that this binary data can be saved as.
207
+ :param name: An optional identifying name for this result that will be accessible to the next agent.
208
+ """
209
+ self.text_data = text_data
210
+ self.content_type = content_type
211
+ self.file_extensions = file_extensions if file_extensions else ["txt"]
212
+ self.name = name
213
+
214
+ def to_proto(self) -> StepOutputBatchPbo | list[FieldValueMapPbo]:
215
+ return StepOutputBatchPbo(
216
+ item_container=StepItemContainerPbo(
217
+ content_type=ContentTypePbo(name=self.content_type, extensions=self.file_extensions),
218
+ container_name=self.name,
219
+ text_container=StepTextContainerPbo(items=self.text_data)
220
+ )
221
+ )
222
+
223
+
224
+ class AgentServiceBase(ToolServiceServicer, ABC):
225
+ """
226
+ A base class for implementing an agent service. Subclasses should implement the register_agents method to register
227
+ their agents with the service.
228
+ """
229
+ debug_mode: bool = False
230
+
231
+ def GetToolDetails(self, request: ToolDetailsRequestPbo, context: ServicerContext) -> ToolDetailsResponsePbo:
232
+ try:
233
+ # Get the agent details from the registered agents.
234
+ details: list[ToolDetailsPbo] = []
235
+ for agent in self.register_agents():
236
+ details.append(agent().to_pbo())
237
+ if not details:
238
+ raise Exception("No agents registered with this service.")
239
+ return ToolDetailsResponsePbo(tool_framework_version=self.server_version(), tool_details=details)
240
+ except Exception as e:
241
+ # Woe to you if you somehow cause an exception to be raised when just initializing your agents.
242
+ # There's no way to log this.
243
+ print(f"CRITICAL ERROR: {e}")
244
+ print(traceback.format_exc())
245
+ return ToolDetailsResponsePbo()
246
+
247
+ def ProcessData(self, request: ProcessStepRequestPbo, context: ServicerContext) -> ProcessStepResponsePbo:
248
+ try:
249
+ # Convert the SapioConnectionInfo proto object to a SapioUser object.
250
+ user = self._create_user(request.sapio_user)
251
+ # Get the agent results from the registered agent matching the request.
252
+ success, msg, results, logs = self.run(user, request, context)
253
+ # Convert the results to protobuf objects.
254
+ output_data: list[StepOutputBatchPbo] = []
255
+ new_records: list[FieldValueMapPbo] = []
256
+ for result in results:
257
+ data: StepOutputBatchPbo | list[FieldValueMapPbo] = result.to_proto()
258
+ if isinstance(data, StepOutputBatchPbo):
259
+ output_data.append(data)
260
+ else:
261
+ new_records.extend(data)
262
+ # Return a ProcessStepResponse proto object containing the results to the caller.
263
+ status = ProcessStepResponseStatusPbo.SUCCESS if success else ProcessStepResponseStatusPbo.FAILURE
264
+ return ProcessStepResponsePbo(status=status, status_message=msg, output=output_data, log=logs,
265
+ new_records=new_records)
266
+ except Exception as e:
267
+ # This try/except should never be needed, as the agent should handle its own exceptions, but better safe
268
+ # than sorry.
269
+ print(f"CRITICAL ERROR: {e}")
270
+ print(traceback.format_exc())
271
+ return ProcessStepResponsePbo(status=ProcessStepResponseStatusPbo.FAILURE,
272
+ status_message=f"CRITICAL ERROR: {e}",
273
+ log=[traceback.format_exc()])
274
+
275
+ @staticmethod
276
+ def _create_user(info: SapioConnectionInfoPbo, timeout_seconds: int = 60) -> SapioUser:
277
+ """
278
+ Create a SapioUser object from the given SapioConnectionInfo proto object.
279
+
280
+ :param info: The SapioConnectionInfo proto object.
281
+ :param timeout_seconds: The request timeout for calls made from this user object.
282
+ """
283
+ user = SapioUser(info.webservice_url.rstrip("/"), True, timeout_seconds, guid=info.app_guid)
284
+ match info.secret_type:
285
+ case SapioUserSecretTypePbo.SESSION_TOKEN:
286
+ user.api_token = info.secret
287
+ case SapioUserSecretTypePbo.PASSWORD:
288
+ secret: str = info.secret
289
+ if secret.startswith("Basic "):
290
+ secret = secret[6:]
291
+ credentials: list[str] = base64.b64decode(secret).decode().split(":", 1)
292
+ user.username = credentials[0]
293
+ user.password = credentials[1]
294
+ case _:
295
+ raise Exception(f"Unexpected secret type: {info.secret_type}")
296
+ return user
297
+
298
+ @staticmethod
299
+ def server_version() -> int:
300
+ """
301
+ :return: The version of this set of .
302
+ """
303
+ return 1
304
+
305
+ @abstractmethod
306
+ def register_agents(self) -> list[type[AgentBase]]:
307
+ """
308
+ Register agent types with this service. Provided agents should implement the AgentBase class.
309
+
310
+ :return: A list of agents to register to this service.
311
+ """
312
+ pass
313
+
314
+ def run(self, user: SapioUser, request: ProcessStepRequestPbo, context: ServicerContext) \
315
+ -> tuple[bool, str, list[SapioAgentResult], list[str]]:
316
+ """
317
+ Execute an agent from this service.
318
+
319
+ :param user: A user object that can be used to initialize manager classes using DataMgmtServer to query the
320
+ system.
321
+ :param request: The request object containing the input data.
322
+ :param context: The gRPC context.
323
+ :return: Whether or not the agent succeeded, the status message, the results of the agent, and any logs
324
+ generated by the agent.
325
+ """
326
+ # Locate the agent named in the request.
327
+ find_agent: str = request.tool_name
328
+ registered_agents: dict[str, type[AgentBase]] = {t.name(): t for t in self.register_agents()}
329
+ if find_agent not in registered_agents:
330
+ # If the agent is not found, list all of the registered agents for this service so that the LLM can correct
331
+ # the agent it is requesting.
332
+ all_agent_names: str = "\n".join(registered_agents.keys())
333
+ msg: str = (f"Agent \"{find_agent}\" not found in the registered agents for this service. The registered "
334
+ f"agents for this service are: \n{all_agent_names}")
335
+ return False, msg, [], []
336
+
337
+ # Instantiate the agent class.
338
+ agent: AgentBase = registered_agents[find_agent]()
339
+ try:
340
+ # Setup the agent with details from the request.
341
+ agent.setup(user, request, context, self.debug_mode)
342
+ # Validate that the provided inputs match the agent's expected inputs.
343
+ msg: str = ""
344
+ if len(request.input) != len(agent.input_configs):
345
+ msg = f"Expected {len(agent.input_configs)} inputs for this agent, but got {len(request.input)} instead."
346
+ else:
347
+ errors: list[str] = agent.validate_input()
348
+ if errors:
349
+ msg = "\n".join(errors)
350
+ # If there is no error message, then the inputs are valid.
351
+ success: bool = not bool(msg)
352
+ # If this is a dry run, then provide the fixed dry run output.
353
+ # Otherwise, if the inputs were successfully validated, then the agent is executed normally.
354
+ results: list[SapioAgentResult] = []
355
+ if request.dry_run:
356
+ results = agent.dry_run_output()
357
+ elif success:
358
+ results = agent.run(user)
359
+ # Update the status message to reflect the successful execution of the agent.
360
+ msg = f"{agent.name()} successfully completed."
361
+ return success, msg, results, agent.logs
362
+ except Exception as e:
363
+ agent.log_exception("Exception occurred during agent execution.", e)
364
+ return False, str(e), [], agent.logs
365
+ finally:
366
+ # Clean up any temporary files created by the agent. If in debug mode, then log the files instead
367
+ # so that they can be manually inspected.
368
+ if self.debug_mode:
369
+ print("Temporary files/directories created during agent execution:")
370
+ for directory in agent.temp_data.directories:
371
+ print(f"\tDirectory: {directory}")
372
+ for file in agent.temp_data.files:
373
+ print(f"\tFile: {file}")
374
+ else:
375
+ agent.temp_data.cleanup()
376
+
377
+
378
+ class AgentBase(ABC):
379
+ """
380
+ A base class for implementing an agent.
381
+ """
382
+ input_configs: list[ToolInputDetailsPbo]
383
+ input_container_types: list[ContainerType]
384
+ output_configs: list[ToolOutputDetailsPbo]
385
+ output_container_types: list[ContainerType]
386
+ config_fields: list[VeloxFieldDefPbo]
387
+
388
+ logs: list[str]
389
+ logger: Logger
390
+ verbose_logging: bool
391
+
392
+ temp_data: TempFileHandler
393
+
394
+ user: SapioUser
395
+ request: ProcessStepRequestPbo
396
+ context: ServicerContext
397
+ debug_mode: bool
398
+
399
+ @classmethod
400
+ @abstractmethod
401
+ def identifier(cls):
402
+ """
403
+ :return: The unique identifier of the agent. This is used by the system to determine which agent should be
404
+ updated if an agent is re-imported. This should not be changed after the first time that an agent is
405
+ imported, otherwise a duplicate agent will be created.
406
+ """
407
+ pass
408
+
409
+ @staticmethod
410
+ @abstractmethod
411
+ def name() -> str:
412
+ """
413
+ :return: The display name of the agent. This should be unique across all agents in the service.
414
+ """
415
+ pass
416
+
417
+ @staticmethod
418
+ @abstractmethod
419
+ def category() -> str:
420
+ """
421
+ :return: The category of the agent. This is used to group similar agents together in the plan manager.
422
+ """
423
+ pass
424
+
425
+ @staticmethod
426
+ @abstractmethod
427
+ def description() -> str:
428
+ """
429
+ :return: A description of the agent.
430
+ """
431
+ pass
432
+
433
+ @staticmethod
434
+ @abstractmethod
435
+ def citations() -> dict[str, str]:
436
+ """
437
+ :return: Any citations or references for this agent, as a dictionary of citation name to URL.
438
+ """
439
+ pass
440
+
441
+ @staticmethod
442
+ def data_type_name() -> str | None:
443
+ """
444
+ :return: The name of the output data type of this agent, if applicable. When this agent returns
445
+ FieldMapResult objects in its run method, this name will be used to set the data type of the output data.
446
+ """
447
+ return None
448
+
449
+ @staticmethod
450
+ def license_flag() -> str | None:
451
+ """
452
+ :return: The license flag for this agent. The system must have this license in order to use this agent.
453
+ If None, the agent is not license locked.
454
+ """
455
+ return None
456
+
457
+ def __init__(self):
458
+ self.input_configs = []
459
+ self.input_container_types = []
460
+ self.output_configs = []
461
+ self.output_container_types = []
462
+ self.config_fields = []
463
+ self.temp_data = TempFileHandler()
464
+ self.logs = []
465
+ self.logger = logging.getLogger(f"AgentBase.{self.name()}")
466
+ ensure_logger_initialized(self.logger)
467
+
468
+ def setup(self, user: SapioUser, request: ProcessStepRequestPbo, context: ServicerContext, debug_mode: bool) -> None:
469
+ """
470
+ Setup the agent with the user, request, and context. This method can be overridden by subclasses to perform
471
+ additional setup.
472
+
473
+ :param user: A user object that can be used to initialize manager classes using DataMgmtServer to query the
474
+ system.
475
+ :param request: The request object containing the input data.
476
+ :param context: The gRPC context.
477
+ :param debug_mode: If true, the agent should run in debug mode, providing additional logging and not cleaning
478
+ up temporary files.
479
+ """
480
+ self.user = user
481
+ self.request = request
482
+ self.context = context
483
+ self.verbose_logging = request.verbose_logging
484
+ self.debug_mode = debug_mode
485
+
486
+ def add_input(self, container_type: ContainerType, content_type: str, display_name: str, description: str,
487
+ structure_example: str | bytes | None = None, validation: str | None = None,
488
+ input_count: tuple[int, int] | None = None, is_paged: bool = False,
489
+ page_size: tuple[int, int] | None = None, max_request_bytes: int | None = None) -> None:
490
+ """
491
+ Add an input configuration to the agent. This determines how many inputs this agent will accept in the plan
492
+ manager, as well as what those inputs are. The IO number of the input will be set to the current number of
493
+ inputs. That is, the first time this is called, the IO number will be 0, the second time it is called, the IO
494
+ number will be 1, and so on.
495
+
496
+ :param container_type: The container type of the input.
497
+ :param content_type: The content type of the input.
498
+ :param display_name: The display name of the input.
499
+ :param description: The description of the input.
500
+ :param structure_example: An optional example of the structure of the input, such as how the structure of a
501
+ JSON output may look. This does not need to be an entirely valid example, and should often be truncated for
502
+ brevity. This must be provided for any container type other than BINARY.
503
+ :param validation: An optional validation string for the input.
504
+ :param input_count: A tuple of the minimum and maximum number of inputs allowed for this agent.
505
+ :param is_paged: If true, this input will be paged. If false, this input will not be paged.
506
+ :param page_size: A tuple of the minimum and maximum page size for this agent. The input must be paged in order
507
+ for this to have an effect.
508
+ :param max_request_bytes: The maximum request size in bytes for this agent.
509
+ """
510
+ if container_type != ContainerType.BINARY and structure_example is None:
511
+ raise ValueError("structure_example must be provided for inputs with a container_type other than BINARY.")
512
+ structure: ExampleContainerPbo | None = None
513
+ if isinstance(structure_example, str):
514
+ structure = ExampleContainerPbo(text_example=structure_example)
515
+ elif isinstance(structure_example, bytes):
516
+ structure = ExampleContainerPbo(binary_example=structure_example)
517
+ self.input_configs.append(ToolInputDetailsPbo(
518
+ base_config=ToolIoConfigBasePbo(
519
+ io_number=len(self.input_configs),
520
+ content_type=content_type,
521
+ display_name=display_name,
522
+ description=description,
523
+ structure_example=structure,
524
+ # The testing example on the input is never used, hence why it can't be set by this function.
525
+ # The testing example is only used during dry runs, in which the testing_example of the output
526
+ # of the previous step is what gets passed to the next step's input validation.
527
+ testing_example=None
528
+ ),
529
+ validation=validation,
530
+ min_input_count=input_count[0] if input_count else None,
531
+ max_input_count=input_count[1] if input_count else None,
532
+ paged=is_paged,
533
+ min_page_size=page_size[0] if page_size else None,
534
+ max_page_size=page_size[1] if page_size else None,
535
+ max_request_bytes=max_request_bytes,
536
+ ))
537
+ self.input_container_types.append(container_type)
538
+
539
+ def add_output(self, container_type: ContainerType, content_type: str, display_name: str, description: str,
540
+ testing_example: str | bytes, structure_example: str | bytes | None = None) -> None:
541
+ """
542
+ Add an output configuration to the agent. This determines how many inputs this agent will accept in the plan
543
+ manager, as well as what those inputs are. The IO number of the output will be set to the current number of
544
+ outputs. That is, the first time this is called, the IO number will be 0, the second time it is called, the IO
545
+ number will be 1, and so on.
546
+
547
+ :param container_type: The container type of the output.
548
+ :param content_type: The content type of the output.
549
+ :param display_name: The display name of the output.
550
+ :param description: The description of the output.
551
+ :param testing_example: An example of the input to be used when testing this agent in the system. This must be
552
+ an entirely valid example of what an output of this agent could look like so that it can be properly used
553
+ to run tests with. The provided example may be a string, such as for representing JSON or CSV outputs,
554
+ or bytes, such as for representing binary outputs like images or files.
555
+ :param structure_example: An optional example of the structure of the input, such as how the structure of a
556
+ JSON output may look. This does not need to be an entirely valid example, and should often be truncated for
557
+ brevity. This must be provided for any container type other than BINARY.
558
+ """
559
+ if not testing_example:
560
+ raise ValueError("A testing_example must be provided for the output.")
561
+ testing: ExampleContainerPbo | None = None
562
+ if isinstance(testing_example, str):
563
+ testing = ExampleContainerPbo(text_example=testing_example)
564
+ elif isinstance(testing_example, bytes):
565
+ testing = ExampleContainerPbo(binary_example=testing_example)
566
+
567
+ if container_type != ContainerType.BINARY and structure_example is None:
568
+ raise ValueError("structure_example must be provided for inputs with a container_type other than BINARY.")
569
+ structure: ExampleContainerPbo | None = None
570
+ if isinstance(structure_example, str):
571
+ structure = ExampleContainerPbo(text_example=structure_example)
572
+ elif isinstance(structure_example, bytes):
573
+ structure = ExampleContainerPbo(binary_example=structure_example)
574
+
575
+ self.output_configs.append(ToolOutputDetailsPbo(
576
+ base_config=ToolIoConfigBasePbo(
577
+ io_number=len(self.output_configs),
578
+ content_type=content_type,
579
+ display_name=display_name,
580
+ description=description,
581
+ structure_example=structure,
582
+ testing_example=testing
583
+ )))
584
+ self.output_container_types.append(container_type)
585
+
586
+ def add_config_field(self, field: VeloxFieldDefPbo) -> None:
587
+ """
588
+ Add a configuration field to the agent. This field will be used to configure the agent in the plan manager.
589
+
590
+ :param field: The configuration field details.
591
+ """
592
+ self.config_fields.append(field)
593
+
594
+ def add_config_field_def(self, field: AbstractVeloxFieldDefinition) -> None:
595
+ """
596
+ Add a configuration field to the agent. This field will be used to configure the agent in the plan manager.
597
+
598
+ :param field: The configuration field details.
599
+ """
600
+ self.config_fields.append(ProtobufUtils.field_def_to_pbo(field))
601
+
602
+ def add_boolean_config_field(self, field_name: str, display_name: str, description: str,
603
+ default_value: bool | None = None, optional: bool = False) -> None:
604
+ """
605
+ Add a boolean configuration field to the agent. This field will be used to configure the agent in the plan
606
+ manager.
607
+
608
+ :param field_name: The name of the field.
609
+ :param display_name: The display name of the field.
610
+ :param description: The description of the field.
611
+ :param default_value: The default value of the field.
612
+ :param optional: If true, this field is optional. If false, this field is required.
613
+ """
614
+ self.config_fields.append(VeloxFieldDefPbo(
615
+ data_field_type=FieldTypePbo.BOOLEAN,
616
+ data_field_name=field_name,
617
+ display_name=display_name,
618
+ description=description,
619
+ required=not optional,
620
+ editable=True,
621
+ boolean_properties=BooleanPropertiesPbo(
622
+ default_value=default_value
623
+ )
624
+ ))
625
+
626
+ def add_double_config_field(self, field_name: str, display_name: str, description: str,
627
+ default_value: float | None = None, min_value: float = -10.**120,
628
+ max_value: float = 10.**120, precision: int = 2, optional: bool = False) -> None:
629
+ """
630
+ Add a double configuration field to the agent. This field will be used to configure the agent in the plan
631
+ manager.
632
+
633
+ :param field_name: The name of the field.
634
+ :param display_name: The display name of the field.
635
+ :param description: The description of the field.
636
+ :param default_value: The default value of the field.
637
+ :param min_value: The minimum value of the field.
638
+ :param max_value: The maximum value of the field.
639
+ :param precision: The precision of the field.
640
+ :param optional: If true, this field is optional. If false, this field is required.
641
+ """
642
+ self.config_fields.append(VeloxFieldDefPbo(
643
+ data_field_type=FieldTypePbo.DOUBLE,
644
+ data_field_name=field_name,
645
+ display_name=display_name,
646
+ description=description,
647
+ required=not optional,
648
+ editable=True,
649
+ double_properties=DoublePropertiesPbo(
650
+ default_value=default_value,
651
+ min_value=min_value,
652
+ max_value=max_value,
653
+ precision=precision
654
+ )
655
+ ))
656
+
657
+ def add_integer_config_field(self, field_name: str, display_name: str, description: str,
658
+ default_value: int | None = None, min_value: int = -2**31, max_value: int = 2**31-1,
659
+ optional: bool = False) -> None:
660
+ """
661
+ Add an integer configuration field to the agent. This field will be used to configure the agent in the plan
662
+ manager.
663
+
664
+ :param field_name: The name of the field.
665
+ :param display_name: The display name of the field.
666
+ :param description: The description of the field.
667
+ :param default_value: The default value of the field.
668
+ :param min_value: The minimum value of the field.
669
+ :param max_value: The maximum value of the field.
670
+ :param optional: If true, this field is optional. If false, this field is required.
671
+ """
672
+ self.config_fields.append(VeloxFieldDefPbo(
673
+ data_field_type=FieldTypePbo.INTEGER,
674
+ data_field_name=field_name,
675
+ display_name=display_name,
676
+ description=description,
677
+ required=not optional,
678
+ editable=True,
679
+ integer_properties=IntegerPropertiesPbo(
680
+ default_value=default_value,
681
+ min_value=min_value,
682
+ max_value=max_value
683
+ )
684
+ ))
685
+
686
+ def add_string_config_field(self, field_name: str, display_name: str, description: str,
687
+ default_value: str | None = None, max_length: int = 1000, optional: bool = False,
688
+ validation_regex: str | None = None, error_msg: str | None = None) -> None:
689
+ """
690
+ Add a string configuration field to the agent. This field will be used to configure the agent in the plan
691
+ manager.
692
+
693
+ :param field_name: The name of the field.
694
+ :param display_name: The display name of the field.
695
+ :param description: The description of the field.
696
+ :param default_value: The default value of the field.
697
+ :param max_length: The maximum length of the field.
698
+ :param optional: If true, this field is optional. If false, this field is required.
699
+ :param validation_regex: An optional regex that the field value must match.
700
+ :param error_msg: An optional error message to display if the field value does not match the regex.
701
+ """
702
+ self.config_fields.append(VeloxFieldDefPbo(
703
+ data_field_type=FieldTypePbo.STRING,
704
+ data_field_name=field_name,
705
+ display_name=display_name,
706
+ description=description,
707
+ required=not optional,
708
+ editable=True,
709
+ string_properties=StringPropertiesPbo(
710
+ default_value=default_value,
711
+ max_length=max_length,
712
+ field_validator=FieldValidatorPbo(validation_regex=validation_regex, error_message=error_msg) if validation_regex else None
713
+ )
714
+ ))
715
+
716
+ def add_list_config_field(self, field_name: str, display_name: str, description: str,
717
+ default_value: str | None = None, allowed_values: list[str] | None = None,
718
+ direct_edit: bool = False, optional: bool = False,
719
+ validation_regex: str | None = None, error_msg: str | None = None) -> None:
720
+ """
721
+ Add a list configuration field to the agent. This field will be used to configure the agent in the plan
722
+ manager.
723
+
724
+ :param field_name: The name of the field.
725
+ :param display_name: The display name of the field.
726
+ :param description: The description of the field.
727
+ :param default_value: The default value of the field.
728
+ :param allowed_values: The list of allowed values for the field.
729
+ :param direct_edit: If true, the user can enter a value that is not in the list of allowed values. If false,
730
+ the user can only select from the list of allowed values.
731
+ :param optional: If true, this field is optional. If false, this field is required.
732
+ :param validation_regex: An optional regex that the field value must match.
733
+ :param error_msg: An optional error message to display if the field value does not match the regex.
734
+ """
735
+ self.config_fields.append(VeloxFieldDefPbo(
736
+ data_field_type=FieldTypePbo.SELECTION,
737
+ data_field_name=field_name,
738
+ display_name=display_name,
739
+ description=description,
740
+ required=not optional,
741
+ editable=True,
742
+ selection_properties=SelectionPropertiesPbo(
743
+ default_value=default_value,
744
+ static_list_values=allowed_values,
745
+ direct_edit=direct_edit,
746
+ field_validator=FieldValidatorPbo(validation_regex=validation_regex, error_message=error_msg) if validation_regex else None
747
+ )
748
+ ))
749
+
750
+ def add_multi_list_config_field(self, field_name: str, display_name: str, description: str,
751
+ default_value: list[str] | None = None, allowed_values: list[str] | None = None,
752
+ direct_edit: bool = False, optional: bool = False,
753
+ validation_regex: str | None = None, error_msg: str | None = None) -> None:
754
+ """
755
+ Add a multi-select list configuration field to the agent. This field will be used to configure the agent in the
756
+ plan manager.
757
+
758
+ :param field_name: The name of the field.
759
+ :param display_name: The display name of the field.
760
+ :param description: The description of the field.
761
+ :param default_value: The default value of the field.
762
+ :param allowed_values: The list of allowed values for the field.
763
+ :param direct_edit: If true, the user can enter a value that is not in the list of allowed values. If false,
764
+ the user can only select from the list of allowed values.
765
+ :param optional: If true, this field is optional. If false, this field is required.
766
+ :param validation_regex: An optional regex that the field value must match.
767
+ :param error_msg: An optional error message to display if the field value does not match the regex.
768
+ """
769
+ self.config_fields.append(VeloxFieldDefPbo(
770
+ data_field_type=FieldTypePbo.SELECTION,
771
+ data_field_name=field_name,
772
+ display_name=display_name,
773
+ description=description,
774
+ required=not optional,
775
+ editable=True,
776
+ selection_properties=SelectionPropertiesPbo(
777
+ default_value=",".join(default_value) if default_value else None,
778
+ static_list_values=allowed_values,
779
+ multi_select=True,
780
+ direct_edit=direct_edit,
781
+ field_validator=FieldValidatorPbo(validation_regex=validation_regex, error_message=error_msg) if validation_regex else None
782
+ )
783
+ ))
784
+
785
+ def add_date_config_field(self, field_name: str, display_name: str, description: str, optional: bool = False,
786
+ date_time_format: str = "MMM dd, yyyy", default_to_today: bool = False,
787
+ is_static_date: bool = False) -> None:
788
+ """
789
+ Add a date configuration field to the agent. This field will be used to configure the agent in the plan
790
+ manager.
791
+
792
+ :param field_name: The name of the field.
793
+ :param display_name: The display name of the field.
794
+ :param description: The description of the field.
795
+ :param date_time_format: The format that this date field should appear in. The date format is Java-style.
796
+ See https://docs.oracle.com/en/java/javase/18/docs/api/java.base/java/text/SimpleDateFormat.html for more
797
+ details.
798
+ :param default_to_today: If true, the default value of the field will be set to today's date. If false, the
799
+ default value will be None.
800
+ :param is_static_date: If true, the user will input the date as UTC. If false, the user will input the date
801
+ as local time.
802
+ :param optional: If true, this field is optional. If false, this field is required.
803
+ """
804
+ self.config_fields.append(VeloxFieldDefPbo(
805
+ data_field_type=FieldTypePbo.DATE,
806
+ data_field_name=field_name,
807
+ display_name=display_name,
808
+ description=description,
809
+ required=not optional,
810
+ editable=True,
811
+ date_properties=DatePropertiesPbo(
812
+ default_value="@Today" if default_to_today else None,
813
+ static_date=is_static_date,
814
+ date_time_format=date_time_format
815
+ )
816
+ ))
817
+
818
+ def add_credentials_config_field(self, field_name: str, display_name: str, description: str, optional: bool = False,
819
+ category: str | None = None) -> None:
820
+ """
821
+ Add a list field that asks the user to choose which credentials to use. This field will be used to
822
+ configure the agent in the plan manager.
823
+
824
+ :param field_name: The name of the field.
825
+ :param display_name: The display name of the field.
826
+ :param description: The description of the field.
827
+ :param optional: If true, this field is optional. If false, this field is required.
828
+ :param category: If provided, only credentials in this category will be shown to the user.
829
+ """
830
+ self.config_fields.append(VeloxFieldDefPbo(
831
+ data_field_type=FieldTypePbo.SELECTION,
832
+ data_field_name=field_name,
833
+ display_name=display_name,
834
+ description=description,
835
+ required=not optional,
836
+ editable=True,
837
+ selection_properties=SelectionPropertiesPbo(
838
+ # A credentials field is just a selection field with its list mode set to [ExternalCredentials].
839
+ list_mode=f"[ExternalCredentials]{category.strip() if category else ''}",
840
+ multi_select=False,
841
+ default_value=None,
842
+ direct_edit=False
843
+ )
844
+ ))
845
+
846
+ def to_pbo(self) -> ToolDetailsPbo:
847
+ """
848
+ :return: The ToolDetailsPbo proto object representing this agent.
849
+ """
850
+ return ToolDetailsPbo(
851
+ import_id=self.identifier(),
852
+ name=self.name(),
853
+ description=self.description(),
854
+ category=self.category(),
855
+ citation=[ToolCitationPbo(title=x, url=y) for x, y in self.citations().items()],
856
+ input_configs=self.input_configs,
857
+ output_configs=self.output_configs,
858
+ output_data_type_name=self.data_type_name(),
859
+ config_fields=self.config_fields,
860
+ license_info=self.license_flag(),
861
+ )
862
+
863
+ @abstractmethod
864
+ def validate_input(self) -> list[str] | None:
865
+ """
866
+ Validate the request given to this agent. If the request is validly formatted, this method should return None.
867
+ If the request is not valid, this method should return an error message indicating what is wrong with the
868
+ request.
869
+
870
+ This method should not perform any actual processing of the request. It should only validate the inputs and
871
+ configurations provided in the request.
872
+
873
+ The request inputs can be accessed using the self.get_input_*() methods.
874
+ The request settings can be accessed using the self.get_config_fields() method.
875
+ The request itself can be accessed using self.request.
876
+
877
+ :return: A list of the error messages if the request is not valid. If the request is valid, return an empty
878
+ list or None.
879
+ """
880
+ pass
881
+
882
+ def dry_run_output(self) -> list[SapioAgentResult]:
883
+ """
884
+ Provide fixed results for a dry run of this agent. This method should not perform any actual processing of the
885
+ request. It should only return example outputs that can be used to test the next agent in the plan.
886
+
887
+ The default implementation of this method looks at the testing_example field of each output configuration
888
+ and returns a SapioAgentResult object based on the content type of the output.
889
+
890
+ :return: A list of SapioAgentResult objects containing example outputs for this agent. Each result in the list
891
+ corresponds to a separate output from the agent.
892
+ """
893
+ results: list[SapioAgentResult] = []
894
+ for output, container_type in zip(self.output_configs, self.output_container_types):
895
+ config: ToolIoConfigBasePbo = output.base_config
896
+ example: ExampleContainerPbo = config.testing_example
897
+ content_type: str = config.content_type
898
+ match container_type:
899
+ case ContainerType.BINARY:
900
+ example: bytes = example.binary_example
901
+ results.append(BinaryResult(binary_data=[example], content_type=content_type))
902
+ case ContainerType.CSV:
903
+ example: str = example.text_example
904
+ results.append(CsvResult(FileUtil.tokenize_csv(example.encode())[0], content_type=content_type))
905
+ case ContainerType.JSON:
906
+ # The example may be in the JSONL format instead of plain JSON, so we need to use Pandas to parse
907
+ # the example into plain JSON.
908
+ example: str = example.text_example
909
+ # Format the JSONL in a way that Pandas likes. Collapse everything into a single line, and then
910
+ # split it back into multiple lines where each line is a single JSON list or dictionary.
911
+ example: str = re.sub("([]}])\s*([\[{])", r"\1\n\2", example.replace("\n", "")).strip()
912
+ # Read the JSONL into a Pandas DataFrame and convert it back to plain JSON.
913
+ import pandas as pd
914
+ with io.StringIO(example) as stream:
915
+ example: str = pd.read_json(path_or_buf=stream, lines=True).to_json()
916
+ data = json.loads(example)
917
+ if not isinstance(data, list):
918
+ data = [data]
919
+ results.append(JsonResult(json_data=data, content_type=content_type))
920
+ case ContainerType.TEXT:
921
+ example: str = example.text_example
922
+ results.append(TextResult(text_data=[example], content_type=content_type))
923
+ return results
924
+
925
+ @abstractmethod
926
+ def run(self, user: SapioUser) -> list[SapioAgentResult]:
927
+ """
928
+ Execute this agent.
929
+
930
+ The request inputs can be accessed using the self.get_input_*() methods.
931
+ The request settings can be accessed using the self.get_config_fields() method.
932
+ The request itself can be accessed using self.request.
933
+
934
+ :param user: A user object that can be used to initialize manager classes using DataMgmtServer to query the
935
+ system.
936
+ :return: A list of SapioAgentResult objects containing the response data. Each result in the list corresponds to
937
+ a separate output from the agent. Field map results do not appear as agent output in the plan manager,
938
+ instead appearing as records related to the plan step during the run.
939
+ """
940
+ pass
941
+
942
+ def get_credentials(self, category: str = None, host: str = None) -> ExternalCredentials:
943
+ """
944
+ Get credentials for the given category and host.
945
+
946
+ :param category: The category of the credentials to retrieve.
947
+ :param host: The host for which to retrieve the credentials.
948
+ :return: An ExternalCredentials object containing the credentials for the given category and host.
949
+ """
950
+ # Remove leading/trailing whitespace
951
+ category = category.strip() if category else None
952
+ host = host.strip() if host else None
953
+
954
+ matching_creds: list[ExternalCredentialsPbo] = []
955
+ for cred in self.request.external_credential:
956
+ # Do case insensitive comparison
957
+ if category and cred.category.lower() != category.lower():
958
+ continue
959
+ if host:
960
+ # Parse the URL to get the host and compare
961
+ from urllib.parse import urlparse
962
+ parsed_url = urlparse(cred.url)
963
+ if parsed_url.hostname is None or parsed_url.hostname.lower() != host.lower():
964
+ continue
965
+
966
+ matching_creds.append(cred)
967
+ if len(matching_creds) == 0:
968
+ raise ValueError(f"No credentials found for category '{category}' and host '{host}'.")
969
+ if len(matching_creds) > 1:
970
+ raise ValueError(f"Multiple credentials found for category '{category}' and host '{host}'.")
971
+
972
+ return ExternalCredentials.from_pbo(matching_creds[0])
973
+
974
+ def get_credentials_from_config(self, value: str) -> ExternalCredentials:
975
+ """
976
+ Get credentials given the value of a credentials config field.
977
+
978
+ :param value: The value of the credentials config field.
979
+ :return: An ExternalCredentials object containing the credentials.
980
+ """
981
+ # Values should be of the format "Name (Identifier)"
982
+ match = re.match(r"^(.*) \((.*)\)$", value)
983
+ if not match:
984
+ raise ValueError(f"Invalid credentials value '{value}'. Expected format 'Name (Identifier)'.")
985
+ identifier: str = match.group(2)
986
+ for cred in self.request.external_credential:
987
+ if cred.id == identifier:
988
+ return ExternalCredentials.from_pbo(cred)
989
+ raise ValueError(f"No credentials found with identifier '{identifier}'.")
990
+
991
+ def call_subprocess(self,
992
+ args: str | bytes | PathLike[str] | PathLike[bytes] | Sequence[str | bytes | PathLike[str] | PathLike[bytes]],
993
+ cwd: str | bytes | PathLike[str] | PathLike[bytes] | None = None,
994
+ **kwargs) -> CompletedProcess[str]:
995
+ """
996
+ Call a subprocess with the given arguments, logging the command and any errors that occur.
997
+ This function will raise an exception if the return code of the subprocess is non-zero. The output of the
998
+ subprocess will be captured and returned as part of the CompletedProcess object.
999
+
1000
+ :param args: The list of arguments to pass to the subprocess.
1001
+ :param cwd: The working directory to run the subprocess in. If None, the current working directory is used.
1002
+ :param kwargs: Additional keyword arguments to pass to subprocess.run().
1003
+ :return: The CompletedProcess object returned by subprocess.run().
1004
+ """
1005
+ try:
1006
+ self.log_info(f"Running subprocess with command: {' '.join(args)}")
1007
+ return subprocess.run(args, check=True, capture_output=True, text=True, cwd=cwd, **kwargs)
1008
+ except subprocess.CalledProcessError as e:
1009
+ self.log_error(f"Error running subprocess. Return code: {e.returncode}")
1010
+ self.log_error(f"STDOUT: {e.stdout}")
1011
+ self.log_error(f"STDERR: {e.stderr}")
1012
+ raise
1013
+
1014
+ def log_info(self, message: str) -> None:
1015
+ """
1016
+ Log an info message for this agent. If verbose logging is enabled, this message will be included in the logs
1017
+ returned to the caller. Empty/None inputs will not be logged.
1018
+
1019
+ :param message: The message to log.
1020
+ """
1021
+ if not message:
1022
+ return
1023
+ if self.verbose_logging:
1024
+ self.logs.append(f"INFO: {self.name()}: {message}")
1025
+ self.logger.info(message)
1026
+
1027
+ def log_warning(self, message: str) -> None:
1028
+ """
1029
+ Log a warning message for this agent. This message will be included in the logs returned to the caller.
1030
+ Empty/None inputs will not be logged.
1031
+
1032
+ :param message: The message to log.
1033
+ """
1034
+ if not message:
1035
+ return
1036
+ self.logs.append(f"WARNING: {self.name()}: {message}")
1037
+ self.logger.warning(message)
1038
+
1039
+ def log_error(self, message: str) -> None:
1040
+ """
1041
+ Log an error message for this agent. This message will be included in the logs returned to the caller.
1042
+ Empty/None inputs will not be logged.
1043
+
1044
+ :param message: The message to log.
1045
+ """
1046
+ if not message:
1047
+ return
1048
+ self.logs.append(f"ERROR: {self.name()}: {message}")
1049
+ self.logger.error(message)
1050
+
1051
+ def log_exception(self, message: str, e: Exception) -> None:
1052
+ """
1053
+ Log an exception for this agent. This message will be included in the logs returned to the caller.
1054
+ Empty/None inputs will not be logged.
1055
+
1056
+ :param message: The message to log.
1057
+ :param e: The exception to log.
1058
+ """
1059
+ if not message and not e:
1060
+ return
1061
+ self.logs.append(f"EXCEPTION: {self.name()}: {message} - {e}")
1062
+ self.logger.error(f"{message}\n{traceback.format_exc()}")
1063
+
1064
+ def is_input_partial(self, index: int = 0) -> bool:
1065
+ """
1066
+ Check if the input at the given index is marked as partial.
1067
+
1068
+ :param index: The index of the input to check. Defaults to 0. Used for agents that accept multiple inputs.
1069
+ :return: True if the input is marked as partial, False otherwise.
1070
+ """
1071
+ return self.request.input[index].is_partial
1072
+
1073
+ def get_input_name(self, index: int = 0) -> str | None:
1074
+ """
1075
+ Get the name of the input from the request object.
1076
+
1077
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1078
+ :return: The name of the input from the request object, or None if no name is set.
1079
+ """
1080
+ return self.request.input[index].item_container.container_name
1081
+
1082
+ def get_input_content_type(self, index: int = 0) -> ContentTypePbo:
1083
+ """
1084
+ Get the content type of the input from the request object.
1085
+
1086
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1087
+ :return: The content type of the input from the request object.
1088
+ """
1089
+ return self.request.input[index].item_container.content_type
1090
+
1091
+ def get_input_binary(self, index: int = 0) -> list[bytes]:
1092
+ """
1093
+ Get the binary data from the request object.
1094
+
1095
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1096
+ :return: The binary data from the request object.
1097
+ """
1098
+ container: StepItemContainerPbo = self.request.input[index].item_container
1099
+ if not container.HasField("binary_container"):
1100
+ raise Exception(f"Input {index} does not contain a binary container.")
1101
+ return list(container.binary_container.items)
1102
+
1103
+ def get_input_csv(self, index: int = 0) -> tuple[list[str], list[dict[str, str]]]:
1104
+ """
1105
+ Parse the CSV data from the request object.
1106
+
1107
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1108
+ :return: A tuple containing the header row and the data rows. The header row is a list of strings representing
1109
+ the column names, and the data rows are a list of dictionaries where each dictionary represents a row in the
1110
+ CSV with the column names as keys and the corresponding values as strings.
1111
+ """
1112
+ container: StepItemContainerPbo = self.request.input[index].item_container
1113
+ if not container.HasField("csv_container"):
1114
+ raise Exception(f"Input {index} does not contain a CSV container.")
1115
+ ret_val: list[dict[str, str]] = []
1116
+ headers: Iterable[str] = container.csv_container.header.cells
1117
+ for row in container.csv_container.items:
1118
+ row_dict: dict[str, str] = {}
1119
+ for header, value in zip(headers, row.cells):
1120
+ row_dict[header] = value
1121
+ ret_val.append(row_dict)
1122
+ return list(headers), ret_val
1123
+
1124
+ def get_input_json(self, index: int = 0) -> list[dict[str, Any]]:
1125
+ """
1126
+ Parse the JSON data from the request object.
1127
+
1128
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1129
+ :return: A list of parsed JSON objects, which are represented as dictionaries.
1130
+ """
1131
+ container: StepItemContainerPbo = self.request.input[index].item_container
1132
+ if not container.HasField("json_container"):
1133
+ raise Exception(f"Input {index} does not contain a JSON container.")
1134
+ input_json: list[Any] = [json.loads(x) for x in container.json_container.items]
1135
+ # Verify that the given JSON actually is a list of dictionaries. If they aren't then the previous step provided
1136
+ # bad input. Agents are enforced to result in a list of dictionaries when returning JSON data, so this is likely
1137
+ # an error caused by a script or static input step.
1138
+ for i, entry in enumerate(input_json):
1139
+ if not isinstance(entry, dict):
1140
+ raise Exception(f"Element {i} of input {index} is not a dictionary object. All top-level JSON inputs "
1141
+ f"are expected to be dictionaries.")
1142
+ return input_json
1143
+
1144
+ def get_input_text(self, index: int = 0) -> list[str]:
1145
+ """
1146
+ Parse the text data from the request object.
1147
+
1148
+ :param index: The index of the input to parse. Defaults to 0. Used for agents that accept multiple inputs.
1149
+ :return: A list of text data as strings.
1150
+ """
1151
+ container: StepItemContainerPbo = self.request.input[index].item_container
1152
+ if not container.HasField("text_container"):
1153
+ raise Exception(f"Input {index} does not contain a text container.")
1154
+ return list(container.text_container.items)
1155
+
1156
+ def get_config_defs(self) -> dict[str, VeloxFieldDefPbo]:
1157
+ """
1158
+ Get the config field definitions for this agent.
1159
+
1160
+ :return: A dictionary of field definitions, where the keys are the field names and the values are the
1161
+ VeloxFieldDefPbo objects representing the field definitions.
1162
+ """
1163
+ field_defs: dict[str, VeloxFieldDefPbo] = {}
1164
+ for field_def in self.to_pbo().config_fields:
1165
+ field_defs[field_def.data_field_name] = field_def
1166
+ return field_defs
1167
+
1168
+ def get_config_fields(self) -> dict[str, FieldValue | list[str]]:
1169
+ """
1170
+ Get the configuration field values from the request object. If a field is not present in the request,
1171
+ the default value from the config definition will be returned.
1172
+
1173
+ :return: A dictionary of configuration field names and their values. For multi-select selection list fields,
1174
+ a list of strings will be returned. For all other field types, the value will match the field type
1175
+ (bool for boolean fields, float for double fields, int for short, integer, long, and enum fields, and
1176
+ string for everything else).
1177
+ """
1178
+ config_fields: dict[str, Any] = {}
1179
+ raw_configs: Mapping[str, FieldValuePbo] = self.request.config_field_values
1180
+ for field_name, field_def in self.get_config_defs().items():
1181
+ field_value: FieldValue = None
1182
+ # If the field is present in the request, convert the protobuf value to a Python value.
1183
+ if field_name in raw_configs:
1184
+ field_value = ProtobufUtils.field_pbo_to_value(raw_configs[field_name])
1185
+ # If the field isn't present or is None, use the default value from the field definition.
1186
+ if field_value is None:
1187
+ field_value = ProtobufUtils.field_def_pbo_to_default_value(field_def)
1188
+ # If the field is a multi-select selection list, split the value by commas and strip whitespace.
1189
+ if field_def.data_field_type == FieldTypePbo.SELECTION and field_def.selection_properties.multi_select:
1190
+ field_value: list[str] = [x.strip() for x in field_value.split(',') if x.strip()]
1191
+ config_fields[field_name] = field_value
1192
+ return config_fields
1193
+
1194
+ @staticmethod
1195
+ def read_from_json(json_data: list[dict[str, Any]], key: str) -> list[Any]:
1196
+ """
1197
+ From a list of dictionaries, return a list of values for the given key from each dictionary. Skips null values.
1198
+
1199
+ :param json_data: The JSON data to read from.
1200
+ :param key: The key to read the values from.
1201
+ :return: A list of values corresponding to the given key in the JSON data.
1202
+ """
1203
+ ret_val: list[Any] = []
1204
+ for entry in json_data:
1205
+ if key in entry:
1206
+ value = entry[key]
1207
+ if isinstance(value, list):
1208
+ ret_val.extend(value)
1209
+ elif value is not None:
1210
+ ret_val.append(value)
1211
+ return ret_val
1212
+
1213
+ @staticmethod
1214
+ def flatten_text(text_data: list[str]) -> list[str]:
1215
+ """
1216
+ From a list of strings that come from a text input, flatten the list by splitting each string on newlines and
1217
+ stripping whitespace. Empty lines will be removed.
1218
+
1219
+ :param text_data: The text data to flatten.
1220
+ :return: A flattened list of strings.
1221
+ """
1222
+ ret_val: list[str] = []
1223
+ for entry in text_data:
1224
+ lines: list[str] = [x.strip() for x in entry.splitlines() if x.strip()]
1225
+ ret_val.extend(lines)
1226
+ return ret_val