azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,266 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ """
5
+ File operation utilities for Red Team Agent.
6
+
7
+ This module provides centralized file handling, path operations, and
8
+ data serialization utilities used across the red team components.
9
+ """
10
+
11
+ import json
12
+ import os
13
+ import uuid
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Union
17
+
18
+ # Try to import DefaultOpenEncoding, fallback to standard encoding
19
+ try:
20
+ from azure.ai.evaluation._common._utils import DefaultOpenEncoding
21
+
22
+ DEFAULT_ENCODING = DefaultOpenEncoding.WRITE
23
+ except ImportError:
24
+ DEFAULT_ENCODING = "utf-8"
25
+
26
+
27
+ class FileManager:
28
+ """Centralized file operations manager for Red Team operations."""
29
+
30
+ def __init__(self, base_output_dir: Optional[str] = None, logger=None):
31
+ """Initialize file manager.
32
+
33
+ :param base_output_dir: Base directory for all file operations
34
+ :param logger: Logger instance for file operations
35
+ """
36
+ self.base_output_dir = base_output_dir or "."
37
+ self.logger = logger
38
+
39
+ def ensure_directory(self, path: Union[str, os.PathLike]) -> str:
40
+ """Ensure a directory exists, creating it if necessary.
41
+
42
+ :param path: Path to the directory
43
+ :return: Absolute path to the directory
44
+ """
45
+ abs_path = os.path.abspath(path)
46
+ os.makedirs(abs_path, exist_ok=True)
47
+ return abs_path
48
+
49
+ def generate_unique_filename(
50
+ self, prefix: str = "", suffix: str = "", extension: str = "", use_timestamp: bool = False
51
+ ) -> str:
52
+ """Generate a unique filename.
53
+
54
+ :param prefix: Prefix for the filename
55
+ :param suffix: Suffix for the filename
56
+ :param extension: File extension (with or without dot)
57
+ :param use_timestamp: Whether to include timestamp in filename
58
+ :return: Unique filename
59
+ """
60
+ parts = []
61
+
62
+ if prefix:
63
+ parts.append(prefix)
64
+
65
+ if use_timestamp:
66
+ parts.append(datetime.now().strftime("%Y%m%d_%H%M%S"))
67
+
68
+ # Always include UUID for uniqueness
69
+ parts.append(str(uuid.uuid4()))
70
+
71
+ if suffix:
72
+ parts.append(suffix)
73
+
74
+ filename = "_".join(parts)
75
+
76
+ if extension:
77
+ if not extension.startswith("."):
78
+ extension = "." + extension
79
+ filename += extension
80
+
81
+ return filename
82
+
83
+ def get_scan_output_path(self, scan_id: str, filename: str = "") -> str:
84
+ """Get path for scan output files.
85
+
86
+ :param scan_id: Unique scan identifier
87
+ :param filename: Optional filename to append
88
+ :return: Full path for scan output
89
+ """
90
+ # Create scan directory based on DEBUG environment
91
+ is_debug = os.environ.get("DEBUG", "").lower() in ("true", "1", "yes", "y")
92
+ folder_prefix = "" if is_debug else "."
93
+
94
+ scan_dir = os.path.join(self.base_output_dir, f"{folder_prefix}{scan_id}")
95
+ self.ensure_directory(scan_dir)
96
+
97
+ # Create .gitignore in scan directory if not debug mode
98
+ if not is_debug:
99
+ gitignore_path = os.path.join(scan_dir, ".gitignore")
100
+ if not os.path.exists(gitignore_path):
101
+ with open(gitignore_path, "w", encoding="utf-8") as f:
102
+ f.write("*\n")
103
+
104
+ if filename:
105
+ return os.path.join(scan_dir, filename)
106
+ return scan_dir
107
+
108
+ def write_json(self, data: Any, filepath: Union[str, os.PathLike], indent: int = 2, ensure_dir: bool = True) -> str:
109
+ """Write data to JSON file.
110
+
111
+ :param data: Data to write
112
+ :param filepath: Path to write the file
113
+ :param indent: JSON indentation
114
+ :param ensure_dir: Whether to ensure directory exists
115
+ :return: Absolute path of written file
116
+ """
117
+ abs_path = os.path.abspath(filepath)
118
+
119
+ if ensure_dir:
120
+ self.ensure_directory(os.path.dirname(abs_path))
121
+
122
+ with open(abs_path, "w", encoding=DEFAULT_ENCODING) as f:
123
+ json.dump(data, f, indent=indent)
124
+
125
+ if self.logger:
126
+ self.logger.debug(f"Successfully wrote JSON to {abs_path}")
127
+
128
+ return abs_path
129
+
130
+ def read_json(self, filepath: Union[str, os.PathLike]) -> Any:
131
+ """Read data from JSON file.
132
+
133
+ :param filepath: Path to the JSON file
134
+ :return: Parsed JSON data
135
+ """
136
+ abs_path = os.path.abspath(filepath)
137
+
138
+ try:
139
+ with open(abs_path, "r", encoding="utf-8") as f:
140
+ data = json.load(f)
141
+
142
+ if self.logger:
143
+ self.logger.debug(f"Successfully read JSON from {abs_path}")
144
+
145
+ return data
146
+ except Exception as e:
147
+ if self.logger:
148
+ self.logger.error(f"Failed to read JSON from {abs_path}: {str(e)}")
149
+ raise
150
+
151
+ def read_jsonl(self, filepath: Union[str, os.PathLike]) -> List[Dict]:
152
+ """Read data from JSONL file.
153
+
154
+ :param filepath: Path to the JSONL file
155
+ :return: List of parsed JSON objects
156
+ """
157
+ abs_path = os.path.abspath(filepath)
158
+ data = []
159
+
160
+ try:
161
+ with open(abs_path, "r", encoding="utf-8") as f:
162
+ for line_num, line in enumerate(f, 1):
163
+ line = line.strip()
164
+ if line:
165
+ try:
166
+ data.append(json.loads(line))
167
+ except json.JSONDecodeError as e:
168
+ if self.logger:
169
+ self.logger.warning(f"Skipping invalid JSON line {line_num} in {abs_path}: {str(e)}")
170
+
171
+ if self.logger:
172
+ self.logger.debug(f"Successfully read {len(data)} records from JSONL {abs_path}")
173
+
174
+ return data
175
+ except Exception as e:
176
+ if self.logger:
177
+ self.logger.error(f"Failed to read JSONL from {abs_path}: {str(e)}")
178
+ raise
179
+
180
+ def write_jsonl(self, data: List[Dict], filepath: Union[str, os.PathLike], ensure_dir: bool = True) -> str:
181
+ """Write data to JSONL file.
182
+
183
+ :param data: List of dictionaries to write
184
+ :param filepath: Path to write the file
185
+ :param ensure_dir: Whether to ensure directory exists
186
+ :return: Absolute path of written file
187
+ """
188
+ abs_path = os.path.abspath(filepath)
189
+
190
+ if ensure_dir:
191
+ self.ensure_directory(os.path.dirname(abs_path))
192
+
193
+ with open(abs_path, "w", encoding="utf-8") as f:
194
+ for item in data:
195
+ f.write(json.dumps(item) + "\n")
196
+
197
+ if self.logger:
198
+ self.logger.debug(f"Successfully wrote {len(data)} records to JSONL {abs_path}")
199
+
200
+ return abs_path
201
+
202
+ def safe_filename(self, name: str, max_length: int = 255) -> str:
203
+ """Create a safe filename from a string.
204
+
205
+ :param name: Original name
206
+ :param max_length: Maximum filename length
207
+ :return: Safe filename
208
+ """
209
+ # Replace invalid characters
210
+ invalid_chars = '<>:"/\\|?*'
211
+ safe_name = "".join(c if c not in invalid_chars else "_" for c in name)
212
+
213
+ # Replace spaces with underscores
214
+ safe_name = safe_name.replace(" ", "_")
215
+
216
+ # Truncate if too long
217
+ if len(safe_name) > max_length:
218
+ safe_name = safe_name[: max_length - 4] + "..."
219
+
220
+ return safe_name
221
+
222
+ def get_file_size(self, filepath: Union[str, os.PathLike]) -> int:
223
+ """Get file size in bytes.
224
+
225
+ :param filepath: Path to the file
226
+ :return: File size in bytes
227
+ """
228
+ return os.path.getsize(filepath)
229
+
230
+ def file_exists(self, filepath: Union[str, os.PathLike]) -> bool:
231
+ """Check if file exists.
232
+
233
+ :param filepath: Path to check
234
+ :return: True if file exists
235
+ """
236
+ return os.path.isfile(filepath)
237
+
238
+ def cleanup_file(self, filepath: Union[str, os.PathLike], ignore_errors: bool = True) -> bool:
239
+ """Delete a file if it exists.
240
+
241
+ :param filepath: Path to the file to delete
242
+ :param ignore_errors: Whether to ignore deletion errors
243
+ :return: True if file was deleted or didn't exist
244
+ """
245
+ try:
246
+ if self.file_exists(filepath):
247
+ os.remove(filepath)
248
+ if self.logger:
249
+ self.logger.debug(f"Deleted file: {filepath}")
250
+ return True
251
+ except Exception as e:
252
+ if not ignore_errors:
253
+ raise
254
+ if self.logger:
255
+ self.logger.warning(f"Failed to delete file {filepath}: {str(e)}")
256
+ return False
257
+
258
+
259
+ def create_file_manager(base_output_dir: Optional[str] = None, logger=None) -> FileManager:
260
+ """Create a FileManager instance.
261
+
262
+ :param base_output_dir: Base directory for file operations
263
+ :param logger: Logger instance
264
+ :return: Configured FileManager
265
+ """
266
+ return FileManager(base_output_dir=base_output_dir, logger=logger)
@@ -0,0 +1,365 @@
1
+ """
2
+ Utility functions for formatting, conversion, and processing in Red Team Agent.
3
+ """
4
+
5
+ import json
6
+ import math
7
+ import itertools
8
+ import os
9
+ import logging
10
+ from typing import Dict, List, Union, Any
11
+ from pathlib import Path
12
+ from pyrit.models import ChatMessage
13
+ from pyrit.memory import CentralMemory
14
+ from .._attack_strategy import AttackStrategy
15
+ from .._red_team_result import RedTeamResult
16
+
17
+
18
+ def message_to_dict(
19
+ message: ChatMessage, context: str = None, tool_calls: List[Any] = None, token_usage: Dict[str, Any] = None
20
+ ) -> Dict[str, Any]:
21
+ """Convert a ChatMessage and context to dictionary format.
22
+
23
+ :param message: The chat message to convert
24
+ :type message: ChatMessage
25
+ :param context: Additional context to include in the dictionary
26
+ :type context: str
27
+ :param tool_calls: List of tool calls to include in the dictionary
28
+ :type tool_calls: List[Any]
29
+ :param token_usage: Token usage information from the callback
30
+ :type token_usage: Dict[str, Any]
31
+ :return: Dictionary representation with role and content
32
+ :rtype: Dict[str, Any]
33
+ """
34
+ msg_dict = {"role": message.role, "content": message.content, "context": context, "tool_calls": tool_calls}
35
+ if token_usage:
36
+ msg_dict["token_usage"] = token_usage
37
+ return msg_dict
38
+
39
+
40
+ def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
41
+ """Get a string name for an attack strategy or list of strategies.
42
+
43
+ :param attack_strategy: The attack strategy or list of strategies
44
+ :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
45
+ :return: A string name for the strategy
46
+ :rtype: str
47
+ """
48
+ if isinstance(attack_strategy, List):
49
+ return "_".join([str(strategy.value) for strategy in attack_strategy])
50
+ else:
51
+ return str(attack_strategy.value)
52
+
53
+
54
+ def get_flattened_attack_strategies(
55
+ attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
56
+ ) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
57
+ """Flatten complex attack strategies into individual strategies.
58
+
59
+ :param attack_strategies: List of attack strategies to flatten
60
+ :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
61
+ :return: Flattened list of attack strategies
62
+ :rtype: List[Union[AttackStrategy, List[AttackStrategy]]]
63
+ """
64
+ flattened_strategies = []
65
+ seen_strategies = set()
66
+ attack_strategies_temp = attack_strategies.copy()
67
+
68
+ if AttackStrategy.EASY in attack_strategies_temp:
69
+ attack_strategies_temp.extend([AttackStrategy.Base64, AttackStrategy.Flip, AttackStrategy.Morse])
70
+ attack_strategies_temp.remove(AttackStrategy.EASY)
71
+
72
+ if AttackStrategy.MODERATE in attack_strategies_temp:
73
+ attack_strategies_temp.extend(
74
+ [AttackStrategy.Tense],
75
+ )
76
+ attack_strategies_temp.remove(AttackStrategy.MODERATE)
77
+ if AttackStrategy.DIFFICULT in attack_strategies_temp:
78
+ attack_strategies_temp.extend(
79
+ [
80
+ AttackStrategy.Compose([AttackStrategy.Tense, AttackStrategy.Base64]),
81
+ ]
82
+ )
83
+ attack_strategies_temp.remove(AttackStrategy.DIFFICULT)
84
+
85
+ ## Baseline is always included
86
+ attack_strategies_temp.append(AttackStrategy.Baseline)
87
+
88
+ for strategy in attack_strategies_temp:
89
+ if isinstance(strategy, List) and tuple(strategy) not in seen_strategies: # For composed strategies
90
+ flattened_strategies.append([s for s in strategy])
91
+ seen_strategies.add(tuple(strategy))
92
+ elif isinstance(strategy, AttackStrategy) and strategy not in seen_strategies: # For single strategies
93
+ flattened_strategies.append(strategy)
94
+ seen_strategies.add(strategy)
95
+
96
+ return flattened_strategies
97
+
98
+
99
+ def get_attack_success(result: str) -> bool:
100
+ """
101
+ Based on EVALUATION_PASS_FAIL_MAPPING, returns True denoting attack success if result matches the value of the EVALUATION_PASS_FAIL_MAPPING key False.
102
+ """
103
+ from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
104
+
105
+ return result == EVALUATION_PASS_FAIL_MAPPING.get(False, "true")
106
+
107
+
108
+ def format_scorecard(redteam_result: RedTeamResult) -> str:
109
+ """Format the RedTeamResult into a human-readable scorecard.
110
+
111
+ The scorecard presents a summary of attack success rates across different risk categories
112
+ and attack complexity levels in a tabular format.
113
+
114
+ :param redteam_result: The RedTeamResult object to format
115
+ :type redteam_result: Dict[str, Any]
116
+ :return: A formatted scorecard as a string
117
+ :rtype: str
118
+ """
119
+ scorecard = redteam_result["scorecard"]
120
+ risk_summary = scorecard["risk_category_summary"][0] if scorecard["risk_category_summary"] else {}
121
+ overall_asr = risk_summary.get("overall_asr", 0)
122
+
123
+ output = [f"Overall ASR: {overall_asr}%"]
124
+ overall_successes = risk_summary.get("overall_successful_attacks", 0)
125
+ overall_total = risk_summary.get("overall_total", 0)
126
+ output.append(f"Attack Success: {overall_successes}/{overall_total} attacks were successful")
127
+
128
+ separator = "-" * 132
129
+ output.append(separator)
130
+ output.append(
131
+ f"{'Risk Category':<21}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}"
132
+ )
133
+ output.append(separator)
134
+
135
+ for item in scorecard["joint_risk_attack_summary"]:
136
+ risk_category = item["risk_category"].replace("_", "-").capitalize()
137
+
138
+ baseline_val = item.get("baseline_asr")
139
+ easy_val = item.get("easy_complexity_asr")
140
+ moderate_val = item.get("moderate_complexity_asr")
141
+ difficult_val = item.get("difficult_complexity_asr")
142
+
143
+ baseline = "N/A" if is_none_or_nan(baseline_val) else f"{baseline_val}%"
144
+ easy = "N/A" if is_none_or_nan(easy_val) else f"{easy_val}%"
145
+ moderate = "N/A" if is_none_or_nan(moderate_val) else f"{moderate_val}%"
146
+ difficult = "N/A" if is_none_or_nan(difficult_val) else f"{difficult_val}%"
147
+
148
+ output.append(f"{risk_category:<21}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
149
+
150
+ return "\n".join(output)
151
+
152
+
153
+ def is_none_or_nan(value: Any) -> bool:
154
+ """Check if a value is None or NaN."""
155
+ if value is None:
156
+ return True
157
+ if isinstance(value, float) and math.isnan(value):
158
+ return True
159
+ return False
160
+
161
+
162
+ def list_mean_nan_safe(data_list: List[Any]) -> float:
163
+ """Calculate the mean of a list, handling None and NaN values safely.
164
+
165
+ :param data_list: List of values to calculate mean for
166
+ :type data_list: List[Any]
167
+ :return: Mean value or 0.0 if list is empty after filtering
168
+ :rtype: float
169
+ """
170
+ filtered_list = [x for x in data_list if not is_none_or_nan(x)]
171
+ if not filtered_list:
172
+ return 0.0
173
+ return sum(filtered_list) / len(filtered_list)
174
+
175
+
176
+ def format_content_by_modality(content: str, modality: str) -> str:
177
+ """Format content based on the specified modality, returning one random format.
178
+
179
+ :param content: The content to format
180
+ :type content: str
181
+ :param modality: The modality type (html, markdown, footnote, code, text)
182
+ :type modality: str
183
+ :return: Formatted content
184
+ :rtype: str
185
+ """
186
+ import html
187
+ import random
188
+
189
+ def format_as_html(text):
190
+ html_text = html.escape(text)
191
+ return [
192
+ f"<p>{html_text}</p>",
193
+ f'<div style="display:none">{html_text}</div>',
194
+ f'<span style="display:none">{html_text}</span>',
195
+ f"<div hidden>{html_text}</div>",
196
+ f'<meta content="{html_text}">',
197
+ ]
198
+
199
+ def format_as_markdown(text):
200
+ markdown_text = text.replace("\n", " \n") # Convert newlines to Markdown line breaks
201
+ return [
202
+ f"\n**{markdown_text}**\n", # Bold
203
+ f"\n*{markdown_text}*\n", # Italic
204
+ f"\n`{markdown_text}`\n", # Inline code
205
+ f"\n```{markdown_text}```\n", # Code block
206
+ f"\n[Link]({markdown_text})\n", # Link
207
+ ]
208
+
209
+ def format_as_footnote(text):
210
+ footnote_text = text.replace("\n", " ")
211
+ return [
212
+ f"[^{footnote_text}]", # Footnote reference
213
+ f"\n[1] {footnote_text}\n", # Footnote reference with number
214
+ f"<sup>{footnote_text}</sup>", # Superscript
215
+ f'<span class="footnote">{footnote_text}</span>', # Custom footnote span
216
+ f'<div class="footnote">{footnote_text}</div>', # Custom footnote div
217
+ ]
218
+
219
+ def format_as_code(text):
220
+ code_text = text.replace("\n", " ")
221
+ return [
222
+ f"`{code_text}`", # Inline code
223
+ f"```\n{code_text}\n```", # Code block
224
+ f'"""\n{code_text}\n"""', # Code block
225
+ f"# {code_text}", # Inline comment
226
+ f'def function():\n print("{code_text}")', # Function call
227
+ ]
228
+
229
+ def format_as_text(text):
230
+ return [f"<document>{text}</document>"] # Return text in document tags
231
+
232
+ # Mapping of modality types to formatting functions
233
+ modality_formatters = {
234
+ "html": format_as_html,
235
+ "markdown": format_as_markdown,
236
+ "footnote": format_as_footnote,
237
+ "code": format_as_code,
238
+ "text": format_as_text,
239
+ }
240
+
241
+ # Get formatter based on modality type
242
+ if modality and modality.lower() in modality_formatters:
243
+ formatter = modality_formatters[modality.lower()]
244
+ formats = formatter(content)
245
+ # Return one random format from the available options
246
+ return random.choice(formats)
247
+ else:
248
+ # Return plain text if modality not recognized
249
+ return content
250
+
251
+
252
+ def write_pyrit_outputs_to_file(
253
+ *,
254
+ output_path: str,
255
+ logger: logging.Logger,
256
+ prompt_to_context: Dict[str, str],
257
+ ) -> str:
258
+ """Write PyRIT outputs to a file with a name based on orchestrator, strategy, and risk category.
259
+
260
+ :param output_path: Path to write the output file
261
+ :type output_path: str
262
+ :param logger: Logger instance for logging
263
+ :type logger: logging.Logger
264
+ :param prompt_to_context: Mapping of prompts to their context
265
+ :type prompt_to_context: Dict[str, str]
266
+ :return: Path to the output file
267
+ :rtype: str
268
+ :raises IOError: If the output file cannot be read or written
269
+ :raises PermissionError: If there are insufficient permissions to access the output file
270
+ :raises Exception: For other unexpected errors during file operations or memory retrieval
271
+ """
272
+
273
+ logger.debug(f"Writing PyRIT outputs to file: {output_path}")
274
+ memory = CentralMemory.get_memory_instance()
275
+
276
+ memory_label = {"risk_strategy_path": output_path}
277
+
278
+ prompts_request_pieces = memory.get_prompt_request_pieces(labels=memory_label)
279
+
280
+ conversations = [
281
+ [
282
+ (
283
+ item.to_chat_message(),
284
+ prompt_to_context.get(item.original_value, "") or item.labels.get("context", ""),
285
+ item.labels.get("tool_calls", []),
286
+ item.labels.get("risk_sub_type"),
287
+ item.labels.get("token_usage"),
288
+ )
289
+ for item in group
290
+ ]
291
+ for conv_id, group in itertools.groupby(prompts_request_pieces, key=lambda x: x.conversation_id)
292
+ ]
293
+
294
+ # Check if we should overwrite existing file with more conversations
295
+ if os.path.exists(output_path):
296
+ existing_line_count = 0
297
+ try:
298
+ with open(output_path, "r") as existing_file:
299
+ existing_line_count = sum(1 for _ in existing_file)
300
+
301
+ if len(conversations) > existing_line_count:
302
+ logger.debug(
303
+ f"Found more prompts ({len(conversations)}) than existing file lines ({existing_line_count}). Replacing content."
304
+ )
305
+ # Convert to json lines
306
+ json_lines = ""
307
+ for conversation in conversations:
308
+ if conversation[0][0].role == "system":
309
+ # Skip system messages in the output
310
+ continue
311
+ conv_dict = {
312
+ "conversation": {
313
+ "messages": [
314
+ message_to_dict(
315
+ message[0], message[1], message[2], message[4] if len(message) > 4 else None
316
+ )
317
+ for message in conversation
318
+ ]
319
+ }
320
+ }
321
+ # Add risk_sub_type if present (check first message for the label)
322
+ if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
323
+ risk_sub_type = conversation[0][3]
324
+ if risk_sub_type:
325
+ conv_dict["risk_sub_type"] = risk_sub_type
326
+ json_lines += json.dumps(conv_dict) + "\n"
327
+ with Path(output_path).open("w") as f:
328
+ f.writelines(json_lines)
329
+ logger.debug(
330
+ f"Successfully wrote {len(conversations)-existing_line_count} new conversation(s) to {output_path}"
331
+ )
332
+ else:
333
+ logger.debug(
334
+ f"Existing file has {existing_line_count} lines, new data has {len(conversations)} prompts. Keeping existing file."
335
+ )
336
+ return output_path
337
+ except Exception as e:
338
+ logger.warning(f"Failed to read existing file {output_path}: {str(e)}")
339
+ else:
340
+ logger.debug(f"Creating new file: {output_path}")
341
+ # Convert to json lines
342
+ json_lines = ""
343
+
344
+ for conversation in conversations:
345
+ if conversation[0][0].role == "system":
346
+ # Skip system messages in the output
347
+ continue
348
+ conv_dict = {
349
+ "conversation": {
350
+ "messages": [
351
+ message_to_dict(message[0], message[1], message[2], message[4] if len(message) > 4 else None)
352
+ for message in conversation
353
+ ]
354
+ }
355
+ }
356
+ # Add risk_sub_type if present (check first message for the label)
357
+ if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
358
+ risk_sub_type = conversation[0][3]
359
+ if risk_sub_type:
360
+ conv_dict["risk_sub_type"] = risk_sub_type
361
+ json_lines += json.dumps(conv_dict) + "\n"
362
+ with Path(output_path).open("w") as f:
363
+ f.writelines(json_lines)
364
+ logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")
365
+ return str(output_path)