azure-ai-evaluation 1.11.2__py3-none-any.whl → 1.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +2 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +69 -28
- azure/ai/evaluation/_aoai/label_grader.py +14 -13
- azure/ai/evaluation/_aoai/python_grader.py +15 -13
- azure/ai/evaluation/_aoai/score_model_grader.py +13 -10
- azure/ai/evaluation/_aoai/string_check_grader.py +13 -13
- azure/ai/evaluation/_aoai/text_similarity_grader.py +16 -25
- azure/ai/evaluation/_common/__init__.py +2 -1
- azure/ai/evaluation/_common/constants.py +109 -0
- azure/ai/evaluation/_common/evaluation_onedp_client.py +5 -5
- azure/ai/evaluation/_common/onedp/__init__.py +2 -2
- azure/ai/evaluation/_common/onedp/_client.py +44 -14
- azure/ai/evaluation/_common/onedp/_configuration.py +9 -7
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1 -1
- azure/ai/evaluation/_common/onedp/_validation.py +18 -2
- azure/ai/evaluation/_common/onedp/aio/__init__.py +2 -2
- azure/ai/evaluation/_common/onedp/aio/_client.py +44 -14
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +9 -7
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +12 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +3942 -1631
- azure/ai/evaluation/_common/onedp/models/__init__.py +196 -6
- azure/ai/evaluation/_common/onedp/models/_enums.py +217 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +3876 -603
- azure/ai/evaluation/_common/onedp/operations/__init__.py +12 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +5422 -2577
- azure/ai/evaluation/_common/rai_service.py +299 -2
- azure/ai/evaluation/_common/utils.py +173 -39
- azure/ai/evaluation/_constants.py +100 -0
- azure/ai/evaluation/_eval_mapping.py +10 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +10 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +1125 -9
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +330 -51
- azure/ai/evaluation/_evaluate/_utils.py +17 -6
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -17
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +7 -1
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +80 -4
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +181 -3
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +7 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +28 -13
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +23 -4
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -7
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +36 -19
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +43 -20
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +7 -1
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +7 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +32 -6
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +23 -127
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -19
- azure/ai/evaluation/_exceptions.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +14 -1
- azure/ai/evaluation/_legacy/prompty/_prompty.py +2 -1
- azure/ai/evaluation/_legacy/prompty/_utils.py +54 -6
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_attack_objective_generator.py +3 -1
- azure/ai/evaluation/red_team/_attack_strategy.py +1 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +45 -14
- azure/ai/evaluation/red_team/_evaluation_processor.py +129 -12
- azure/ai/evaluation/red_team/_mlflow_integration.py +144 -36
- azure/ai/evaluation/red_team/_orchestrator_manager.py +309 -51
- azure/ai/evaluation/red_team/_red_team.py +503 -37
- azure/ai/evaluation/red_team/_red_team_result.py +264 -15
- azure/ai/evaluation/red_team/_result_processor.py +953 -31
- azure/ai/evaluation/red_team/_utils/constants.py +1 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +126 -25
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +10 -7
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +3 -25
- azure/ai/evaluation/simulator/_adversarial_simulator.py +1 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
- azure/ai/evaluation/simulator/_conversation/_conversation.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +25 -2
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -0
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/METADATA +44 -10
- {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/RECORD +102 -84
- {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/licenses/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/top_level.txt +0 -0
|
@@ -14,63 +14,139 @@ if TYPE_CHECKING:
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
from ._models import ( # type: ignore
|
|
17
|
-
|
|
17
|
+
AIContent,
|
|
18
|
+
AgentClusterInsightResult,
|
|
19
|
+
AgentClusterInsightsRequest,
|
|
18
20
|
AgentEvaluation,
|
|
19
21
|
AgentEvaluationRedactionConfiguration,
|
|
20
22
|
AgentEvaluationRequest,
|
|
21
23
|
AgentEvaluationResult,
|
|
22
24
|
AgentEvaluationSamplingConfiguration,
|
|
25
|
+
AgentTaxonomyInput,
|
|
23
26
|
AnnotationDTO,
|
|
24
27
|
ApiKeyCredentials,
|
|
25
28
|
AssetCredentialRequest,
|
|
26
29
|
AssetCredentialResponse,
|
|
30
|
+
AssistantMessage,
|
|
31
|
+
AttackMessage,
|
|
27
32
|
AttackObjective,
|
|
33
|
+
AzureAIAgentTarget,
|
|
34
|
+
AzureAIEvaluator,
|
|
28
35
|
AzureAISearchIndex,
|
|
36
|
+
AzureOpenAIModelConfiguration,
|
|
29
37
|
BaseCredentials,
|
|
38
|
+
BlobReference,
|
|
30
39
|
BlobReferenceForConsumption,
|
|
40
|
+
ChartCoordinate,
|
|
31
41
|
ChatChoice,
|
|
32
42
|
ChatCompletions,
|
|
33
43
|
ChatResponseMessage,
|
|
44
|
+
ClusterInsightResult,
|
|
45
|
+
ClusterTokenUsage,
|
|
46
|
+
CodeBasedEvaluatorDefinition,
|
|
34
47
|
CompletionsUsage,
|
|
35
48
|
CompletionsUsageDetails,
|
|
36
49
|
Connection,
|
|
37
50
|
Content,
|
|
51
|
+
ContinuousEvaluationRuleAction,
|
|
38
52
|
CosmosDBIndex,
|
|
53
|
+
CreateEvalJsonlRunDataSource,
|
|
54
|
+
CronTrigger,
|
|
39
55
|
CustomCredential,
|
|
56
|
+
CustomInference,
|
|
40
57
|
CustomizationParameters,
|
|
58
|
+
DailyRecurrenceSchedule,
|
|
41
59
|
DatasetVersion,
|
|
42
60
|
Deployment,
|
|
61
|
+
DeveloperMessage,
|
|
43
62
|
EmbeddingConfiguration,
|
|
44
63
|
EntraIDCredentials,
|
|
64
|
+
EvalCompareReport,
|
|
65
|
+
EvalJsonlFileContent,
|
|
66
|
+
EvalJsonlFileContentItem,
|
|
67
|
+
EvalJsonlFileContentSource,
|
|
68
|
+
EvalResult,
|
|
69
|
+
EvalRunOutputItem,
|
|
70
|
+
EvalRunResultCompareItem,
|
|
71
|
+
EvalRunResultComparison,
|
|
72
|
+
EvalRunResultSummary,
|
|
45
73
|
Evaluation,
|
|
74
|
+
EvaluationComparisonRequest,
|
|
46
75
|
EvaluationResult,
|
|
76
|
+
EvaluationResultSample,
|
|
77
|
+
EvaluationRule,
|
|
78
|
+
EvaluationRuleAction,
|
|
79
|
+
EvaluationRuleFilter,
|
|
80
|
+
EvaluationRunClusterInsightResult,
|
|
81
|
+
EvaluationRunClusterInsightsRequest,
|
|
82
|
+
EvaluationScheduleTask,
|
|
47
83
|
EvaluationTarget,
|
|
84
|
+
EvaluationTaxonomy,
|
|
85
|
+
EvaluationTaxonomyInput,
|
|
48
86
|
EvaluationUpload,
|
|
49
87
|
EvaluatorConfiguration,
|
|
88
|
+
EvaluatorDefinition,
|
|
89
|
+
EvaluatorMessage,
|
|
90
|
+
EvaluatorMetric,
|
|
91
|
+
EvaluatorVersion,
|
|
92
|
+
FieldMapping,
|
|
50
93
|
FileDatasetVersion,
|
|
51
94
|
FolderDatasetVersion,
|
|
95
|
+
HourlyRecurrenceSchedule,
|
|
96
|
+
HumanEvaluationRuleAction,
|
|
97
|
+
ImageSource,
|
|
98
|
+
ImageUrlContent,
|
|
52
99
|
Index,
|
|
53
100
|
InputData,
|
|
54
101
|
InputDataset,
|
|
102
|
+
Insight,
|
|
103
|
+
InsightCluster,
|
|
104
|
+
InsightModelConfiguration,
|
|
105
|
+
InsightRequest,
|
|
106
|
+
InsightResult,
|
|
107
|
+
InsightSample,
|
|
108
|
+
InsightScheduleTask,
|
|
109
|
+
InsightSummary,
|
|
110
|
+
InsightsMetadata,
|
|
55
111
|
LongRunningResponse,
|
|
56
|
-
MAASModelConfig,
|
|
57
112
|
ManagedAzureAISearchIndex,
|
|
58
113
|
Message,
|
|
59
114
|
Metadata,
|
|
60
115
|
ModelDeployment,
|
|
116
|
+
ModelResponseGenerationTarget,
|
|
117
|
+
MonthlyRecurrenceSchedule,
|
|
61
118
|
NoAuthenticationCredentials,
|
|
119
|
+
OneTimeTrigger,
|
|
62
120
|
PendingUploadRequest,
|
|
63
121
|
PendingUploadResponse,
|
|
122
|
+
PromptBasedEvaluatorDefinition,
|
|
64
123
|
PromptUsageDetails,
|
|
124
|
+
QueryResponseInlineMessage,
|
|
125
|
+
RecurrenceSchedule,
|
|
126
|
+
RecurrenceTrigger,
|
|
65
127
|
RedTeam,
|
|
66
128
|
RedTeamUpload,
|
|
67
129
|
SASCredentials,
|
|
68
130
|
SasCredential,
|
|
131
|
+
Schedule,
|
|
132
|
+
ScheduleRun,
|
|
133
|
+
ScheduleTask,
|
|
69
134
|
SimulationDTO,
|
|
70
135
|
Sku,
|
|
136
|
+
SyncEvalInput,
|
|
71
137
|
SystemData,
|
|
138
|
+
SystemMessage,
|
|
139
|
+
TargetConfig,
|
|
72
140
|
TargetHarm,
|
|
73
|
-
|
|
141
|
+
TaxonomyCategory,
|
|
142
|
+
TaxonomySubCategory,
|
|
143
|
+
TextContent,
|
|
144
|
+
ToolCallContent,
|
|
145
|
+
ToolDescription,
|
|
146
|
+
ToolResultContent,
|
|
147
|
+
Trigger,
|
|
148
|
+
UserMessage,
|
|
149
|
+
WeeklyRecurrenceSchedule,
|
|
74
150
|
)
|
|
75
151
|
|
|
76
152
|
from ._enums import ( # type: ignore
|
|
@@ -80,89 +156,203 @@ from ._enums import ( # type: ignore
|
|
|
80
156
|
ConnectionType,
|
|
81
157
|
CredentialType,
|
|
82
158
|
DatasetType,
|
|
159
|
+
DayOfWeek,
|
|
83
160
|
DeploymentType,
|
|
161
|
+
EvaluationRuleActionType,
|
|
162
|
+
EvaluationRuleEventType,
|
|
163
|
+
EvaluationTargetType,
|
|
164
|
+
EvaluationTaxonomyInputType,
|
|
165
|
+
EvaluatorCategory,
|
|
166
|
+
EvaluatorDefinitionType,
|
|
167
|
+
EvaluatorMetricDirection,
|
|
168
|
+
EvaluatorMetricType,
|
|
169
|
+
EvaluatorType,
|
|
84
170
|
IndexType,
|
|
171
|
+
InsightType,
|
|
85
172
|
ListViewType,
|
|
173
|
+
OperationState,
|
|
86
174
|
PendingUploadType,
|
|
175
|
+
RecurrenceType,
|
|
87
176
|
ResultType,
|
|
88
177
|
RiskCategory,
|
|
178
|
+
SampleType,
|
|
179
|
+
ScheduleProvisioningStatus,
|
|
180
|
+
ScheduleTaskType,
|
|
89
181
|
SimulationType,
|
|
182
|
+
TargetType,
|
|
183
|
+
TreatmentEffectType,
|
|
184
|
+
TriggerType,
|
|
90
185
|
)
|
|
91
186
|
from ._patch import __all__ as _patch_all
|
|
92
187
|
from ._patch import *
|
|
93
188
|
from ._patch import patch_sdk as _patch_sdk
|
|
94
189
|
|
|
95
190
|
__all__ = [
|
|
96
|
-
"
|
|
191
|
+
"AIContent",
|
|
192
|
+
"AgentClusterInsightResult",
|
|
193
|
+
"AgentClusterInsightsRequest",
|
|
97
194
|
"AgentEvaluation",
|
|
98
195
|
"AgentEvaluationRedactionConfiguration",
|
|
99
196
|
"AgentEvaluationRequest",
|
|
100
197
|
"AgentEvaluationResult",
|
|
101
198
|
"AgentEvaluationSamplingConfiguration",
|
|
199
|
+
"AgentTaxonomyInput",
|
|
102
200
|
"AnnotationDTO",
|
|
103
201
|
"ApiKeyCredentials",
|
|
104
202
|
"AssetCredentialRequest",
|
|
105
203
|
"AssetCredentialResponse",
|
|
204
|
+
"AssistantMessage",
|
|
205
|
+
"AttackMessage",
|
|
106
206
|
"AttackObjective",
|
|
207
|
+
"AzureAIAgentTarget",
|
|
208
|
+
"AzureAIEvaluator",
|
|
107
209
|
"AzureAISearchIndex",
|
|
210
|
+
"AzureOpenAIModelConfiguration",
|
|
108
211
|
"BaseCredentials",
|
|
212
|
+
"BlobReference",
|
|
109
213
|
"BlobReferenceForConsumption",
|
|
214
|
+
"ChartCoordinate",
|
|
110
215
|
"ChatChoice",
|
|
111
216
|
"ChatCompletions",
|
|
112
217
|
"ChatResponseMessage",
|
|
218
|
+
"ClusterInsightResult",
|
|
219
|
+
"ClusterTokenUsage",
|
|
220
|
+
"CodeBasedEvaluatorDefinition",
|
|
113
221
|
"CompletionsUsage",
|
|
114
222
|
"CompletionsUsageDetails",
|
|
115
223
|
"Connection",
|
|
116
224
|
"Content",
|
|
225
|
+
"ContinuousEvaluationRuleAction",
|
|
117
226
|
"CosmosDBIndex",
|
|
227
|
+
"CreateEvalJsonlRunDataSource",
|
|
228
|
+
"CronTrigger",
|
|
118
229
|
"CustomCredential",
|
|
230
|
+
"CustomInference",
|
|
119
231
|
"CustomizationParameters",
|
|
232
|
+
"DailyRecurrenceSchedule",
|
|
120
233
|
"DatasetVersion",
|
|
121
234
|
"Deployment",
|
|
235
|
+
"DeveloperMessage",
|
|
122
236
|
"EmbeddingConfiguration",
|
|
123
237
|
"EntraIDCredentials",
|
|
238
|
+
"EvalCompareReport",
|
|
239
|
+
"EvalJsonlFileContent",
|
|
240
|
+
"EvalJsonlFileContentItem",
|
|
241
|
+
"EvalJsonlFileContentSource",
|
|
242
|
+
"EvalResult",
|
|
243
|
+
"EvalRunOutputItem",
|
|
244
|
+
"EvalRunResultCompareItem",
|
|
245
|
+
"EvalRunResultComparison",
|
|
246
|
+
"EvalRunResultSummary",
|
|
124
247
|
"Evaluation",
|
|
248
|
+
"EvaluationComparisonRequest",
|
|
125
249
|
"EvaluationResult",
|
|
250
|
+
"EvaluationResultSample",
|
|
251
|
+
"EvaluationRule",
|
|
252
|
+
"EvaluationRuleAction",
|
|
253
|
+
"EvaluationRuleFilter",
|
|
254
|
+
"EvaluationRunClusterInsightResult",
|
|
255
|
+
"EvaluationRunClusterInsightsRequest",
|
|
256
|
+
"EvaluationScheduleTask",
|
|
126
257
|
"EvaluationTarget",
|
|
258
|
+
"EvaluationTaxonomy",
|
|
259
|
+
"EvaluationTaxonomyInput",
|
|
127
260
|
"EvaluationUpload",
|
|
128
261
|
"EvaluatorConfiguration",
|
|
262
|
+
"EvaluatorDefinition",
|
|
263
|
+
"EvaluatorMessage",
|
|
264
|
+
"EvaluatorMetric",
|
|
265
|
+
"EvaluatorVersion",
|
|
266
|
+
"FieldMapping",
|
|
129
267
|
"FileDatasetVersion",
|
|
130
268
|
"FolderDatasetVersion",
|
|
269
|
+
"HourlyRecurrenceSchedule",
|
|
270
|
+
"HumanEvaluationRuleAction",
|
|
271
|
+
"ImageSource",
|
|
272
|
+
"ImageUrlContent",
|
|
131
273
|
"Index",
|
|
132
274
|
"InputData",
|
|
133
275
|
"InputDataset",
|
|
276
|
+
"Insight",
|
|
277
|
+
"InsightCluster",
|
|
278
|
+
"InsightModelConfiguration",
|
|
279
|
+
"InsightRequest",
|
|
280
|
+
"InsightResult",
|
|
281
|
+
"InsightSample",
|
|
282
|
+
"InsightScheduleTask",
|
|
283
|
+
"InsightSummary",
|
|
284
|
+
"InsightsMetadata",
|
|
134
285
|
"LongRunningResponse",
|
|
135
|
-
"MAASModelConfig",
|
|
136
286
|
"ManagedAzureAISearchIndex",
|
|
137
287
|
"Message",
|
|
138
288
|
"Metadata",
|
|
139
289
|
"ModelDeployment",
|
|
290
|
+
"ModelResponseGenerationTarget",
|
|
291
|
+
"MonthlyRecurrenceSchedule",
|
|
140
292
|
"NoAuthenticationCredentials",
|
|
293
|
+
"OneTimeTrigger",
|
|
141
294
|
"PendingUploadRequest",
|
|
142
295
|
"PendingUploadResponse",
|
|
296
|
+
"PromptBasedEvaluatorDefinition",
|
|
143
297
|
"PromptUsageDetails",
|
|
298
|
+
"QueryResponseInlineMessage",
|
|
299
|
+
"RecurrenceSchedule",
|
|
300
|
+
"RecurrenceTrigger",
|
|
144
301
|
"RedTeam",
|
|
145
302
|
"RedTeamUpload",
|
|
146
303
|
"SASCredentials",
|
|
147
304
|
"SasCredential",
|
|
305
|
+
"Schedule",
|
|
306
|
+
"ScheduleRun",
|
|
307
|
+
"ScheduleTask",
|
|
148
308
|
"SimulationDTO",
|
|
149
309
|
"Sku",
|
|
310
|
+
"SyncEvalInput",
|
|
150
311
|
"SystemData",
|
|
312
|
+
"SystemMessage",
|
|
313
|
+
"TargetConfig",
|
|
151
314
|
"TargetHarm",
|
|
152
|
-
"
|
|
315
|
+
"TaxonomyCategory",
|
|
316
|
+
"TaxonomySubCategory",
|
|
317
|
+
"TextContent",
|
|
318
|
+
"ToolCallContent",
|
|
319
|
+
"ToolDescription",
|
|
320
|
+
"ToolResultContent",
|
|
321
|
+
"Trigger",
|
|
322
|
+
"UserMessage",
|
|
323
|
+
"WeeklyRecurrenceSchedule",
|
|
153
324
|
"AttackStrategy",
|
|
154
325
|
"ChatRole",
|
|
155
326
|
"CompletionsFinishReason",
|
|
156
327
|
"ConnectionType",
|
|
157
328
|
"CredentialType",
|
|
158
329
|
"DatasetType",
|
|
330
|
+
"DayOfWeek",
|
|
159
331
|
"DeploymentType",
|
|
332
|
+
"EvaluationRuleActionType",
|
|
333
|
+
"EvaluationRuleEventType",
|
|
334
|
+
"EvaluationTargetType",
|
|
335
|
+
"EvaluationTaxonomyInputType",
|
|
336
|
+
"EvaluatorCategory",
|
|
337
|
+
"EvaluatorDefinitionType",
|
|
338
|
+
"EvaluatorMetricDirection",
|
|
339
|
+
"EvaluatorMetricType",
|
|
340
|
+
"EvaluatorType",
|
|
160
341
|
"IndexType",
|
|
342
|
+
"InsightType",
|
|
161
343
|
"ListViewType",
|
|
344
|
+
"OperationState",
|
|
162
345
|
"PendingUploadType",
|
|
346
|
+
"RecurrenceType",
|
|
163
347
|
"ResultType",
|
|
164
348
|
"RiskCategory",
|
|
349
|
+
"SampleType",
|
|
350
|
+
"ScheduleProvisioningStatus",
|
|
351
|
+
"ScheduleTaskType",
|
|
165
352
|
"SimulationType",
|
|
353
|
+
"TargetType",
|
|
354
|
+
"TreatmentEffectType",
|
|
355
|
+
"TriggerType",
|
|
166
356
|
]
|
|
167
357
|
__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
|
|
168
358
|
_patch_sdk()
|
|
@@ -149,6 +149,25 @@ class DatasetType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
149
149
|
"""URI folder."""
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
class DayOfWeek(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
153
|
+
"""Days of the week for recurrence schedule."""
|
|
154
|
+
|
|
155
|
+
SUNDAY = "Sunday"
|
|
156
|
+
"""Sunday."""
|
|
157
|
+
MONDAY = "Monday"
|
|
158
|
+
"""Monday."""
|
|
159
|
+
TUESDAY = "Tuesday"
|
|
160
|
+
"""Tuesday."""
|
|
161
|
+
WEDNESDAY = "Wednesday"
|
|
162
|
+
"""Wednesday."""
|
|
163
|
+
THURSDAY = "Thursday"
|
|
164
|
+
"""Thursday."""
|
|
165
|
+
FRIDAY = "Friday"
|
|
166
|
+
"""Friday."""
|
|
167
|
+
SATURDAY = "Saturday"
|
|
168
|
+
"""Saturday."""
|
|
169
|
+
|
|
170
|
+
|
|
152
171
|
class DeploymentType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
153
172
|
"""Type of DeploymentType."""
|
|
154
173
|
|
|
@@ -156,6 +175,99 @@ class DeploymentType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
156
175
|
"""Model deployment"""
|
|
157
176
|
|
|
158
177
|
|
|
178
|
+
class EvaluationRuleActionType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
179
|
+
"""Type of the evaluation action."""
|
|
180
|
+
|
|
181
|
+
CONTINUOUS_EVALUATION = "continuousEvaluation"
|
|
182
|
+
"""Continuous evaluation."""
|
|
183
|
+
HUMAN_EVALUATION = "humanEvaluation"
|
|
184
|
+
"""Human evaluation."""
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class EvaluationRuleEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
188
|
+
"""Type of the evaluation rule event."""
|
|
189
|
+
|
|
190
|
+
RESPONSE_COMPLETED = "response.completed"
|
|
191
|
+
"""Response completed."""
|
|
192
|
+
MANUAL = "manual"
|
|
193
|
+
"""Manual trigger."""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class EvaluationTargetType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
197
|
+
"""Allowed types of evaluation targets."""
|
|
198
|
+
|
|
199
|
+
MODEL_RESPONSE_GENERATION = "modelResponseGeneration"
|
|
200
|
+
"""Evaluation target that uses a model for response generation."""
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class EvaluationTaxonomyInputType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
204
|
+
"""Type of the evaluation taxonomy input."""
|
|
205
|
+
|
|
206
|
+
AGENT = "agent"
|
|
207
|
+
"""Agent"""
|
|
208
|
+
POLICY = "policy"
|
|
209
|
+
"""Policy."""
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class EvaluatorCategory(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
213
|
+
"""The category of the evaluator."""
|
|
214
|
+
|
|
215
|
+
QUALITY = "quality"
|
|
216
|
+
"""Quality"""
|
|
217
|
+
SAFETY = "safety"
|
|
218
|
+
"""Risk & Safety"""
|
|
219
|
+
AGENTS = "agents"
|
|
220
|
+
"""Agents"""
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class EvaluatorDefinitionType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
224
|
+
"""The type of evaluator definition."""
|
|
225
|
+
|
|
226
|
+
PROMPT = "prompt"
|
|
227
|
+
"""Prompt-based definition"""
|
|
228
|
+
CODE = "code"
|
|
229
|
+
"""Code-based definition"""
|
|
230
|
+
PROMPT_AND_CODE = "prompt_and_code"
|
|
231
|
+
"""Prompt & Code Based definition"""
|
|
232
|
+
SERVICE = "service"
|
|
233
|
+
"""Service-based evaluator"""
|
|
234
|
+
OPENAI_GRADERS = "openai_graders"
|
|
235
|
+
"""OpenAI graders"""
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class EvaluatorMetricDirection(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
239
|
+
"""The direction of the metric indicating whether a higher value is better, a lower value is
|
|
240
|
+
better, or neutral.
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
INCREASE = "increase"
|
|
244
|
+
"""It indicates a higher value is better for this metric"""
|
|
245
|
+
DECREASE = "decrease"
|
|
246
|
+
"""It indicates a lower value is better for this metric"""
|
|
247
|
+
NEUTRAL = "neutral"
|
|
248
|
+
"""It indicates no preference for this metric direction"""
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class EvaluatorMetricType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
252
|
+
"""The type of the evaluator."""
|
|
253
|
+
|
|
254
|
+
ORDINAL = "ordinal"
|
|
255
|
+
"""Ordinal metric representing categories that can be ordered or ranked."""
|
|
256
|
+
CONTINUOUS = "continuous"
|
|
257
|
+
"""Continuous metric representing values in a continuous range."""
|
|
258
|
+
BOOLEAN = "boolean"
|
|
259
|
+
"""Boolean metric representing true/false values"""
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class EvaluatorType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
263
|
+
"""The type of the evaluator."""
|
|
264
|
+
|
|
265
|
+
BUILT_IN = "builtin"
|
|
266
|
+
"""Built-in evaluator (Microsoft provided)"""
|
|
267
|
+
CUSTOM = "custom"
|
|
268
|
+
"""Custom evaluator"""
|
|
269
|
+
|
|
270
|
+
|
|
159
271
|
class IndexType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
160
272
|
"""Type of IndexType."""
|
|
161
273
|
|
|
@@ -167,6 +279,17 @@ class IndexType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
167
279
|
"""Managed Azure Search"""
|
|
168
280
|
|
|
169
281
|
|
|
282
|
+
class InsightType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
283
|
+
"""The request of the insights."""
|
|
284
|
+
|
|
285
|
+
EVALUATION_RUN_CLUSTER_INSIGHT = "EvaluationRunClusterInsight"
|
|
286
|
+
"""Insights on an Evaluation run result."""
|
|
287
|
+
AGENT_CLUSTER_INSIGHT = "AgentClusterInsight"
|
|
288
|
+
"""Cluster Insight on an Agent."""
|
|
289
|
+
EVALUATION_COMPARISON = "EvaluationComparison"
|
|
290
|
+
"""Evaluation Comparison."""
|
|
291
|
+
|
|
292
|
+
|
|
170
293
|
class ListViewType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
171
294
|
"""List View Type Definition."""
|
|
172
295
|
|
|
@@ -178,15 +301,45 @@ class ListViewType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
178
301
|
"""List all items."""
|
|
179
302
|
|
|
180
303
|
|
|
304
|
+
class OperationState(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
305
|
+
"""Enum describing allowed operation states."""
|
|
306
|
+
|
|
307
|
+
NOT_STARTED = "NotStarted"
|
|
308
|
+
"""The operation has not started."""
|
|
309
|
+
RUNNING = "Running"
|
|
310
|
+
"""The operation is in progress."""
|
|
311
|
+
SUCCEEDED = "Succeeded"
|
|
312
|
+
"""The operation has completed successfully."""
|
|
313
|
+
FAILED = "Failed"
|
|
314
|
+
"""The operation has failed."""
|
|
315
|
+
CANCELED = "Canceled"
|
|
316
|
+
"""The operation has been canceled by the user."""
|
|
317
|
+
|
|
318
|
+
|
|
181
319
|
class PendingUploadType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
182
320
|
"""The type of pending upload."""
|
|
183
321
|
|
|
184
322
|
NONE = "None"
|
|
185
323
|
"""No pending upload."""
|
|
324
|
+
BLOB_REFERENCE = "BlobReference"
|
|
325
|
+
"""Blob Reference is the only supported type."""
|
|
186
326
|
TEMPORARY_BLOB_REFERENCE = "TemporaryBlobReference"
|
|
187
327
|
"""Temporary Blob Reference is the only supported type."""
|
|
188
328
|
|
|
189
329
|
|
|
330
|
+
class RecurrenceType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
331
|
+
"""Recurrence type."""
|
|
332
|
+
|
|
333
|
+
HOURLY = "Hourly"
|
|
334
|
+
"""Hourly recurrence pattern."""
|
|
335
|
+
DAILY = "Daily"
|
|
336
|
+
"""Daily recurrence pattern."""
|
|
337
|
+
WEEKLY = "Weekly"
|
|
338
|
+
"""Weekly recurrence pattern."""
|
|
339
|
+
MONTHLY = "Monthly"
|
|
340
|
+
"""Monthly recurrence pattern."""
|
|
341
|
+
|
|
342
|
+
|
|
190
343
|
class ResultType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
191
344
|
"""Type of Evaluation result."""
|
|
192
345
|
|
|
@@ -219,6 +372,37 @@ class RiskCategory(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
219
372
|
"""Represents content with ungrounded attributes."""
|
|
220
373
|
|
|
221
374
|
|
|
375
|
+
class SampleType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
376
|
+
"""The type of sample used in the analysis."""
|
|
377
|
+
|
|
378
|
+
EVALUATION_RESULT_SAMPLE = "EvaluationResultSample"
|
|
379
|
+
"""A sample from the evaluation result."""
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class ScheduleProvisioningStatus(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
383
|
+
"""Schedule provisioning status."""
|
|
384
|
+
|
|
385
|
+
CREATING = "Creating"
|
|
386
|
+
"""Represents the creation status of the schedule."""
|
|
387
|
+
UPDATING = "Updating"
|
|
388
|
+
"""Represents the updating status of the schedule."""
|
|
389
|
+
DELETING = "Deleting"
|
|
390
|
+
"""Represents the deleting status of the schedule."""
|
|
391
|
+
SUCCEEDED = "Succeeded"
|
|
392
|
+
"""Represents the succeeded status of the schedule."""
|
|
393
|
+
FAILED = "Failed"
|
|
394
|
+
"""Represents the failed status of the schedule."""
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class ScheduleTaskType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
398
|
+
"""Type of the task."""
|
|
399
|
+
|
|
400
|
+
EVALUATION = "Evaluation"
|
|
401
|
+
"""Evaluation task."""
|
|
402
|
+
INSIGHT = "Insight"
|
|
403
|
+
"""Insight task."""
|
|
404
|
+
|
|
405
|
+
|
|
222
406
|
class SimulationType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
223
407
|
"""Simulation type."""
|
|
224
408
|
|
|
@@ -228,3 +412,36 @@ class SimulationType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
228
412
|
"""Custom persona simulation type."""
|
|
229
413
|
HARM_TURN_GENERATOR = "HarmTurnGenerator"
|
|
230
414
|
"""Harm turn generator simulation type."""
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class TargetType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
418
|
+
"""Target type."""
|
|
419
|
+
|
|
420
|
+
AZURE_AI_AGENT = "AzureAIAgent"
|
|
421
|
+
"""Azure AI Agent Target"""
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class TreatmentEffectType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
425
|
+
"""Treatment Effect Type."""
|
|
426
|
+
|
|
427
|
+
TOO_FEW_SAMPLES = "TooFewSamples"
|
|
428
|
+
"""Not enough samples to determine treatment effect."""
|
|
429
|
+
INCONCLUSIVE = "Inconclusive"
|
|
430
|
+
"""No significant difference between treatment and baseline."""
|
|
431
|
+
CHANGED = "Changed"
|
|
432
|
+
"""Indicates the metric changed with statistical significance, but the direction is neutral."""
|
|
433
|
+
IMPROVED = "Improved"
|
|
434
|
+
"""Indicates the treatment significantly improved the metric compared to baseline."""
|
|
435
|
+
DEGRADED = "Degraded"
|
|
436
|
+
"""Indicates the treatment significantly degraded the metric compared to baseline."""
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class TriggerType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
440
|
+
"""Type of the trigger."""
|
|
441
|
+
|
|
442
|
+
CRON = "Cron"
|
|
443
|
+
"""Cron based trigger."""
|
|
444
|
+
RECURRENCE = "Recurrence"
|
|
445
|
+
"""Recurrence based trigger."""
|
|
446
|
+
ONE_TIME = "OneTime"
|
|
447
|
+
"""One-time trigger."""
|