arthur-common 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arthur_common/aggregations/aggregator.py +73 -9
- arthur_common/aggregations/functions/agentic_aggregations.py +260 -85
- arthur_common/aggregations/functions/categorical_count.py +15 -15
- arthur_common/aggregations/functions/confusion_matrix.py +24 -26
- arthur_common/aggregations/functions/inference_count.py +5 -9
- arthur_common/aggregations/functions/inference_count_by_class.py +16 -27
- arthur_common/aggregations/functions/inference_null_count.py +10 -13
- arthur_common/aggregations/functions/mean_absolute_error.py +12 -18
- arthur_common/aggregations/functions/mean_squared_error.py +12 -18
- arthur_common/aggregations/functions/multiclass_confusion_matrix.py +13 -20
- arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +1 -1
- arthur_common/aggregations/functions/numeric_stats.py +13 -15
- arthur_common/aggregations/functions/numeric_sum.py +12 -15
- arthur_common/aggregations/functions/shield_aggregations.py +457 -215
- arthur_common/models/common_schemas.py +214 -0
- arthur_common/models/connectors.py +10 -2
- arthur_common/models/constants.py +24 -0
- arthur_common/models/datasets.py +0 -9
- arthur_common/models/enums.py +177 -0
- arthur_common/models/metric_schemas.py +63 -0
- arthur_common/models/metrics.py +2 -9
- arthur_common/models/request_schemas.py +870 -0
- arthur_common/models/response_schemas.py +785 -0
- arthur_common/models/schema_definitions.py +6 -1
- arthur_common/models/task_job_specs.py +3 -12
- arthur_common/tools/duckdb_data_loader.py +34 -2
- arthur_common/tools/duckdb_utils.py +3 -6
- arthur_common/tools/schema_inferer.py +3 -6
- {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/METADATA +12 -4
- arthur_common-2.4.13.dist-info/RECORD +49 -0
- arthur_common/models/shield.py +0 -642
- arthur_common-2.1.58.dist-info/RECORD +0 -44
- {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,785 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
5
|
+
|
|
6
|
+
from arthur_common.models.common_schemas import (
|
|
7
|
+
AuthUserRole,
|
|
8
|
+
VariableTemplateValue,
|
|
9
|
+
ExamplesConfig,
|
|
10
|
+
KeywordsConfig,
|
|
11
|
+
PIIConfig,
|
|
12
|
+
RegexConfig,
|
|
13
|
+
ToxicityConfig,
|
|
14
|
+
)
|
|
15
|
+
from arthur_common.models.enums import (
|
|
16
|
+
AgenticAnnotationType,
|
|
17
|
+
ContinuousEvalRunStatus,
|
|
18
|
+
InferenceFeedbackTarget,
|
|
19
|
+
MetricType,
|
|
20
|
+
PIIEntityTypes,
|
|
21
|
+
RuleResultEnum,
|
|
22
|
+
RuleScope,
|
|
23
|
+
RuleType,
|
|
24
|
+
ToxicityViolationType,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class HTTPError(BaseModel):
|
|
29
|
+
detail: str
|
|
30
|
+
|
|
31
|
+
model_config = ConfigDict(
|
|
32
|
+
json_schema_extra={
|
|
33
|
+
"example": {"detail": "HTTPException raised."},
|
|
34
|
+
},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RuleResponse(BaseModel):
|
|
39
|
+
id: str = Field(description="ID of the Rule")
|
|
40
|
+
name: str = Field(description="Name of the Rule")
|
|
41
|
+
type: RuleType = Field(description="Type of Rule")
|
|
42
|
+
apply_to_prompt: bool = Field(description="Rule applies to prompt")
|
|
43
|
+
apply_to_response: bool = Field(description="Rule applies to response")
|
|
44
|
+
enabled: Optional[bool] = Field(
|
|
45
|
+
description="Rule is enabled for the task",
|
|
46
|
+
default=None,
|
|
47
|
+
)
|
|
48
|
+
scope: RuleScope = Field(
|
|
49
|
+
description="Scope of the rule. The rule can be set at default level or task level.",
|
|
50
|
+
)
|
|
51
|
+
# UNIX millis format
|
|
52
|
+
created_at: int = Field(
|
|
53
|
+
description="Time the rule was created in unix milliseconds",
|
|
54
|
+
)
|
|
55
|
+
updated_at: int = Field(
|
|
56
|
+
description="Time the rule was updated in unix milliseconds",
|
|
57
|
+
)
|
|
58
|
+
config: (
|
|
59
|
+
KeywordsConfig
|
|
60
|
+
| RegexConfig
|
|
61
|
+
| ExamplesConfig
|
|
62
|
+
| ToxicityConfig
|
|
63
|
+
| PIIConfig
|
|
64
|
+
| None
|
|
65
|
+
) = Field(description="Config of the rule", default=None)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class HallucinationClaimResponse(BaseModel):
|
|
69
|
+
claim: str
|
|
70
|
+
valid: bool
|
|
71
|
+
reason: str
|
|
72
|
+
order_number: Optional[int] = Field(
|
|
73
|
+
default=-1,
|
|
74
|
+
description="This field is a helper for ordering the claims",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PIIEntitySpanResponse(BaseModel):
|
|
79
|
+
entity: PIIEntityTypes
|
|
80
|
+
span: str = Field(
|
|
81
|
+
description="The subtext within the input string that was identified as PII.",
|
|
82
|
+
)
|
|
83
|
+
# Only optional to keep reverse compatibility with old inferences
|
|
84
|
+
confidence: Optional[float] = Field(
|
|
85
|
+
description="Float value representing the confidence score of a given PII identification.",
|
|
86
|
+
default=None,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class KeywordSpanResponse(BaseModel):
|
|
91
|
+
keyword: str = Field(
|
|
92
|
+
description="The keyword from the rule that matched within the input string.",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class RegexSpanResponse(BaseModel):
|
|
97
|
+
matching_text: str = Field(
|
|
98
|
+
description="The subtext within the input string that matched the regex rule.",
|
|
99
|
+
)
|
|
100
|
+
pattern: Optional[str] = Field(
|
|
101
|
+
description="Pattern that yielded the match.",
|
|
102
|
+
default=None,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class BaseDetailsResponse(BaseModel):
|
|
107
|
+
score: Optional[bool] = None
|
|
108
|
+
message: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class HallucinationDetailsResponse(BaseDetailsResponse):
|
|
112
|
+
claims: list[HallucinationClaimResponse]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class PIIDetailsResponse(BaseDetailsResponse):
|
|
116
|
+
pii_entities: list[PIIEntitySpanResponse]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ToxicityDetailsResponse(BaseDetailsResponse):
|
|
120
|
+
toxicity_score: Optional[float] = None
|
|
121
|
+
toxicity_violation_type: ToxicityViolationType
|
|
122
|
+
|
|
123
|
+
model_config = ConfigDict(extra="forbid")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class KeywordDetailsResponse(BaseDetailsResponse):
|
|
127
|
+
keyword_matches: list[KeywordSpanResponse] = Field(
|
|
128
|
+
[],
|
|
129
|
+
description="Each keyword in this list corresponds to a keyword that was both configured in the rule that was "
|
|
130
|
+
"run and found in the input text.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
model_config = ConfigDict(extra="forbid")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class RegexDetailsResponse(BaseDetailsResponse):
|
|
137
|
+
regex_matches: list[RegexSpanResponse] = Field(
|
|
138
|
+
[],
|
|
139
|
+
description="Each string in this list corresponds to a matching span from the input text that matches the "
|
|
140
|
+
"configured regex rule.",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
model_config = ConfigDict(extra="forbid")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class ExternalRuleResult(BaseModel):
|
|
147
|
+
id: str = Field(description=" ID of the rule")
|
|
148
|
+
name: str = Field(description="Name of the rule")
|
|
149
|
+
rule_type: RuleType = Field(description="Type of the rule")
|
|
150
|
+
scope: RuleScope = Field(
|
|
151
|
+
description="Scope of the rule. The rule can be set at default level or task level.",
|
|
152
|
+
)
|
|
153
|
+
result: RuleResultEnum = Field(description="Result if the rule")
|
|
154
|
+
latency_ms: int = Field(description="Duration in millisesconds of rule execution")
|
|
155
|
+
|
|
156
|
+
# The super class (BaseDetailsResponse) must come last in this ordering otherwise the fastapi serializer will pick
|
|
157
|
+
# it for the less specific types and you'll waste time figuring out why type1 is being serialized as type2
|
|
158
|
+
# https://github.com/tiangolo/fastapi/issues/2783#issuecomment-776662347
|
|
159
|
+
details: Optional[
|
|
160
|
+
Union[
|
|
161
|
+
KeywordDetailsResponse,
|
|
162
|
+
RegexDetailsResponse,
|
|
163
|
+
HallucinationDetailsResponse,
|
|
164
|
+
PIIDetailsResponse,
|
|
165
|
+
ToxicityDetailsResponse,
|
|
166
|
+
BaseDetailsResponse,
|
|
167
|
+
]
|
|
168
|
+
] = Field(description="Details of the rule output", default=None)
|
|
169
|
+
|
|
170
|
+
model_config = ConfigDict(
|
|
171
|
+
json_schema_extra={
|
|
172
|
+
"example": {
|
|
173
|
+
"id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
|
|
174
|
+
"name": "PII Rule",
|
|
175
|
+
"result": "Pass",
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ValidationResult(BaseModel):
|
|
182
|
+
inference_id: Optional[str] = Field(description="ID of the inference", default=None)
|
|
183
|
+
rule_results: Optional[List[ExternalRuleResult]] = Field(
|
|
184
|
+
description="List of rule results",
|
|
185
|
+
default=None,
|
|
186
|
+
)
|
|
187
|
+
user_id: Optional[str] = Field(
|
|
188
|
+
description="The user ID this prompt belongs to",
|
|
189
|
+
default=None,
|
|
190
|
+
)
|
|
191
|
+
model_name: Optional[str] = Field(
|
|
192
|
+
description="The model name and version used for this validation (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
|
|
193
|
+
default=None,
|
|
194
|
+
)
|
|
195
|
+
model_config = ConfigDict(
|
|
196
|
+
json_schema_extra={
|
|
197
|
+
"example": {
|
|
198
|
+
"inference_id": "4dd1fae1-34b9-4aec-8abe-fe7bf12af31d",
|
|
199
|
+
"rule_results": [
|
|
200
|
+
{
|
|
201
|
+
"id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
|
|
202
|
+
"name": "PII Check",
|
|
203
|
+
"result": "Pass",
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
"id": "946c4a44-b367-4229-84d4-1a8e461cb132",
|
|
207
|
+
"name": "Sensitive Data Check",
|
|
208
|
+
"result": "Pass",
|
|
209
|
+
},
|
|
210
|
+
],
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class ExternalInferencePrompt(BaseModel):
|
|
217
|
+
id: str
|
|
218
|
+
inference_id: str
|
|
219
|
+
result: RuleResultEnum
|
|
220
|
+
created_at: int
|
|
221
|
+
updated_at: int
|
|
222
|
+
message: str
|
|
223
|
+
prompt_rule_results: List[ExternalRuleResult]
|
|
224
|
+
tokens: int | None = None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class ExternalInferenceResponse(BaseModel):
|
|
228
|
+
id: str
|
|
229
|
+
inference_id: str
|
|
230
|
+
result: RuleResultEnum
|
|
231
|
+
created_at: int
|
|
232
|
+
updated_at: int
|
|
233
|
+
message: str
|
|
234
|
+
context: Optional[str] = None
|
|
235
|
+
response_rule_results: List[ExternalRuleResult]
|
|
236
|
+
tokens: int | None = None
|
|
237
|
+
model_name: Optional[str] = Field(
|
|
238
|
+
description="The model name and version used for this response (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
|
|
239
|
+
default=None,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class InferenceFeedbackResponse(BaseModel):
|
|
244
|
+
id: str
|
|
245
|
+
inference_id: str
|
|
246
|
+
target: InferenceFeedbackTarget
|
|
247
|
+
score: int
|
|
248
|
+
reason: Optional[str] = None
|
|
249
|
+
user_id: Optional[str] = None
|
|
250
|
+
created_at: datetime
|
|
251
|
+
updated_at: datetime
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class QueryFeedbackResponse(BaseModel):
|
|
255
|
+
feedback: list[InferenceFeedbackResponse] = Field(
|
|
256
|
+
description="List of inferences matching the search filters. Length is less than or equal to page_size parameter",
|
|
257
|
+
)
|
|
258
|
+
page: int = Field(description="The current page number")
|
|
259
|
+
page_size: int = Field(description="The number of feedback items per page")
|
|
260
|
+
total_pages: int = Field(description="The total number of pages")
|
|
261
|
+
total_count: int = Field(
|
|
262
|
+
description="The total number of feedback items matching the query parameters",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
model_config = ConfigDict(
|
|
266
|
+
json_schema_extra={
|
|
267
|
+
"example": {
|
|
268
|
+
"feedback": [
|
|
269
|
+
{
|
|
270
|
+
"id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
|
|
271
|
+
"inference_id": "81437d71-9557-4611-981b-9283d1c98643",
|
|
272
|
+
"target": "context",
|
|
273
|
+
"score": "0",
|
|
274
|
+
"reason": "good reason",
|
|
275
|
+
"user_id": "user_1",
|
|
276
|
+
"created_at": "2024-06-06T06:37:46.123-04:00",
|
|
277
|
+
"updated_at": "2024-06-06T06:37:46.123-04:00",
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
"id": "248381c2-543b-4de0-98cd-d7511fee6241",
|
|
281
|
+
"inference_id": "bcbc7ca0-4cfc-4f67-9cf8-26cb2291ba33",
|
|
282
|
+
"target": "response_results",
|
|
283
|
+
"score": "1",
|
|
284
|
+
"reason": "some reason",
|
|
285
|
+
"user_id": "user_2",
|
|
286
|
+
"created_at": "2023-05-05T05:26:35.987-04:00",
|
|
287
|
+
"updated_at": "2023-05-05T05:26:35.987-04:00",
|
|
288
|
+
},
|
|
289
|
+
],
|
|
290
|
+
"page": 1,
|
|
291
|
+
"page_size": 10,
|
|
292
|
+
"total_pages": 1,
|
|
293
|
+
"total_count": 2,
|
|
294
|
+
},
|
|
295
|
+
},
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class ExternalInference(BaseModel):
|
|
300
|
+
id: str
|
|
301
|
+
result: RuleResultEnum
|
|
302
|
+
created_at: int
|
|
303
|
+
updated_at: int
|
|
304
|
+
task_id: Optional[str] = None
|
|
305
|
+
task_name: str | None = None
|
|
306
|
+
conversation_id: Optional[str] = None
|
|
307
|
+
inference_prompt: ExternalInferencePrompt
|
|
308
|
+
inference_response: Optional[ExternalInferenceResponse] = None
|
|
309
|
+
inference_feedback: List[InferenceFeedbackResponse]
|
|
310
|
+
user_id: str | None = None
|
|
311
|
+
model_name: Optional[str] = Field(
|
|
312
|
+
description="The model name and version used for this inference (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
|
|
313
|
+
default=None,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class QueryInferencesResponse(BaseModel):
|
|
318
|
+
count: int = Field(
|
|
319
|
+
description="The total number of inferences matching the query parameters",
|
|
320
|
+
)
|
|
321
|
+
inferences: list[ExternalInference] = Field(
|
|
322
|
+
description="List of inferences matching the search filters. Length is less than or equal to page_size parameter",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
model_config = ConfigDict(
|
|
326
|
+
json_schema_extra={
|
|
327
|
+
"example": {
|
|
328
|
+
"count": 1,
|
|
329
|
+
"inferences": [
|
|
330
|
+
{
|
|
331
|
+
"id": "957df309-c907-4b77-abe5-15dd00c081f7",
|
|
332
|
+
"result": "Pass",
|
|
333
|
+
"created_at": 1723204737120,
|
|
334
|
+
"updated_at": 1723204787050,
|
|
335
|
+
"task_id": "957df309-c907-4b77-abe5-15dd00c081f8",
|
|
336
|
+
"task_name": "My task name",
|
|
337
|
+
"conversation_id": "957df309-c907-4b77-abe5-15dd00c08112",
|
|
338
|
+
"inference_prompt": {
|
|
339
|
+
"id": "834f7ebd-cd6b-4691-9473-8bc350f8922c",
|
|
340
|
+
"inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
|
|
341
|
+
"result": "Pass",
|
|
342
|
+
"created_at": 1723204737121,
|
|
343
|
+
"updated_at": 1723204737121,
|
|
344
|
+
"message": "How many stars are in the solar system?",
|
|
345
|
+
"prompt_rule_results": [
|
|
346
|
+
{
|
|
347
|
+
"id": "bc599a56-2e31-4cb7-910d-9e5ed6455db2",
|
|
348
|
+
"name": "My_PII_Rule",
|
|
349
|
+
"rule_type": "PIIDataRule",
|
|
350
|
+
"scope": "default",
|
|
351
|
+
"result": "Pass",
|
|
352
|
+
"latency_ms": 73,
|
|
353
|
+
"details": None,
|
|
354
|
+
},
|
|
355
|
+
],
|
|
356
|
+
"tokens": 100,
|
|
357
|
+
},
|
|
358
|
+
"inference_response": {
|
|
359
|
+
"id": "ec765a75-1479-4938-8e1c-6334b7deb8ce",
|
|
360
|
+
"inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
|
|
361
|
+
"result": "Pass",
|
|
362
|
+
"created_at": 1723204786599,
|
|
363
|
+
"updated_at": 1723204786599,
|
|
364
|
+
"message": "There is one star in solar system.",
|
|
365
|
+
"context": "Solar system contains one star.",
|
|
366
|
+
"response_rule_results": [
|
|
367
|
+
{
|
|
368
|
+
"id": "a45267c5-96d9-4de2-a871-debf2c8fdb86",
|
|
369
|
+
"name": "My_another_PII_Rule",
|
|
370
|
+
"rule_type": "PIIDataRule",
|
|
371
|
+
"scope": "default",
|
|
372
|
+
"result": "Pass",
|
|
373
|
+
"latency_ms": 107,
|
|
374
|
+
"details": None,
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
"id": "92b7b46e-eaf2-4226-82d4-be12ceb3e4b7",
|
|
378
|
+
"name": "My_Hallucination_Rule",
|
|
379
|
+
"rule_type": "ModelHallucinationRuleV2",
|
|
380
|
+
"scope": "default",
|
|
381
|
+
"result": "Pass",
|
|
382
|
+
"latency_ms": 700,
|
|
383
|
+
"details": {
|
|
384
|
+
"score": True,
|
|
385
|
+
"message": "All claims were supported by the context!",
|
|
386
|
+
"claims": [
|
|
387
|
+
{
|
|
388
|
+
"claim": "There is one star in solar system.",
|
|
389
|
+
"valid": True,
|
|
390
|
+
"reason": "No hallucination detected!",
|
|
391
|
+
"order_number": 0,
|
|
392
|
+
},
|
|
393
|
+
],
|
|
394
|
+
"pii_results": [],
|
|
395
|
+
"pii_entities": [],
|
|
396
|
+
"toxicity_score": None,
|
|
397
|
+
},
|
|
398
|
+
},
|
|
399
|
+
],
|
|
400
|
+
"tokens": 100,
|
|
401
|
+
},
|
|
402
|
+
"inference_feedback": [
|
|
403
|
+
{
|
|
404
|
+
"id": "0d602e5c-4ae6-4fc9-a610-68a1d8928ad7",
|
|
405
|
+
"inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
|
|
406
|
+
"target": "context",
|
|
407
|
+
"score": 100,
|
|
408
|
+
"reason": "Perfect answer.",
|
|
409
|
+
"user_id": "957df309-2137-4b77-abe5-15dd00c081f8",
|
|
410
|
+
"created_at": "2024-08-09T12:08:34.847381",
|
|
411
|
+
"updated_at": "2024-08-09T12:08:34.847386",
|
|
412
|
+
},
|
|
413
|
+
],
|
|
414
|
+
"user_id": "957df309-2137-4b77-abe5-15dd00c081f8",
|
|
415
|
+
},
|
|
416
|
+
],
|
|
417
|
+
},
|
|
418
|
+
},
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class MetricResponse(BaseModel):
|
|
423
|
+
id: str = Field(description="ID of the Metric")
|
|
424
|
+
name: str = Field(description="Name of the Metric")
|
|
425
|
+
type: MetricType = Field(description="Type of the Metric")
|
|
426
|
+
metric_metadata: str = Field(description="Metadata of the Metric")
|
|
427
|
+
config: Optional[str] = Field(
|
|
428
|
+
description="JSON-serialized configuration for the Metric",
|
|
429
|
+
default=None,
|
|
430
|
+
)
|
|
431
|
+
created_at: datetime = Field(
|
|
432
|
+
description="Time the Metric was created in unix milliseconds",
|
|
433
|
+
)
|
|
434
|
+
updated_at: datetime = Field(
|
|
435
|
+
description="Time the Metric was updated in unix milliseconds",
|
|
436
|
+
)
|
|
437
|
+
enabled: Optional[bool] = Field(
|
|
438
|
+
description="Whether the Metric is enabled",
|
|
439
|
+
default=None,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
class TaskResponse(BaseModel):
|
|
444
|
+
id: str = Field(description=" ID of the task")
|
|
445
|
+
name: str = Field(description="Name of the task")
|
|
446
|
+
created_at: int = Field(
|
|
447
|
+
description="Time the task was created in unix milliseconds",
|
|
448
|
+
)
|
|
449
|
+
updated_at: int = Field(
|
|
450
|
+
description="Time the task was created in unix milliseconds",
|
|
451
|
+
)
|
|
452
|
+
is_agentic: Optional[bool] = Field(
|
|
453
|
+
description="Whether the task is agentic or not",
|
|
454
|
+
default=None,
|
|
455
|
+
)
|
|
456
|
+
rules: List[RuleResponse] = Field(description="List of all the rules for the task.")
|
|
457
|
+
metrics: Optional[List[MetricResponse]] = Field(
|
|
458
|
+
description="List of all the metrics for the task.",
|
|
459
|
+
default=None,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
class SearchTasksResponse(BaseModel):
|
|
464
|
+
count: int = Field(description="The total number of tasks matching the parameters")
|
|
465
|
+
tasks: list[TaskResponse] = Field(
|
|
466
|
+
description="List of tasks matching the search filters. Length is less than or equal to page_size parameter",
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class SearchRulesResponse(BaseModel):
|
|
471
|
+
count: int = Field(description="The total number of rules matching the parameters")
|
|
472
|
+
rules: list[RuleResponse] = Field(
|
|
473
|
+
description="List of rules matching the search filters. Length is less than or equal to page_size parameter",
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
class FileUploadResult(BaseModel):
|
|
478
|
+
id: str
|
|
479
|
+
name: str
|
|
480
|
+
type: str
|
|
481
|
+
word_count: int
|
|
482
|
+
success: bool
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
class ExternalDocument(BaseModel):
|
|
486
|
+
id: str
|
|
487
|
+
name: str
|
|
488
|
+
type: str
|
|
489
|
+
owner_id: str
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class ChatDocumentContext(BaseModel):
|
|
493
|
+
id: str
|
|
494
|
+
seq_num: int
|
|
495
|
+
context: str
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
class ChatResponse(BaseModel):
|
|
499
|
+
inference_id: str = Field(description="ID of the inference sent to the chat")
|
|
500
|
+
conversation_id: str = Field(description="ID of the conversation session")
|
|
501
|
+
timestamp: int = Field(
|
|
502
|
+
description="Time the inference was made in unix milliseconds",
|
|
503
|
+
)
|
|
504
|
+
retrieved_context: List[ChatDocumentContext] = Field(
|
|
505
|
+
description="related sections of documents that were most relevant to the inference prompt. "
|
|
506
|
+
"Formatted as a list of retrieved context chunks which include document name, seq num, and context.",
|
|
507
|
+
)
|
|
508
|
+
llm_response: str = Field(
|
|
509
|
+
description="response from the LLM for the original user prompt",
|
|
510
|
+
)
|
|
511
|
+
prompt_results: List[ExternalRuleResult] = Field(
|
|
512
|
+
description="list of rule results for the user prompt",
|
|
513
|
+
)
|
|
514
|
+
response_results: List[ExternalRuleResult] = Field(
|
|
515
|
+
description="list of rule results for the llm response",
|
|
516
|
+
)
|
|
517
|
+
model_name: Optional[str] = Field(
|
|
518
|
+
description="The model name and version used for this chat response (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
|
|
519
|
+
default=None,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
class TokenUsageCount(BaseModel):
|
|
524
|
+
inference: int = Field(description="Number of inference tokens sent to Arthur.")
|
|
525
|
+
eval_prompt: int = Field(
|
|
526
|
+
description="Number of Prompt tokens incurred by Arthur rules.",
|
|
527
|
+
)
|
|
528
|
+
eval_completion: int = Field(
|
|
529
|
+
description="Number of Completion tokens incurred by Arthur rules.",
|
|
530
|
+
)
|
|
531
|
+
user_input: int = Field(
|
|
532
|
+
description="Number of user input tokens sent to Arthur. This field is deprecated and will be removed in the future. Use inference instead.",
|
|
533
|
+
json_schema_extra={"deprecated": True},
|
|
534
|
+
)
|
|
535
|
+
prompt: int = Field(
|
|
536
|
+
description="Number of Prompt tokens incurred by Arthur rules. This field is deprecated and will be removed in the future. Use eval_prompt instead.",
|
|
537
|
+
json_schema_extra={"deprecated": True},
|
|
538
|
+
)
|
|
539
|
+
completion: int = Field(
|
|
540
|
+
description="Number of Completion tokens incurred by Arthur rules. This field is deprecated and will be removed in the future. Use eval_completion instead.",
|
|
541
|
+
json_schema_extra={"deprecated": True},
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
class TokenUsageResponse(BaseModel):
|
|
546
|
+
rule_type: Optional[str] = None
|
|
547
|
+
task_id: Optional[str] = None
|
|
548
|
+
count: TokenUsageCount
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
class ApiKeyResponse(BaseModel):
|
|
552
|
+
id: str = Field(description="ID of the key")
|
|
553
|
+
key: Optional[str] = Field(
|
|
554
|
+
description="The generated GenAI Engine API key. The key is displayed on key creation request only.",
|
|
555
|
+
default=None,
|
|
556
|
+
)
|
|
557
|
+
description: Optional[str] = Field(
|
|
558
|
+
description="Description of the API key",
|
|
559
|
+
default=None,
|
|
560
|
+
)
|
|
561
|
+
is_active: bool = Field(description="Status of the key.")
|
|
562
|
+
created_at: datetime = Field(description="Creation time of the key")
|
|
563
|
+
deactivated_at: Optional[datetime] = Field(
|
|
564
|
+
description="Deactivation time of the key",
|
|
565
|
+
default=None,
|
|
566
|
+
)
|
|
567
|
+
message: Optional[str] = Field(description="Optional Message", default=None)
|
|
568
|
+
roles: list[str] = Field(
|
|
569
|
+
description="Roles of the API key",
|
|
570
|
+
default=[],
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
class UserResponse(BaseModel):
|
|
575
|
+
id: str
|
|
576
|
+
email: str
|
|
577
|
+
first_name: Optional[str] = None
|
|
578
|
+
last_name: Optional[str] = None
|
|
579
|
+
roles: list[AuthUserRole]
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
class ConversationBaseResponse(BaseModel):
|
|
583
|
+
id: str
|
|
584
|
+
updated_at: datetime
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
class ConversationResponse(ConversationBaseResponse):
|
|
588
|
+
inferences: list[ExternalInference]
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class HealthResponse(BaseModel):
|
|
592
|
+
message: str
|
|
593
|
+
build_version: Optional[str] = None
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class ChatDefaultTaskResponse(BaseModel):
|
|
597
|
+
task_id: str
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
class MetricResultResponse(BaseModel):
|
|
601
|
+
id: str = Field(description="ID of the metric result")
|
|
602
|
+
metric_type: MetricType = Field(description="Type of the metric")
|
|
603
|
+
details: Optional[str] = Field(
|
|
604
|
+
description="JSON-serialized metric details",
|
|
605
|
+
default=None,
|
|
606
|
+
)
|
|
607
|
+
prompt_tokens: int = Field(description="Number of prompt tokens used")
|
|
608
|
+
completion_tokens: int = Field(description="Number of completion tokens used")
|
|
609
|
+
latency_ms: int = Field(description="Latency in milliseconds")
|
|
610
|
+
span_id: str = Field(description="ID of the span this result belongs to")
|
|
611
|
+
metric_id: str = Field(description="ID of the metric that generated this result")
|
|
612
|
+
created_at: datetime = Field(description="Time the result was created")
|
|
613
|
+
updated_at: datetime = Field(description="Time the result was last updated")
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
class TokenCountCostSchema(BaseModel):
|
|
617
|
+
"""Base schema for responses that include token count and cost information.
|
|
618
|
+
|
|
619
|
+
These fields represent LLM token usage and associated costs.
|
|
620
|
+
None values indicate data is not available.
|
|
621
|
+
"""
|
|
622
|
+
|
|
623
|
+
prompt_token_count: Optional[int] = Field(
|
|
624
|
+
default=None,
|
|
625
|
+
description="Number of prompt tokens",
|
|
626
|
+
)
|
|
627
|
+
completion_token_count: Optional[int] = Field(
|
|
628
|
+
default=None,
|
|
629
|
+
description="Number of completion tokens",
|
|
630
|
+
)
|
|
631
|
+
total_token_count: Optional[int] = Field(
|
|
632
|
+
default=None,
|
|
633
|
+
description="Total number of tokens",
|
|
634
|
+
)
|
|
635
|
+
prompt_token_cost: Optional[float] = Field(
|
|
636
|
+
default=None,
|
|
637
|
+
description="Cost of prompt tokens in USD",
|
|
638
|
+
)
|
|
639
|
+
completion_token_cost: Optional[float] = Field(
|
|
640
|
+
default=None,
|
|
641
|
+
description="Cost of completion tokens in USD",
|
|
642
|
+
)
|
|
643
|
+
total_token_cost: Optional[float] = Field(
|
|
644
|
+
default=None,
|
|
645
|
+
description="Total cost in USD",
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
class SpanWithMetricsResponse(TokenCountCostSchema):
|
|
650
|
+
id: str
|
|
651
|
+
trace_id: str
|
|
652
|
+
span_id: str
|
|
653
|
+
parent_span_id: Optional[str] = None
|
|
654
|
+
span_kind: Optional[str] = None
|
|
655
|
+
span_name: Optional[str] = None
|
|
656
|
+
start_time: datetime
|
|
657
|
+
end_time: datetime
|
|
658
|
+
task_id: Optional[str] = None
|
|
659
|
+
session_id: Optional[str] = None
|
|
660
|
+
status_code: str = Field(description="Status code for the span (Unset, Error, Ok)")
|
|
661
|
+
created_at: datetime
|
|
662
|
+
updated_at: datetime
|
|
663
|
+
raw_data: dict[str, Any]
|
|
664
|
+
# OpenInference standard input/output fields (computed on-demand from raw_data)
|
|
665
|
+
input_content: Optional[str] = Field(
|
|
666
|
+
None,
|
|
667
|
+
description="Span input value from raw_data.attributes.input.value",
|
|
668
|
+
)
|
|
669
|
+
output_content: Optional[str] = Field(
|
|
670
|
+
None,
|
|
671
|
+
description="Span output value from raw_data.attributes.output.value",
|
|
672
|
+
)
|
|
673
|
+
metric_results: list[MetricResultResponse] = Field(
|
|
674
|
+
description="List of metric results for this span",
|
|
675
|
+
default=[],
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
class NestedSpanWithMetricsResponse(TokenCountCostSchema):
|
|
680
|
+
"""Nested span response with children for building span trees"""
|
|
681
|
+
|
|
682
|
+
id: str
|
|
683
|
+
trace_id: str
|
|
684
|
+
span_id: str
|
|
685
|
+
parent_span_id: Optional[str] = None
|
|
686
|
+
span_kind: Optional[str] = None
|
|
687
|
+
span_name: Optional[str] = None
|
|
688
|
+
start_time: datetime
|
|
689
|
+
end_time: datetime
|
|
690
|
+
task_id: Optional[str] = None
|
|
691
|
+
session_id: Optional[str] = None
|
|
692
|
+
status_code: str = Field(description="Status code for the span (Unset, Error, Ok)")
|
|
693
|
+
created_at: datetime
|
|
694
|
+
updated_at: datetime
|
|
695
|
+
raw_data: dict[str, Any]
|
|
696
|
+
# OpenInference standard input/output fields (computed on-demand from raw_data)
|
|
697
|
+
input_content: Optional[str] = Field(
|
|
698
|
+
None,
|
|
699
|
+
description="Span input value from raw_data.attributes.input.value",
|
|
700
|
+
)
|
|
701
|
+
output_content: Optional[str] = Field(
|
|
702
|
+
None,
|
|
703
|
+
description="Span output value from raw_data.attributes.output.value",
|
|
704
|
+
)
|
|
705
|
+
metric_results: list[MetricResultResponse] = Field(
|
|
706
|
+
description="List of metric results for this span",
|
|
707
|
+
default=[],
|
|
708
|
+
)
|
|
709
|
+
children: list["NestedSpanWithMetricsResponse"] = Field(
|
|
710
|
+
description="Child spans nested under this span",
|
|
711
|
+
default=[],
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
class AgenticAnnotationResponse(BaseModel):
|
|
715
|
+
id: str = Field(description="ID of the annotation")
|
|
716
|
+
annotation_type: AgenticAnnotationType = Field(description="Type of annotation")
|
|
717
|
+
trace_id: str = Field(description="ID of the trace this annotation belongs to")
|
|
718
|
+
continuous_eval_id: Optional[str] = Field(default=None, description="ID of the continuous eval this annotation belongs to")
|
|
719
|
+
annotation_score: Optional[int] = Field(default=None, description="Binary score for a positive or negative annotation.")
|
|
720
|
+
annotation_description: Optional[str] = Field(default=None, description="Description of the annotation.")
|
|
721
|
+
input_variables: Optional[List[VariableTemplateValue]] = Field(default=None, description="Input variables for the continuous eval")
|
|
722
|
+
run_status: Optional[ContinuousEvalRunStatus] = Field(default=None, description="Status of the continuous eval run")
|
|
723
|
+
cost: Optional[float] = Field(default=None, description="Cost of the continuous eval run")
|
|
724
|
+
created_at: datetime = Field(description="Time the annotation was created")
|
|
725
|
+
updated_at: datetime = Field(description="Time the annotation was last updated")
|
|
726
|
+
|
|
727
|
+
class ListAgenticAnnotationsResponse(BaseModel):
|
|
728
|
+
annotations: list[AgenticAnnotationResponse] = Field(description="List of annotations")
|
|
729
|
+
|
|
730
|
+
class AgenticAnnotationMetadataResponse(BaseModel):
|
|
731
|
+
id: str = Field(description="ID of the annotation")
|
|
732
|
+
annotation_type: AgenticAnnotationType = Field(description="Type of annotation")
|
|
733
|
+
trace_id: str = Field(description="ID of the trace this annotation belongs to")
|
|
734
|
+
continuous_eval_id: Optional[str] = Field(default=None, description="ID of the continuous eval this annotation belongs to")
|
|
735
|
+
annotation_score: Optional[int] = Field(default=None, description="Binary score for a positive or negative annotation.")
|
|
736
|
+
run_status: Optional[str] = Field(default=None, description="Status of the continuous eval run")
|
|
737
|
+
cost: Optional[float] = Field(default=None, description="Cost of the continuous eval run")
|
|
738
|
+
created_at: datetime = Field(description="Time the annotation was created")
|
|
739
|
+
updated_at: datetime = Field(description="Time the annotation was last updated")
|
|
740
|
+
|
|
741
|
+
class ListAgenticAnnotationsMetadataResponse(BaseModel):
|
|
742
|
+
annotations: list[AgenticAnnotationMetadataResponse] = Field(description="List of annotations")
|
|
743
|
+
count: int = Field(description="Total number of annotations")
|
|
744
|
+
|
|
745
|
+
class TraceResponse(TokenCountCostSchema):
|
|
746
|
+
"""Response model for a single trace containing nested spans"""
|
|
747
|
+
|
|
748
|
+
trace_id: str = Field(description="ID of the trace")
|
|
749
|
+
start_time: datetime = Field(
|
|
750
|
+
description="Start time of the earliest span in this trace",
|
|
751
|
+
)
|
|
752
|
+
end_time: datetime = Field(description="End time of the latest span in this trace")
|
|
753
|
+
input_content: Optional[str] = Field(
|
|
754
|
+
None,
|
|
755
|
+
description="Root span input value from trace metadata",
|
|
756
|
+
)
|
|
757
|
+
output_content: Optional[str] = Field(
|
|
758
|
+
None,
|
|
759
|
+
description="Root span output value from trace metadata",
|
|
760
|
+
)
|
|
761
|
+
root_spans: list[NestedSpanWithMetricsResponse] = Field(
|
|
762
|
+
description="Root spans (spans with no parent) in this trace, with children nested",
|
|
763
|
+
default=[],
|
|
764
|
+
)
|
|
765
|
+
annotations: Optional[List[AgenticAnnotationMetadataResponse]] = Field(default=None, description="Annotations for this trace.")
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
class QueryTracesWithMetricsResponse(BaseModel):
|
|
769
|
+
"""New response format that groups spans into traces with nested structure"""
|
|
770
|
+
|
|
771
|
+
count: int = Field(
|
|
772
|
+
description="The total number of spans matching the query parameters",
|
|
773
|
+
)
|
|
774
|
+
traces: list[TraceResponse] = Field(
|
|
775
|
+
description="List of traces containing nested spans matching the search filters",
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
class QuerySpansResponse(BaseModel):
|
|
780
|
+
count: int = Field(
|
|
781
|
+
description="The total number of spans matching the query parameters",
|
|
782
|
+
)
|
|
783
|
+
spans: list[SpanWithMetricsResponse] = Field(
|
|
784
|
+
description="List of spans with metrics matching the search filters",
|
|
785
|
+
)
|