arthur-common 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. arthur_common/aggregations/aggregator.py +73 -9
  2. arthur_common/aggregations/functions/agentic_aggregations.py +260 -85
  3. arthur_common/aggregations/functions/categorical_count.py +15 -15
  4. arthur_common/aggregations/functions/confusion_matrix.py +24 -26
  5. arthur_common/aggregations/functions/inference_count.py +5 -9
  6. arthur_common/aggregations/functions/inference_count_by_class.py +16 -27
  7. arthur_common/aggregations/functions/inference_null_count.py +10 -13
  8. arthur_common/aggregations/functions/mean_absolute_error.py +12 -18
  9. arthur_common/aggregations/functions/mean_squared_error.py +12 -18
  10. arthur_common/aggregations/functions/multiclass_confusion_matrix.py +13 -20
  11. arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +1 -1
  12. arthur_common/aggregations/functions/numeric_stats.py +13 -15
  13. arthur_common/aggregations/functions/numeric_sum.py +12 -15
  14. arthur_common/aggregations/functions/shield_aggregations.py +457 -215
  15. arthur_common/models/common_schemas.py +214 -0
  16. arthur_common/models/connectors.py +10 -2
  17. arthur_common/models/constants.py +24 -0
  18. arthur_common/models/datasets.py +0 -9
  19. arthur_common/models/enums.py +177 -0
  20. arthur_common/models/metric_schemas.py +63 -0
  21. arthur_common/models/metrics.py +2 -9
  22. arthur_common/models/request_schemas.py +870 -0
  23. arthur_common/models/response_schemas.py +785 -0
  24. arthur_common/models/schema_definitions.py +6 -1
  25. arthur_common/models/task_job_specs.py +3 -12
  26. arthur_common/tools/duckdb_data_loader.py +34 -2
  27. arthur_common/tools/duckdb_utils.py +3 -6
  28. arthur_common/tools/schema_inferer.py +3 -6
  29. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/METADATA +12 -4
  30. arthur_common-2.4.13.dist-info/RECORD +49 -0
  31. arthur_common/models/shield.py +0 -642
  32. arthur_common-2.1.58.dist-info/RECORD +0 -44
  33. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,785 @@
1
+ from datetime import datetime
2
+ from typing import Any, List, Optional, Union
3
+
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+
6
+ from arthur_common.models.common_schemas import (
7
+ AuthUserRole,
8
+ VariableTemplateValue,
9
+ ExamplesConfig,
10
+ KeywordsConfig,
11
+ PIIConfig,
12
+ RegexConfig,
13
+ ToxicityConfig,
14
+ )
15
+ from arthur_common.models.enums import (
16
+ AgenticAnnotationType,
17
+ ContinuousEvalRunStatus,
18
+ InferenceFeedbackTarget,
19
+ MetricType,
20
+ PIIEntityTypes,
21
+ RuleResultEnum,
22
+ RuleScope,
23
+ RuleType,
24
+ ToxicityViolationType,
25
+ )
26
+
27
+
28
+ class HTTPError(BaseModel):
29
+ detail: str
30
+
31
+ model_config = ConfigDict(
32
+ json_schema_extra={
33
+ "example": {"detail": "HTTPException raised."},
34
+ },
35
+ )
36
+
37
+
38
+ class RuleResponse(BaseModel):
39
+ id: str = Field(description="ID of the Rule")
40
+ name: str = Field(description="Name of the Rule")
41
+ type: RuleType = Field(description="Type of Rule")
42
+ apply_to_prompt: bool = Field(description="Rule applies to prompt")
43
+ apply_to_response: bool = Field(description="Rule applies to response")
44
+ enabled: Optional[bool] = Field(
45
+ description="Rule is enabled for the task",
46
+ default=None,
47
+ )
48
+ scope: RuleScope = Field(
49
+ description="Scope of the rule. The rule can be set at default level or task level.",
50
+ )
51
+ # UNIX millis format
52
+ created_at: int = Field(
53
+ description="Time the rule was created in unix milliseconds",
54
+ )
55
+ updated_at: int = Field(
56
+ description="Time the rule was updated in unix milliseconds",
57
+ )
58
+ config: (
59
+ KeywordsConfig
60
+ | RegexConfig
61
+ | ExamplesConfig
62
+ | ToxicityConfig
63
+ | PIIConfig
64
+ | None
65
+ ) = Field(description="Config of the rule", default=None)
66
+
67
+
68
+ class HallucinationClaimResponse(BaseModel):
69
+ claim: str
70
+ valid: bool
71
+ reason: str
72
+ order_number: Optional[int] = Field(
73
+ default=-1,
74
+ description="This field is a helper for ordering the claims",
75
+ )
76
+
77
+
78
+ class PIIEntitySpanResponse(BaseModel):
79
+ entity: PIIEntityTypes
80
+ span: str = Field(
81
+ description="The subtext within the input string that was identified as PII.",
82
+ )
83
+ # Only optional to keep reverse compatibility with old inferences
84
+ confidence: Optional[float] = Field(
85
+ description="Float value representing the confidence score of a given PII identification.",
86
+ default=None,
87
+ )
88
+
89
+
90
+ class KeywordSpanResponse(BaseModel):
91
+ keyword: str = Field(
92
+ description="The keyword from the rule that matched within the input string.",
93
+ )
94
+
95
+
96
+ class RegexSpanResponse(BaseModel):
97
+ matching_text: str = Field(
98
+ description="The subtext within the input string that matched the regex rule.",
99
+ )
100
+ pattern: Optional[str] = Field(
101
+ description="Pattern that yielded the match.",
102
+ default=None,
103
+ )
104
+
105
+
106
+ class BaseDetailsResponse(BaseModel):
107
+ score: Optional[bool] = None
108
+ message: Optional[str] = None
109
+
110
+
111
+ class HallucinationDetailsResponse(BaseDetailsResponse):
112
+ claims: list[HallucinationClaimResponse]
113
+
114
+
115
+ class PIIDetailsResponse(BaseDetailsResponse):
116
+ pii_entities: list[PIIEntitySpanResponse]
117
+
118
+
119
+ class ToxicityDetailsResponse(BaseDetailsResponse):
120
+ toxicity_score: Optional[float] = None
121
+ toxicity_violation_type: ToxicityViolationType
122
+
123
+ model_config = ConfigDict(extra="forbid")
124
+
125
+
126
+ class KeywordDetailsResponse(BaseDetailsResponse):
127
+ keyword_matches: list[KeywordSpanResponse] = Field(
128
+ [],
129
+ description="Each keyword in this list corresponds to a keyword that was both configured in the rule that was "
130
+ "run and found in the input text.",
131
+ )
132
+
133
+ model_config = ConfigDict(extra="forbid")
134
+
135
+
136
+ class RegexDetailsResponse(BaseDetailsResponse):
137
+ regex_matches: list[RegexSpanResponse] = Field(
138
+ [],
139
+ description="Each string in this list corresponds to a matching span from the input text that matches the "
140
+ "configured regex rule.",
141
+ )
142
+
143
+ model_config = ConfigDict(extra="forbid")
144
+
145
+
146
+ class ExternalRuleResult(BaseModel):
147
+ id: str = Field(description=" ID of the rule")
148
+ name: str = Field(description="Name of the rule")
149
+ rule_type: RuleType = Field(description="Type of the rule")
150
+ scope: RuleScope = Field(
151
+ description="Scope of the rule. The rule can be set at default level or task level.",
152
+ )
153
+ result: RuleResultEnum = Field(description="Result if the rule")
154
+ latency_ms: int = Field(description="Duration in millisesconds of rule execution")
155
+
156
+ # The super class (BaseDetailsResponse) must come last in this ordering otherwise the fastapi serializer will pick
157
+ # it for the less specific types and you'll waste time figuring out why type1 is being serialized as type2
158
+ # https://github.com/tiangolo/fastapi/issues/2783#issuecomment-776662347
159
+ details: Optional[
160
+ Union[
161
+ KeywordDetailsResponse,
162
+ RegexDetailsResponse,
163
+ HallucinationDetailsResponse,
164
+ PIIDetailsResponse,
165
+ ToxicityDetailsResponse,
166
+ BaseDetailsResponse,
167
+ ]
168
+ ] = Field(description="Details of the rule output", default=None)
169
+
170
+ model_config = ConfigDict(
171
+ json_schema_extra={
172
+ "example": {
173
+ "id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
174
+ "name": "PII Rule",
175
+ "result": "Pass",
176
+ },
177
+ },
178
+ )
179
+
180
+
181
+ class ValidationResult(BaseModel):
182
+ inference_id: Optional[str] = Field(description="ID of the inference", default=None)
183
+ rule_results: Optional[List[ExternalRuleResult]] = Field(
184
+ description="List of rule results",
185
+ default=None,
186
+ )
187
+ user_id: Optional[str] = Field(
188
+ description="The user ID this prompt belongs to",
189
+ default=None,
190
+ )
191
+ model_name: Optional[str] = Field(
192
+ description="The model name and version used for this validation (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
193
+ default=None,
194
+ )
195
+ model_config = ConfigDict(
196
+ json_schema_extra={
197
+ "example": {
198
+ "inference_id": "4dd1fae1-34b9-4aec-8abe-fe7bf12af31d",
199
+ "rule_results": [
200
+ {
201
+ "id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
202
+ "name": "PII Check",
203
+ "result": "Pass",
204
+ },
205
+ {
206
+ "id": "946c4a44-b367-4229-84d4-1a8e461cb132",
207
+ "name": "Sensitive Data Check",
208
+ "result": "Pass",
209
+ },
210
+ ],
211
+ },
212
+ },
213
+ )
214
+
215
+
216
+ class ExternalInferencePrompt(BaseModel):
217
+ id: str
218
+ inference_id: str
219
+ result: RuleResultEnum
220
+ created_at: int
221
+ updated_at: int
222
+ message: str
223
+ prompt_rule_results: List[ExternalRuleResult]
224
+ tokens: int | None = None
225
+
226
+
227
+ class ExternalInferenceResponse(BaseModel):
228
+ id: str
229
+ inference_id: str
230
+ result: RuleResultEnum
231
+ created_at: int
232
+ updated_at: int
233
+ message: str
234
+ context: Optional[str] = None
235
+ response_rule_results: List[ExternalRuleResult]
236
+ tokens: int | None = None
237
+ model_name: Optional[str] = Field(
238
+ description="The model name and version used for this response (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
239
+ default=None,
240
+ )
241
+
242
+
243
+ class InferenceFeedbackResponse(BaseModel):
244
+ id: str
245
+ inference_id: str
246
+ target: InferenceFeedbackTarget
247
+ score: int
248
+ reason: Optional[str] = None
249
+ user_id: Optional[str] = None
250
+ created_at: datetime
251
+ updated_at: datetime
252
+
253
+
254
+ class QueryFeedbackResponse(BaseModel):
255
+ feedback: list[InferenceFeedbackResponse] = Field(
256
+ description="List of inferences matching the search filters. Length is less than or equal to page_size parameter",
257
+ )
258
+ page: int = Field(description="The current page number")
259
+ page_size: int = Field(description="The number of feedback items per page")
260
+ total_pages: int = Field(description="The total number of pages")
261
+ total_count: int = Field(
262
+ description="The total number of feedback items matching the query parameters",
263
+ )
264
+
265
+ model_config = ConfigDict(
266
+ json_schema_extra={
267
+ "example": {
268
+ "feedback": [
269
+ {
270
+ "id": "90f18c69-d793-4913-9bde-a0c7f3643de0",
271
+ "inference_id": "81437d71-9557-4611-981b-9283d1c98643",
272
+ "target": "context",
273
+ "score": "0",
274
+ "reason": "good reason",
275
+ "user_id": "user_1",
276
+ "created_at": "2024-06-06T06:37:46.123-04:00",
277
+ "updated_at": "2024-06-06T06:37:46.123-04:00",
278
+ },
279
+ {
280
+ "id": "248381c2-543b-4de0-98cd-d7511fee6241",
281
+ "inference_id": "bcbc7ca0-4cfc-4f67-9cf8-26cb2291ba33",
282
+ "target": "response_results",
283
+ "score": "1",
284
+ "reason": "some reason",
285
+ "user_id": "user_2",
286
+ "created_at": "2023-05-05T05:26:35.987-04:00",
287
+ "updated_at": "2023-05-05T05:26:35.987-04:00",
288
+ },
289
+ ],
290
+ "page": 1,
291
+ "page_size": 10,
292
+ "total_pages": 1,
293
+ "total_count": 2,
294
+ },
295
+ },
296
+ )
297
+
298
+
299
+ class ExternalInference(BaseModel):
300
+ id: str
301
+ result: RuleResultEnum
302
+ created_at: int
303
+ updated_at: int
304
+ task_id: Optional[str] = None
305
+ task_name: str | None = None
306
+ conversation_id: Optional[str] = None
307
+ inference_prompt: ExternalInferencePrompt
308
+ inference_response: Optional[ExternalInferenceResponse] = None
309
+ inference_feedback: List[InferenceFeedbackResponse]
310
+ user_id: str | None = None
311
+ model_name: Optional[str] = Field(
312
+ description="The model name and version used for this inference (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
313
+ default=None,
314
+ )
315
+
316
+
317
+ class QueryInferencesResponse(BaseModel):
318
+ count: int = Field(
319
+ description="The total number of inferences matching the query parameters",
320
+ )
321
+ inferences: list[ExternalInference] = Field(
322
+ description="List of inferences matching the search filters. Length is less than or equal to page_size parameter",
323
+ )
324
+
325
+ model_config = ConfigDict(
326
+ json_schema_extra={
327
+ "example": {
328
+ "count": 1,
329
+ "inferences": [
330
+ {
331
+ "id": "957df309-c907-4b77-abe5-15dd00c081f7",
332
+ "result": "Pass",
333
+ "created_at": 1723204737120,
334
+ "updated_at": 1723204787050,
335
+ "task_id": "957df309-c907-4b77-abe5-15dd00c081f8",
336
+ "task_name": "My task name",
337
+ "conversation_id": "957df309-c907-4b77-abe5-15dd00c08112",
338
+ "inference_prompt": {
339
+ "id": "834f7ebd-cd6b-4691-9473-8bc350f8922c",
340
+ "inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
341
+ "result": "Pass",
342
+ "created_at": 1723204737121,
343
+ "updated_at": 1723204737121,
344
+ "message": "How many stars are in the solar system?",
345
+ "prompt_rule_results": [
346
+ {
347
+ "id": "bc599a56-2e31-4cb7-910d-9e5ed6455db2",
348
+ "name": "My_PII_Rule",
349
+ "rule_type": "PIIDataRule",
350
+ "scope": "default",
351
+ "result": "Pass",
352
+ "latency_ms": 73,
353
+ "details": None,
354
+ },
355
+ ],
356
+ "tokens": 100,
357
+ },
358
+ "inference_response": {
359
+ "id": "ec765a75-1479-4938-8e1c-6334b7deb8ce",
360
+ "inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
361
+ "result": "Pass",
362
+ "created_at": 1723204786599,
363
+ "updated_at": 1723204786599,
364
+ "message": "There is one star in solar system.",
365
+ "context": "Solar system contains one star.",
366
+ "response_rule_results": [
367
+ {
368
+ "id": "a45267c5-96d9-4de2-a871-debf2c8fdb86",
369
+ "name": "My_another_PII_Rule",
370
+ "rule_type": "PIIDataRule",
371
+ "scope": "default",
372
+ "result": "Pass",
373
+ "latency_ms": 107,
374
+ "details": None,
375
+ },
376
+ {
377
+ "id": "92b7b46e-eaf2-4226-82d4-be12ceb3e4b7",
378
+ "name": "My_Hallucination_Rule",
379
+ "rule_type": "ModelHallucinationRuleV2",
380
+ "scope": "default",
381
+ "result": "Pass",
382
+ "latency_ms": 700,
383
+ "details": {
384
+ "score": True,
385
+ "message": "All claims were supported by the context!",
386
+ "claims": [
387
+ {
388
+ "claim": "There is one star in solar system.",
389
+ "valid": True,
390
+ "reason": "No hallucination detected!",
391
+ "order_number": 0,
392
+ },
393
+ ],
394
+ "pii_results": [],
395
+ "pii_entities": [],
396
+ "toxicity_score": None,
397
+ },
398
+ },
399
+ ],
400
+ "tokens": 100,
401
+ },
402
+ "inference_feedback": [
403
+ {
404
+ "id": "0d602e5c-4ae6-4fc9-a610-68a1d8928ad7",
405
+ "inference_id": "957df309-c907-4b77-abe5-15dd00c081f7",
406
+ "target": "context",
407
+ "score": 100,
408
+ "reason": "Perfect answer.",
409
+ "user_id": "957df309-2137-4b77-abe5-15dd00c081f8",
410
+ "created_at": "2024-08-09T12:08:34.847381",
411
+ "updated_at": "2024-08-09T12:08:34.847386",
412
+ },
413
+ ],
414
+ "user_id": "957df309-2137-4b77-abe5-15dd00c081f8",
415
+ },
416
+ ],
417
+ },
418
+ },
419
+ )
420
+
421
+
422
+ class MetricResponse(BaseModel):
423
+ id: str = Field(description="ID of the Metric")
424
+ name: str = Field(description="Name of the Metric")
425
+ type: MetricType = Field(description="Type of the Metric")
426
+ metric_metadata: str = Field(description="Metadata of the Metric")
427
+ config: Optional[str] = Field(
428
+ description="JSON-serialized configuration for the Metric",
429
+ default=None,
430
+ )
431
+ created_at: datetime = Field(
432
+ description="Time the Metric was created in unix milliseconds",
433
+ )
434
+ updated_at: datetime = Field(
435
+ description="Time the Metric was updated in unix milliseconds",
436
+ )
437
+ enabled: Optional[bool] = Field(
438
+ description="Whether the Metric is enabled",
439
+ default=None,
440
+ )
441
+
442
+
443
+ class TaskResponse(BaseModel):
444
+ id: str = Field(description=" ID of the task")
445
+ name: str = Field(description="Name of the task")
446
+ created_at: int = Field(
447
+ description="Time the task was created in unix milliseconds",
448
+ )
449
+ updated_at: int = Field(
450
+ description="Time the task was created in unix milliseconds",
451
+ )
452
+ is_agentic: Optional[bool] = Field(
453
+ description="Whether the task is agentic or not",
454
+ default=None,
455
+ )
456
+ rules: List[RuleResponse] = Field(description="List of all the rules for the task.")
457
+ metrics: Optional[List[MetricResponse]] = Field(
458
+ description="List of all the metrics for the task.",
459
+ default=None,
460
+ )
461
+
462
+
463
+ class SearchTasksResponse(BaseModel):
464
+ count: int = Field(description="The total number of tasks matching the parameters")
465
+ tasks: list[TaskResponse] = Field(
466
+ description="List of tasks matching the search filters. Length is less than or equal to page_size parameter",
467
+ )
468
+
469
+
470
+ class SearchRulesResponse(BaseModel):
471
+ count: int = Field(description="The total number of rules matching the parameters")
472
+ rules: list[RuleResponse] = Field(
473
+ description="List of rules matching the search filters. Length is less than or equal to page_size parameter",
474
+ )
475
+
476
+
477
+ class FileUploadResult(BaseModel):
478
+ id: str
479
+ name: str
480
+ type: str
481
+ word_count: int
482
+ success: bool
483
+
484
+
485
+ class ExternalDocument(BaseModel):
486
+ id: str
487
+ name: str
488
+ type: str
489
+ owner_id: str
490
+
491
+
492
+ class ChatDocumentContext(BaseModel):
493
+ id: str
494
+ seq_num: int
495
+ context: str
496
+
497
+
498
+ class ChatResponse(BaseModel):
499
+ inference_id: str = Field(description="ID of the inference sent to the chat")
500
+ conversation_id: str = Field(description="ID of the conversation session")
501
+ timestamp: int = Field(
502
+ description="Time the inference was made in unix milliseconds",
503
+ )
504
+ retrieved_context: List[ChatDocumentContext] = Field(
505
+ description="related sections of documents that were most relevant to the inference prompt. "
506
+ "Formatted as a list of retrieved context chunks which include document name, seq num, and context.",
507
+ )
508
+ llm_response: str = Field(
509
+ description="response from the LLM for the original user prompt",
510
+ )
511
+ prompt_results: List[ExternalRuleResult] = Field(
512
+ description="list of rule results for the user prompt",
513
+ )
514
+ response_results: List[ExternalRuleResult] = Field(
515
+ description="list of rule results for the llm response",
516
+ )
517
+ model_name: Optional[str] = Field(
518
+ description="The model name and version used for this chat response (e.g., 'gpt-4', 'gpt-3.5-turbo', 'claude-3-opus', 'gemini-pro').",
519
+ default=None,
520
+ )
521
+
522
+
523
+ class TokenUsageCount(BaseModel):
524
+ inference: int = Field(description="Number of inference tokens sent to Arthur.")
525
+ eval_prompt: int = Field(
526
+ description="Number of Prompt tokens incurred by Arthur rules.",
527
+ )
528
+ eval_completion: int = Field(
529
+ description="Number of Completion tokens incurred by Arthur rules.",
530
+ )
531
+ user_input: int = Field(
532
+ description="Number of user input tokens sent to Arthur. This field is deprecated and will be removed in the future. Use inference instead.",
533
+ json_schema_extra={"deprecated": True},
534
+ )
535
+ prompt: int = Field(
536
+ description="Number of Prompt tokens incurred by Arthur rules. This field is deprecated and will be removed in the future. Use eval_prompt instead.",
537
+ json_schema_extra={"deprecated": True},
538
+ )
539
+ completion: int = Field(
540
+ description="Number of Completion tokens incurred by Arthur rules. This field is deprecated and will be removed in the future. Use eval_completion instead.",
541
+ json_schema_extra={"deprecated": True},
542
+ )
543
+
544
+
545
+ class TokenUsageResponse(BaseModel):
546
+ rule_type: Optional[str] = None
547
+ task_id: Optional[str] = None
548
+ count: TokenUsageCount
549
+
550
+
551
+ class ApiKeyResponse(BaseModel):
552
+ id: str = Field(description="ID of the key")
553
+ key: Optional[str] = Field(
554
+ description="The generated GenAI Engine API key. The key is displayed on key creation request only.",
555
+ default=None,
556
+ )
557
+ description: Optional[str] = Field(
558
+ description="Description of the API key",
559
+ default=None,
560
+ )
561
+ is_active: bool = Field(description="Status of the key.")
562
+ created_at: datetime = Field(description="Creation time of the key")
563
+ deactivated_at: Optional[datetime] = Field(
564
+ description="Deactivation time of the key",
565
+ default=None,
566
+ )
567
+ message: Optional[str] = Field(description="Optional Message", default=None)
568
+ roles: list[str] = Field(
569
+ description="Roles of the API key",
570
+ default=[],
571
+ )
572
+
573
+
574
+ class UserResponse(BaseModel):
575
+ id: str
576
+ email: str
577
+ first_name: Optional[str] = None
578
+ last_name: Optional[str] = None
579
+ roles: list[AuthUserRole]
580
+
581
+
582
+ class ConversationBaseResponse(BaseModel):
583
+ id: str
584
+ updated_at: datetime
585
+
586
+
587
+ class ConversationResponse(ConversationBaseResponse):
588
+ inferences: list[ExternalInference]
589
+
590
+
591
+ class HealthResponse(BaseModel):
592
+ message: str
593
+ build_version: Optional[str] = None
594
+
595
+
596
+ class ChatDefaultTaskResponse(BaseModel):
597
+ task_id: str
598
+
599
+
600
+ class MetricResultResponse(BaseModel):
601
+ id: str = Field(description="ID of the metric result")
602
+ metric_type: MetricType = Field(description="Type of the metric")
603
+ details: Optional[str] = Field(
604
+ description="JSON-serialized metric details",
605
+ default=None,
606
+ )
607
+ prompt_tokens: int = Field(description="Number of prompt tokens used")
608
+ completion_tokens: int = Field(description="Number of completion tokens used")
609
+ latency_ms: int = Field(description="Latency in milliseconds")
610
+ span_id: str = Field(description="ID of the span this result belongs to")
611
+ metric_id: str = Field(description="ID of the metric that generated this result")
612
+ created_at: datetime = Field(description="Time the result was created")
613
+ updated_at: datetime = Field(description="Time the result was last updated")
614
+
615
+
616
+ class TokenCountCostSchema(BaseModel):
617
+ """Base schema for responses that include token count and cost information.
618
+
619
+ These fields represent LLM token usage and associated costs.
620
+ None values indicate data is not available.
621
+ """
622
+
623
+ prompt_token_count: Optional[int] = Field(
624
+ default=None,
625
+ description="Number of prompt tokens",
626
+ )
627
+ completion_token_count: Optional[int] = Field(
628
+ default=None,
629
+ description="Number of completion tokens",
630
+ )
631
+ total_token_count: Optional[int] = Field(
632
+ default=None,
633
+ description="Total number of tokens",
634
+ )
635
+ prompt_token_cost: Optional[float] = Field(
636
+ default=None,
637
+ description="Cost of prompt tokens in USD",
638
+ )
639
+ completion_token_cost: Optional[float] = Field(
640
+ default=None,
641
+ description="Cost of completion tokens in USD",
642
+ )
643
+ total_token_cost: Optional[float] = Field(
644
+ default=None,
645
+ description="Total cost in USD",
646
+ )
647
+
648
+
649
+ class SpanWithMetricsResponse(TokenCountCostSchema):
650
+ id: str
651
+ trace_id: str
652
+ span_id: str
653
+ parent_span_id: Optional[str] = None
654
+ span_kind: Optional[str] = None
655
+ span_name: Optional[str] = None
656
+ start_time: datetime
657
+ end_time: datetime
658
+ task_id: Optional[str] = None
659
+ session_id: Optional[str] = None
660
+ status_code: str = Field(description="Status code for the span (Unset, Error, Ok)")
661
+ created_at: datetime
662
+ updated_at: datetime
663
+ raw_data: dict[str, Any]
664
+ # OpenInference standard input/output fields (computed on-demand from raw_data)
665
+ input_content: Optional[str] = Field(
666
+ None,
667
+ description="Span input value from raw_data.attributes.input.value",
668
+ )
669
+ output_content: Optional[str] = Field(
670
+ None,
671
+ description="Span output value from raw_data.attributes.output.value",
672
+ )
673
+ metric_results: list[MetricResultResponse] = Field(
674
+ description="List of metric results for this span",
675
+ default=[],
676
+ )
677
+
678
+
679
+ class NestedSpanWithMetricsResponse(TokenCountCostSchema):
680
+ """Nested span response with children for building span trees"""
681
+
682
+ id: str
683
+ trace_id: str
684
+ span_id: str
685
+ parent_span_id: Optional[str] = None
686
+ span_kind: Optional[str] = None
687
+ span_name: Optional[str] = None
688
+ start_time: datetime
689
+ end_time: datetime
690
+ task_id: Optional[str] = None
691
+ session_id: Optional[str] = None
692
+ status_code: str = Field(description="Status code for the span (Unset, Error, Ok)")
693
+ created_at: datetime
694
+ updated_at: datetime
695
+ raw_data: dict[str, Any]
696
+ # OpenInference standard input/output fields (computed on-demand from raw_data)
697
+ input_content: Optional[str] = Field(
698
+ None,
699
+ description="Span input value from raw_data.attributes.input.value",
700
+ )
701
+ output_content: Optional[str] = Field(
702
+ None,
703
+ description="Span output value from raw_data.attributes.output.value",
704
+ )
705
+ metric_results: list[MetricResultResponse] = Field(
706
+ description="List of metric results for this span",
707
+ default=[],
708
+ )
709
+ children: list["NestedSpanWithMetricsResponse"] = Field(
710
+ description="Child spans nested under this span",
711
+ default=[],
712
+ )
713
+
714
+ class AgenticAnnotationResponse(BaseModel):
715
+ id: str = Field(description="ID of the annotation")
716
+ annotation_type: AgenticAnnotationType = Field(description="Type of annotation")
717
+ trace_id: str = Field(description="ID of the trace this annotation belongs to")
718
+ continuous_eval_id: Optional[str] = Field(default=None, description="ID of the continuous eval this annotation belongs to")
719
+ annotation_score: Optional[int] = Field(default=None, description="Binary score for a positive or negative annotation.")
720
+ annotation_description: Optional[str] = Field(default=None, description="Description of the annotation.")
721
+ input_variables: Optional[List[VariableTemplateValue]] = Field(default=None, description="Input variables for the continuous eval")
722
+ run_status: Optional[ContinuousEvalRunStatus] = Field(default=None, description="Status of the continuous eval run")
723
+ cost: Optional[float] = Field(default=None, description="Cost of the continuous eval run")
724
+ created_at: datetime = Field(description="Time the annotation was created")
725
+ updated_at: datetime = Field(description="Time the annotation was last updated")
726
+
727
+ class ListAgenticAnnotationsResponse(BaseModel):
728
+ annotations: list[AgenticAnnotationResponse] = Field(description="List of annotations")
729
+
730
+ class AgenticAnnotationMetadataResponse(BaseModel):
731
+ id: str = Field(description="ID of the annotation")
732
+ annotation_type: AgenticAnnotationType = Field(description="Type of annotation")
733
+ trace_id: str = Field(description="ID of the trace this annotation belongs to")
734
+ continuous_eval_id: Optional[str] = Field(default=None, description="ID of the continuous eval this annotation belongs to")
735
+ annotation_score: Optional[int] = Field(default=None, description="Binary score for a positive or negative annotation.")
736
+ run_status: Optional[str] = Field(default=None, description="Status of the continuous eval run")
737
+ cost: Optional[float] = Field(default=None, description="Cost of the continuous eval run")
738
+ created_at: datetime = Field(description="Time the annotation was created")
739
+ updated_at: datetime = Field(description="Time the annotation was last updated")
740
+
741
+ class ListAgenticAnnotationsMetadataResponse(BaseModel):
742
+ annotations: list[AgenticAnnotationMetadataResponse] = Field(description="List of annotations")
743
+ count: int = Field(description="Total number of annotations")
744
+
745
+ class TraceResponse(TokenCountCostSchema):
746
+ """Response model for a single trace containing nested spans"""
747
+
748
+ trace_id: str = Field(description="ID of the trace")
749
+ start_time: datetime = Field(
750
+ description="Start time of the earliest span in this trace",
751
+ )
752
+ end_time: datetime = Field(description="End time of the latest span in this trace")
753
+ input_content: Optional[str] = Field(
754
+ None,
755
+ description="Root span input value from trace metadata",
756
+ )
757
+ output_content: Optional[str] = Field(
758
+ None,
759
+ description="Root span output value from trace metadata",
760
+ )
761
+ root_spans: list[NestedSpanWithMetricsResponse] = Field(
762
+ description="Root spans (spans with no parent) in this trace, with children nested",
763
+ default=[],
764
+ )
765
+ annotations: Optional[List[AgenticAnnotationMetadataResponse]] = Field(default=None, description="Annotations for this trace.")
766
+
767
+
768
+ class QueryTracesWithMetricsResponse(BaseModel):
769
+ """New response format that groups spans into traces with nested structure"""
770
+
771
+ count: int = Field(
772
+ description="The total number of spans matching the query parameters",
773
+ )
774
+ traces: list[TraceResponse] = Field(
775
+ description="List of traces containing nested spans matching the search filters",
776
+ )
777
+
778
+
779
+ class QuerySpansResponse(BaseModel):
780
+ count: int = Field(
781
+ description="The total number of spans matching the query parameters",
782
+ )
783
+ spans: list[SpanWithMetricsResponse] = Field(
784
+ description="List of spans with metrics matching the search filters",
785
+ )