arthur-common 2.1.68__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arthur-common might be problematic. Click here for more details.
- {arthur_common-2.1.68 → arthur_common-2.2.0}/PKG-INFO +1 -1
- {arthur_common-2.1.68 → arthur_common-2.2.0}/pyproject.toml +1 -1
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/enums.py +11 -3
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/request_schemas.py +263 -8
- {arthur_common-2.1.68 → arthur_common-2.2.0}/README.md +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/aggregator.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/README.md +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/agentic_aggregations.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/categorical_count.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/confusion_matrix.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/inference_count.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/inference_count_by_class.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/inference_null_count.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/mean_absolute_error.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/mean_squared_error.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/multiclass_confusion_matrix.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/numeric_stats.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/numeric_sum.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/py.typed +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/shield_aggregations.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/py.typed +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/config/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/config/config.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/config/settings.yaml +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/common_schemas.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/connectors.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/constants.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/datasets.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/metric_schemas.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/metrics.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/py.typed +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/response_schemas.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/schema_definitions.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/models/task_job_specs.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/py.typed +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/__init__.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/aggregation_analyzer.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/aggregation_loader.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/duckdb_data_loader.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/duckdb_utils.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/functions.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/py.typed +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/schema_inferer.py +0 -0
- {arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/time_utils.py +0 -0
|
@@ -119,9 +119,9 @@ class TokenUsageScope(BaseEnum):
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
class ToolClassEnum(IntEnum):
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
122
|
+
INCORRECT = 0
|
|
123
|
+
CORRECT = 1
|
|
124
|
+
NA = 2
|
|
125
125
|
|
|
126
126
|
def __str__(self) -> str:
|
|
127
127
|
return str(self.value)
|
|
@@ -147,3 +147,11 @@ class UserPermissionResource(BaseEnum):
|
|
|
147
147
|
RESPONSES = "responses"
|
|
148
148
|
RULES = "rules"
|
|
149
149
|
TASKS = "tasks"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ComparisonOperatorEnum(BaseEnum):
|
|
153
|
+
EQUAL = "eq"
|
|
154
|
+
GREATER_THAN = "gt"
|
|
155
|
+
GREATER_THAN_OR_EQUAL = "gte"
|
|
156
|
+
LESS_THAN = "lt"
|
|
157
|
+
LESS_THAN_OR_EQUAL = "lte"
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Any, Dict, List, Optional, Self, Type
|
|
2
|
+
from typing import Any, Dict, List, Optional, Self, Type
|
|
3
3
|
|
|
4
4
|
from fastapi import HTTPException
|
|
5
5
|
from openinference.semconv.trace import OpenInferenceSpanKindValues
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import (
|
|
7
|
+
BaseModel,
|
|
8
|
+
ConfigDict,
|
|
9
|
+
Field,
|
|
10
|
+
ValidationInfo,
|
|
11
|
+
field_validator,
|
|
12
|
+
model_validator,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
from arthur_common.models.common_schemas import (
|
|
9
16
|
ExamplesConfig,
|
|
@@ -25,6 +32,7 @@ from arthur_common.models.enums import (
|
|
|
25
32
|
PIIEntityTypes,
|
|
26
33
|
RuleScope,
|
|
27
34
|
RuleType,
|
|
35
|
+
ToolClassEnum,
|
|
28
36
|
)
|
|
29
37
|
from arthur_common.models.metric_schemas import RelevanceMetricConfig
|
|
30
38
|
|
|
@@ -50,12 +58,12 @@ class NewRuleRequest(BaseModel):
|
|
|
50
58
|
examples=[False],
|
|
51
59
|
)
|
|
52
60
|
config: (
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
61
|
+
KeywordsConfig
|
|
62
|
+
| RegexConfig
|
|
63
|
+
| ExamplesConfig
|
|
64
|
+
| ToxicityConfig
|
|
65
|
+
| PIIConfig
|
|
66
|
+
| None
|
|
59
67
|
) = Field(description="Config of the rule", default=None)
|
|
60
68
|
|
|
61
69
|
model_config = ConfigDict(
|
|
@@ -554,3 +562,250 @@ class SpanQueryRequest(BaseModel):
|
|
|
554
562
|
f"Valid values: {', '.join(sorted(valid_span_kinds))}",
|
|
555
563
|
)
|
|
556
564
|
return value
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
class TraceQueryRequest(BaseModel):
|
|
568
|
+
"""Request schema for querying traces with comprehensive filtering."""
|
|
569
|
+
|
|
570
|
+
# Required
|
|
571
|
+
task_ids: list[str] = Field(
|
|
572
|
+
...,
|
|
573
|
+
description="Task IDs to filter on. At least one is required.",
|
|
574
|
+
min_length=1,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Common optional filters
|
|
578
|
+
trace_ids: Optional[list[str]] = Field(
|
|
579
|
+
None,
|
|
580
|
+
description="Trace IDs to filter on. Optional.",
|
|
581
|
+
)
|
|
582
|
+
start_time: Optional[datetime] = Field(
|
|
583
|
+
None,
|
|
584
|
+
description="Inclusive start date in ISO8601 string format. Use local time (not UTC).",
|
|
585
|
+
)
|
|
586
|
+
end_time: Optional[datetime] = Field(
|
|
587
|
+
None,
|
|
588
|
+
description="Exclusive end date in ISO8601 string format. Use local time (not UTC).",
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# New trace-level filters
|
|
592
|
+
tool_name: Optional[str] = Field(
|
|
593
|
+
None,
|
|
594
|
+
description="Return only results with this tool name.",
|
|
595
|
+
)
|
|
596
|
+
span_types: Optional[list[str]] = Field(
|
|
597
|
+
None,
|
|
598
|
+
description="Span types to filter on. Optional.",
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
# Query relevance filters
|
|
602
|
+
query_relevance_eq: Optional[float] = Field(
|
|
603
|
+
None,
|
|
604
|
+
ge=0,
|
|
605
|
+
le=1,
|
|
606
|
+
description="Equal to this value.",
|
|
607
|
+
)
|
|
608
|
+
query_relevance_gt: Optional[float] = Field(
|
|
609
|
+
None,
|
|
610
|
+
ge=0,
|
|
611
|
+
le=1,
|
|
612
|
+
description="Greater than this value.",
|
|
613
|
+
)
|
|
614
|
+
query_relevance_gte: Optional[float] = Field(
|
|
615
|
+
None,
|
|
616
|
+
ge=0,
|
|
617
|
+
le=1,
|
|
618
|
+
description="Greater than or equal to this value.",
|
|
619
|
+
)
|
|
620
|
+
query_relevance_lt: Optional[float] = Field(
|
|
621
|
+
None,
|
|
622
|
+
ge=0,
|
|
623
|
+
le=1,
|
|
624
|
+
description="Less than this value.",
|
|
625
|
+
)
|
|
626
|
+
query_relevance_lte: Optional[float] = Field(
|
|
627
|
+
None,
|
|
628
|
+
ge=0,
|
|
629
|
+
le=1,
|
|
630
|
+
description="Less than or equal to this value.",
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Response relevance filters
|
|
634
|
+
response_relevance_eq: Optional[float] = Field(
|
|
635
|
+
None,
|
|
636
|
+
ge=0,
|
|
637
|
+
le=1,
|
|
638
|
+
description="Equal to this value.",
|
|
639
|
+
)
|
|
640
|
+
response_relevance_gt: Optional[float] = Field(
|
|
641
|
+
None,
|
|
642
|
+
ge=0,
|
|
643
|
+
le=1,
|
|
644
|
+
description="Greater than this value.",
|
|
645
|
+
)
|
|
646
|
+
response_relevance_gte: Optional[float] = Field(
|
|
647
|
+
None,
|
|
648
|
+
ge=0,
|
|
649
|
+
le=1,
|
|
650
|
+
description="Greater than or equal to this value.",
|
|
651
|
+
)
|
|
652
|
+
response_relevance_lt: Optional[float] = Field(
|
|
653
|
+
None,
|
|
654
|
+
ge=0,
|
|
655
|
+
le=1,
|
|
656
|
+
description="Less than this value.",
|
|
657
|
+
)
|
|
658
|
+
response_relevance_lte: Optional[float] = Field(
|
|
659
|
+
None,
|
|
660
|
+
ge=0,
|
|
661
|
+
le=1,
|
|
662
|
+
description="Less than or equal to this value.",
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# Tool classification filters
|
|
666
|
+
tool_selection: Optional[ToolClassEnum] = Field(
|
|
667
|
+
None,
|
|
668
|
+
description="Tool selection evaluation result.",
|
|
669
|
+
)
|
|
670
|
+
tool_usage: Optional[ToolClassEnum] = Field(
|
|
671
|
+
None,
|
|
672
|
+
description="Tool usage evaluation result.",
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# Trace duration filters
|
|
676
|
+
trace_duration_eq: Optional[float] = Field(
|
|
677
|
+
None,
|
|
678
|
+
ge=0,
|
|
679
|
+
description="Duration exactly equal to this value (seconds).",
|
|
680
|
+
)
|
|
681
|
+
trace_duration_gt: Optional[float] = Field(
|
|
682
|
+
None,
|
|
683
|
+
ge=0,
|
|
684
|
+
description="Duration greater than this value (seconds).",
|
|
685
|
+
)
|
|
686
|
+
trace_duration_gte: Optional[float] = Field(
|
|
687
|
+
None,
|
|
688
|
+
ge=0,
|
|
689
|
+
description="Duration greater than or equal to this value (seconds).",
|
|
690
|
+
)
|
|
691
|
+
trace_duration_lt: Optional[float] = Field(
|
|
692
|
+
None,
|
|
693
|
+
ge=0,
|
|
694
|
+
description="Duration less than this value (seconds).",
|
|
695
|
+
)
|
|
696
|
+
trace_duration_lte: Optional[float] = Field(
|
|
697
|
+
None,
|
|
698
|
+
ge=0,
|
|
699
|
+
description="Duration less than or equal to this value (seconds).",
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
@field_validator(
|
|
703
|
+
"query_relevance_eq",
|
|
704
|
+
"query_relevance_gt",
|
|
705
|
+
"query_relevance_gte",
|
|
706
|
+
"query_relevance_lt",
|
|
707
|
+
"query_relevance_lte",
|
|
708
|
+
"response_relevance_eq",
|
|
709
|
+
"response_relevance_gt",
|
|
710
|
+
"response_relevance_gte",
|
|
711
|
+
"response_relevance_lt",
|
|
712
|
+
"response_relevance_lte",
|
|
713
|
+
mode="before",
|
|
714
|
+
)
|
|
715
|
+
@classmethod
|
|
716
|
+
def validate_relevance_scores(
|
|
717
|
+
cls,
|
|
718
|
+
value: Optional[float],
|
|
719
|
+
info: ValidationInfo,
|
|
720
|
+
) -> Optional[float]:
|
|
721
|
+
"""Validate that relevance scores are between 0 and 1 (inclusive)."""
|
|
722
|
+
if value is not None:
|
|
723
|
+
if not (0.0 <= value <= 1.0):
|
|
724
|
+
raise ValueError(
|
|
725
|
+
f"{info.field_name} value must be between 0 and 1 (inclusive)",
|
|
726
|
+
)
|
|
727
|
+
return value
|
|
728
|
+
|
|
729
|
+
@field_validator(
|
|
730
|
+
"trace_duration_eq",
|
|
731
|
+
"trace_duration_gt",
|
|
732
|
+
"trace_duration_gte",
|
|
733
|
+
"trace_duration_lt",
|
|
734
|
+
"trace_duration_lte",
|
|
735
|
+
mode="before",
|
|
736
|
+
)
|
|
737
|
+
@classmethod
|
|
738
|
+
def validate_trace_duration(
|
|
739
|
+
cls,
|
|
740
|
+
value: Optional[float],
|
|
741
|
+
info: ValidationInfo,
|
|
742
|
+
) -> Optional[float]:
|
|
743
|
+
"""Validate that trace duration values are non-negative."""
|
|
744
|
+
if value is not None:
|
|
745
|
+
if value < 0:
|
|
746
|
+
raise ValueError(
|
|
747
|
+
f"{info.field_name} value must be non-negative (greater than or equal to 0)",
|
|
748
|
+
)
|
|
749
|
+
return value
|
|
750
|
+
|
|
751
|
+
@field_validator("tool_selection", "tool_usage", mode="before")
|
|
752
|
+
@classmethod
|
|
753
|
+
def validate_tool_classification(cls, value: Any) -> Optional[ToolClassEnum]:
|
|
754
|
+
"""Validate tool classification enum values."""
|
|
755
|
+
if value is not None:
|
|
756
|
+
# Handle both integer and enum inputs
|
|
757
|
+
if isinstance(value, int):
|
|
758
|
+
if value not in [0, 1, 2]:
|
|
759
|
+
raise ValueError(
|
|
760
|
+
"Tool classification must be 0 (INCORRECT), "
|
|
761
|
+
"1 (CORRECT), or 2 (NA)",
|
|
762
|
+
)
|
|
763
|
+
return ToolClassEnum(value)
|
|
764
|
+
elif isinstance(value, ToolClassEnum):
|
|
765
|
+
return value
|
|
766
|
+
else:
|
|
767
|
+
raise ValueError(
|
|
768
|
+
"Tool classification must be an integer (0, 1, 2) or ToolClassEnum instance",
|
|
769
|
+
)
|
|
770
|
+
return value
|
|
771
|
+
|
|
772
|
+
@field_validator("span_types")
|
|
773
|
+
@classmethod
|
|
774
|
+
def validate_span_types(cls, value: Optional[list[str]]) -> Optional[list[str]]:
|
|
775
|
+
"""Validate that all span_types are valid OpenInference span kinds."""
|
|
776
|
+
if not value:
|
|
777
|
+
return value
|
|
778
|
+
|
|
779
|
+
# Get all valid span kind values
|
|
780
|
+
valid_span_kinds = [kind.value for kind in OpenInferenceSpanKindValues]
|
|
781
|
+
invalid_types = [st for st in value if st not in valid_span_kinds]
|
|
782
|
+
|
|
783
|
+
if invalid_types:
|
|
784
|
+
raise ValueError(
|
|
785
|
+
f"Invalid span_types received: {invalid_types}. "
|
|
786
|
+
f"Valid values: {', '.join(sorted(valid_span_kinds))}",
|
|
787
|
+
)
|
|
788
|
+
return value
|
|
789
|
+
|
|
790
|
+
@model_validator(mode="after")
|
|
791
|
+
def validate_filter_combinations(self) -> Self:
|
|
792
|
+
"""Validate that filter combinations are logically valid."""
|
|
793
|
+
# Check mutually exclusive filters for each metric type
|
|
794
|
+
for prefix in ["query_relevance", "response_relevance", "trace_duration"]:
|
|
795
|
+
eq_field = f"{prefix}_eq"
|
|
796
|
+
comparison_fields = [f"{prefix}_{op}" for op in ["gt", "gte", "lt", "lte"]]
|
|
797
|
+
|
|
798
|
+
if getattr(self, eq_field) and any(
|
|
799
|
+
getattr(self, field) for field in comparison_fields
|
|
800
|
+
):
|
|
801
|
+
raise ValueError(
|
|
802
|
+
f"{eq_field} cannot be combined with other {prefix} comparison operators",
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
# Check for incompatible operator combinations
|
|
806
|
+
if getattr(self, f"{prefix}_gt") and getattr(self, f"{prefix}_gte"):
|
|
807
|
+
raise ValueError(f"Cannot combine {prefix}_gt with {prefix}_gte")
|
|
808
|
+
if getattr(self, f"{prefix}_lt") and getattr(self, f"{prefix}_lte"):
|
|
809
|
+
raise ValueError(f"Cannot combine {prefix}_lt with {prefix}_lte")
|
|
810
|
+
|
|
811
|
+
return self
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/README.md
RENAMED
|
File without changes
|
{arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/numeric_sum.py
RENAMED
|
File without changes
|
{arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/aggregations/functions/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arthur_common-2.1.68 → arthur_common-2.2.0}/src/arthur_common/tools/aggregation_analyzer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|