PyPI - arthur-common - Versions diffs - 2.1.53__py3-none-any.whl → 2.1.54__py3-none-any.whl - Mend

arthur-common 2.1.53py3-none-any.whl → 2.1.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (21) hide show

arthur_common/aggregations/functions/categorical_count.py CHANGED Viewed

@@ -5,9 +5,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -42,7 +42,7 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=CategoricalCountAggregationFunction.METRIC_NAME,
                 description=CategoricalCountAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/confusion_matrix.py CHANGED Viewed

@@ -6,9 +6,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,

arthur_common/aggregations/functions/inference_count.py CHANGED Viewed

@@ -5,9 +5,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -42,7 +42,7 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=InferenceCountAggregationFunction.METRIC_NAME,
                 description=InferenceCountAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/inference_count_by_class.py CHANGED Viewed

@@ -6,9 +6,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -46,7 +46,7 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
             BaseReportedAggregation(
                 metric_name=BinaryClassifierCountByClassAggregationFunction._metric_name(),
                 description=BinaryClassifierCountByClassAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -172,7 +172,7 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
             BaseReportedAggregation(
                 metric_name=BinaryClassifierCountThresholdClassAggregationFunction._metric_name(),
                 description=BinaryClassifierCountThresholdClassAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/inference_null_count.py CHANGED Viewed

@@ -5,10 +5,10 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     Dimension,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -43,7 +43,7 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=InferenceNullCountAggregationFunction.METRIC_NAME,
                 description=InferenceNullCountAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/mean_absolute_error.py CHANGED Viewed

@@ -6,9 +6,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -159,7 +159,8 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
         )
         count_metric = self.series_to_metric(
-            self.ABSOLUTE_ERROR_COUNT_METRIC_NAME, count_series
+            self.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
+            count_series,
         )
         absolute_error_metric = self.series_to_metric(
             self.ABSOLUTE_ERROR_SUM_METRIC_NAME,

arthur_common/aggregations/functions/mean_squared_error.py CHANGED Viewed

@@ -6,9 +6,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -159,7 +159,8 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
         )
         count_metric = self.series_to_metric(
-            self.SQUARED_ERROR_COUNT_METRIC_NAME, count_series
+            self.SQUARED_ERROR_COUNT_METRIC_NAME,
+            count_series,
         )
         absolute_error_metric = self.series_to_metric(
             self.SQUARED_ERROR_SUM_METRIC_NAME,

arthur_common/aggregations/functions/multiclass_confusion_matrix.py CHANGED Viewed

@@ -6,9 +6,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,

arthur_common/aggregations/functions/multiclass_inference_count_by_class.py CHANGED Viewed

@@ -8,9 +8,9 @@ from arthur_common.aggregations.functions.inference_count_by_class import (
 )
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -57,7 +57,7 @@ class MulticlassClassifierCountByClassAggregationFunction(
             BaseReportedAggregation(
                 metric_name=MulticlassClassifierCountByClassAggregationFunction._metric_name(),
                 description=MulticlassClassifierCountByClassAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/numeric_stats.py CHANGED Viewed

@@ -5,9 +5,9 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import SketchAggregationFunction
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     SketchMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -44,7 +44,7 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=NumericSketchAggregationFunction.METRIC_NAME,
                 description=NumericSketchAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/numeric_sum.py CHANGED Viewed

@@ -5,10 +5,10 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     Dimension,
     NumericMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
@@ -43,7 +43,7 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=NumericSumAggregationFunction.METRIC_NAME,
                 description=NumericSumAggregationFunction.description(),
-            )
+            ),
         ]
     def aggregate(

arthur_common/aggregations/functions/shield_aggregations.py CHANGED Viewed

@@ -11,10 +11,10 @@ from arthur_common.aggregations.aggregator import (
 )
 from arthur_common.models.datasets import ModelProblemType
 from arthur_common.models.metrics import (
+    BaseReportedAggregation,
     DatasetReference,
     NumericMetric,
     SketchMetric,
-    BaseReportedAggregation,
 )
 from arthur_common.models.schema_definitions import (
     SHIELD_RESPONSE_SCHEMA,
@@ -44,7 +44,7 @@ class ShieldInferencePassFailCountAggregation(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferencePassFailCountAggregation.METRIC_NAME,
                 description=ShieldInferencePassFailCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -113,7 +113,7 @@ class ShieldInferenceRuleCountAggregation(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleCountAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -205,7 +205,7 @@ class ShieldInferenceHallucinationCountAggregation(NumericAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceHallucinationCountAggregation.METRIC_NAME,
                 description=ShieldInferenceHallucinationCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -269,7 +269,7 @@ class ShieldInferenceRuleToxicityScoreAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleToxicityScoreAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleToxicityScoreAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -354,7 +354,7 @@ class ShieldInferenceRulePIIDataScoreAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRulePIIDataScoreAggregation.METRIC_NAME,
                 description=ShieldInferenceRulePIIDataScoreAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -445,7 +445,7 @@ class ShieldInferenceRuleClaimCountAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleClaimCountAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleClaimCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -518,7 +518,7 @@ class ShieldInferenceRuleClaimPassCountAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleClaimPassCountAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleClaimPassCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -591,7 +591,7 @@ class ShieldInferenceRuleClaimFailCountAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleClaimFailCountAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleClaimFailCountAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -664,7 +664,7 @@ class ShieldInferenceRuleLatencyAggregation(SketchAggregationFunction):
             BaseReportedAggregation(
                 metric_name=ShieldInferenceRuleLatencyAggregation.METRIC_NAME,
                 description=ShieldInferenceRuleLatencyAggregation.description(),
-            )
+            ),
         ]
     def aggregate(
@@ -768,7 +768,7 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
         return [base_token_count_agg] + [
             BaseReportedAggregation(
                 metric_name=ShieldInferenceTokenCountAggregation._series_name_from_model_name(
-                    model_name
+                    model_name,
                 ),
                 description=f"Metric that reports the estimated cost for the {model_name} model of the tokens in the Shield response and prompt schemas.",
             )
@@ -855,7 +855,8 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
             )
             resp.append(
                 self.series_to_metric(
-                    self._series_name_from_model_name(model), model_series
-                )
+                    self._series_name_from_model_name(model),
+                    model_series,
+                ),
             )
         return resp

arthur_common/models/datasets.py CHANGED Viewed

@@ -7,6 +7,7 @@ class ModelProblemType(str, Enum):
     ARTHUR_SHIELD = "arthur_shield"
     CUSTOM = "custom"
     MULTICLASS_CLASSIFICATION = "multiclass_classification"
+    AGENTIC_TRACE = "agentic_trace"
 class DatasetFileType(str, Enum):

arthur_common/models/metrics.py CHANGED Viewed

@@ -194,7 +194,8 @@ class MetricsColumnParameterSchema(MetricsParameterSchema, BaseColumnParameterSc
 class MetricsColumnListParameterSchema(
-    MetricsParameterSchema, BaseColumnParameterSchema
+    MetricsParameterSchema,
+    BaseColumnParameterSchema,
 ):
     # list column parameter schema specific to default metrics
     parameter_type: Literal["column_list"] = "column_list"
@@ -249,14 +250,14 @@ class AggregationSpecSchema(BaseModel):
         description="List of parameters to the aggregation's aggregate function.",
     )
     reported_aggregations: list[BaseReportedAggregation] = Field(
-        description="List of aggregations reported by the metric."
+        description="List of aggregations reported by the metric.",
     )
     @model_validator(mode="after")
     def at_least_one_reported_agg(self) -> Self:
         if len(self.reported_aggregations) < 1:
             raise ValueError(
-                "Aggregation spec must specify at least one reported aggregation."
+                "Aggregation spec must specify at least one reported aggregation.",
             )
         return self
@@ -283,16 +284,16 @@ class AggregationSpecSchema(BaseModel):
 class ReportedCustomAggregation(BaseReportedAggregation):
     value_column: str = Field(
-        description="Name of the column returned from the SQL query holding the metric value."
+        description="Name of the column returned from the SQL query holding the metric value.",
     )
     timestamp_column: str = Field(
-        description="Name of the column returned from the SQL query holding the timestamp buckets."
+        description="Name of the column returned from the SQL query holding the timestamp buckets.",
     )
     metric_kind: AggregationMetricType = Field(
         description="Return type of the reported aggregation metric value.",
     )
     dimension_columns: list[str] = Field(
-        description="Name of any dimension columns returned from the SQL query. Max length is 1."
+        description="Name of any dimension columns returned from the SQL query. Max length is 1.",
     )
     @field_validator("dimension_columns")

arthur_common/models/schema_definitions.py CHANGED Viewed

@@ -367,6 +367,38 @@ def create_shield_inference_feedback_schema() -> DatasetListType:
     )
+def AGENTIC_TRACE_SCHEMA() -> DatasetSchema:
+    return DatasetSchema(
+        alias_mask={},
+        columns=[
+            DatasetColumn(
+                id=uuid4(),
+                source_name="trace_id",
+                definition=create_dataset_scalar_type(DType.STRING),
+            ),
+            DatasetColumn(
+                id=uuid4(),
+                source_name="start_time",
+                definition=create_dataset_scalar_type(DType.TIMESTAMP),
+            ),
+            DatasetColumn(
+                id=uuid4(),
+                source_name="end_time",
+                definition=create_dataset_scalar_type(DType.TIMESTAMP),
+            ),
+            DatasetColumn(
+                id=uuid4(),
+                source_name="root_spans",
+                definition=create_dataset_list_type(
+                    create_dataset_scalar_type(
+                        DType.JSON,
+                    ),  # JSON blob to preserve hierarchy
+                ),
+            ),
+        ],
+    )
 def SHIELD_SCHEMA() -> DatasetSchema:
     return DatasetSchema(
         alias_mask={},
@@ -423,6 +455,32 @@ def SHIELD_SCHEMA() -> DatasetSchema:
 SHIELD_RESPONSE_SCHEMA = create_shield_response_schema().to_base_type()
 SHIELD_PROMPT_SCHEMA = create_shield_prompt_schema().to_base_type()
+# Agentic trace schema base type for API responses
+def create_agentic_trace_response_schema() -> DatasetObjectType:
+    return create_dataset_object_type(
+        {
+            "count": create_dataset_scalar_type(DType.INT),
+            "traces": create_dataset_list_type(
+                create_dataset_object_type(
+                    {
+                        "trace_id": create_dataset_scalar_type(DType.STRING),
+                        "start_time": create_dataset_scalar_type(DType.TIMESTAMP),
+                        "end_time": create_dataset_scalar_type(DType.TIMESTAMP),
+                        "root_spans": create_dataset_list_type(
+                            create_dataset_scalar_type(
+                                DType.JSON,
+                            ),  # JSON blob for infinite depth
+                        ),
+                    },
+                ),
+            ),
+        },
+    )
+AGENTIC_TRACE_RESPONSE_SCHEMA = create_agentic_trace_response_schema().to_base_type()
 SEGMENTATION_ALLOWED_DTYPES = [DType.INT, DType.BOOL, DType.STRING, DType.UUID]
 SEGMENTATION_ALLOWED_COLUMN_TYPES = [
     ScalarType(dtype=d_type) for d_type in SEGMENTATION_ALLOWED_DTYPES

arthur_common/models/shield.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from datetime import datetime
 from enum import Enum
 from typing import Any, Dict, List, Optional, Self, Type, Union
@@ -26,6 +27,15 @@ class RuleScope(str, Enum):
     TASK = "task"
+class MetricType(str, Enum):
+    QUERY_RELEVANCE = "QueryRelevance"
+    RESPONSE_RELEVANCE = "ResponseRelevance"
+    TOOL_SELECTION = "ToolSelection"
+    def __str__(self):
+        return self.value
 class BaseEnum(str, Enum):
     @classmethod
     def values(cls) -> list[Any]:
@@ -240,6 +250,27 @@ class RuleResponse(BaseModel):
     )
+class MetricResponse(BaseModel):
+    id: str = Field(description="ID of the Metric")
+    name: str = Field(description="Name of the Metric")
+    type: MetricType = Field(description="Type of the Metric")
+    metric_metadata: str = Field(description="Metadata of the Metric")
+    config: Optional[str] = Field(
+        description="JSON-serialized configuration for the Metric",
+        default=None,
+    )
+    created_at: datetime = Field(
+        description="Time the Metric was created in unix milliseconds",
+    )
+    updated_at: datetime = Field(
+        description="Time the Metric was updated in unix milliseconds",
+    )
+    enabled: Optional[bool] = Field(
+        description="Whether the Metric is enabled",
+        default=None,
+    )
 class TaskResponse(BaseModel):
     id: str = Field(description=" ID of the task")
     name: str = Field(description="Name of the task")
@@ -249,7 +280,12 @@ class TaskResponse(BaseModel):
     updated_at: int = Field(
         description="Time the task was created in unix milliseconds",
     )
+    is_agentic: bool = Field(description="Whether the task is agentic or not")
     rules: List[RuleResponse] = Field(description="List of all the rules for the task.")
+    metrics: Optional[List[MetricResponse]] = Field(
+        description="List of all the metrics for the task.",
+        default=None,
+    )
 class UpdateRuleRequest(BaseModel):
@@ -484,3 +520,125 @@ class NewRuleRequest(BaseModel):
                     detail="Examples must be provided to onboard a ModelSensitiveDataRule",
                 )
         return self
+class RelevanceMetricConfig(BaseModel):
+    """Configuration for relevance metrics including QueryRelevance and ResponseRelevance"""
+    relevance_threshold: Optional[float] = Field(
+        default=None,
+        description="Threshold for determining relevance when not using LLM judge",
+    )
+    use_llm_judge: bool = Field(
+        default=True,
+        description="Whether to use LLM as a judge for relevance scoring",
+    )
+class NewMetricRequest(BaseModel):
+    type: MetricType = Field(
+        description="Type of the metric. It can only be one of QueryRelevance, ResponseRelevance, ToolSelection",
+        examples=["UserQueryRelevance"],
+    )
+    name: str = Field(
+        description="Name of metric",
+        examples=["My User Query Relevance"],
+    )
+    metric_metadata: str = Field(description="Additional metadata for the metric")
+    config: Optional[RelevanceMetricConfig] = Field(
+        description="Configuration for the metric. Currently only applies to UserQueryRelevance and ResponseRelevance metric types.",
+        default=None,
+    )
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example1": {
+                "type": "QueryRelevance",
+                "name": "My User Query Relevance",
+                "metric_metadata": "This is a test metric metadata",
+            },
+            "example2": {
+                "type": "QueryRelevance",
+                "name": "My User Query Relevance with Config",
+                "metric_metadata": "This is a test metric metadata",
+                "config": {"relevance_threshold": 0.8, "use_llm_judge": False},
+            },
+            "example3": {
+                "type": "ResponseRelevance",
+                "name": "My Response Relevance",
+                "metric_metadata": "This is a test metric metadata",
+                "config": {"use_llm_judge": True},
+            },
+        },
+    )
+    @field_validator("type")
+    def validate_metric_type(cls, value):
+        if value not in MetricType:
+            raise ValueError(
+                f"Invalid metric type: {value}. Valid types are: {', '.join([t.value for t in MetricType])}",
+            )
+        return value
+    @model_validator(mode="before")
+    def set_config_type(cls, values):
+        if not isinstance(values, dict):
+            return values
+        metric_type = values.get("type")
+        config_values = values.get("config")
+        # Map metric types to their corresponding config classes
+        metric_type_to_config = {
+            MetricType.QUERY_RELEVANCE: RelevanceMetricConfig,
+            MetricType.RESPONSE_RELEVANCE: RelevanceMetricConfig,
+            # Add new metric types and their configs here as needed
+        }
+        config_class = metric_type_to_config.get(metric_type)
+        if config_class is not None:
+            if config_values is None:
+                # Default config when none is provided
+                config_values = {"use_llm_judge": True}
+            elif isinstance(config_values, dict):
+                # Handle mutually exclusive parameters
+                if (
+                    "relevance_threshold" in config_values
+                    and "use_llm_judge" in config_values
+                    and config_values["use_llm_judge"]
+                ):
+                    raise HTTPException(
+                        status_code=400,
+                        detail="relevance_threshold and use_llm_judge=true are mutually exclusive. Set use_llm_judge=false when using relevance_threshold.",
+                        headers={"full_stacktrace": "false"},
+                    )
+                # If relevance_threshold is set but use_llm_judge isn't, set use_llm_judge to false
+                if (
+                    "relevance_threshold" in config_values
+                    and "use_llm_judge" not in config_values
+                ):
+                    config_values["use_llm_judge"] = False
+                # If neither is set, default to use_llm_judge=True
+                if (
+                    "relevance_threshold" not in config_values
+                    and "use_llm_judge" not in config_values
+                ):
+                    config_values["use_llm_judge"] = True
+            if isinstance(config_values, BaseModel):
+                config_values = config_values.model_dump()
+            values["config"] = config_class(**config_values)
+        elif config_values is not None:
+            # Provide a nice error message listing supported metric types
+            supported_types = [t.value for t in metric_type_to_config.keys()]
+            raise HTTPException(
+                status_code=400,
+                detail=f"Config is only supported for {', '.join(supported_types)} metric types",
+                headers={"full_stacktrace": "false"},
+            )
+        return values

arthur-common 2.1.53__py3-none-any.whl → 2.1.54__py3-none-any.whl

Potentially problematic release.

arthur-common 2.1.53py3-none-any.whl → 2.1.54py3-none-any.whl