PyPI - arthur-common - Versions diffs - 2.1.51__tar.gz → 2.1.53__tar.gz - Mend

arthur-common 2.1.51tar.gz → 2.1.53tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (43) hide show

{arthur_common-2.1.51 → arthur_common-2.1.53}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arthur-common
-Version: 2.1.51
+Version: 2.1.53
 Summary: Utility code common to Arthur platform components.
 License: MIT
 Author: Arthur

{arthur_common-2.1.51 → arthur_common-2.1.53}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "arthur-common"
-version = "2.1.51"
+version = "2.1.53"
 description = "Utility code common to Arthur platform components."
 authors = ["Arthur <engineering@arthur.ai>"]
 license = "MIT"

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/aggregator.py RENAMED Viewed

@@ -29,6 +29,12 @@ class AggregationFunction(ABC):
     def aggregation_type(self) -> Type[SketchMetric] | Type[NumericMetric]:
         raise NotImplementedError
+    @staticmethod
+    @abstractmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        """Returns the list of aggregations reported by the aggregate function."""
+        raise NotImplementedError
     @abstractmethod
     def aggregate(
         self,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/categorical_count.py RENAMED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of discrete values of each category in a string column. Creates a separate dimension for each category and the values are the count of occurrences of that category in the time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=CategoricalCountAggregationFunction.METRIC_NAME,
+                description=CategoricalCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/confusion_matrix.py RENAMED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -20,6 +24,32 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
+    TRUE_POSITIVE_METRIC_NAME = "confusion_matrix_true_positive_count"
+    FALSE_POSITIVE_METRIC_NAME = "confusion_matrix_false_positive_count"
+    FALSE_NEGATIVE_METRIC_NAME = "confusion_matrix_false_negative_count"
+    TRUE_NEGATIVE_METRIC_NAME = "confusion_matrix_true_negative_count"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def generate_confusion_matrix_metrics(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -129,10 +159,10 @@ class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
             dim_columns=segmentation_cols + extra_dims,
             timestamp_col="ts",
         )
-        tp_metric = self.series_to_metric("confusion_matrix_true_positive_count", tp)
-        fp_metric = self.series_to_metric("confusion_matrix_false_positive_count", fp)
-        fn_metric = self.series_to_metric("confusion_matrix_false_negative_count", fn)
-        tn_metric = self.series_to_metric("confusion_matrix_true_negative_count", tn)
+        tp_metric = self.series_to_metric(self.TRUE_POSITIVE_METRIC_NAME, tp)
+        fp_metric = self.series_to_metric(self.FALSE_POSITIVE_METRIC_NAME, fp)
+        fn_metric = self.series_to_metric(self.FALSE_NEGATIVE_METRIC_NAME, fn)
+        tn_metric = self.series_to_metric(self.TRUE_NEGATIVE_METRIC_NAME, tn)
         return [tp_metric, fp_metric, fn_metric, tn_metric]

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/inference_count.py RENAMED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of inferences per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceCountAggregationFunction.METRIC_NAME,
+                description=InferenceCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/inference_count_by_class.py RENAMED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -36,6 +40,15 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountByClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountByClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -153,6 +166,15 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountThresholdClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountThresholdClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/inference_null_count.py RENAMED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of null values in the column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceNullCountAggregationFunction.METRIC_NAME,
+                description=InferenceNullCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/mean_absolute_error.py RENAMED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
+    ABSOLUTE_ERROR_COUNT_METRIC_NAME = "absolute_error_count"
+    ABSOLUTE_ERROR_SUM_METRIC_NAME = "absolute_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-00000000000e")
@@ -31,6 +38,19 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the absolute error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
+                description="Sum of the absolute error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,11 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("absolute_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.ABSOLUTE_ERROR_COUNT_METRIC_NAME, count_series
+        )
         absolute_error_metric = self.series_to_metric(
-            "absolute_error_sum",
+            self.ABSOLUTE_ERROR_SUM_METRIC_NAME,
             absolute_error_series,
         )

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/mean_squared_error.py RENAMED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
+    SQUARED_ERROR_COUNT_METRIC_NAME = "squared_error_count"
+    SQUARED_ERROR_SUM_METRIC_NAME = "squared_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-000000000010")
@@ -31,6 +38,19 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the squared error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME,
+                description="Sum of the squared error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_COUNT_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,11 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("squared_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.SQUARED_ERROR_COUNT_METRIC_NAME, count_series
+        )
         absolute_error_metric = self.series_to_metric(
-            "squared_error_sum",
+            self.SQUARED_ERROR_SUM_METRIC_NAME,
             squared_error_series,
         )

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/multiclass_confusion_matrix.py RENAMED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -22,6 +26,19 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction(
     NumericAggregationFunction,
 ):
+    MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_negative_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_negative_count"
+    )
     @staticmethod
     def id() -> UUID:
         return UUID("dc728927-6928-4a3b-b174-8c1ec8b58d62")
@@ -38,6 +55,27 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             "False Negatives, True Negatives) for that class compared to all others."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -238,19 +276,19 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             timestamp_col="ts",
         )
         tp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
             tp,
         )
         fp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
             fp,
         )
         fn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
             fn,
         )
         tn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
             tn,
         )
         return [tp_metric, fp_metric, fn_metric, tn_metric]

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/multiclass_inference_count_by_class.py RENAMED Viewed

@@ -7,7 +7,11 @@ from arthur_common.aggregations.functions.inference_count_by_class import (
     BinaryClassifierCountByClassAggregationFunction,
 )
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -47,6 +51,15 @@ class MulticlassClassifierCountByClassAggregationFunction(
     def _metric_name() -> str:
         return "multiclass_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierCountByClassAggregationFunction._metric_name(),
+                description=MulticlassClassifierCountByClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/numeric_stats.py RENAMED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import SketchAggregationFunction
-from arthur_common.models.metrics import DatasetReference, SketchMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    SketchMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -34,6 +38,15 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
             "Metric that calculates a distribution (data sketch) on a numeric column."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSketchAggregationFunction.METRIC_NAME,
+                description=NumericSketchAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/numeric_sum.py RENAMED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that reports the sum of the numeric column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSumAggregationFunction.METRIC_NAME,
+                description=NumericSumAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/aggregations/functions/shield_aggregations.py RENAMED Viewed

@@ -10,7 +10,12 @@ from arthur_common.aggregations.aggregator import (
     SketchAggregationFunction,
 )
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric, SketchMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    SketchMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SHIELD_RESPONSE_SCHEMA,
     MetricColumnParameterAnnotation,
@@ -33,6 +38,15 @@ class ShieldInferencePassFailCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield inferences grouped by the prompt, response, and overall check results."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferencePassFailCountAggregation.METRIC_NAME,
+                description=ShieldInferencePassFailCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -93,6 +107,15 @@ class ShieldInferenceRuleCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield rule evaluations grouped by whether it was on the prompt or response, the rule type, the rule evaluation result, the rule name, and the rule id."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -176,6 +199,15 @@ class ShieldInferenceHallucinationCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield hallucination evaluations that failed."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceHallucinationCountAggregation.METRIC_NAME,
+                description=ShieldInferenceHallucinationCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -231,6 +263,15 @@ class ShieldInferenceRuleToxicityScoreAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on toxicity scores returned by the Shield toxicity rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleToxicityScoreAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleToxicityScoreAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -307,6 +348,15 @@ class ShieldInferenceRulePIIDataScoreAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on PII scores returned by the Shield PII rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRulePIIDataScoreAggregation.METRIC_NAME,
+                description=ShieldInferenceRulePIIDataScoreAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -389,6 +439,15 @@ class ShieldInferenceRuleClaimCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on over the number of claims identified by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -453,6 +512,15 @@ class ShieldInferenceRuleClaimPassCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the number of valid claims determined by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimPassCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimPassCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -517,6 +585,15 @@ class ShieldInferenceRuleClaimFailCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the number of invalid claims determined by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimFailCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimFailCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -581,6 +658,15 @@ class ShieldInferenceRuleLatencyAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the latency of Shield rule evaluations. Dimensions are the rule result, rule type, and whether the rule was applicable to a prompt or response."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleLatencyAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleLatencyAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -643,6 +729,18 @@ class ShieldInferenceRuleLatencyAggregation(SketchAggregationFunction):
 class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
     METRIC_NAME = "token_count"
+    SUPPORTED_MODELS = [
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-3.5-turbo",
+        "o1-mini",
+        "deepseek-chat",
+        "claude-3-5-sonnet-20241022",
+        "gemini/gemini-1.5-pro",
+        "meta.llama3-1-8b-instruct-v1:0",
+        "meta.llama3-1-70b-instruct-v1:0",
+        "meta.llama3-2-11b-instruct-v1:0",
+    ]
     @staticmethod
     def id() -> UUID:
@@ -656,6 +754,27 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that reports the number of tokens in the Shield response and prompt schemas, and their estimated cost."
+    @staticmethod
+    def _series_name_from_model_name(model_name: str) -> str:
+        """Calculates name of reported series based on the model name considered."""
+        return f"token_cost.{model_name}"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        base_token_count_agg = BaseReportedAggregation(
+            metric_name=ShieldInferenceTokenCountAggregation.METRIC_NAME,
+            description=f"Metric that reports the number of tokens in the Shield response and prompt schemas.",
+        )
+        return [base_token_count_agg] + [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceTokenCountAggregation._series_name_from_model_name(
+                    model_name
+                ),
+                description=f"Metric that reports the estimated cost for the {model_name} model of the tokens in the Shield response and prompt schemas.",
+            )
+            for model_name in ShieldInferenceTokenCountAggregation.SUPPORTED_MODELS
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -708,25 +827,12 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
         resp = [metric]
         # Compute Cost for each model
-        models = [
-            "gpt-4o",
-            "gpt-4o-mini",
-            "gpt-3.5-turbo",
-            "o1-mini",
-            "deepseek-chat",
-            "claude-3-5-sonnet-20241022",
-            "gemini/gemini-1.5-pro",
-            "meta.llama3-1-8b-instruct-v1:0",
-            "meta.llama3-1-70b-instruct-v1:0",
-            "meta.llama3-2-11b-instruct-v1:0",
-        ]
         # Precompute input/output classification to avoid recalculating in loop
         location_type = results["location"].apply(
             lambda x: "input" if x == "prompt" else "output",
         )
-        for model in models:
+        for model in self.SUPPORTED_MODELS:
             # Efficient list comprehension instead of apply
             cost_values = [
                 calculate_cost_by_tokens(int(tokens), model, loc_type)
@@ -747,5 +853,9 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
                 ["location"],
                 "ts",
             )
-            resp.append(self.series_to_metric(f"token_cost.{model}", model_series))
+            resp.append(
+                self.series_to_metric(
+                    self._series_name_from_model_name(model), model_series
+                )
+            )
         return resp

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/models/metrics.py RENAMED Viewed

@@ -4,7 +4,7 @@ from enum import Enum
 from typing import Literal, Optional
 from uuid import UUID
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, field_validator, model_validator
 from typing_extensions import Self
 from arthur_common.models.datasets import ModelProblemType
@@ -112,12 +112,9 @@ class AggregationMetricType(Enum):
     NUMERIC = "numeric"
-class MetricsParameterSchema(BaseModel):
+class BaseAggregationParameterSchema(BaseModel):
+    # fields for aggregation parameters shared across all parameter types and between default and custom metrics
     parameter_key: str = Field(description="Name of the parameter.")
-    optional: bool = Field(
-        False,
-        description="Boolean denoting if the parameter is optional.",
-    )
     friendly_name: str = Field(
         description="User facing name of the parameter.",
     )
@@ -126,7 +123,16 @@ class MetricsParameterSchema(BaseModel):
     )
-class MetricsDatasetParameterSchema(MetricsParameterSchema):
+class MetricsParameterSchema(BaseAggregationParameterSchema):
+    # specific to default metrics/Python metrics—not available to custom aggregations
+    optional: bool = Field(
+        False,
+        description="Boolean denoting if the parameter is optional.",
+    )
+class BaseDatasetParameterSchema(BaseAggregationParameterSchema):
+    # fields specific to dataset parameters shared across default and custom metrics
     parameter_type: Literal["dataset"] = "dataset"
     model_problem_type: Optional[ModelProblemType] = Field(
         default=None,
@@ -134,12 +140,24 @@ class MetricsDatasetParameterSchema(MetricsParameterSchema):
     )
-class MetricsLiteralParameterSchema(MetricsParameterSchema):
+class MetricsDatasetParameterSchema(MetricsParameterSchema, BaseDatasetParameterSchema):
+    # dataset parameter schema including fields specific to default metrics
+    pass
+class BaseLiteralParameterSchema(BaseAggregationParameterSchema):
+    # fields specific to literal parameters shared across default and custom metrics
     parameter_type: Literal["literal"] = "literal"
     parameter_dtype: DType = Field(description="Data type of the parameter.")
-class MetricsColumnBaseParameterSchema(MetricsParameterSchema):
+class MetricsLiteralParameterSchema(MetricsParameterSchema, BaseLiteralParameterSchema):
+    # literal parameter schema including fields specific to default metrics
+    pass
+class BaseColumnBaseParameterSchema(BaseAggregationParameterSchema):
+    # fields specific to all single or multiple column parameters shared across default and custom metrics
     tag_hints: list[ScopeSchemaTag] = Field(
         [],
         description="List of tags that are applicable to this parameter. Datasets with columns that have matching tags can be inferred this way.",
@@ -165,12 +183,20 @@ class MetricsColumnBaseParameterSchema(MetricsParameterSchema):
         return self
-class MetricsColumnParameterSchema(MetricsColumnBaseParameterSchema):
+class BaseColumnParameterSchema(BaseColumnBaseParameterSchema):
+    # single column parameter schema common across default and custom metrics
     parameter_type: Literal["column"] = "column"
-# Not used /implemented yet. Might turn into group by column list
-class MetricsColumnListParameterSchema(MetricsColumnBaseParameterSchema):
+class MetricsColumnParameterSchema(MetricsParameterSchema, BaseColumnParameterSchema):
+    # single column parameter schema specific to default metrics
+    parameter_type: Literal["column"] = "column"
+class MetricsColumnListParameterSchema(
+    MetricsParameterSchema, BaseColumnParameterSchema
+):
+    # list column parameter schema specific to default metrics
     parameter_type: Literal["column_list"] = "column_list"
@@ -186,6 +212,11 @@ MetricsColumnSchemaUnion = (
 )
+CustomAggregationParametersSchemaUnion = (
+    BaseDatasetParameterSchema | BaseLiteralParameterSchema | BaseColumnParameterSchema
+)
 @dataclass
 class DatasetReference:
     dataset_name: str
@@ -193,6 +224,14 @@ class DatasetReference:
     dataset_id: UUID
+class BaseReportedAggregation(BaseModel):
+    # in future will be used by default metrics
+    metric_name: str = Field(description="Name of the reported aggregation metric.")
+    description: str = Field(
+        description="Description of the reported aggregation metric and what it aggregates.",
+    )
 class AggregationSpecSchema(BaseModel):
     name: str = Field(description="Name of the aggregation function.")
     id: UUID = Field(description="Unique identifier of the aggregation function.")
@@ -209,6 +248,17 @@ class AggregationSpecSchema(BaseModel):
     aggregate_args: list[MetricsParameterSchemaUnion] = Field(
         description="List of parameters to the aggregation's aggregate function.",
     )
+    reported_aggregations: list[BaseReportedAggregation] = Field(
+        description="List of aggregations reported by the metric."
+    )
+    @model_validator(mode="after")
+    def at_least_one_reported_agg(self) -> Self:
+        if len(self.reported_aggregations) < 1:
+            raise ValueError(
+                "Aggregation spec must specify at least one reported aggregation."
+            )
+        return self
     @model_validator(mode="after")
     def column_dataset_references_exist(self) -> Self:
@@ -229,3 +279,25 @@ class AggregationSpecSchema(BaseModel):
                     f"Column parameter '{param.parameter_key}' references dataset parameter '{param.source_dataset_parameter_key}' which does not exist.",
                 )
         return self
+class ReportedCustomAggregation(BaseReportedAggregation):
+    value_column: str = Field(
+        description="Name of the column returned from the SQL query holding the metric value."
+    )
+    timestamp_column: str = Field(
+        description="Name of the column returned from the SQL query holding the timestamp buckets."
+    )
+    metric_kind: AggregationMetricType = Field(
+        description="Return type of the reported aggregation metric value.",
+    )
+    dimension_columns: list[str] = Field(
+        description="Name of any dimension columns returned from the SQL query. Max length is 1."
+    )
+    @field_validator("dimension_columns")
+    @classmethod
+    def validate_dimension_columns_length(cls, v: list[str]) -> str:
+        if len(v) > 1:
+            raise ValueError("Only one dimension column can be specified.")
+        return v

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/models/shield.py RENAMED Viewed

@@ -10,9 +10,7 @@ DEFAULT_PII_RULE_CONFIDENCE_SCORE_THRESHOLD = 0
 class RuleType(str, Enum):
     KEYWORD = "KeywordRule"
-    MODEL_HALLUCINATION = "ModelHallucinationRule"
     MODEL_HALLUCINATION_V2 = "ModelHallucinationRuleV2"
-    MODEL_HALLUCINATION_V3 = "ModelHallucinationRuleV3"
     MODEL_SENSITIVE_DATA = "ModelSensitiveDataRule"
     PII_DATA = "PIIDataRule"
     PROMPT_INJECTION = "PromptInjectionRule"
@@ -456,14 +454,6 @@ class NewRuleRequest(BaseModel):
                 detail="PromptInjectionRule can only be enabled for prompt. Please set the 'apply_to_response' field "
                 "to false.",
             )
-        if (self.type == RuleType.MODEL_HALLUCINATION) and (
-            self.apply_to_prompt is True
-        ):
-            raise HTTPException(
-                status_code=400,
-                detail="ModelHallucinationRule can only be enabled for response. Please set the 'apply_to_prompt' "
-                "field to false.",
-            )
         if (self.type == RuleType.MODEL_HALLUCINATION_V2) and (
             self.apply_to_prompt is True
         ):
@@ -472,14 +462,6 @@ class NewRuleRequest(BaseModel):
                 detail="ModelHallucinationRuleV2 can only be enabled for response. Please set the 'apply_to_prompt' "
                 "field to false.",
             )
-        if (self.type == RuleType.MODEL_HALLUCINATION_V3) and (
-            self.apply_to_prompt is True
-        ):
-            raise HTTPException(
-                status_code=400,
-                detail="ModelHallucinationRuleV3 can only be enabled for response. Please set the "
-                "'apply_to_prompt' field to false.",
-            )
         if (self.apply_to_prompt is False) and (self.apply_to_response is False):
             raise HTTPException(
                 status_code=400,

{arthur_common-2.1.51 → arthur_common-2.1.53}/src/arthur_common/tools/aggregation_analyzer.py RENAMED Viewed

@@ -207,7 +207,7 @@ class FunctionAnalyzer:
             )
         # Check if X implements the required methods
         required_methods = ["aggregate", "id", "description", "display_name"]
-        static_methods = ["description", "id", "display_name"]
+        static_methods = ["description", "id", "display_name", "reported_aggregations"]
         for method in required_methods:
             if not hasattr(agg_func, method) or not callable(getattr(agg_func, method)):
                 raise AttributeError(
@@ -253,6 +253,7 @@ class FunctionAnalyzer:
             metric_type=metric_type,
             init_args=aggregation_init_args,
             aggregate_args=aggregate_args,
+            reported_aggregations=agg_func.reported_aggregations(),
         )