PyPI - arthur-common - Versions diffs - 2.1.52__py3-none-any.whl → 2.1.53__py3-none-any.whl - Mend

arthur-common 2.1.52py3-none-any.whl → 2.1.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (18) hide show

arthur_common/aggregations/aggregator.py CHANGED Viewed

@@ -29,6 +29,12 @@ class AggregationFunction(ABC):
     def aggregation_type(self) -> Type[SketchMetric] | Type[NumericMetric]:
         raise NotImplementedError
+    @staticmethod
+    @abstractmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        """Returns the list of aggregations reported by the aggregate function."""
+        raise NotImplementedError
     @abstractmethod
     def aggregate(
         self,

arthur_common/aggregations/functions/categorical_count.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of discrete values of each category in a string column. Creates a separate dimension for each category and the values are the count of occurrences of that category in the time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=CategoricalCountAggregationFunction.METRIC_NAME,
+                description=CategoricalCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/confusion_matrix.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -20,6 +24,32 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
+    TRUE_POSITIVE_METRIC_NAME = "confusion_matrix_true_positive_count"
+    FALSE_POSITIVE_METRIC_NAME = "confusion_matrix_false_positive_count"
+    FALSE_NEGATIVE_METRIC_NAME = "confusion_matrix_false_negative_count"
+    TRUE_NEGATIVE_METRIC_NAME = "confusion_matrix_true_negative_count"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def generate_confusion_matrix_metrics(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -129,10 +159,10 @@ class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
             dim_columns=segmentation_cols + extra_dims,
             timestamp_col="ts",
         )
-        tp_metric = self.series_to_metric("confusion_matrix_true_positive_count", tp)
-        fp_metric = self.series_to_metric("confusion_matrix_false_positive_count", fp)
-        fn_metric = self.series_to_metric("confusion_matrix_false_negative_count", fn)
-        tn_metric = self.series_to_metric("confusion_matrix_true_negative_count", tn)
+        tp_metric = self.series_to_metric(self.TRUE_POSITIVE_METRIC_NAME, tp)
+        fp_metric = self.series_to_metric(self.FALSE_POSITIVE_METRIC_NAME, fp)
+        fn_metric = self.series_to_metric(self.FALSE_NEGATIVE_METRIC_NAME, fn)
+        tn_metric = self.series_to_metric(self.TRUE_NEGATIVE_METRIC_NAME, tn)
         return [tp_metric, fp_metric, fn_metric, tn_metric]

arthur_common/aggregations/functions/inference_count.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of inferences per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceCountAggregationFunction.METRIC_NAME,
+                description=InferenceCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/inference_count_by_class.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -36,6 +40,15 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountByClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountByClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -153,6 +166,15 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountThresholdClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountThresholdClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/inference_null_count.py CHANGED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of null values in the column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceNullCountAggregationFunction.METRIC_NAME,
+                description=InferenceNullCountAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/mean_absolute_error.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
+    ABSOLUTE_ERROR_COUNT_METRIC_NAME = "absolute_error_count"
+    ABSOLUTE_ERROR_SUM_METRIC_NAME = "absolute_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-00000000000e")
@@ -31,6 +38,19 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the absolute error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
+                description="Sum of the absolute error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,11 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("absolute_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.ABSOLUTE_ERROR_COUNT_METRIC_NAME, count_series
+        )
         absolute_error_metric = self.series_to_metric(
-            "absolute_error_sum",
+            self.ABSOLUTE_ERROR_SUM_METRIC_NAME,
             absolute_error_series,
         )

arthur_common/aggregations/functions/mean_squared_error.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
+    SQUARED_ERROR_COUNT_METRIC_NAME = "squared_error_count"
+    SQUARED_ERROR_SUM_METRIC_NAME = "squared_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-000000000010")
@@ -31,6 +38,19 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the squared error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME,
+                description="Sum of the squared error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_COUNT_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,11 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("squared_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.SQUARED_ERROR_COUNT_METRIC_NAME, count_series
+        )
         absolute_error_metric = self.series_to_metric(
-            "squared_error_sum",
+            self.SQUARED_ERROR_SUM_METRIC_NAME,
             squared_error_series,
         )

arthur_common/aggregations/functions/multiclass_confusion_matrix.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -22,6 +26,19 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction(
     NumericAggregationFunction,
 ):
+    MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_negative_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_negative_count"
+    )
     @staticmethod
     def id() -> UUID:
         return UUID("dc728927-6928-4a3b-b174-8c1ec8b58d62")
@@ -38,6 +55,27 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             "False Negatives, True Negatives) for that class compared to all others."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -238,19 +276,19 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             timestamp_col="ts",
         )
         tp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
             tp,
         )
         fp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
             fp,
         )
         fn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
             fn,
         )
         tn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
             tn,
         )
         return [tp_metric, fp_metric, fn_metric, tn_metric]

arthur_common/aggregations/functions/multiclass_inference_count_by_class.py CHANGED Viewed

@@ -7,7 +7,11 @@ from arthur_common.aggregations.functions.inference_count_by_class import (
     BinaryClassifierCountByClassAggregationFunction,
 )
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -47,6 +51,15 @@ class MulticlassClassifierCountByClassAggregationFunction(
     def _metric_name() -> str:
         return "multiclass_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierCountByClassAggregationFunction._metric_name(),
+                description=MulticlassClassifierCountByClassAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/numeric_stats.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import SketchAggregationFunction
-from arthur_common.models.metrics import DatasetReference, SketchMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    SketchMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -34,6 +38,15 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
             "Metric that calculates a distribution (data sketch) on a numeric column."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSketchAggregationFunction.METRIC_NAME,
+                description=NumericSketchAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/numeric_sum.py CHANGED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that reports the sum of the numeric column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSumAggregationFunction.METRIC_NAME,
+                description=NumericSumAggregationFunction.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/shield_aggregations.py CHANGED Viewed

@@ -10,7 +10,12 @@ from arthur_common.aggregations.aggregator import (
     SketchAggregationFunction,
 )
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric, SketchMetric
+from arthur_common.models.metrics import (
+    DatasetReference,
+    NumericMetric,
+    SketchMetric,
+    BaseReportedAggregation,
+)
 from arthur_common.models.schema_definitions import (
     SHIELD_RESPONSE_SCHEMA,
     MetricColumnParameterAnnotation,
@@ -33,6 +38,15 @@ class ShieldInferencePassFailCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield inferences grouped by the prompt, response, and overall check results."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferencePassFailCountAggregation.METRIC_NAME,
+                description=ShieldInferencePassFailCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -93,6 +107,15 @@ class ShieldInferenceRuleCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield rule evaluations grouped by whether it was on the prompt or response, the rule type, the rule evaluation result, the rule name, and the rule id."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -176,6 +199,15 @@ class ShieldInferenceHallucinationCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of Shield hallucination evaluations that failed."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceHallucinationCountAggregation.METRIC_NAME,
+                description=ShieldInferenceHallucinationCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -231,6 +263,15 @@ class ShieldInferenceRuleToxicityScoreAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on toxicity scores returned by the Shield toxicity rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleToxicityScoreAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleToxicityScoreAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -307,6 +348,15 @@ class ShieldInferenceRulePIIDataScoreAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on PII scores returned by the Shield PII rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRulePIIDataScoreAggregation.METRIC_NAME,
+                description=ShieldInferenceRulePIIDataScoreAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -389,6 +439,15 @@ class ShieldInferenceRuleClaimCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on over the number of claims identified by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -453,6 +512,15 @@ class ShieldInferenceRuleClaimPassCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the number of valid claims determined by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimPassCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimPassCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -517,6 +585,15 @@ class ShieldInferenceRuleClaimFailCountAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the number of invalid claims determined by the Shield hallucination rule."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleClaimFailCountAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleClaimFailCountAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -581,6 +658,15 @@ class ShieldInferenceRuleLatencyAggregation(SketchAggregationFunction):
     def description() -> str:
         return "Metric that reports a distribution (data sketch) on the latency of Shield rule evaluations. Dimensions are the rule result, rule type, and whether the rule was applicable to a prompt or response."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceRuleLatencyAggregation.METRIC_NAME,
+                description=ShieldInferenceRuleLatencyAggregation.description(),
+            )
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -643,6 +729,18 @@ class ShieldInferenceRuleLatencyAggregation(SketchAggregationFunction):
 class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
     METRIC_NAME = "token_count"
+    SUPPORTED_MODELS = [
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-3.5-turbo",
+        "o1-mini",
+        "deepseek-chat",
+        "claude-3-5-sonnet-20241022",
+        "gemini/gemini-1.5-pro",
+        "meta.llama3-1-8b-instruct-v1:0",
+        "meta.llama3-1-70b-instruct-v1:0",
+        "meta.llama3-2-11b-instruct-v1:0",
+    ]
     @staticmethod
     def id() -> UUID:
@@ -656,6 +754,27 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
     def description() -> str:
         return "Metric that reports the number of tokens in the Shield response and prompt schemas, and their estimated cost."
+    @staticmethod
+    def _series_name_from_model_name(model_name: str) -> str:
+        """Calculates name of reported series based on the model name considered."""
+        return f"token_cost.{model_name}"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        base_token_count_agg = BaseReportedAggregation(
+            metric_name=ShieldInferenceTokenCountAggregation.METRIC_NAME,
+            description=f"Metric that reports the number of tokens in the Shield response and prompt schemas.",
+        )
+        return [base_token_count_agg] + [
+            BaseReportedAggregation(
+                metric_name=ShieldInferenceTokenCountAggregation._series_name_from_model_name(
+                    model_name
+                ),
+                description=f"Metric that reports the estimated cost for the {model_name} model of the tokens in the Shield response and prompt schemas.",
+            )
+            for model_name in ShieldInferenceTokenCountAggregation.SUPPORTED_MODELS
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -708,25 +827,12 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
         resp = [metric]
         # Compute Cost for each model
-        models = [
-            "gpt-4o",
-            "gpt-4o-mini",
-            "gpt-3.5-turbo",
-            "o1-mini",
-            "deepseek-chat",
-            "claude-3-5-sonnet-20241022",
-            "gemini/gemini-1.5-pro",
-            "meta.llama3-1-8b-instruct-v1:0",
-            "meta.llama3-1-70b-instruct-v1:0",
-            "meta.llama3-2-11b-instruct-v1:0",
-        ]
         # Precompute input/output classification to avoid recalculating in loop
         location_type = results["location"].apply(
             lambda x: "input" if x == "prompt" else "output",
         )
-        for model in models:
+        for model in self.SUPPORTED_MODELS:
             # Efficient list comprehension instead of apply
             cost_values = [
                 calculate_cost_by_tokens(int(tokens), model, loc_type)
@@ -747,5 +853,9 @@ class ShieldInferenceTokenCountAggregation(NumericAggregationFunction):
                 ["location"],
                 "ts",
             )
-            resp.append(self.series_to_metric(f"token_cost.{model}", model_series))
+            resp.append(
+                self.series_to_metric(
+                    self._series_name_from_model_name(model), model_series
+                )
+            )
         return resp

arthur_common/models/metrics.py CHANGED Viewed

@@ -193,7 +193,9 @@ class MetricsColumnParameterSchema(MetricsParameterSchema, BaseColumnParameterSc
     parameter_type: Literal["column"] = "column"
-class MetricsColumnListParameterSchema(MetricsParameterSchema, BaseColumnParameterSchema):
+class MetricsColumnListParameterSchema(
+    MetricsParameterSchema, BaseColumnParameterSchema
+):
     # list column parameter schema specific to default metrics
     parameter_type: Literal["column_list"] = "column_list"
@@ -211,9 +213,7 @@ MetricsColumnSchemaUnion = (
 CustomAggregationParametersSchemaUnion = (
-    BaseDatasetParameterSchema
-    | BaseLiteralParameterSchema
-    | BaseColumnParameterSchema
+    BaseDatasetParameterSchema | BaseLiteralParameterSchema | BaseColumnParameterSchema
 )
@@ -224,6 +224,14 @@ class DatasetReference:
     dataset_id: UUID
+class BaseReportedAggregation(BaseModel):
+    # in future will be used by default metrics
+    metric_name: str = Field(description="Name of the reported aggregation metric.")
+    description: str = Field(
+        description="Description of the reported aggregation metric and what it aggregates.",
+    )
 class AggregationSpecSchema(BaseModel):
     name: str = Field(description="Name of the aggregation function.")
     id: UUID = Field(description="Unique identifier of the aggregation function.")
@@ -240,6 +248,17 @@ class AggregationSpecSchema(BaseModel):
     aggregate_args: list[MetricsParameterSchemaUnion] = Field(
         description="List of parameters to the aggregation's aggregate function.",
     )
+    reported_aggregations: list[BaseReportedAggregation] = Field(
+        description="List of aggregations reported by the metric."
+    )
+    @model_validator(mode="after")
+    def at_least_one_reported_agg(self) -> Self:
+        if len(self.reported_aggregations) < 1:
+            raise ValueError(
+                "Aggregation spec must specify at least one reported aggregation."
+            )
+        return self
     @model_validator(mode="after")
     def column_dataset_references_exist(self) -> Self:
@@ -262,26 +281,23 @@ class AggregationSpecSchema(BaseModel):
         return self
-class BaseReportedAggregation(BaseModel):
-    # in future will be used by default metrics
-    metric_name: str = Field(description="Name of the reported aggregation metric.")
-    description: str = Field(
-        description="Description of the reported aggregation metric and what it aggregates.",
-    )
 class ReportedCustomAggregation(BaseReportedAggregation):
-    value_column: str = Field(description="Name of the column returned from the SQL query holding the metric value.")
-    timestamp_column: str = Field(description="Name of the column returned from the SQL query holding the timestamp buckets.")
+    value_column: str = Field(
+        description="Name of the column returned from the SQL query holding the metric value."
+    )
+    timestamp_column: str = Field(
+        description="Name of the column returned from the SQL query holding the timestamp buckets."
+    )
     metric_kind: AggregationMetricType = Field(
         description="Return type of the reported aggregation metric value.",
     )
-    dimension_columns: list[str] = Field(description="Name of any dimension columns returned from the SQL query. Max length is 1.")
+    dimension_columns: list[str] = Field(
+        description="Name of any dimension columns returned from the SQL query. Max length is 1."
+    )
-    @field_validator('dimension_columns')
+    @field_validator("dimension_columns")
     @classmethod
     def validate_dimension_columns_length(cls, v: list[str]) -> str:
         if len(v) > 1:
-            raise ValueError('Only one dimension column can be specified.')
+            raise ValueError("Only one dimension column can be specified.")
         return v

arthur_common/tools/aggregation_analyzer.py CHANGED Viewed

@@ -207,7 +207,7 @@ class FunctionAnalyzer:
             )
         # Check if X implements the required methods
         required_methods = ["aggregate", "id", "description", "display_name"]
-        static_methods = ["description", "id", "display_name"]
+        static_methods = ["description", "id", "display_name", "reported_aggregations"]
         for method in required_methods:
             if not hasattr(agg_func, method) or not callable(getattr(agg_func, method)):
                 raise AttributeError(
@@ -253,6 +253,7 @@ class FunctionAnalyzer:
             metric_type=metric_type,
             init_args=aggregation_init_args,
             aggregate_args=aggregate_args,
+            reported_aggregations=agg_func.reported_aggregations(),
         )

{arthur_common-2.1.52.dist-info → arthur_common-2.1.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arthur-common
-Version: 2.1.52
+Version: 2.1.53
 Summary: Utility code common to Arthur platform components.
 License: MIT
 Author: Arthur

{arthur_common-2.1.52.dist-info → arthur_common-2.1.53.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,21 @@
 arthur_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/aggregations/__init__.py,sha256=vISWyciQAtksa71OKeHNP-QyFGd1NzBKq_LBsG0QSG8,67
-arthur_common/aggregations/aggregator.py,sha256=lCe0P-ZbzifG-KvmzeodNyv6LtaS56B0qOInhFrTr1U,7714
+arthur_common/aggregations/aggregator.py,sha256=kS9Qru0AhZzZz4Ym20NT7aNrbcQaqg2zgBVYFogFbbg,7936
 arthur_common/aggregations/functions/README.md,sha256=MkZoTAJ94My96R5Z8GAxud7S6vyR0vgVi9gqdt9a4XY,5460
 arthur_common/aggregations/functions/__init__.py,sha256=HqC3UNRURX7ZQHgamTrQvfA8u_FiZGZ4I4eQW7Ooe5o,1299
-arthur_common/aggregations/functions/categorical_count.py,sha256=56C9ELDFfZjv5Kt45U0Tq1SUGdneha4ED5o9BIl98UI,4966
-arthur_common/aggregations/functions/confusion_matrix.py,sha256=4UsCcnToQjrwN_W75TDDYzOcXdoEeu7DZDLeojrUZZs,20845
-arthur_common/aggregations/functions/inference_count.py,sha256=_llDm6rsjISpll0ORaP7Ms1KlZnQtGuOQk99p_IgNv8,3721
-arthur_common/aggregations/functions/inference_count_by_class.py,sha256=BkZEYO8KWmutCYIv8EiP3mEylC4rxaHSE2RxX5oWdGE,10804
-arthur_common/aggregations/functions/inference_null_count.py,sha256=FRNgSw9T9MtZFK6hD-2L6sBPbQ8m2HBmGlzpOp31Lio,4455
-arthur_common/aggregations/functions/mean_absolute_error.py,sha256=vokx77DAYkdrpGtZnSKJRAhuAX2Az0udXOzYVQ2ZgRI,5889
-arthur_common/aggregations/functions/mean_squared_error.py,sha256=83Ygxd2OTYxJQoZfFEFoGUz1acEuvc_dL3a0imcWvHk,5921
-arthur_common/aggregations/functions/multiclass_confusion_matrix.py,sha256=-LO1f6t7E8XShIfeJn1tdCzF4KN90V_9ACbd5G3-HEk,10862
-arthur_common/aggregations/functions/multiclass_inference_count_by_class.py,sha256=x7kwpMFgXal0yQqsV7nil244agxa0wpTsFPb2ws1hqY,3867
-arthur_common/aggregations/functions/numeric_stats.py,sha256=jaHThHXmqhYWAwsdRz4Qtmw2PfdW2CU14OUTZCmh9nw,4565
-arthur_common/aggregations/functions/numeric_sum.py,sha256=2sijXJKvETENsZsyHsP2ztAuKVbS6xfa8xnEMm5d0IE,4674
+arthur_common/aggregations/functions/categorical_count.py,sha256=na22lBhxASMMR0R9Z-3qBvToYN875tJm8u2ULVdrdYQ,5327
+arthur_common/aggregations/functions/confusion_matrix.py,sha256=MbtS_Nge7dgjNutdtzd0hx756qzLQlHS2MQxuwSuwxc,22108
+arthur_common/aggregations/functions/inference_count.py,sha256=lO-IgcmnsfRR1qmHbWjENJUSnQT-dXwZd9rVFOtKYrs,4078
+arthur_common/aggregations/functions/inference_count_by_class.py,sha256=sOgrMyeZh71U9uGvq8w-bYlXNPRI6jtR2jP-oV81hHo,11552
+arthur_common/aggregations/functions/inference_null_count.py,sha256=6dfkumX8NJjTB633Pt-shY5x99TXaqSyLcYVHk_DxHc,4824
+arthur_common/aggregations/functions/mean_absolute_error.py,sha256=-Nihcl_QcwZPn-LrHX6KgG9O-QSfoa6SY3LHt2xDCbg,6821
+arthur_common/aggregations/functions/mean_squared_error.py,sha256=kpADLvsJkg7C07nj5X1drk8ChRXvur_PjkzMB2uLazg,6842
+arthur_common/aggregations/functions/multiclass_confusion_matrix.py,sha256=zfKK5maUy3TXmVEkqXwtXs6NM3fjp0W0yc-zS0uXZT4,12615
+arthur_common/aggregations/functions/multiclass_inference_count_by_class.py,sha256=ZJU_GDMsq4XvqbhCAiH2J-DKrGVjXlz-E2nxrd7pM6c,4263
+arthur_common/aggregations/functions/numeric_stats.py,sha256=4auKDwtTNxqw86gA0q3AqOf0-IM9uYWZ_tMquuug_sE,4920
+arthur_common/aggregations/functions/numeric_sum.py,sha256=LcV2MWL-EOl0JPCozIGIoHkvohu7d2S1PHuAik-cAo4,5027
 arthur_common/aggregations/functions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arthur_common/aggregations/functions/shield_aggregations.py,sha256=mLSpoYKGLdX8RoYryKLKIBgMjn5Z8ZWHauggXK9cpEY,31512
+arthur_common/aggregations/functions/shield_aggregations.py,sha256=KQzi97ILgn6UQhpQPyerrQ3CXMxs1vpuSUAAvIWf_zg,35630
 arthur_common/aggregations/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/config/config.py,sha256=fcpjOYjPKu4Duk63CuTHrOWKQKAlAhVUR60kF_2_Xog,1247
@@ -23,14 +23,14 @@ arthur_common/config/settings.yaml,sha256=0CrygUwJzC5mGcO5Xnvv2ttp-P7LIsx682jllY
 arthur_common/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/models/connectors.py,sha256=5f5DUgOQ16P3lBPZ0zpUv9kTAqw45Agrl526F-iFJes,1862
 arthur_common/models/datasets.py,sha256=giG_8mv_3ilBf7cIvRV0_TDCDdb4qxRbYZvl7hRb6l8,491
-arthur_common/models/metrics.py,sha256=ibJK2WkTw_yCTanx_4oU3CZFEmMWf9BcZYWeuvXsd7E,10885
+arthur_common/models/metrics.py,sha256=8_7ec0oFIjFGJpgRWS0Y28aaGCSd3j7dqa_QTYCNGus,11343
 arthur_common/models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/models/schema_definitions.py,sha256=0zXZKHKr49d7ATml2Tzw1AIFfM0i0HjIblM-qOwNxk8,14878
 arthur_common/models/shield.py,sha256=62SKLzlsUsuP3u7EnibtI1CrRYg3TummP4Wbwg5ZPUs,18310
 arthur_common/models/task_job_specs.py,sha256=uZo8eiTBHWf2EZGEQrDfJGVyYg_8wd9MHWLxn-5oNUk,2797
 arthur_common/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arthur_common/tools/aggregation_analyzer.py,sha256=y0W70_19d6PtBxwSe0pQKVisHkfapFnKMebmX2TJCi0,11113
+arthur_common/tools/aggregation_analyzer.py,sha256=UfMtvFWXV2Dqly8S6nneGgomuvEGN-1tBz81tfkMcAE,11206
 arthur_common/tools/aggregation_loader.py,sha256=3CF46bNi-GdJBNOXkjYfCQ1Aung8lf65L532sdWmR_s,2351
 arthur_common/tools/duckdb_data_loader.py,sha256=nscmarfP5FeL8p-9e3uZhpGEV0xFqDJmR3t77HdR26U,11081
 arthur_common/tools/duckdb_utils.py,sha256=1i-kRXu95gh4Sf9Osl2LFUpdb0yZifOjLDtIgSfSmfs,1197
@@ -38,6 +38,6 @@ arthur_common/tools/functions.py,sha256=FWL4eWO5-vLp86WudT-MGUKvf2B8f02IdoXQFKd6
 arthur_common/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/tools/schema_inferer.py,sha256=Ur4CXGAkd6ZMSU0nMNrkOEElsBopHXq0lctTV8X92W8,5188
 arthur_common/tools/time_utils.py,sha256=4gfiu9NXfvPZltiVNLSIQGylX6h2W0viNi9Kv4bKyfw,1410
-arthur_common-2.1.52.dist-info/METADATA,sha256=LfIOLc5zbA8nx7aTwN65q7L3JmGZOdhQGSYmnSn_Mh8,1609
-arthur_common-2.1.52.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-arthur_common-2.1.52.dist-info/RECORD,,
+arthur_common-2.1.53.dist-info/METADATA,sha256=ezzkiB4FHTSRLK3rzvj1mqiRxYqxedgouR0q55zsVLk,1609
+arthur_common-2.1.53.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+arthur_common-2.1.53.dist-info/RECORD,,

{arthur_common-2.1.52.dist-info → arthur_common-2.1.53.dist-info}/WHEEL RENAMED Viewed

File without changes

arthur-common 2.1.52__py3-none-any.whl → 2.1.53__py3-none-any.whl

Potentially problematic release.

arthur-common 2.1.52py3-none-any.whl → 2.1.53py3-none-any.whl