PyPI - arthur-common - Versions diffs - 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl - Mend

arthur-common 2.1.52py3-none-any.whl → 2.1.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (23) hide show

arthur_common/aggregations/functions/categorical_count.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of discrete values of each category in a string column. Creates a separate dimension for each category and the values are the count of occurrences of that category in the time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=CategoricalCountAggregationFunction.METRIC_NAME,
+                description=CategoricalCountAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/confusion_matrix.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -20,6 +24,32 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
+    TRUE_POSITIVE_METRIC_NAME = "confusion_matrix_true_positive_count"
+    FALSE_POSITIVE_METRIC_NAME = "confusion_matrix_false_positive_count"
+    FALSE_NEGATIVE_METRIC_NAME = "confusion_matrix_false_negative_count"
+    TRUE_NEGATIVE_METRIC_NAME = "confusion_matrix_true_negative_count"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_POSITIVE_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.FALSE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=ConfusionMatrixAggregationFunction.TRUE_NEGATIVE_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def generate_confusion_matrix_metrics(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -129,10 +159,10 @@ class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
             dim_columns=segmentation_cols + extra_dims,
             timestamp_col="ts",
         )
-        tp_metric = self.series_to_metric("confusion_matrix_true_positive_count", tp)
-        fp_metric = self.series_to_metric("confusion_matrix_false_positive_count", fp)
-        fn_metric = self.series_to_metric("confusion_matrix_false_negative_count", fn)
-        tn_metric = self.series_to_metric("confusion_matrix_true_negative_count", tn)
+        tp_metric = self.series_to_metric(self.TRUE_POSITIVE_METRIC_NAME, tp)
+        fp_metric = self.series_to_metric(self.FALSE_POSITIVE_METRIC_NAME, fp)
+        fn_metric = self.series_to_metric(self.FALSE_NEGATIVE_METRIC_NAME, fn)
+        tn_metric = self.series_to_metric(self.TRUE_NEGATIVE_METRIC_NAME, tn)
         return [tp_metric, fp_metric, fn_metric, tn_metric]

arthur_common/aggregations/functions/inference_count.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +36,15 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of inferences per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceCountAggregationFunction.METRIC_NAME,
+                description=InferenceCountAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/inference_count_by_class.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -36,6 +40,15 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountByClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountByClassAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -153,6 +166,15 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
     def _metric_name() -> str:
         return "binary_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=BinaryClassifierCountThresholdClassAggregationFunction._metric_name(),
+                description=BinaryClassifierCountThresholdClassAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/inference_null_count.py CHANGED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that counts the number of null values in the column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=InferenceNullCountAggregationFunction.METRIC_NAME,
+                description=InferenceNullCountAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/mean_absolute_error.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
+    ABSOLUTE_ERROR_COUNT_METRIC_NAME = "absolute_error_count"
+    ABSOLUTE_ERROR_SUM_METRIC_NAME = "absolute_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-00000000000e")
@@ -31,6 +38,19 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the absolute error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
+                description="Sum of the absolute error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,12 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("absolute_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
+            count_series,
+        )
         absolute_error_metric = self.series_to_metric(
-            "absolute_error_sum",
+            self.ABSOLUTE_ERROR_SUM_METRIC_NAME,
             absolute_error_series,
         )

arthur_common/aggregations/functions/mean_squared_error.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
 class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
+    SQUARED_ERROR_COUNT_METRIC_NAME = "squared_error_count"
+    SQUARED_ERROR_SUM_METRIC_NAME = "squared_error_sum"
     @staticmethod
     def id() -> UUID:
         return UUID("00000000-0000-0000-0000-000000000010")
@@ -31,6 +38,19 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that sums the squared error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME,
+                description="Sum of the squared error of a prediction and ground truth column, omitting rows where either column is null.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_COUNT_METRIC_NAME,
+                description=f"Count of non-null rows used in the calculation of the {MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME} metric.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,12 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
             "ts",
         )
-        count_metric = self.series_to_metric("squared_error_count", count_series)
+        count_metric = self.series_to_metric(
+            self.SQUARED_ERROR_COUNT_METRIC_NAME,
+            count_series,
+        )
         absolute_error_metric = self.series_to_metric(
-            "squared_error_sum",
+            self.SQUARED_ERROR_SUM_METRIC_NAME,
             squared_error_series,
         )

arthur_common/aggregations/functions/multiclass_confusion_matrix.py CHANGED Viewed

@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -22,6 +26,19 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
 class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction(
     NumericAggregationFunction,
 ):
+    MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_positive_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_false_negative_count"
+    )
+    MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME = (
+        "multiclass_confusion_matrix_single_class_true_negative_count"
+    )
     @staticmethod
     def id() -> UUID:
         return UUID("dc728927-6928-4a3b-b174-8c1ec8b58d62")
@@ -38,6 +55,27 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             "False Negatives, True Negatives) for that class compared to all others."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
+                description="Confusion matrix true positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
+                description="Confusion matrix false positives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
+                description="Confusion matrix false negatives count.",
+            ),
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
+                description="Confusion matrix true negatives count.",
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,
@@ -238,19 +276,19 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
             timestamp_col="ts",
         )
         tp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
             tp,
         )
         fp_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_positive_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
             fp,
         )
         fn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_false_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
             fn,
         )
         tn_metric = self.series_to_metric(
-            "multiclass_confusion_matrix_single_class_true_negative_count",
+            self.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
             tn,
         )
         return [tp_metric, fp_metric, fn_metric, tn_metric]

arthur_common/aggregations/functions/multiclass_inference_count_by_class.py CHANGED Viewed

@@ -7,7 +7,11 @@ from arthur_common.aggregations.functions.inference_count_by_class import (
     BinaryClassifierCountByClassAggregationFunction,
 )
 from arthur_common.models.datasets import ModelProblemType
-from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -47,6 +51,15 @@ class MulticlassClassifierCountByClassAggregationFunction(
     def _metric_name() -> str:
         return "multiclass_classifier_count_by_class"
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=MulticlassClassifierCountByClassAggregationFunction._metric_name(),
+                description=MulticlassClassifierCountByClassAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/numeric_stats.py CHANGED Viewed

@@ -4,7 +4,11 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import SketchAggregationFunction
-from arthur_common.models.metrics import DatasetReference, SketchMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    SketchMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -34,6 +38,15 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
             "Metric that calculates a distribution (data sketch) on a numeric column."
         )
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSketchAggregationFunction.METRIC_NAME,
+                description=NumericSketchAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur_common/aggregations/functions/numeric_sum.py CHANGED Viewed

@@ -4,7 +4,12 @@ from uuid import UUID
 from duckdb import DuckDBPyConnection
 from arthur_common.aggregations.aggregator import NumericAggregationFunction
-from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
+from arthur_common.models.metrics import (
+    BaseReportedAggregation,
+    DatasetReference,
+    Dimension,
+    NumericMetric,
+)
 from arthur_common.models.schema_definitions import (
     SEGMENTATION_ALLOWED_COLUMN_TYPES,
     DType,
@@ -32,6 +37,15 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
     def description() -> str:
         return "Metric that reports the sum of the numeric column per time window."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=NumericSumAggregationFunction.METRIC_NAME,
+                description=NumericSumAggregationFunction.description(),
+            ),
+        ]
     def aggregate(
         self,
         ddb_conn: DuckDBPyConnection,

arthur-common 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl

Potentially problematic release.

arthur-common 2.1.52py3-none-any.whl → 2.1.54py3-none-any.whl