PyPI - arthur-common - Versions diffs - 1.0.1__py3-none-any.whl - Mend

arthur-common 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (40) hide show

arthur_common/__init__.py +0 -0
arthur_common/__version__.py +1 -0
arthur_common/aggregations/__init__.py +2 -0
arthur_common/aggregations/aggregator.py +214 -0
arthur_common/aggregations/functions/README.md +26 -0
arthur_common/aggregations/functions/__init__.py +25 -0
arthur_common/aggregations/functions/categorical_count.py +89 -0
arthur_common/aggregations/functions/confusion_matrix.py +412 -0
arthur_common/aggregations/functions/inference_count.py +69 -0
arthur_common/aggregations/functions/inference_count_by_class.py +206 -0
arthur_common/aggregations/functions/inference_null_count.py +82 -0
arthur_common/aggregations/functions/mean_absolute_error.py +110 -0
arthur_common/aggregations/functions/mean_squared_error.py +110 -0
arthur_common/aggregations/functions/multiclass_confusion_matrix.py +205 -0
arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +90 -0
arthur_common/aggregations/functions/numeric_stats.py +90 -0
arthur_common/aggregations/functions/numeric_sum.py +87 -0
arthur_common/aggregations/functions/py.typed +0 -0
arthur_common/aggregations/functions/shield_aggregations.py +752 -0
arthur_common/aggregations/py.typed +0 -0
arthur_common/models/__init__.py +0 -0
arthur_common/models/connectors.py +41 -0
arthur_common/models/datasets.py +22 -0
arthur_common/models/metrics.py +227 -0
arthur_common/models/py.typed +0 -0
arthur_common/models/schema_definitions.py +420 -0
arthur_common/models/shield.py +504 -0
arthur_common/models/task_job_specs.py +78 -0
arthur_common/py.typed +0 -0
arthur_common/tools/__init__.py +0 -0
arthur_common/tools/aggregation_analyzer.py +243 -0
arthur_common/tools/aggregation_loader.py +59 -0
arthur_common/tools/duckdb_data_loader.py +329 -0
arthur_common/tools/functions.py +46 -0
arthur_common/tools/py.typed +0 -0
arthur_common/tools/schema_inferer.py +104 -0
arthur_common/tools/time_utils.py +33 -0
arthur_common-1.0.1.dist-info/METADATA +74 -0
arthur_common-1.0.1.dist-info/RECORD +40 -0
arthur_common-1.0.1.dist-info/WHEEL +4 -0

arthur_common/aggregations/functions/confusion_matrix.py ADDED Viewed

@@ -0,0 +1,412 @@
+from typing import Annotated
+from uuid import UUID
+from arthur_common.aggregations.aggregator import NumericAggregationFunction
+from arthur_common.models.datasets import ModelProblemType
+from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.schema_definitions import (
+    DType,
+    MetricColumnParameterAnnotation,
+    MetricDatasetParameterAnnotation,
+    MetricLiteralParameterAnnotation,
+    ScalarType,
+    ScopeSchemaTag,
+)
+from arthur_common.tools.duckdb_data_loader import escape_identifier
+from duckdb import DuckDBPyConnection
+class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
+    def generate_confusion_matrix_metrics(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        timestamp_col: str,
+        prediction_col: str,
+        gt_values_col: str,
+        prediction_normalization_case: str,
+        gt_normalization_case: str,
+        dataset: DatasetReference,
+    ) -> list[NumericMetric]:
+        """
+        Generate a SQL query to compute confusion matrix metrics over time.
+        Args:
+            timestamp_col: Column name containing timestamps
+            prediction_col: Column name containing predictions
+            gt_values_col: Column name containing ground truth values
+            prediction_normalization_case: SQL CASE statement for normalizing predictions to 0 / 1 / null using 'value' as the target column name
+            gt_normalization_case: SQL CASE statement for normalizing ground truth values to 0 / 1 / null using 'value' as the target column name
+            dataset: DatasetReference containing dataset metadata
+        Returns:
+            str: SQL query that computes confusion matrix metrics
+        """
+        escaped_timestamp_col = escape_identifier(timestamp_col)
+        escaped_prediction_col = escape_identifier(prediction_col)
+        escaped_gt_values_col = escape_identifier(gt_values_col)
+        confusion_matrix_query = f"""
+            WITH normalized_data AS (
+                SELECT
+                    {escaped_timestamp_col} AS timestamp,
+                    {prediction_normalization_case.replace('value', escaped_prediction_col)} AS prediction,
+                    {gt_normalization_case.replace('value', escaped_gt_values_col)} AS actual_value
+                FROM {dataset.dataset_table_name}
+                WHERE {escaped_timestamp_col} IS NOT NULL
+            )
+            SELECT
+                time_bucket(INTERVAL '5 minutes', timestamp) AS ts,
+                SUM(CASE WHEN prediction = actual_value AND actual_value = 1 THEN 1 ELSE 0 END) AS true_positive_count,
+                SUM(CASE WHEN prediction != actual_value AND actual_value = 0 THEN 1 ELSE 0 END) AS false_positive_count,
+                SUM(CASE WHEN prediction != actual_value AND actual_value = 1 THEN 1 ELSE 0 END) AS false_negative_count,
+                SUM(CASE WHEN prediction = actual_value AND actual_value = 0 THEN 1 ELSE 0 END) AS true_negative_count
+            FROM normalized_data
+            GROUP BY ts
+            ORDER BY ts
+        """
+        results = ddb_conn.sql(confusion_matrix_query).df()
+        tp = self.dimensionless_query_results_to_numeric_metrics(
+            results,
+            "true_positive_count",
+            timestamp_col="ts",
+        )
+        fp = self.dimensionless_query_results_to_numeric_metrics(
+            results,
+            "false_positive_count",
+            timestamp_col="ts",
+        )
+        fn = self.dimensionless_query_results_to_numeric_metrics(
+            results,
+            "false_negative_count",
+            timestamp_col="ts",
+        )
+        tn = self.dimensionless_query_results_to_numeric_metrics(
+            results,
+            "true_negative_count",
+            timestamp_col="ts",
+        )
+        tp_metric = self.series_to_metric("confusion_matrix_true_positive_count", [tp])
+        fp_metric = self.series_to_metric("confusion_matrix_false_positive_count", [fp])
+        fn_metric = self.series_to_metric("confusion_matrix_false_negative_count", [fn])
+        tn_metric = self.series_to_metric("confusion_matrix_true_negative_count", [tn])
+        return [tp_metric, fp_metric, fn_metric, tn_metric]
+class BinaryClassifierIntBoolConfusionMatrixAggregationFunction(
+    ConfusionMatrixAggregationFunction,
+):
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-00000000001c")
+    @staticmethod
+    def display_name() -> str:
+        return "Binary Classification Confusion Matrix - Int/Bool Prediction"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that takes in boolean or integer prediction and ground truth values and calculates the confusion matrix (True Positives, False Positives, False Negatives, True Negatives) for a binary set of predictions and values."
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing the prediction and ground truth values.",
+                model_problem_type=ModelProblemType.BINARY_CLASSIFICATION,
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+        prediction_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.BOOL),
+                    ScalarType(dtype=DType.INT),
+                ],
+                tag_hints=[ScopeSchemaTag.PREDICTION],
+                friendly_name="Prediction Column",
+                description="A column containing boolean or integer prediction values.",
+            ),
+        ],
+        gt_values_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.BOOL),
+                    ScalarType(dtype=DType.INT),
+                ],
+                tag_hints=[ScopeSchemaTag.GROUND_TRUTH],
+                friendly_name="Ground Truth Column",
+                description="A column containing boolean or integer ground truth values.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        escaped_prediction_col = escape_identifier(prediction_col)
+        # Get the type of prediction column
+        type_query = f"SELECT typeof({escaped_prediction_col}) as col_type FROM {dataset.dataset_table_name} LIMIT 1"
+        res = ddb_conn.sql(type_query).fetchone()
+        # As long as this column exists, we should be able to get the type. This is here to make mypy happy.
+        if not res:
+            raise ValueError(f"No results found for type query: {type_query}")
+        col_type = res[0].lower()
+        match col_type:
+            case "boolean":
+                normalization_case = """
+                CASE
+                    WHEN value THEN 1
+                    ELSE 0
+                END
+                """
+            case "integer" | "bigint":
+                normalization_case = """
+                CASE
+                    WHEN value = 1 THEN 1
+                    WHEN value = 0 THEN 0
+                    ELSE NULL
+                END
+                """
+            case _:
+                raise ValueError(f"Unsupported column type: {col_type}")
+        return self.generate_confusion_matrix_metrics(
+            ddb_conn,
+            timestamp_col,
+            prediction_col,
+            gt_values_col,
+            normalization_case,
+            normalization_case,
+            dataset,
+        )
+class BinaryClassifierStringLabelConfusionMatrixAggregationFunction(
+    ConfusionMatrixAggregationFunction,
+):
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-00000000001d")
+    @staticmethod
+    def display_name() -> str:
+        return "Binary Classification Confusion Matrix - String Class Label Prediction"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that takes in string labelled prediction and ground truth values and calculates the confusion matrix (True Positives, False Positives, False Negatives, True Negatives) for a binary set of predictions and values."
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing the prediction and ground truth values.",
+                model_problem_type=ModelProblemType.BINARY_CLASSIFICATION,
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+        prediction_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.STRING),
+                ],
+                tag_hints=[ScopeSchemaTag.PREDICTION],
+                friendly_name="Prediction Column",
+                description="A column containing string labelled prediction values.",
+            ),
+        ],
+        gt_values_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.STRING),
+                ],
+                tag_hints=[ScopeSchemaTag.GROUND_TRUTH],
+                friendly_name="Ground Truth Column",
+                description="A column containing string labelled ground truth values.",
+            ),
+        ],
+        true_label: Annotated[
+            str,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.STRING,
+                friendly_name="True Label",
+                description="The label indicating a positive classification to normalize to 1.",
+            ),
+        ],
+        false_label: Annotated[
+            str,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.STRING,
+                friendly_name="False Label",
+                description="The label indicating a negative classification to normalize to 0.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        normalization_case = f"""
+                CASE
+                    WHEN value = '{true_label}' THEN 1
+                    WHEN value = '{false_label}' THEN 0
+                    ELSE NULL
+                END
+                """
+        return self.generate_confusion_matrix_metrics(
+            ddb_conn,
+            timestamp_col,
+            prediction_col,
+            gt_values_col,
+            normalization_case,
+            normalization_case,
+            dataset,
+        )
+class BinaryClassifierProbabilityThresholdConfusionMatrixAggregationFunction(
+    ConfusionMatrixAggregationFunction,
+):
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-00000000001e")
+    @staticmethod
+    def display_name() -> str:
+        return "Binary Classification Confusion Matrix - Probability Threshold"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that takes in a float prediction column, a ground truth values column, and a probability threshold and calculates the confusion matrix (True Positives, False Positives, False Negatives, True Negatives) for a binary set of predictions and values where the predictions are calculated using the probability threshold."
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing the prediction and ground truth values.",
+                model_problem_type=ModelProblemType.BINARY_CLASSIFICATION,
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+        prediction_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.FLOAT),
+                ],
+                tag_hints=[ScopeSchemaTag.PREDICTION],
+                friendly_name="Prediction Column",
+                description="A column containing float prediction values.",
+            ),
+        ],
+        gt_values_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.BOOL),
+                    ScalarType(dtype=DType.INT),
+                ],
+                tag_hints=[ScopeSchemaTag.GROUND_TRUTH],
+                friendly_name="Ground Truth Column",
+                description="A column containing boolean or integer ground truth values.",
+            ),
+        ],
+        threshold: Annotated[
+            float,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.FLOAT,
+                friendly_name="Threshold",
+                description="The threshold to classify predictions to 0 or 1.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        escaped_gt_values_col = escape_identifier(gt_values_col)
+        prediction_normalization_case = f"""
+                CASE
+                    WHEN value >= {threshold} THEN 1
+                    WHEN value < {threshold} THEN 0
+                    ELSE NULL
+                END
+                """
+        type_query = f"SELECT typeof({escaped_gt_values_col}) as col_type FROM {dataset.dataset_table_name} LIMIT 1"
+        res = ddb_conn.sql(type_query).fetchone()
+        # As long as this column exists, we should be able to get the type. This is here to make mypy happy.
+        if not res:
+            raise ValueError(f"No results found for type query: {type_query}")
+        col_type = res[0].lower()
+        match col_type:
+            case "boolean":
+                gt_normalization_case = """
+                CASE
+                    WHEN value THEN 1
+                    ELSE 0
+                END
+                """
+            case "integer" | "bigint":
+                gt_normalization_case = """
+                CASE
+                    WHEN value = 1 THEN 1
+                    WHEN value = 0 THEN 0
+                    ELSE NULL
+                END
+                """
+            case _:
+                raise ValueError(f"Unsupported column type: {col_type}")
+        return self.generate_confusion_matrix_metrics(
+            ddb_conn,
+            timestamp_col,
+            prediction_col,
+            gt_values_col,
+            prediction_normalization_case,
+            gt_normalization_case,
+            dataset,
+        )

arthur_common/aggregations/functions/inference_count.py ADDED Viewed

@@ -0,0 +1,69 @@
+from typing import Annotated
+from uuid import UUID
+from arthur_common.aggregations.aggregator import NumericAggregationFunction
+from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.schema_definitions import (
+    DType,
+    MetricColumnParameterAnnotation,
+    MetricDatasetParameterAnnotation,
+    ScalarType,
+    ScopeSchemaTag,
+)
+from arthur_common.tools.duckdb_data_loader import escape_identifier
+from duckdb import DuckDBPyConnection
+class InferenceCountAggregationFunction(NumericAggregationFunction):
+    METRIC_NAME = "inference_count"
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-00000000000a")
+    @staticmethod
+    def display_name() -> str:
+        return "Inference Count"
+    @staticmethod
+    def description() -> str:
+        return "Metric that counts the number of inferences per time window."
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing the inference data.",
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        escaped_timestamp_col = escape_identifier(timestamp_col)
+        count_query = f" \
+            select time_bucket(INTERVAL '5 minutes', {escaped_timestamp_col}) as ts, \
+            count(*) as count \
+            from {dataset.dataset_table_name} \
+            group by ts \
+        "
+        results = ddb_conn.sql(count_query).df()
+        series = self.dimensionless_query_results_to_numeric_metrics(
+            results,
+            "count",
+            "ts",
+        )
+        metric = self.series_to_metric(self.METRIC_NAME, [series])
+        return [metric]

arthur_common/aggregations/functions/inference_count_by_class.py ADDED Viewed

@@ -0,0 +1,206 @@
+from typing import Annotated
+from uuid import UUID
+from arthur_common.aggregations.aggregator import NumericAggregationFunction
+from arthur_common.models.datasets import ModelProblemType
+from arthur_common.models.metrics import DatasetReference, NumericMetric
+from arthur_common.models.schema_definitions import (
+    DType,
+    MetricColumnParameterAnnotation,
+    MetricDatasetParameterAnnotation,
+    MetricLiteralParameterAnnotation,
+    ScalarType,
+    ScopeSchemaTag,
+)
+from arthur_common.tools.duckdb_data_loader import escape_identifier
+from duckdb import DuckDBPyConnection
+class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction):
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-00000000001f")
+    @staticmethod
+    def display_name() -> str:
+        return "Binary Classification Count by Class - Class Label"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that counts the number of predictions by class for a binary classifier. Takes boolean, integer, or string prediction values and groups them by time bucket to show prediction distribution over time."
+    @staticmethod
+    def _metric_name() -> str:
+        return "binary_classifier_count_by_class"
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing binary classifier prediction values.",
+                model_problem_type=ModelProblemType.BINARY_CLASSIFICATION,
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+        prediction_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.BOOL),
+                    ScalarType(dtype=DType.INT),
+                    ScalarType(dtype=DType.STRING),
+                ],
+                tag_hints=[ScopeSchemaTag.PREDICTION],
+                friendly_name="Prediction Column",
+                description="A column containing boolean, integer, or string labelled prediction values.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        escaped_timestamp_col = escape_identifier(timestamp_col)
+        escaped_pred_col = escape_identifier(prediction_col)
+        query = f"""
+            SELECT
+                time_bucket(INTERVAL '5 minutes', {escaped_timestamp_col}) as ts,
+                {escaped_pred_col} as prediction,
+                COUNT(*) as count
+            FROM {dataset.dataset_table_name}
+            GROUP BY
+                ts,
+                -- group by raw column name instead of alias in select
+                -- in case table has a column called 'prediction'
+                {escaped_pred_col}
+            ORDER BY ts
+        """
+        result = ddb_conn.sql(query).df()
+        series = self.group_query_results_to_numeric_metrics(
+            result,
+            "count",
+            ["prediction"],
+            "ts",
+        )
+        metric = self.series_to_metric(self._metric_name(), series)
+        return [metric]
+class BinaryClassifierCountThresholdClassAggregationFunction(
+    NumericAggregationFunction,
+):
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-000000000020")
+    @staticmethod
+    def display_name() -> str:
+        return "Binary Classification Count by Class - Probability Threshold"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that counts the number of predictions by class for a binary classifier using a probability threshold. Takes float prediction values and a threshold value to classify predictions, then groups them by time bucket to show prediction distribution over time."
+    @staticmethod
+    def _metric_name() -> str:
+        return "binary_classifier_count_by_class"
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The dataset containing binary classifier prediction values.",
+                model_problem_type=ModelProblemType.BINARY_CLASSIFICATION,
+            ),
+        ],
+        timestamp_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                tag_hints=[ScopeSchemaTag.PRIMARY_TIMESTAMP],
+                allowed_column_types=[
+                    ScalarType(dtype=DType.TIMESTAMP),
+                ],
+                friendly_name="Timestamp Column",
+                description="A column containing timestamp values to bucket by.",
+            ),
+        ],
+        prediction_col: Annotated[
+            str,
+            MetricColumnParameterAnnotation(
+                source_dataset_parameter_key="dataset",
+                allowed_column_types=[
+                    ScalarType(dtype=DType.FLOAT),
+                ],
+                tag_hints=[ScopeSchemaTag.PREDICTION],
+                friendly_name="Prediction Column",
+                description="A column containing float prediction values.",
+            ),
+        ],
+        threshold: Annotated[
+            float,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.FLOAT,
+                friendly_name="Threshold",
+                description="The threshold to classify predictions to 0 or 1. 0 will result in the 'False Label' being assigned and 1 to the 'True Label' being assigned.",
+            ),
+        ],
+        true_label: Annotated[
+            str,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.STRING,
+                friendly_name="True Label",
+                description="The label denoting a positive classification.",
+            ),
+        ],
+        false_label: Annotated[
+            str,
+            MetricLiteralParameterAnnotation(
+                parameter_dtype=DType.STRING,
+                friendly_name="False Label",
+                description="The label denoting a negative classification.",
+            ),
+        ],
+    ) -> list[NumericMetric]:
+        escaped_timestamp_col = escape_identifier(timestamp_col)
+        escaped_prediction_col = escape_identifier(prediction_col)
+        query = f"""
+            SELECT
+                time_bucket(INTERVAL '5 minutes', {escaped_timestamp_col}) as ts,
+                CASE WHEN {escaped_prediction_col} >= {threshold} THEN '{true_label}' ELSE '{false_label}' END as prediction,
+                COUNT(*) as count
+            FROM {dataset.dataset_table_name}
+            GROUP BY
+                ts,
+                -- group by raw column name instead of alias in select
+                -- in case table has a column called 'prediction'
+                {escaped_prediction_col}
+            ORDER BY ts
+        """
+        result = ddb_conn.sql(query).df()
+        series = self.group_query_results_to_numeric_metrics(
+            result,
+            "count",
+            ["prediction"],
+            "ts",
+        )
+        metric = self.series_to_metric(self._metric_name(), series)
+        return [metric]