arthur-common 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arthur-common might be problematic. Click here for more details.

@@ -4,7 +4,11 @@ from uuid import UUID
4
4
  from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
- from arthur_common.models.metrics import DatasetReference, NumericMetric
7
+ from arthur_common.models.metrics import (
8
+ BaseReportedAggregation,
9
+ DatasetReference,
10
+ NumericMetric,
11
+ )
8
12
  from arthur_common.models.schema_definitions import (
9
13
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
14
  DType,
@@ -32,6 +36,15 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
32
36
  def description() -> str:
33
37
  return "Metric that counts the number of discrete values of each category in a string column. Creates a separate dimension for each category and the values are the count of occurrences of that category in the time window."
34
38
 
39
+ @staticmethod
40
+ def reported_aggregations() -> list[BaseReportedAggregation]:
41
+ return [
42
+ BaseReportedAggregation(
43
+ metric_name=CategoricalCountAggregationFunction.METRIC_NAME,
44
+ description=CategoricalCountAggregationFunction.description(),
45
+ ),
46
+ ]
47
+
35
48
  def aggregate(
36
49
  self,
37
50
  ddb_conn: DuckDBPyConnection,
@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
7
  from arthur_common.models.datasets import ModelProblemType
8
- from arthur_common.models.metrics import DatasetReference, NumericMetric
8
+ from arthur_common.models.metrics import (
9
+ BaseReportedAggregation,
10
+ DatasetReference,
11
+ NumericMetric,
12
+ )
9
13
  from arthur_common.models.schema_definitions import (
10
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
11
15
  DType,
@@ -20,6 +24,32 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
20
24
 
21
25
 
22
26
  class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
27
+ TRUE_POSITIVE_METRIC_NAME = "confusion_matrix_true_positive_count"
28
+ FALSE_POSITIVE_METRIC_NAME = "confusion_matrix_false_positive_count"
29
+ FALSE_NEGATIVE_METRIC_NAME = "confusion_matrix_false_negative_count"
30
+ TRUE_NEGATIVE_METRIC_NAME = "confusion_matrix_true_negative_count"
31
+
32
+ @staticmethod
33
+ def reported_aggregations() -> list[BaseReportedAggregation]:
34
+ return [
35
+ BaseReportedAggregation(
36
+ metric_name=ConfusionMatrixAggregationFunction.TRUE_POSITIVE_METRIC_NAME,
37
+ description="Confusion matrix true positives count.",
38
+ ),
39
+ BaseReportedAggregation(
40
+ metric_name=ConfusionMatrixAggregationFunction.FALSE_POSITIVE_METRIC_NAME,
41
+ description="Confusion matrix false positives count.",
42
+ ),
43
+ BaseReportedAggregation(
44
+ metric_name=ConfusionMatrixAggregationFunction.FALSE_NEGATIVE_METRIC_NAME,
45
+ description="Confusion matrix false negatives count.",
46
+ ),
47
+ BaseReportedAggregation(
48
+ metric_name=ConfusionMatrixAggregationFunction.TRUE_NEGATIVE_METRIC_NAME,
49
+ description="Confusion matrix true negatives count.",
50
+ ),
51
+ ]
52
+
23
53
  def generate_confusion_matrix_metrics(
24
54
  self,
25
55
  ddb_conn: DuckDBPyConnection,
@@ -129,10 +159,10 @@ class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
129
159
  dim_columns=segmentation_cols + extra_dims,
130
160
  timestamp_col="ts",
131
161
  )
132
- tp_metric = self.series_to_metric("confusion_matrix_true_positive_count", tp)
133
- fp_metric = self.series_to_metric("confusion_matrix_false_positive_count", fp)
134
- fn_metric = self.series_to_metric("confusion_matrix_false_negative_count", fn)
135
- tn_metric = self.series_to_metric("confusion_matrix_true_negative_count", tn)
162
+ tp_metric = self.series_to_metric(self.TRUE_POSITIVE_METRIC_NAME, tp)
163
+ fp_metric = self.series_to_metric(self.FALSE_POSITIVE_METRIC_NAME, fp)
164
+ fn_metric = self.series_to_metric(self.FALSE_NEGATIVE_METRIC_NAME, fn)
165
+ tn_metric = self.series_to_metric(self.TRUE_NEGATIVE_METRIC_NAME, tn)
136
166
  return [tp_metric, fp_metric, fn_metric, tn_metric]
137
167
 
138
168
 
@@ -4,7 +4,11 @@ from uuid import UUID
4
4
  from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
- from arthur_common.models.metrics import DatasetReference, NumericMetric
7
+ from arthur_common.models.metrics import (
8
+ BaseReportedAggregation,
9
+ DatasetReference,
10
+ NumericMetric,
11
+ )
8
12
  from arthur_common.models.schema_definitions import (
9
13
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
14
  DType,
@@ -32,6 +36,15 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
32
36
  def description() -> str:
33
37
  return "Metric that counts the number of inferences per time window."
34
38
 
39
+ @staticmethod
40
+ def reported_aggregations() -> list[BaseReportedAggregation]:
41
+ return [
42
+ BaseReportedAggregation(
43
+ metric_name=InferenceCountAggregationFunction.METRIC_NAME,
44
+ description=InferenceCountAggregationFunction.description(),
45
+ ),
46
+ ]
47
+
35
48
  def aggregate(
36
49
  self,
37
50
  ddb_conn: DuckDBPyConnection,
@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
7
  from arthur_common.models.datasets import ModelProblemType
8
- from arthur_common.models.metrics import DatasetReference, NumericMetric
8
+ from arthur_common.models.metrics import (
9
+ BaseReportedAggregation,
10
+ DatasetReference,
11
+ NumericMetric,
12
+ )
9
13
  from arthur_common.models.schema_definitions import (
10
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
11
15
  DType,
@@ -36,6 +40,15 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
36
40
  def _metric_name() -> str:
37
41
  return "binary_classifier_count_by_class"
38
42
 
43
+ @staticmethod
44
+ def reported_aggregations() -> list[BaseReportedAggregation]:
45
+ return [
46
+ BaseReportedAggregation(
47
+ metric_name=BinaryClassifierCountByClassAggregationFunction._metric_name(),
48
+ description=BinaryClassifierCountByClassAggregationFunction.description(),
49
+ ),
50
+ ]
51
+
39
52
  def aggregate(
40
53
  self,
41
54
  ddb_conn: DuckDBPyConnection,
@@ -153,6 +166,15 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
153
166
  def _metric_name() -> str:
154
167
  return "binary_classifier_count_by_class"
155
168
 
169
+ @staticmethod
170
+ def reported_aggregations() -> list[BaseReportedAggregation]:
171
+ return [
172
+ BaseReportedAggregation(
173
+ metric_name=BinaryClassifierCountThresholdClassAggregationFunction._metric_name(),
174
+ description=BinaryClassifierCountThresholdClassAggregationFunction.description(),
175
+ ),
176
+ ]
177
+
156
178
  def aggregate(
157
179
  self,
158
180
  ddb_conn: DuckDBPyConnection,
@@ -4,7 +4,12 @@ from uuid import UUID
4
4
  from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
- from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
7
+ from arthur_common.models.metrics import (
8
+ BaseReportedAggregation,
9
+ DatasetReference,
10
+ Dimension,
11
+ NumericMetric,
12
+ )
8
13
  from arthur_common.models.schema_definitions import (
9
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
15
  DType,
@@ -32,6 +37,15 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
32
37
  def description() -> str:
33
38
  return "Metric that counts the number of null values in the column per time window."
34
39
 
40
+ @staticmethod
41
+ def reported_aggregations() -> list[BaseReportedAggregation]:
42
+ return [
43
+ BaseReportedAggregation(
44
+ metric_name=InferenceNullCountAggregationFunction.METRIC_NAME,
45
+ description=InferenceNullCountAggregationFunction.description(),
46
+ ),
47
+ ]
48
+
35
49
  def aggregate(
36
50
  self,
37
51
  ddb_conn: DuckDBPyConnection,
@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
7
  from arthur_common.models.datasets import ModelProblemType
8
- from arthur_common.models.metrics import DatasetReference, NumericMetric
8
+ from arthur_common.models.metrics import (
9
+ BaseReportedAggregation,
10
+ DatasetReference,
11
+ NumericMetric,
12
+ )
9
13
  from arthur_common.models.schema_definitions import (
10
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
11
15
  DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
19
23
 
20
24
 
21
25
  class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
26
+ ABSOLUTE_ERROR_COUNT_METRIC_NAME = "absolute_error_count"
27
+ ABSOLUTE_ERROR_SUM_METRIC_NAME = "absolute_error_sum"
28
+
22
29
  @staticmethod
23
30
  def id() -> UUID:
24
31
  return UUID("00000000-0000-0000-0000-00000000000e")
@@ -31,6 +38,19 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
31
38
  def description() -> str:
32
39
  return "Metric that sums the absolute error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
33
40
 
41
+ @staticmethod
42
+ def reported_aggregations() -> list[BaseReportedAggregation]:
43
+ return [
44
+ BaseReportedAggregation(
45
+ metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
46
+ description="Sum of the absolute error of a prediction and ground truth column, omitting rows where either column is null.",
47
+ ),
48
+ BaseReportedAggregation(
49
+ metric_name=MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME,
50
+ description=f"Count of non-null rows used in the calculation of the {MeanAbsoluteErrorAggregationFunction.ABSOLUTE_ERROR_SUM_METRIC_NAME} metric.",
51
+ ),
52
+ ]
53
+
34
54
  def aggregate(
35
55
  self,
36
56
  ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,12 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
138
158
  "ts",
139
159
  )
140
160
 
141
- count_metric = self.series_to_metric("absolute_error_count", count_series)
161
+ count_metric = self.series_to_metric(
162
+ self.ABSOLUTE_ERROR_COUNT_METRIC_NAME,
163
+ count_series,
164
+ )
142
165
  absolute_error_metric = self.series_to_metric(
143
- "absolute_error_sum",
166
+ self.ABSOLUTE_ERROR_SUM_METRIC_NAME,
144
167
  absolute_error_series,
145
168
  )
146
169
 
@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
7
  from arthur_common.models.datasets import ModelProblemType
8
- from arthur_common.models.metrics import DatasetReference, NumericMetric
8
+ from arthur_common.models.metrics import (
9
+ BaseReportedAggregation,
10
+ DatasetReference,
11
+ NumericMetric,
12
+ )
9
13
  from arthur_common.models.schema_definitions import (
10
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
11
15
  DType,
@@ -19,6 +23,9 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier
19
23
 
20
24
 
21
25
  class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
26
+ SQUARED_ERROR_COUNT_METRIC_NAME = "squared_error_count"
27
+ SQUARED_ERROR_SUM_METRIC_NAME = "squared_error_sum"
28
+
22
29
  @staticmethod
23
30
  def id() -> UUID:
24
31
  return UUID("00000000-0000-0000-0000-000000000010")
@@ -31,6 +38,19 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
31
38
  def description() -> str:
32
39
  return "Metric that sums the squared error of a prediction and ground truth column. It omits any rows where either the prediction or ground truth are null. It reports the count of non-null rows used in the calculation in a second metric."
33
40
 
41
+ @staticmethod
42
+ def reported_aggregations() -> list[BaseReportedAggregation]:
43
+ return [
44
+ BaseReportedAggregation(
45
+ metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME,
46
+ description="Sum of the squared error of a prediction and ground truth column, omitting rows where either column is null.",
47
+ ),
48
+ BaseReportedAggregation(
49
+ metric_name=MeanSquaredErrorAggregationFunction.SQUARED_ERROR_COUNT_METRIC_NAME,
50
+ description=f"Count of non-null rows used in the calculation of the {MeanSquaredErrorAggregationFunction.SQUARED_ERROR_SUM_METRIC_NAME} metric.",
51
+ ),
52
+ ]
53
+
34
54
  def aggregate(
35
55
  self,
36
56
  ddb_conn: DuckDBPyConnection,
@@ -138,9 +158,12 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
138
158
  "ts",
139
159
  )
140
160
 
141
- count_metric = self.series_to_metric("squared_error_count", count_series)
161
+ count_metric = self.series_to_metric(
162
+ self.SQUARED_ERROR_COUNT_METRIC_NAME,
163
+ count_series,
164
+ )
142
165
  absolute_error_metric = self.series_to_metric(
143
- "squared_error_sum",
166
+ self.SQUARED_ERROR_SUM_METRIC_NAME,
144
167
  squared_error_series,
145
168
  )
146
169
 
@@ -5,7 +5,11 @@ from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
7
  from arthur_common.models.datasets import ModelProblemType
8
- from arthur_common.models.metrics import DatasetReference, NumericMetric
8
+ from arthur_common.models.metrics import (
9
+ BaseReportedAggregation,
10
+ DatasetReference,
11
+ NumericMetric,
12
+ )
9
13
  from arthur_common.models.schema_definitions import (
10
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
11
15
  DType,
@@ -22,6 +26,19 @@ from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str
22
26
  class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction(
23
27
  NumericAggregationFunction,
24
28
  ):
29
+ MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME = (
30
+ "multiclass_confusion_matrix_single_class_true_positive_count"
31
+ )
32
+ MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME = (
33
+ "multiclass_confusion_matrix_single_class_false_positive_count"
34
+ )
35
+ MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME = (
36
+ "multiclass_confusion_matrix_single_class_false_negative_count"
37
+ )
38
+ MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME = (
39
+ "multiclass_confusion_matrix_single_class_true_negative_count"
40
+ )
41
+
25
42
  @staticmethod
26
43
  def id() -> UUID:
27
44
  return UUID("dc728927-6928-4a3b-b174-8c1ec8b58d62")
@@ -38,6 +55,27 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
38
55
  "False Negatives, True Negatives) for that class compared to all others."
39
56
  )
40
57
 
58
+ @staticmethod
59
+ def reported_aggregations() -> list[BaseReportedAggregation]:
60
+ return [
61
+ BaseReportedAggregation(
62
+ metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
63
+ description="Confusion matrix true positives count.",
64
+ ),
65
+ BaseReportedAggregation(
66
+ metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
67
+ description="Confusion matrix false positives count.",
68
+ ),
69
+ BaseReportedAggregation(
70
+ metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
71
+ description="Confusion matrix false negatives count.",
72
+ ),
73
+ BaseReportedAggregation(
74
+ metric_name=MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
75
+ description="Confusion matrix true negatives count.",
76
+ ),
77
+ ]
78
+
41
79
  def aggregate(
42
80
  self,
43
81
  ddb_conn: DuckDBPyConnection,
@@ -238,19 +276,19 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
238
276
  timestamp_col="ts",
239
277
  )
240
278
  tp_metric = self.series_to_metric(
241
- "multiclass_confusion_matrix_single_class_true_positive_count",
279
+ self.MULTICLASS_CM_SINGLE_CLASS_TP_COUNT_METRIC_NAME,
242
280
  tp,
243
281
  )
244
282
  fp_metric = self.series_to_metric(
245
- "multiclass_confusion_matrix_single_class_false_positive_count",
283
+ self.MULTICLASS_CM_SINGLE_CLASS_FP_COUNT_METRIC_NAME,
246
284
  fp,
247
285
  )
248
286
  fn_metric = self.series_to_metric(
249
- "multiclass_confusion_matrix_single_class_false_negative_count",
287
+ self.MULTICLASS_CM_SINGLE_CLASS_FN_COUNT_METRIC_NAME,
250
288
  fn,
251
289
  )
252
290
  tn_metric = self.series_to_metric(
253
- "multiclass_confusion_matrix_single_class_true_negative_count",
291
+ self.MULTICLASS_CM_SINGLE_CLASS_TN_COUNT_METRIC_NAME,
254
292
  tn,
255
293
  )
256
294
  return [tp_metric, fp_metric, fn_metric, tn_metric]
@@ -7,7 +7,11 @@ from arthur_common.aggregations.functions.inference_count_by_class import (
7
7
  BinaryClassifierCountByClassAggregationFunction,
8
8
  )
9
9
  from arthur_common.models.datasets import ModelProblemType
10
- from arthur_common.models.metrics import DatasetReference, NumericMetric
10
+ from arthur_common.models.metrics import (
11
+ BaseReportedAggregation,
12
+ DatasetReference,
13
+ NumericMetric,
14
+ )
11
15
  from arthur_common.models.schema_definitions import (
12
16
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
13
17
  DType,
@@ -47,6 +51,15 @@ class MulticlassClassifierCountByClassAggregationFunction(
47
51
  def _metric_name() -> str:
48
52
  return "multiclass_classifier_count_by_class"
49
53
 
54
+ @staticmethod
55
+ def reported_aggregations() -> list[BaseReportedAggregation]:
56
+ return [
57
+ BaseReportedAggregation(
58
+ metric_name=MulticlassClassifierCountByClassAggregationFunction._metric_name(),
59
+ description=MulticlassClassifierCountByClassAggregationFunction.description(),
60
+ ),
61
+ ]
62
+
50
63
  def aggregate(
51
64
  self,
52
65
  ddb_conn: DuckDBPyConnection,
@@ -4,7 +4,11 @@ from uuid import UUID
4
4
  from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import SketchAggregationFunction
7
- from arthur_common.models.metrics import DatasetReference, SketchMetric
7
+ from arthur_common.models.metrics import (
8
+ BaseReportedAggregation,
9
+ DatasetReference,
10
+ SketchMetric,
11
+ )
8
12
  from arthur_common.models.schema_definitions import (
9
13
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
14
  DType,
@@ -34,6 +38,15 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
34
38
  "Metric that calculates a distribution (data sketch) on a numeric column."
35
39
  )
36
40
 
41
+ @staticmethod
42
+ def reported_aggregations() -> list[BaseReportedAggregation]:
43
+ return [
44
+ BaseReportedAggregation(
45
+ metric_name=NumericSketchAggregationFunction.METRIC_NAME,
46
+ description=NumericSketchAggregationFunction.description(),
47
+ ),
48
+ ]
49
+
37
50
  def aggregate(
38
51
  self,
39
52
  ddb_conn: DuckDBPyConnection,
@@ -4,7 +4,12 @@ from uuid import UUID
4
4
  from duckdb import DuckDBPyConnection
5
5
 
6
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
7
- from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
7
+ from arthur_common.models.metrics import (
8
+ BaseReportedAggregation,
9
+ DatasetReference,
10
+ Dimension,
11
+ NumericMetric,
12
+ )
8
13
  from arthur_common.models.schema_definitions import (
9
14
  SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
15
  DType,
@@ -32,6 +37,15 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
32
37
  def description() -> str:
33
38
  return "Metric that reports the sum of the numeric column per time window."
34
39
 
40
+ @staticmethod
41
+ def reported_aggregations() -> list[BaseReportedAggregation]:
42
+ return [
43
+ BaseReportedAggregation(
44
+ metric_name=NumericSumAggregationFunction.METRIC_NAME,
45
+ description=NumericSumAggregationFunction.description(),
46
+ ),
47
+ ]
48
+
35
49
  def aggregate(
36
50
  self,
37
51
  ddb_conn: DuckDBPyConnection,