arthur-common 2.1.50__py3-none-any.whl → 2.1.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arthur-common might be problematic. Click here for more details.

Files changed (28) hide show
  1. arthur_common/aggregations/aggregator.py +2 -1
  2. arthur_common/aggregations/functions/categorical_count.py +5 -8
  3. arthur_common/aggregations/functions/confusion_matrix.py +18 -31
  4. arthur_common/aggregations/functions/inference_count.py +5 -8
  5. arthur_common/aggregations/functions/inference_count_by_class.py +7 -15
  6. arthur_common/aggregations/functions/inference_null_count.py +5 -8
  7. arthur_common/aggregations/functions/mean_absolute_error.py +5 -8
  8. arthur_common/aggregations/functions/mean_squared_error.py +5 -8
  9. arthur_common/aggregations/functions/multiclass_confusion_matrix.py +5 -8
  10. arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +5 -8
  11. arthur_common/aggregations/functions/numeric_stats.py +5 -8
  12. arthur_common/aggregations/functions/numeric_sum.py +5 -8
  13. arthur_common/aggregations/functions/shield_aggregations.py +3 -2
  14. arthur_common/config/__init__.py +0 -0
  15. arthur_common/config/config.py +42 -0
  16. arthur_common/config/settings.yaml +4 -0
  17. arthur_common/models/connectors.py +8 -7
  18. arthur_common/models/metrics.py +13 -5
  19. arthur_common/models/schema_definitions.py +8 -1
  20. arthur_common/models/task_job_specs.py +2 -1
  21. arthur_common/tools/aggregation_analyzer.py +1 -1
  22. arthur_common/tools/duckdb_data_loader.py +4 -3
  23. arthur_common/tools/duckdb_utils.py +35 -0
  24. arthur_common/tools/schema_inferer.py +23 -2
  25. {arthur_common-2.1.50.dist-info → arthur_common-2.1.51.dist-info}/METADATA +2 -1
  26. arthur_common-2.1.51.dist-info/RECORD +43 -0
  27. arthur_common-2.1.50.dist-info/RECORD +0 -39
  28. {arthur_common-2.1.50.dist-info → arthur_common-2.1.51.dist-info}/WHEEL +0 -0
@@ -3,10 +3,11 @@ from base64 import b64encode
3
3
  from typing import Any, Type, Union
4
4
 
5
5
  import pandas as pd
6
- from arthur_common.models.metrics import *
7
6
  from datasketches import kll_floats_sketch
8
7
  from duckdb import DuckDBPyConnection
9
8
 
9
+ from arthur_common.models.metrics import *
10
+
10
11
 
11
12
  class AggregationFunction(ABC):
12
13
  @staticmethod
@@ -1,9 +1,12 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.metrics import DatasetReference, NumericMetric
6
8
  from arthur_common.models.schema_definitions import (
9
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
7
10
  DType,
8
11
  MetricColumnParameterAnnotation,
9
12
  MetricDatasetParameterAnnotation,
@@ -12,7 +15,6 @@ from arthur_common.models.schema_definitions import (
12
15
  ScopeSchemaTag,
13
16
  )
14
17
  from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str_literal
15
- from duckdb import DuckDBPyConnection
16
18
 
17
19
 
18
20
  class CategoricalCountAggregationFunction(NumericAggregationFunction):
@@ -69,13 +71,8 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
69
71
  Optional[list[str]],
70
72
  MetricMultipleColumnParameterAnnotation(
71
73
  source_dataset_parameter_key="dataset",
72
- allowed_column_types=[
73
- ScalarType(dtype=DType.INT),
74
- ScalarType(dtype=DType.BOOL),
75
- ScalarType(dtype=DType.STRING),
76
- ScalarType(dtype=DType.UUID),
77
- ],
78
- tag_hints=[],
74
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
75
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
79
76
  friendly_name="Segmentation Columns",
80
77
  description="All columns to include as dimensions for segmentation.",
81
78
  optional=True,
@@ -1,10 +1,13 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.datasets import ModelProblemType
6
8
  from arthur_common.models.metrics import DatasetReference, NumericMetric
7
9
  from arthur_common.models.schema_definitions import (
10
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
8
11
  DType,
9
12
  MetricColumnParameterAnnotation,
10
13
  MetricDatasetParameterAnnotation,
@@ -14,7 +17,6 @@ from arthur_common.models.schema_definitions import (
14
17
  ScopeSchemaTag,
15
18
  )
16
19
  from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str_literal
17
- from duckdb import DuckDBPyConnection
18
20
 
19
21
 
20
22
  class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
@@ -27,7 +29,7 @@ class ConfusionMatrixAggregationFunction(NumericAggregationFunction):
27
29
  prediction_normalization_case: str,
28
30
  gt_normalization_case: str,
29
31
  dataset: DatasetReference,
30
- segmentation_cols: list[str],
32
+ segmentation_cols: Optional[list[str]] = None,
31
33
  ) -> list[NumericMetric]:
32
34
  """
33
35
  Generate a SQL query to compute confusion matrix metrics over time.
@@ -202,13 +204,8 @@ class BinaryClassifierIntBoolConfusionMatrixAggregationFunction(
202
204
  Optional[list[str]],
203
205
  MetricMultipleColumnParameterAnnotation(
204
206
  source_dataset_parameter_key="dataset",
205
- allowed_column_types=[
206
- ScalarType(dtype=DType.INT),
207
- ScalarType(dtype=DType.BOOL),
208
- ScalarType(dtype=DType.STRING),
209
- ScalarType(dtype=DType.UUID),
210
- ],
211
- tag_hints=[],
207
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
208
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
212
209
  friendly_name="Segmentation Columns",
213
210
  description="All columns to include as dimensions for segmentation.",
214
211
  optional=True,
@@ -338,13 +335,8 @@ class BinaryClassifierStringLabelConfusionMatrixAggregationFunction(
338
335
  Optional[list[str]],
339
336
  MetricMultipleColumnParameterAnnotation(
340
337
  source_dataset_parameter_key="dataset",
341
- allowed_column_types=[
342
- ScalarType(dtype=DType.INT),
343
- ScalarType(dtype=DType.BOOL),
344
- ScalarType(dtype=DType.STRING),
345
- ScalarType(dtype=DType.UUID),
346
- ],
347
- tag_hints=[],
338
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
339
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
348
340
  friendly_name="Segmentation Columns",
349
341
  description="All columns to include as dimensions for segmentation.",
350
342
  optional=True,
@@ -446,13 +438,8 @@ class BinaryClassifierProbabilityThresholdConfusionMatrixAggregationFunction(
446
438
  Optional[list[str]],
447
439
  MetricMultipleColumnParameterAnnotation(
448
440
  source_dataset_parameter_key="dataset",
449
- allowed_column_types=[
450
- ScalarType(dtype=DType.INT),
451
- ScalarType(dtype=DType.BOOL),
452
- ScalarType(dtype=DType.STRING),
453
- ScalarType(dtype=DType.UUID),
454
- ],
455
- tag_hints=[],
441
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
442
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
456
443
  friendly_name="Segmentation Columns",
457
444
  description="All columns to include as dimensions for segmentation.",
458
445
  optional=True,
@@ -495,12 +482,12 @@ class BinaryClassifierProbabilityThresholdConfusionMatrixAggregationFunction(
495
482
  raise ValueError(f"Unsupported column type: {col_type}")
496
483
 
497
484
  return self.generate_confusion_matrix_metrics(
498
- ddb_conn,
499
- timestamp_col,
500
- prediction_col,
501
- gt_values_col,
502
- prediction_normalization_case,
503
- gt_normalization_case,
504
- dataset,
505
- segmentation_cols,
485
+ ddb_conn=ddb_conn,
486
+ timestamp_col=timestamp_col,
487
+ prediction_col=prediction_col,
488
+ gt_values_col=gt_values_col,
489
+ prediction_normalization_case=prediction_normalization_case,
490
+ gt_normalization_case=gt_normalization_case,
491
+ dataset=dataset,
492
+ segmentation_cols=segmentation_cols,
506
493
  )
@@ -1,9 +1,12 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.metrics import DatasetReference, NumericMetric
6
8
  from arthur_common.models.schema_definitions import (
9
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
7
10
  DType,
8
11
  MetricColumnParameterAnnotation,
9
12
  MetricDatasetParameterAnnotation,
@@ -12,7 +15,6 @@ from arthur_common.models.schema_definitions import (
12
15
  ScopeSchemaTag,
13
16
  )
14
17
  from arthur_common.tools.duckdb_data_loader import escape_identifier
15
- from duckdb import DuckDBPyConnection
16
18
 
17
19
 
18
20
  class InferenceCountAggregationFunction(NumericAggregationFunction):
@@ -56,13 +58,8 @@ class InferenceCountAggregationFunction(NumericAggregationFunction):
56
58
  Optional[list[str]],
57
59
  MetricMultipleColumnParameterAnnotation(
58
60
  source_dataset_parameter_key="dataset",
59
- allowed_column_types=[
60
- ScalarType(dtype=DType.INT),
61
- ScalarType(dtype=DType.BOOL),
62
- ScalarType(dtype=DType.STRING),
63
- ScalarType(dtype=DType.UUID),
64
- ],
65
- tag_hints=[],
61
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
62
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
66
63
  friendly_name="Segmentation Columns",
67
64
  description="All columns to include as dimensions for segmentation.",
68
65
  optional=True,
@@ -1,10 +1,13 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.datasets import ModelProblemType
6
8
  from arthur_common.models.metrics import DatasetReference, NumericMetric
7
9
  from arthur_common.models.schema_definitions import (
10
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
8
11
  DType,
9
12
  MetricColumnParameterAnnotation,
10
13
  MetricDatasetParameterAnnotation,
@@ -14,7 +17,6 @@ from arthur_common.models.schema_definitions import (
14
17
  ScopeSchemaTag,
15
18
  )
16
19
  from arthur_common.tools.duckdb_data_loader import escape_identifier
17
- from duckdb import DuckDBPyConnection
18
20
 
19
21
 
20
22
  class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction):
@@ -75,13 +77,8 @@ class BinaryClassifierCountByClassAggregationFunction(NumericAggregationFunction
75
77
  Optional[list[str]],
76
78
  MetricMultipleColumnParameterAnnotation(
77
79
  source_dataset_parameter_key="dataset",
78
- allowed_column_types=[
79
- ScalarType(dtype=DType.INT),
80
- ScalarType(dtype=DType.BOOL),
81
- ScalarType(dtype=DType.STRING),
82
- ScalarType(dtype=DType.UUID),
83
- ],
84
- tag_hints=[],
80
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
81
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
85
82
  friendly_name="Segmentation Columns",
86
83
  description="All columns to include as dimensions for segmentation.",
87
84
  optional=True,
@@ -219,13 +216,8 @@ class BinaryClassifierCountThresholdClassAggregationFunction(
219
216
  Optional[list[str]],
220
217
  MetricMultipleColumnParameterAnnotation(
221
218
  source_dataset_parameter_key="dataset",
222
- allowed_column_types=[
223
- ScalarType(dtype=DType.INT),
224
- ScalarType(dtype=DType.BOOL),
225
- ScalarType(dtype=DType.STRING),
226
- ScalarType(dtype=DType.UUID),
227
- ],
228
- tag_hints=[],
219
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
220
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
229
221
  friendly_name="Segmentation Columns",
230
222
  description="All columns to include as dimensions for segmentation.",
231
223
  optional=True,
@@ -1,9 +1,12 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
6
8
  from arthur_common.models.schema_definitions import (
9
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
7
10
  DType,
8
11
  MetricColumnParameterAnnotation,
9
12
  MetricDatasetParameterAnnotation,
@@ -12,7 +15,6 @@ from arthur_common.models.schema_definitions import (
12
15
  ScopeSchemaTag,
13
16
  )
14
17
  from arthur_common.tools.duckdb_data_loader import escape_identifier
15
- from duckdb import DuckDBPyConnection
16
18
 
17
19
 
18
20
  class InferenceNullCountAggregationFunction(NumericAggregationFunction):
@@ -65,13 +67,8 @@ class InferenceNullCountAggregationFunction(NumericAggregationFunction):
65
67
  Optional[list[str]],
66
68
  MetricMultipleColumnParameterAnnotation(
67
69
  source_dataset_parameter_key="dataset",
68
- allowed_column_types=[
69
- ScalarType(dtype=DType.INT),
70
- ScalarType(dtype=DType.BOOL),
71
- ScalarType(dtype=DType.STRING),
72
- ScalarType(dtype=DType.UUID),
73
- ],
74
- tag_hints=[],
70
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
71
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
75
72
  friendly_name="Segmentation Columns",
76
73
  description="All columns to include as dimensions for segmentation.",
77
74
  optional=True,
@@ -1,10 +1,13 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.datasets import ModelProblemType
6
8
  from arthur_common.models.metrics import DatasetReference, NumericMetric
7
9
  from arthur_common.models.schema_definitions import (
10
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
8
11
  DType,
9
12
  MetricColumnParameterAnnotation,
10
13
  MetricDatasetParameterAnnotation,
@@ -13,7 +16,6 @@ from arthur_common.models.schema_definitions import (
13
16
  ScopeSchemaTag,
14
17
  )
15
18
  from arthur_common.tools.duckdb_data_loader import escape_identifier
16
- from duckdb import DuckDBPyConnection
17
19
 
18
20
 
19
21
  class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
@@ -80,13 +82,8 @@ class MeanAbsoluteErrorAggregationFunction(NumericAggregationFunction):
80
82
  Optional[list[str]],
81
83
  MetricMultipleColumnParameterAnnotation(
82
84
  source_dataset_parameter_key="dataset",
83
- allowed_column_types=[
84
- ScalarType(dtype=DType.INT),
85
- ScalarType(dtype=DType.BOOL),
86
- ScalarType(dtype=DType.STRING),
87
- ScalarType(dtype=DType.UUID),
88
- ],
89
- tag_hints=[],
85
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
86
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
90
87
  friendly_name="Segmentation Columns",
91
88
  description="All columns to include as dimensions for segmentation.",
92
89
  optional=True,
@@ -1,10 +1,13 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.datasets import ModelProblemType
6
8
  from arthur_common.models.metrics import DatasetReference, NumericMetric
7
9
  from arthur_common.models.schema_definitions import (
10
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
8
11
  DType,
9
12
  MetricColumnParameterAnnotation,
10
13
  MetricDatasetParameterAnnotation,
@@ -13,7 +16,6 @@ from arthur_common.models.schema_definitions import (
13
16
  ScopeSchemaTag,
14
17
  )
15
18
  from arthur_common.tools.duckdb_data_loader import escape_identifier
16
- from duckdb import DuckDBPyConnection
17
19
 
18
20
 
19
21
  class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
@@ -80,13 +82,8 @@ class MeanSquaredErrorAggregationFunction(NumericAggregationFunction):
80
82
  Optional[list[str]],
81
83
  MetricMultipleColumnParameterAnnotation(
82
84
  source_dataset_parameter_key="dataset",
83
- allowed_column_types=[
84
- ScalarType(dtype=DType.INT),
85
- ScalarType(dtype=DType.BOOL),
86
- ScalarType(dtype=DType.STRING),
87
- ScalarType(dtype=DType.UUID),
88
- ],
89
- tag_hints=[],
85
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
86
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
90
87
  friendly_name="Segmentation Columns",
91
88
  description="All columns to include as dimensions for segmentation.",
92
89
  optional=True,
@@ -1,10 +1,13 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.datasets import ModelProblemType
6
8
  from arthur_common.models.metrics import DatasetReference, NumericMetric
7
9
  from arthur_common.models.schema_definitions import (
10
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
8
11
  DType,
9
12
  MetricColumnParameterAnnotation,
10
13
  MetricDatasetParameterAnnotation,
@@ -14,7 +17,6 @@ from arthur_common.models.schema_definitions import (
14
17
  ScopeSchemaTag,
15
18
  )
16
19
  from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str_literal
17
- from duckdb import DuckDBPyConnection
18
20
 
19
21
 
20
22
  class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFunction(
@@ -95,13 +97,8 @@ class MulticlassClassifierStringLabelSingleClassConfusionMatrixAggregationFuncti
95
97
  Optional[list[str]],
96
98
  MetricMultipleColumnParameterAnnotation(
97
99
  source_dataset_parameter_key="dataset",
98
- allowed_column_types=[
99
- ScalarType(dtype=DType.INT),
100
- ScalarType(dtype=DType.BOOL),
101
- ScalarType(dtype=DType.STRING),
102
- ScalarType(dtype=DType.UUID),
103
- ],
104
- tag_hints=[],
100
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
101
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
105
102
  friendly_name="Segmentation Columns",
106
103
  description="All columns to include as dimensions for segmentation.",
107
104
  optional=True,
@@ -1,12 +1,15 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.functions.inference_count_by_class import (
5
7
  BinaryClassifierCountByClassAggregationFunction,
6
8
  )
7
9
  from arthur_common.models.datasets import ModelProblemType
8
10
  from arthur_common.models.metrics import DatasetReference, NumericMetric
9
11
  from arthur_common.models.schema_definitions import (
12
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
10
13
  DType,
11
14
  MetricColumnParameterAnnotation,
12
15
  MetricDatasetParameterAnnotation,
@@ -14,7 +17,6 @@ from arthur_common.models.schema_definitions import (
14
17
  ScalarType,
15
18
  ScopeSchemaTag,
16
19
  )
17
- from duckdb import DuckDBPyConnection
18
20
 
19
21
 
20
22
  class MulticlassClassifierCountByClassAggregationFunction(
@@ -86,13 +88,8 @@ class MulticlassClassifierCountByClassAggregationFunction(
86
88
  Optional[list[str]],
87
89
  MetricMultipleColumnParameterAnnotation(
88
90
  source_dataset_parameter_key="dataset",
89
- allowed_column_types=[
90
- ScalarType(dtype=DType.INT),
91
- ScalarType(dtype=DType.BOOL),
92
- ScalarType(dtype=DType.STRING),
93
- ScalarType(dtype=DType.UUID),
94
- ],
95
- tag_hints=[],
91
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
92
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
96
93
  friendly_name="Segmentation Columns",
97
94
  description="All columns to include as dimensions for segmentation.",
98
95
  optional=True,
@@ -1,9 +1,12 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import SketchAggregationFunction
5
7
  from arthur_common.models.metrics import DatasetReference, SketchMetric
6
8
  from arthur_common.models.schema_definitions import (
9
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
7
10
  DType,
8
11
  MetricColumnParameterAnnotation,
9
12
  MetricDatasetParameterAnnotation,
@@ -12,7 +15,6 @@ from arthur_common.models.schema_definitions import (
12
15
  ScopeSchemaTag,
13
16
  )
14
17
  from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str_literal
15
- from duckdb import DuckDBPyConnection
16
18
 
17
19
 
18
20
  class NumericSketchAggregationFunction(SketchAggregationFunction):
@@ -71,13 +73,8 @@ class NumericSketchAggregationFunction(SketchAggregationFunction):
71
73
  Optional[list[str]],
72
74
  MetricMultipleColumnParameterAnnotation(
73
75
  source_dataset_parameter_key="dataset",
74
- allowed_column_types=[
75
- ScalarType(dtype=DType.INT),
76
- ScalarType(dtype=DType.BOOL),
77
- ScalarType(dtype=DType.STRING),
78
- ScalarType(dtype=DType.UUID),
79
- ],
80
- tag_hints=[],
76
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
77
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
81
78
  friendly_name="Segmentation Columns",
82
79
  description="All columns to include as dimensions for segmentation.",
83
80
  optional=True,
@@ -1,9 +1,12 @@
1
1
  from typing import Annotated, Optional
2
2
  from uuid import UUID
3
3
 
4
+ from duckdb import DuckDBPyConnection
5
+
4
6
  from arthur_common.aggregations.aggregator import NumericAggregationFunction
5
7
  from arthur_common.models.metrics import DatasetReference, Dimension, NumericMetric
6
8
  from arthur_common.models.schema_definitions import (
9
+ SEGMENTATION_ALLOWED_COLUMN_TYPES,
7
10
  DType,
8
11
  MetricColumnParameterAnnotation,
9
12
  MetricDatasetParameterAnnotation,
@@ -12,7 +15,6 @@ from arthur_common.models.schema_definitions import (
12
15
  ScopeSchemaTag,
13
16
  )
14
17
  from arthur_common.tools.duckdb_data_loader import escape_identifier
15
- from duckdb import DuckDBPyConnection
16
18
 
17
19
 
18
20
  class NumericSumAggregationFunction(NumericAggregationFunction):
@@ -69,13 +71,8 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
69
71
  Optional[list[str]],
70
72
  MetricMultipleColumnParameterAnnotation(
71
73
  source_dataset_parameter_key="dataset",
72
- allowed_column_types=[
73
- ScalarType(dtype=DType.INT),
74
- ScalarType(dtype=DType.BOOL),
75
- ScalarType(dtype=DType.STRING),
76
- ScalarType(dtype=DType.UUID),
77
- ],
78
- tag_hints=[],
74
+ allowed_column_types=SEGMENTATION_ALLOWED_COLUMN_TYPES,
75
+ tag_hints=[ScopeSchemaTag.POSSIBLE_SEGMENTATION],
79
76
  friendly_name="Segmentation Columns",
80
77
  description="All columns to include as dimensions for segmentation.",
81
78
  optional=True,
@@ -2,6 +2,9 @@ from typing import Annotated
2
2
  from uuid import UUID
3
3
 
4
4
  import pandas as pd
5
+ from duckdb import DuckDBPyConnection
6
+ from tokencost import calculate_cost_by_tokens
7
+
5
8
  from arthur_common.aggregations.aggregator import (
6
9
  NumericAggregationFunction,
7
10
  SketchAggregationFunction,
@@ -13,8 +16,6 @@ from arthur_common.models.schema_definitions import (
13
16
  MetricColumnParameterAnnotation,
14
17
  MetricDatasetParameterAnnotation,
15
18
  )
16
- from duckdb import DuckDBPyConnection
17
- from tokencost import calculate_cost_by_tokens
18
19
 
19
20
 
20
21
  class ShieldInferencePassFailCountAggregation(NumericAggregationFunction):
File without changes
@@ -0,0 +1,42 @@
1
+ # get the current directory of this file
2
+ import logging
3
+ import pathlib
4
+
5
+ directory = pathlib.Path(__file__).parent.resolve()
6
+
7
+ # create settings object that reads from settings.yaml and takes overrides from env
8
+ # can also be overwritten via the CLI
9
+ # https://github.com/drgarcia1986/simple-settings
10
+ from simple_settings import LazySettings
11
+
12
+ settings = LazySettings(f"{directory}/settings.yaml", ".environ")
13
+
14
+ logger = logging.getLogger()
15
+
16
+
17
+ class Config:
18
+ settings = settings
19
+
20
+ @staticmethod
21
+ def convert_to_int(value: str | int, setting_name: str) -> int:
22
+ if isinstance(value, int):
23
+ return value
24
+ elif value == "":
25
+ raise ValueError(
26
+ f"Config setting {setting_name} could not be cast to an int.",
27
+ )
28
+
29
+ # attempt to convert setting to int
30
+ try:
31
+ return int(value.strip())
32
+ except TypeError:
33
+ raise ValueError(
34
+ f"Config setting {setting_name} could not be cast to an int.",
35
+ )
36
+
37
+ @staticmethod
38
+ def segmentation_col_unique_values_limit() -> int:
39
+ return Config.convert_to_int(
40
+ settings.SEGMENTATION_COL_UNIQUE_VALUE_LIMIT,
41
+ "SEGMENTATION_COL_UNIQUE_VALUE_LIMIT",
42
+ )
@@ -0,0 +1,4 @@
1
+ # add arthur-common default settings here
2
+ ################################################
3
+ # Aggregation Configurations
4
+ SEGMENTATION_COL_UNIQUE_VALUE_LIMIT: 100
@@ -29,13 +29,14 @@ GOOGLE_CONNECTOR_PROJECT_ID_FIELD = "project_id"
29
29
  GOOGLE_CONNECTOR_LOCATION_FIELD = "location"
30
30
  SHIELD_CONNECTOR_API_KEY_FIELD = "api_key"
31
31
  SHIELD_CONNECTOR_ENDPOINT_FIELD = "endpoint"
32
- MSSQL_CONNECTOR_HOST_FIELD = "host"
33
- MSSQL_CONNECTOR_PORT_FIELD = "port"
34
- MSSQL_CONNECTOR_DATABASE_FIELD = "database"
35
- MSSQL_CONNECTOR_USERNAME_FIELD = "username"
36
- MSSQL_CONNECTOR_PASSWORD_FIELD = "password"
37
- MSSQL_CONNECTOR_DRIVER_FIELD = "driver"
38
- MSSQL_CONNECTOR_TABLE_NAME_FIELD = "table_name"
32
+ ODBC_CONNECTOR_HOST_FIELD = "host"
33
+ ODBC_CONNECTOR_PORT_FIELD = "port"
34
+ ODBC_CONNECTOR_DATABASE_FIELD = "database"
35
+ ODBC_CONNECTOR_USERNAME_FIELD = "username"
36
+ ODBC_CONNECTOR_PASSWORD_FIELD = "password"
37
+ ODBC_CONNECTOR_DRIVER_FIELD = "driver"
38
+ ODBC_CONNECTOR_TABLE_NAME_FIELD = "table_name"
39
+ ODBC_CONNECTOR_DIALECT_FIELD = "dialect"
39
40
 
40
41
 
41
42
  # dataset (connector type dependent) constants
@@ -4,14 +4,15 @@ from enum import Enum
4
4
  from typing import Literal, Optional
5
5
  from uuid import UUID
6
6
 
7
+ from pydantic import BaseModel, Field, model_validator
8
+ from typing_extensions import Self
9
+
7
10
  from arthur_common.models.datasets import ModelProblemType
8
11
  from arthur_common.models.schema_definitions import (
9
12
  DType,
10
13
  SchemaTypeUnion,
11
14
  ScopeSchemaTag,
12
15
  )
13
- from pydantic import BaseModel, Field, model_validator
14
- from typing_extensions import Self
15
16
 
16
17
 
17
18
  # Temporary limited list, expand this as we grow and make it more in line with custom transformations later on
@@ -138,8 +139,7 @@ class MetricsLiteralParameterSchema(MetricsParameterSchema):
138
139
  parameter_dtype: DType = Field(description="Data type of the parameter.")
139
140
 
140
141
 
141
- class MetricsColumnParameterSchema(MetricsParameterSchema):
142
- parameter_type: Literal["column"] = "column"
142
+ class MetricsColumnBaseParameterSchema(MetricsParameterSchema):
143
143
  tag_hints: list[ScopeSchemaTag] = Field(
144
144
  [],
145
145
  description="List of tags that are applicable to this parameter. Datasets with columns that have matching tags can be inferred this way.",
@@ -165,8 +165,12 @@ class MetricsColumnParameterSchema(MetricsParameterSchema):
165
165
  return self
166
166
 
167
167
 
168
+ class MetricsColumnParameterSchema(MetricsColumnBaseParameterSchema):
169
+ parameter_type: Literal["column"] = "column"
170
+
171
+
168
172
  # Not used /implemented yet. Might turn into group by column list
169
- class MetricsColumnListParameterSchema(MetricsColumnParameterSchema):
173
+ class MetricsColumnListParameterSchema(MetricsColumnBaseParameterSchema):
170
174
  parameter_type: Literal["column_list"] = "column_list"
171
175
 
172
176
 
@@ -177,6 +181,10 @@ MetricsParameterSchemaUnion = (
177
181
  | MetricsColumnListParameterSchema
178
182
  )
179
183
 
184
+ MetricsColumnSchemaUnion = (
185
+ MetricsColumnParameterSchema | MetricsColumnListParameterSchema
186
+ )
187
+
180
188
 
181
189
  @dataclass
182
190
  class DatasetReference:
@@ -4,9 +4,10 @@ from enum import Enum
4
4
  from typing import Optional, Self, Union
5
5
  from uuid import UUID, uuid4
6
6
 
7
- from arthur_common.models.datasets import ModelProblemType
8
7
  from pydantic import BaseModel, ConfigDict, Field, computed_field, model_validator
9
8
 
9
+ from arthur_common.models.datasets import ModelProblemType
10
+
10
11
 
11
12
  class ScopeSchemaTag(str, Enum):
12
13
  LLM_CONTEXT = "llm_context"
@@ -18,6 +19,7 @@ class ScopeSchemaTag(str, Enum):
18
19
  PREDICTION = "prediction"
19
20
  GROUND_TRUTH = "ground_truth"
20
21
  PIN_IN_DEEP_DIVE = "pin_in_deep_dive"
22
+ POSSIBLE_SEGMENTATION = "possible_segmentation"
21
23
 
22
24
 
23
25
  class DType(str, Enum):
@@ -420,3 +422,8 @@ def SHIELD_SCHEMA() -> DatasetSchema:
420
422
 
421
423
  SHIELD_RESPONSE_SCHEMA = create_shield_response_schema().to_base_type()
422
424
  SHIELD_PROMPT_SCHEMA = create_shield_prompt_schema().to_base_type()
425
+
426
+ SEGMENTATION_ALLOWED_DTYPES = [DType.INT, DType.BOOL, DType.STRING, DType.UUID]
427
+ SEGMENTATION_ALLOWED_COLUMN_TYPES = [
428
+ ScalarType(dtype=d_type) for d_type in SEGMENTATION_ALLOWED_DTYPES
429
+ ]
@@ -1,9 +1,10 @@
1
1
  from typing import Literal, Optional
2
2
  from uuid import UUID
3
3
 
4
- from arthur_common.models.shield import NewRuleRequest
5
4
  from pydantic import BaseModel, Field
6
5
 
6
+ from arthur_common.models.shield import NewRuleRequest
7
+
7
8
  onboarding_id_desc = "An identifier to assign to the created model to make it easy to retrieve. Used by the UI during the GenAI model creation flow."
8
9
 
9
10
 
@@ -84,7 +84,7 @@ class FunctionAnalyzer:
84
84
  @staticmethod
85
85
  def _get_scope_metric_parameter_from_annotation(
86
86
  param_name: str,
87
- param_dtype: typing.Optional[DType],
87
+ param_dtype: DType,
88
88
  optional: bool,
89
89
  annotation: typing.Annotated, # type: ignore
90
90
  ) -> MetricsParameterSchemaUnion:
@@ -3,6 +3,10 @@ from typing import Any
3
3
 
4
4
  import duckdb
5
5
  import pandas as pd
6
+ from dateutil.parser import parse
7
+ from fsspec import filesystem
8
+ from pydantic import BaseModel
9
+
6
10
  from arthur_common.models.datasets import DatasetJoinKind
7
11
  from arthur_common.models.schema_definitions import (
8
12
  DatasetListType,
@@ -11,9 +15,6 @@ from arthur_common.models.schema_definitions import (
11
15
  DatasetSchema,
12
16
  DType,
13
17
  )
14
- from dateutil.parser import parse
15
- from fsspec import filesystem
16
- from pydantic import BaseModel
17
18
 
18
19
 
19
20
  class ColumnFormat(BaseModel):
@@ -0,0 +1,35 @@
1
+ import duckdb
2
+
3
+ from arthur_common.config.config import Config
4
+ from arthur_common.models.schema_definitions import SEGMENTATION_ALLOWED_DTYPES, DType
5
+ from arthur_common.tools.duckdb_data_loader import escape_identifier
6
+
7
+
8
+ def is_column_possible_segmentation(
9
+ conn: duckdb.DuckDBPyConnection,
10
+ table: str,
11
+ column_name: str,
12
+ column_dtype: DType,
13
+ ) -> bool:
14
+ """Returns whether column fits segmentation criteria:
15
+ 1. Has fewer than SEGMENTATION_COL_UNIQUE_VALUE_LIMIT unique values.
16
+ 2. Has an allowed DType.
17
+
18
+ PreReq: Table with column should already be loaded in DuckDB
19
+ """
20
+ segmentation_col_unique_val_limit = Config.segmentation_col_unique_values_limit()
21
+ if column_dtype not in SEGMENTATION_ALLOWED_DTYPES:
22
+ return False
23
+
24
+ # check column for unique value count
25
+ escaped_column = escape_identifier(column_name)
26
+
27
+ # count distinct values in this column
28
+ distinct_count_query = f"""
29
+ SELECT COUNT(DISTINCT {escaped_column}) as distinct_count
30
+ FROM {table}
31
+ """
32
+ result = conn.sql(distinct_count_query).fetchone()
33
+ distinct_count = result[0] if result else 0
34
+
35
+ return distinct_count < segmentation_col_unique_val_limit
@@ -2,6 +2,7 @@ from typing import Any
2
2
  from uuid import uuid4
3
3
 
4
4
  import pandas as pd
5
+
5
6
  from arthur_common.models.schema_definitions import (
6
7
  DatasetColumn,
7
8
  DatasetListType,
@@ -12,6 +13,7 @@ from arthur_common.models.schema_definitions import (
12
13
  ScopeSchemaTag,
13
14
  )
14
15
  from arthur_common.tools.duckdb_data_loader import DuckDBOperator, escape_identifier
16
+ from arthur_common.tools.duckdb_utils import is_column_possible_segmentation
15
17
 
16
18
 
17
19
  class SchemaInferer:
@@ -38,14 +40,21 @@ class SchemaInferer:
38
40
  self.conn.sql(
39
41
  f"CREATE OR REPLACE TEMP TABLE {escaped_col} AS SELECT UNNEST({escaped_col}) as {escaped_col} FROM {table}",
40
42
  )
41
- return self._infer_schema(escaped_col)
43
+ return self._infer_schema(escaped_col, is_nested_col=True)
42
44
 
43
- def _infer_schema(self, table: str = "root") -> DatasetObjectType:
45
+ def _infer_schema(
46
+ self,
47
+ table: str = "root",
48
+ is_nested_col: bool = False,
49
+ ) -> DatasetObjectType:
50
+ """is_nested_col indicates whether the function is being called on an unnested/flattened table that represents
51
+ a struct column or list column in the root table."""
44
52
  ddb_schema: list[tuple[Any, Any, Any]] = self.conn.sql(
45
53
  f"DESCRIBE {table}",
46
54
  ).fetchall()
47
55
 
48
56
  obj = DatasetObjectType(id=uuid4(), object={}, nullable=False)
57
+ # object has a dict of each column
49
58
  timestamp_cols = []
50
59
 
51
60
  for column in ddb_schema:
@@ -94,6 +103,18 @@ class SchemaInferer:
94
103
  timestamp_cols.append(scalar_schema)
95
104
  case _:
96
105
  raise NotImplementedError(f"Type {col_type} not mappable.")
106
+
107
+ # tag column as a possible segmentation column if it meets criteria
108
+ # we only support top-level column aggregations right now (ie you can't aggregate on a nested column)
109
+ # so we don't want to tag nested columns as possible segmentation columns
110
+ if not is_nested_col and is_column_possible_segmentation(
111
+ self.conn,
112
+ table,
113
+ col_name,
114
+ scalar_schema.dtype,
115
+ ):
116
+ scalar_schema.tag_hints.append(ScopeSchemaTag.POSSIBLE_SEGMENTATION)
117
+
97
118
  obj.object[col_name] = scalar_schema
98
119
 
99
120
  # auto assign primary timestamp tag if there's only one timestamp column
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arthur-common
3
- Version: 2.1.50
3
+ Version: 2.1.51
4
4
  Summary: Utility code common to Arthur platform components.
5
5
  License: MIT
6
6
  Author: Arthur
@@ -16,6 +16,7 @@ Requires-Dist: fastapi (>=0.115.8)
16
16
  Requires-Dist: fsspec (>=2024.10.0)
17
17
  Requires-Dist: pandas (>=2.2.2)
18
18
  Requires-Dist: pydantic (>=2)
19
+ Requires-Dist: simple-settings (>=1.2.0)
19
20
  Requires-Dist: tokencost (==0.1.24)
20
21
  Requires-Dist: types-python-dateutil (>=2.9.0)
21
22
  Requires-Dist: types-requests (>=2.32.0.20241016)
@@ -0,0 +1,43 @@
1
+ arthur_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ arthur_common/aggregations/__init__.py,sha256=vISWyciQAtksa71OKeHNP-QyFGd1NzBKq_LBsG0QSG8,67
3
+ arthur_common/aggregations/aggregator.py,sha256=lCe0P-ZbzifG-KvmzeodNyv6LtaS56B0qOInhFrTr1U,7714
4
+ arthur_common/aggregations/functions/README.md,sha256=MkZoTAJ94My96R5Z8GAxud7S6vyR0vgVi9gqdt9a4XY,5460
5
+ arthur_common/aggregations/functions/__init__.py,sha256=HqC3UNRURX7ZQHgamTrQvfA8u_FiZGZ4I4eQW7Ooe5o,1299
6
+ arthur_common/aggregations/functions/categorical_count.py,sha256=56C9ELDFfZjv5Kt45U0Tq1SUGdneha4ED5o9BIl98UI,4966
7
+ arthur_common/aggregations/functions/confusion_matrix.py,sha256=4UsCcnToQjrwN_W75TDDYzOcXdoEeu7DZDLeojrUZZs,20845
8
+ arthur_common/aggregations/functions/inference_count.py,sha256=_llDm6rsjISpll0ORaP7Ms1KlZnQtGuOQk99p_IgNv8,3721
9
+ arthur_common/aggregations/functions/inference_count_by_class.py,sha256=BkZEYO8KWmutCYIv8EiP3mEylC4rxaHSE2RxX5oWdGE,10804
10
+ arthur_common/aggregations/functions/inference_null_count.py,sha256=FRNgSw9T9MtZFK6hD-2L6sBPbQ8m2HBmGlzpOp31Lio,4455
11
+ arthur_common/aggregations/functions/mean_absolute_error.py,sha256=vokx77DAYkdrpGtZnSKJRAhuAX2Az0udXOzYVQ2ZgRI,5889
12
+ arthur_common/aggregations/functions/mean_squared_error.py,sha256=83Ygxd2OTYxJQoZfFEFoGUz1acEuvc_dL3a0imcWvHk,5921
13
+ arthur_common/aggregations/functions/multiclass_confusion_matrix.py,sha256=-LO1f6t7E8XShIfeJn1tdCzF4KN90V_9ACbd5G3-HEk,10862
14
+ arthur_common/aggregations/functions/multiclass_inference_count_by_class.py,sha256=x7kwpMFgXal0yQqsV7nil244agxa0wpTsFPb2ws1hqY,3867
15
+ arthur_common/aggregations/functions/numeric_stats.py,sha256=jaHThHXmqhYWAwsdRz4Qtmw2PfdW2CU14OUTZCmh9nw,4565
16
+ arthur_common/aggregations/functions/numeric_sum.py,sha256=2sijXJKvETENsZsyHsP2ztAuKVbS6xfa8xnEMm5d0IE,4674
17
+ arthur_common/aggregations/functions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ arthur_common/aggregations/functions/shield_aggregations.py,sha256=mLSpoYKGLdX8RoYryKLKIBgMjn5Z8ZWHauggXK9cpEY,31512
19
+ arthur_common/aggregations/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ arthur_common/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ arthur_common/config/config.py,sha256=fcpjOYjPKu4Duk63CuTHrOWKQKAlAhVUR60kF_2_Xog,1247
22
+ arthur_common/config/settings.yaml,sha256=0CrygUwJzC5mGcO5Xnvv2ttp-P7LIsx682jllYA96NQ,161
23
+ arthur_common/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ arthur_common/models/connectors.py,sha256=5f5DUgOQ16P3lBPZ0zpUv9kTAqw45Agrl526F-iFJes,1862
25
+ arthur_common/models/datasets.py,sha256=giG_8mv_3ilBf7cIvRV0_TDCDdb4qxRbYZvl7hRb6l8,491
26
+ arthur_common/models/metrics.py,sha256=A5mtXPqqHcFNAJ02HKnlvWr-so_Mro7AYKKuGUnD9qg,8398
27
+ arthur_common/models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ arthur_common/models/schema_definitions.py,sha256=0zXZKHKr49d7ATml2Tzw1AIFfM0i0HjIblM-qOwNxk8,14878
29
+ arthur_common/models/shield.py,sha256=1ZblfULKCf5BEvYURO5WScyfmijGwjAmcj0XADlF-XY,19110
30
+ arthur_common/models/task_job_specs.py,sha256=uZo8eiTBHWf2EZGEQrDfJGVyYg_8wd9MHWLxn-5oNUk,2797
31
+ arthur_common/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ arthur_common/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ arthur_common/tools/aggregation_analyzer.py,sha256=y0W70_19d6PtBxwSe0pQKVisHkfapFnKMebmX2TJCi0,11113
34
+ arthur_common/tools/aggregation_loader.py,sha256=3CF46bNi-GdJBNOXkjYfCQ1Aung8lf65L532sdWmR_s,2351
35
+ arthur_common/tools/duckdb_data_loader.py,sha256=nscmarfP5FeL8p-9e3uZhpGEV0xFqDJmR3t77HdR26U,11081
36
+ arthur_common/tools/duckdb_utils.py,sha256=1i-kRXu95gh4Sf9Osl2LFUpdb0yZifOjLDtIgSfSmfs,1197
37
+ arthur_common/tools/functions.py,sha256=FWL4eWO5-vLp86WudT-MGUKvf2B8f02IdoXQFKd6d8k,1093
38
+ arthur_common/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ arthur_common/tools/schema_inferer.py,sha256=Ur4CXGAkd6ZMSU0nMNrkOEElsBopHXq0lctTV8X92W8,5188
40
+ arthur_common/tools/time_utils.py,sha256=4gfiu9NXfvPZltiVNLSIQGylX6h2W0viNi9Kv4bKyfw,1410
41
+ arthur_common-2.1.51.dist-info/METADATA,sha256=Gfan_b1J1PObA_sIeqZXKL_jz9QpA5bx-fHscjczwqw,1609
42
+ arthur_common-2.1.51.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
+ arthur_common-2.1.51.dist-info/RECORD,,
@@ -1,39 +0,0 @@
1
- arthur_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- arthur_common/aggregations/__init__.py,sha256=vISWyciQAtksa71OKeHNP-QyFGd1NzBKq_LBsG0QSG8,67
3
- arthur_common/aggregations/aggregator.py,sha256=8J0TRYjQdwf0qXBnoZ4ETJVCdo4UIyvQnBTZCypY7Fo,7713
4
- arthur_common/aggregations/functions/README.md,sha256=MkZoTAJ94My96R5Z8GAxud7S6vyR0vgVi9gqdt9a4XY,5460
5
- arthur_common/aggregations/functions/__init__.py,sha256=HqC3UNRURX7ZQHgamTrQvfA8u_FiZGZ4I4eQW7Ooe5o,1299
6
- arthur_common/aggregations/functions/categorical_count.py,sha256=W0xIpqqSElRpGBPpvztoZBpDO5QtiGYpaMVOQ3piS-4,5077
7
- arthur_common/aggregations/functions/confusion_matrix.py,sha256=fZ5SeFcENjGyoCqecRTF9Y0Ub-RzsA-SlZ8cEHzUvnM,21111
8
- arthur_common/aggregations/functions/inference_count.py,sha256=jIhuh_NjL82NA84qJD2VI3FVRgZCSFjfcME3Hr_slpg,3832
9
- arthur_common/aggregations/functions/inference_count_by_class.py,sha256=JKTOhXSP3hBz28u10AvA8OyDi7P68Fv7NP6j_M7kXCQ,11066
10
- arthur_common/aggregations/functions/inference_null_count.py,sha256=l-yuVX7OJQGk-vvnkuXJPD-mEI1ZzwGjbzeihwSDT_M,4566
11
- arthur_common/aggregations/functions/mean_absolute_error.py,sha256=pgputW69w0DUT4xbj2nfnPpqliHZAtvh7TQnBfPQ584,6000
12
- arthur_common/aggregations/functions/mean_squared_error.py,sha256=o9TTdlBb1lX57kWqoMRdTNacXcEa_VH-AkfT6SL1xGs,6032
13
- arthur_common/aggregations/functions/multiclass_confusion_matrix.py,sha256=47CVl-Wo40jDeKvSKHeWB2DFFTDo_ichXPGyMmlVEmo,10973
14
- arthur_common/aggregations/functions/multiclass_inference_count_by_class.py,sha256=gLdjXKvQ_WDscc3zksH92LGHhuknkjf6oAF63-cwbJo,3978
15
- arthur_common/aggregations/functions/numeric_stats.py,sha256=9zyroFSYXXELnz4MKow8v06-tPVL23COVafoLHaHgVc,4676
16
- arthur_common/aggregations/functions/numeric_sum.py,sha256=f0ZIYqPjBsyCh-V8ktHpDIs1_4wjjPENboC607i5O3c,4785
17
- arthur_common/aggregations/functions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- arthur_common/aggregations/functions/shield_aggregations.py,sha256=wM6bmuApRw-xsH5P-hRW8a475HDK3Q46K3se_vddqWQ,31511
19
- arthur_common/aggregations/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- arthur_common/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- arthur_common/models/connectors.py,sha256=uHSuxpihfNMSbNVSO2MbEyggHzPSWSWOtAIr60JjGvI,1828
22
- arthur_common/models/datasets.py,sha256=giG_8mv_3ilBf7cIvRV0_TDCDdb4qxRbYZvl7hRb6l8,491
23
- arthur_common/models/metrics.py,sha256=ZfsDL3aPdhZ1-E1-wlRyZT4lZNzz2A3qL-jDrqJbQdY,8217
24
- arthur_common/models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- arthur_common/models/schema_definitions.py,sha256=WBJ6JvBCvThlMBGQsbiAwi9QYjhG9KyURyjE8an8-yE,14633
26
- arthur_common/models/shield.py,sha256=1ZblfULKCf5BEvYURO5WScyfmijGwjAmcj0XADlF-XY,19110
27
- arthur_common/models/task_job_specs.py,sha256=GLJ7qmrb5eXnl5PiV27nnx_yG4S4sc4NDJ8-6xmNDLM,2796
28
- arthur_common/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- arthur_common/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- arthur_common/tools/aggregation_analyzer.py,sha256=IOpGfj-aihjUNo_L5iI47x2EuJWXAW0FKOUMzNca7ek,11130
31
- arthur_common/tools/aggregation_loader.py,sha256=3CF46bNi-GdJBNOXkjYfCQ1Aung8lf65L532sdWmR_s,2351
32
- arthur_common/tools/duckdb_data_loader.py,sha256=ywvFpI1qioK5Is-S7XxBqgQmQNY6u21qibKygTAW0Oo,11080
33
- arthur_common/tools/functions.py,sha256=FWL4eWO5-vLp86WudT-MGUKvf2B8f02IdoXQFKd6d8k,1093
34
- arthur_common/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- arthur_common/tools/schema_inferer.py,sha256=PkAOHZRk_rZ1OZSigYrfzH-jERb9B_Gu7pOMl9WJQA8,4202
36
- arthur_common/tools/time_utils.py,sha256=4gfiu9NXfvPZltiVNLSIQGylX6h2W0viNi9Kv4bKyfw,1410
37
- arthur_common-2.1.50.dist-info/METADATA,sha256=lD-bfaNN5l0Ls75aCffNXVOELI4y6pAli_ma29bPWtM,1568
38
- arthur_common-2.1.50.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
39
- arthur_common-2.1.50.dist-info/RECORD,,