arthur-common 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. arthur_common/aggregations/aggregator.py +73 -9
  2. arthur_common/aggregations/functions/agentic_aggregations.py +260 -85
  3. arthur_common/aggregations/functions/categorical_count.py +15 -15
  4. arthur_common/aggregations/functions/confusion_matrix.py +24 -26
  5. arthur_common/aggregations/functions/inference_count.py +5 -9
  6. arthur_common/aggregations/functions/inference_count_by_class.py +16 -27
  7. arthur_common/aggregations/functions/inference_null_count.py +10 -13
  8. arthur_common/aggregations/functions/mean_absolute_error.py +12 -18
  9. arthur_common/aggregations/functions/mean_squared_error.py +12 -18
  10. arthur_common/aggregations/functions/multiclass_confusion_matrix.py +13 -20
  11. arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +1 -1
  12. arthur_common/aggregations/functions/numeric_stats.py +13 -15
  13. arthur_common/aggregations/functions/numeric_sum.py +12 -15
  14. arthur_common/aggregations/functions/shield_aggregations.py +457 -215
  15. arthur_common/models/common_schemas.py +214 -0
  16. arthur_common/models/connectors.py +10 -2
  17. arthur_common/models/constants.py +24 -0
  18. arthur_common/models/datasets.py +0 -9
  19. arthur_common/models/enums.py +177 -0
  20. arthur_common/models/metric_schemas.py +63 -0
  21. arthur_common/models/metrics.py +2 -9
  22. arthur_common/models/request_schemas.py +870 -0
  23. arthur_common/models/response_schemas.py +785 -0
  24. arthur_common/models/schema_definitions.py +6 -1
  25. arthur_common/models/task_job_specs.py +3 -12
  26. arthur_common/tools/duckdb_data_loader.py +34 -2
  27. arthur_common/tools/duckdb_utils.py +3 -6
  28. arthur_common/tools/schema_inferer.py +3 -6
  29. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/METADATA +12 -4
  30. arthur_common-2.4.13.dist-info/RECORD +49 -0
  31. arthur_common/models/shield.py +0 -642
  32. arthur_common-2.1.58.dist-info/RECORD +0 -44
  33. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/WHEEL +0 -0
@@ -19,7 +19,7 @@ from arthur_common.models.schema_definitions import (
19
19
  ScalarType,
20
20
  ScopeSchemaTag,
21
21
  )
22
- from arthur_common.tools.duckdb_data_loader import escape_identifier
22
+ from arthur_common.tools.duckdb_data_loader import unescape_identifier
23
23
 
24
24
 
25
25
  class NumericSumAggregationFunction(NumericAggregationFunction):
@@ -94,31 +94,26 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
94
94
  ] = None,
95
95
  ) -> list[NumericMetric]:
96
96
  """Executed SQL with no segmentation columns:
97
- select time_bucket(INTERVAL '5 minutes', {escaped_timestamp_col}) as ts, \
98
- sum({escaped_numeric_col}) as sum \
97
+ select time_bucket(INTERVAL '5 minutes', {timestamp_col}) as ts, \
98
+ sum({numeric_col}) as sum \
99
99
  from {dataset.dataset_table_name} \
100
- where {escaped_numeric_col} is not null \
100
+ where {numeric_col} is not null \
101
101
  group by ts \
102
102
  """
103
103
  segmentation_cols = [] if not segmentation_cols else segmentation_cols
104
- escaped_timestamp_col = escape_identifier(timestamp_col)
105
- escaped_numeric_col = escape_identifier(numeric_col)
106
104
 
107
105
  # build query components with segmentation columns
108
- escaped_segmentation_cols = [
109
- escape_identifier(col) for col in segmentation_cols
110
- ]
111
106
  all_select_clause_cols = [
112
- f"time_bucket(INTERVAL '5 minutes', {escaped_timestamp_col}) as ts",
113
- f"sum({escaped_numeric_col}) as sum",
114
- ] + escaped_segmentation_cols
115
- all_group_by_cols = ["ts"] + escaped_segmentation_cols
107
+ f"time_bucket(INTERVAL '5 minutes', {timestamp_col}) as ts",
108
+ f"sum({numeric_col}) as sum",
109
+ ] + segmentation_cols
110
+ all_group_by_cols = ["ts"] + segmentation_cols
116
111
 
117
112
  # build query
118
113
  query = f"""
119
114
  select {", ".join(all_select_clause_cols)}
120
115
  from {dataset.dataset_table_name}
121
- where {escaped_numeric_col} is not null
116
+ where {numeric_col} is not null
122
117
  group by {", ".join(all_group_by_cols)}
123
118
  """
124
119
 
@@ -132,7 +127,9 @@ class NumericSumAggregationFunction(NumericAggregationFunction):
132
127
  )
133
128
  # preserve dimension that identifies the name of the numeric column used for the aggregation
134
129
  for point in series:
135
- point.dimensions.append(Dimension(name="column_name", value=numeric_col))
130
+ point.dimensions.append(
131
+ Dimension(name="column_name", value=unescape_identifier(numeric_col)),
132
+ )
136
133
 
137
134
  metric = self.series_to_metric(self.METRIC_NAME, series)
138
135
  return [metric]