PyPI - arthur-common - Versions diffs - 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl - Mend

arthur-common 2.1.58py3-none-any.whl → 2.4.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

arthur_common/aggregations/aggregator.py +73 -9
arthur_common/aggregations/functions/agentic_aggregations.py +260 -85
arthur_common/aggregations/functions/categorical_count.py +15 -15
arthur_common/aggregations/functions/confusion_matrix.py +24 -26
arthur_common/aggregations/functions/inference_count.py +5 -9
arthur_common/aggregations/functions/inference_count_by_class.py +16 -27
arthur_common/aggregations/functions/inference_null_count.py +10 -13
arthur_common/aggregations/functions/mean_absolute_error.py +12 -18
arthur_common/aggregations/functions/mean_squared_error.py +12 -18
arthur_common/aggregations/functions/multiclass_confusion_matrix.py +13 -20
arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +1 -1
arthur_common/aggregations/functions/numeric_stats.py +13 -15
arthur_common/aggregations/functions/numeric_sum.py +12 -15
arthur_common/aggregations/functions/shield_aggregations.py +457 -215
arthur_common/models/common_schemas.py +214 -0
arthur_common/models/connectors.py +10 -2
arthur_common/models/constants.py +24 -0
arthur_common/models/datasets.py +0 -9
arthur_common/models/enums.py +177 -0
arthur_common/models/metric_schemas.py +63 -0
arthur_common/models/metrics.py +2 -9
arthur_common/models/request_schemas.py +870 -0
arthur_common/models/response_schemas.py +785 -0
arthur_common/models/schema_definitions.py +6 -1
arthur_common/models/task_job_specs.py +3 -12
arthur_common/tools/duckdb_data_loader.py +34 -2
arthur_common/tools/duckdb_utils.py +3 -6
arthur_common/tools/schema_inferer.py +3 -6
{arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/METADATA +12 -4
arthur_common-2.4.13.dist-info/RECORD +49 -0
arthur_common/models/shield.py +0 -642
arthur_common-2.1.58.dist-info/RECORD +0 -44
{arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/WHEEL +0 -0

arthur_common/aggregations/aggregator.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import os
+import re
 from abc import ABC, abstractmethod
 from base64 import b64encode
 from typing import Any, Type, Union
@@ -10,6 +12,8 @@ from arthur_common.models.metrics import *
 class AggregationFunction(ABC):
+    FEATURE_FLAG_NAME: str | None = None
     @staticmethod
     @abstractmethod
     def id() -> UUID:
@@ -35,6 +39,31 @@ class AggregationFunction(ABC):
         """Returns the list of aggregations reported by the aggregate function."""
         raise NotImplementedError
+    @staticmethod
+    def get_innermost_segmentation_columns(segmentation_cols: list[str]) -> list[str]:
+        """
+        Extracts the innermost column name for nested segmentation columns or
+        returns the top-level column name for non-nested segmentation columns.
+        """
+        for i, col in enumerate(segmentation_cols):
+            # extract the innermost column for escaped column names (e.g. '"nested.col"."name"')
+            # otherwise return the name since it's a top-level column
+            if col.startswith('"') and col.endswith('"'):
+                identifier = col[1:-1]
+                identifier_split_in_struct_fields = re.split(r'"\."', identifier)
+                # For nested columns, take just the innermost field name
+                # Otherwise for top-level columns, take the whole name
+                if len(identifier_split_in_struct_fields) > 1:
+                    innermost_field = identifier_split_in_struct_fields[-1]
+                    segmentation_cols[i] = innermost_field.replace('""', '"')
+                else:
+                    segmentation_cols[i] = identifier.replace('""', '"')
+            else:
+                segmentation_cols[i] = col
+        return segmentation_cols
     @abstractmethod
     def aggregate(
         self,
@@ -50,6 +79,13 @@ class AggregationFunction(ABC):
             value = "null"
         return Dimension(name=name, value=str(value))
+    def is_feature_flag_enabled(self, feature_flag_name: str) -> bool:
+        if feature_flag_name is None:
+            value = os.getenv(self.FEATURE_FLAG_NAME, "false")
+        else:
+            value = os.getenv(feature_flag_name, "false")
+        return value.lower() in ("true", "1", "yes")
 class NumericAggregationFunction(AggregationFunction, ABC):
     def aggregation_type(self) -> Type[NumericMetric]:
@@ -89,6 +125,11 @@ class NumericAggregationFunction(AggregationFunction, ABC):
                 ),
             ]
+        # get innermost column name for nested segmentation columns
+        dim_columns = AggregationFunction.get_innermost_segmentation_columns(
+            dim_columns,
+        )
         calculated_metrics: list[NumericTimeSeries] = []
         # make sure dropna is False or rows with "null" as a dimension value will be dropped
         groups = data.groupby(dim_columns, dropna=False)
@@ -168,11 +209,33 @@ class SketchAggregationFunction(AggregationFunction, ABC):
         """
         calculated_metrics: list[SketchTimeSeries] = []
-        # make sure dropna is False or rows with "null" as a dimension value will be dropped
-        groups = data.groupby(dim_columns, dropna=False)
-        for _, group in groups:
+        # get innermost column name for nested segmentation columns
+        dim_columns = AggregationFunction.get_innermost_segmentation_columns(
+            dim_columns,
+        )
+        if dim_columns:
+            # make sure dropna is False or rows with "null" as a dimension value will be dropped
+            # call _group_to_series for each grouped DF
+            groups = data.groupby(dim_columns, dropna=False)
+            for _, group in groups:
+                calculated_metrics.append(
+                    SketchAggregationFunction._group_to_series(
+                        group,
+                        timestamp_col,
+                        dim_columns,
+                        value_col,
+                    ),
+                )
+        else:
             calculated_metrics.append(
-                SketchAggregationFunction._group_to_series(group, timestamp_col, dim_columns, value_col),
+                SketchAggregationFunction._group_to_series(
+                    data,
+                    timestamp_col,
+                    dim_columns,
+                    value_col,
+                ),
             )
         return calculated_metrics
@@ -193,11 +256,12 @@ class SketchAggregationFunction(AggregationFunction, ABC):
             return s
         dimensions: list[Dimension] = []
-        # Get the first row of the group to determine the group level dimensions
-        dims_row = group.iloc[0]
-        for dim in dim_columns:
-            d = AggregationFunction.string_to_dimension(name=dim, value=dims_row[dim])
-            dimensions.append(d)
+        if dim_columns:
+            # Get the first row of the group to determine the group level dimensions
+            dims_row = group.iloc[0]
+            for dim in dim_columns:
+                d = AggregationFunction.string_to_dimension(name=dim, value=dims_row[dim])
+                dimensions.append(d)
         values: list[SketchPoint] = []

arthur_common/aggregations/functions/agentic_aggregations.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
-from typing import Annotated
+from datetime import datetime
+from typing import Annotated, Any
 from uuid import UUID
 import pandas as pd
@@ -10,7 +11,7 @@ from arthur_common.aggregations.aggregator import (
     NumericAggregationFunction,
     SketchAggregationFunction,
 )
-from arthur_common.models.datasets import ModelProblemType
+from arthur_common.models.enums import ModelProblemType
 from arthur_common.models.metrics import (
     BaseReportedAggregation,
     DatasetReference,
@@ -27,7 +28,50 @@ TOOL_SCORE_NO_TOOL_VALUE = 2
 logger = logging.getLogger(__name__)
-def extract_spans_with_metrics_and_agents(root_spans):
+def root_span_in_time_buckets(
+    ddb_conn: DuckDBPyConnection, dataset: DatasetReference
+) -> pd.DataFrame:
+    return ddb_conn.sql(
+        f"""
+            SELECT
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
+                root_spans
+            FROM {dataset.dataset_table_name}
+            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
+            ORDER BY ts DESC;
+            """,
+    ).df()
+def span_parser(span_to_parse: str | dict[str, Any]) -> dict[str, Any]:
+    if isinstance(span_to_parse, str):
+        return json.loads(span_to_parse)  # type: ignore[no-any-return]
+    return span_to_parse
+def extract_agent_name_from_span(span: dict[str, Any]) -> str | None:
+    try:
+        raw_data = span.get("raw_data", {})
+        if isinstance(raw_data, str):
+            raw_data = json.loads(raw_data)
+        # Try to get agent name from the span's name field
+        agent_name = raw_data.get("name", "unknown")
+        if agent_name != "unknown":
+            return str(agent_name)
+    except (json.JSONDecodeError, KeyError, TypeError):
+        logger.error(
+            f"Error parsing attributes from span (span_id: {span.get('span_id')}) in trace {span.get('trace_id')}",
+        )
+    return None
+# TODO: create TypedDict for span
+def extract_spans_with_metrics_and_agents(
+    root_spans: list[str | dict[str, Any]],
+) -> list[tuple[dict[str, Any], str]]:
     """Recursively extract all spans with metrics and their associated agent names from the span tree.
     Returns:
@@ -35,46 +79,42 @@ def extract_spans_with_metrics_and_agents(root_spans):
     """
     spans_with_metrics_and_agents = []
-    def traverse_spans(spans, current_agent_name="unknown"):
-        for span_str in spans:
-            span = json.loads(span_str) if type(span_str) == str else span_str
+    # TODO: Improve function so it won't modify variable outside of its scope
+    def traverse_spans(
+        spans: list[str | dict[str, Any]],
+        current_agent: str = "unknown",
+    ) -> None:
+        for span_to_parse in spans:
+            parsed_span = span_parser(span_to_parse)
             # Update current agent name if this span is an AGENT
-            if span.get("span_kind") == "AGENT":
-                try:
-                    raw_data = span.get("raw_data", {})
-                    if isinstance(raw_data, str):
-                        raw_data = json.loads(raw_data)
-                    # Try to get agent name from the span's name field
-                    agent_name = raw_data.get("name", "unknown")
-                    if agent_name != "unknown":
-                        current_agent_name = agent_name
-                except (json.JSONDecodeError, KeyError, TypeError):
-                    logger.error(
-                        f"Error parsing attributes from span (span_id: {span.get('span_id')}) in trace {span.get('trace_id')}",
-                    )
+            if parsed_span.get("span_kind") == "AGENT":
+                agent_name = extract_agent_name_from_span(parsed_span)
+                if agent_name:
+                    current_agent = agent_name
             # Check if this span has metrics
-            if span.get("metric_results") and len(span.get("metric_results", [])) > 0:
-                spans_with_metrics_and_agents.append((span, current_agent_name))
+            if parsed_span.get("metric_results", []):
+                spans_with_metrics_and_agents.append(
+                    (parsed_span, current_agent),
+                )
             # Recursively traverse children with the current agent name
-            if span.get("children", []):
-                traverse_spans(span["children"], current_agent_name)
+            if children_span := parsed_span.get("children", []):
+                traverse_spans(children_span, current_agent)
     traverse_spans(root_spans)
     return spans_with_metrics_and_agents
-def determine_relevance_pass_fail(score):
+def determine_relevance_pass_fail(score: float | None) -> str | None:
     """Determine pass/fail for relevance scores using global threshold"""
     if score is None:
         return None
     return "pass" if score >= RELEVANCE_SCORE_THRESHOLD else "fail"
-def determine_tool_pass_fail(score):
+def determine_tool_pass_fail(score: int | None) -> str | None:
     """Determine pass/fail for tool scores using global threshold"""
     if score is None:
         return None
@@ -141,16 +181,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
         ],
     ) -> list[SketchMetric]:
         # Query traces by timestamp
-        results = ddb_conn.sql(
-            f"""
-            SELECT
-                time_bucket(INTERVAL '5 minutes', start_time) as ts,
-                root_spans
-            FROM {dataset.dataset_table_name}
-            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
-            ORDER BY ts DESC;
-            """,
-        ).df()
+        results = root_span_in_time_buckets(ddb_conn, dataset)
         # Process traces and extract spans with metrics
         tool_selection_data = []
@@ -177,7 +208,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                 for metric_result in metric_results:
                     metric_type = metric_result.get("metric_type")
-                    details = json.loads(metric_result.get("details", '{}'))
+                    details = json.loads(metric_result.get("details", "{}"))
                     if metric_type == "ToolSelection":
                         tool_selection = details.get("tool_selection", {})
@@ -397,17 +428,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
             ),
         ],
     ) -> list[NumericMetric]:
-        # Query traces by timestamp
-        results = ddb_conn.sql(
-            f"""
-            SELECT
-                time_bucket(INTERVAL '5 minutes', start_time) as ts,
-                root_spans
-            FROM {dataset.dataset_table_name}
-            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
-            ORDER BY ts DESC;
-            """,
-        ).df()
+        results = root_span_in_time_buckets(ddb_conn, dataset)
         # Process traces and extract spans with metrics
         processed_data = []
@@ -430,7 +451,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
                 for metric_result in metric_results:
                     metric_type = metric_result.get("metric_type")
-                    details = json.loads(metric_result.get("details", '{}'))
+                    details = json.loads(metric_result.get("details", "{}"))
                     if metric_type in ["QueryRelevance", "ResponseRelevance"]:
                         relevance_data = details.get(
@@ -522,17 +543,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
             ),
         ],
     ) -> list[NumericMetric]:
-        # Query traces by timestamp
-        results = ddb_conn.sql(
-            f"""
-            SELECT
-                time_bucket(INTERVAL '5 minutes', start_time) as ts,
-                root_spans
-            FROM {dataset.dataset_table_name}
-            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
-            ORDER BY ts DESC;
-            """,
-        ).df()
+        results = root_span_in_time_buckets(ddb_conn, dataset)
         # Process traces and extract spans with metrics
         processed_data = []
@@ -555,7 +566,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
                 for metric_result in metric_results:
                     if metric_result.get("metric_type") == "ToolSelection":
-                        details = json.loads(metric_result.get("details", '{}'))
+                        details = json.loads(metric_result.get("details", "{}"))
                         tool_selection = details.get("tool_selection", {})
                         tool_selection_score = tool_selection.get("tool_selection")
@@ -701,16 +712,7 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
             ),
         ],
     ) -> list[NumericMetric]:
-        results = ddb_conn.sql(
-            f"""
-            SELECT
-                time_bucket(INTERVAL '5 minutes', start_time) as ts,
-                root_spans
-            FROM {dataset.dataset_table_name}
-            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
-            ORDER BY ts DESC;
-            """,
-        ).df()
+        results = root_span_in_time_buckets(ddb_conn, dataset)
         # Process traces and count LLM spans
         llm_call_counts = {}
@@ -723,10 +725,10 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
                 root_spans = json.loads(root_spans)
             # Count LLM spans in the tree
-            def count_llm_spans(spans):
+            def count_llm_spans(spans: list[str | dict[str, Any]]) -> int:
                 count = 0
-                for span_str in spans:
-                    span = json.loads(span_str) if type(span_str) == str else span_str
+                for span_to_parse in spans:
+                    span = span_parser(span_to_parse)
                     # Check if this span is an LLM span
                     if span.get("span_kind") == "LLM":
@@ -798,16 +800,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
         ],
     ) -> list[NumericMetric]:
         # Query traces by timestamp
-        results = ddb_conn.sql(
-            f"""
-            SELECT
-                time_bucket(INTERVAL '5 minutes', start_time) as ts,
-                root_spans
-            FROM {dataset.dataset_table_name}
-            WHERE root_spans IS NOT NULL AND length(root_spans) > 0
-            ORDER BY ts DESC;
-            """,
-        ).df()
+        results = root_span_in_time_buckets(ddb_conn, dataset)
         # Process traces and extract spans with metrics
         processed_data = []
@@ -830,7 +823,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
                 for metric_result in metric_results:
                     if metric_result.get("metric_type") == "ToolSelection":
-                        details = json.loads(metric_result.get("details", '{}'))
+                        details = json.loads(metric_result.get("details", "{}"))
                         tool_selection = details.get("tool_selection", {})
                         tool_selection_score = tool_selection.get("tool_selection")
@@ -884,3 +877,185 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
         )
         metric = self.series_to_metric(self.METRIC_NAME, series)
         return [metric]
+class AgenticTraceLatencyAggregation(SketchAggregationFunction):
+    METRIC_NAME = "trace_latency"
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-000000000039")
+    @staticmethod
+    def display_name() -> str:
+        return "Trace Latency"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that reports the latency of the agentic trace in ms."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=AgenticTraceLatencyAggregation.METRIC_NAME,
+                description=AgenticTraceLatencyAggregation.description(),
+            ),
+        ]
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The agentic trace dataset containing traces with nested spans.",
+                model_problem_type=ModelProblemType.AGENTIC_TRACE,
+            ),
+        ],
+    ) -> list[SketchMetric]:
+        # Query traces by timestamp and calculate latency directly in SQL
+        results = ddb_conn.sql(
+            f"""
+            SELECT
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
+                CAST(EXTRACT(EPOCH FROM (end_time - start_time)) * 1000 AS INTEGER) as latency_ms
+            FROM {dataset.dataset_table_name}
+            WHERE start_time IS NOT NULL
+                AND end_time IS NOT NULL
+                AND end_time > start_time
+            ORDER BY ts DESC;
+            """,
+        ).df()
+        if results.empty:
+            return []
+        df = results
+        # Create a single time series without grouping dimensions
+        # Since we have no dimensions to group by, we create one time series for all data
+        series = [self._group_to_series(df, "ts", [], "latency_ms")]
+        metric = self.series_to_metric(self.METRIC_NAME, series)
+        return [metric]
+class AgenticSpanLatencyAggregation(SketchAggregationFunction):
+    METRIC_NAME = "span_latency"
+    @staticmethod
+    def id() -> UUID:
+        return UUID("00000000-0000-0000-0000-000000000040")
+    @staticmethod
+    def display_name() -> str:
+        return "Span Latency"
+    @staticmethod
+    def description() -> str:
+        return "Aggregation that reports the latency of the agentic span in ms."
+    @staticmethod
+    def reported_aggregations() -> list[BaseReportedAggregation]:
+        return [
+            BaseReportedAggregation(
+                metric_name=AgenticSpanLatencyAggregation.METRIC_NAME,
+                description=AgenticSpanLatencyAggregation.description(),
+            ),
+        ]
+    def aggregate(
+        self,
+        ddb_conn: DuckDBPyConnection,
+        dataset: Annotated[
+            DatasetReference,
+            MetricDatasetParameterAnnotation(
+                friendly_name="Dataset",
+                description="The agentic trace dataset containing traces with nested spans.",
+                model_problem_type=ModelProblemType.AGENTIC_TRACE,
+            ),
+        ],
+    ) -> list[SketchMetric]:
+        results = root_span_in_time_buckets(ddb_conn, dataset)
+        latency_data = []
+        for _, row in results.iterrows():
+            ts = row["ts"]
+            root_spans = row["root_spans"]
+            # Parse root_spans if it's a string
+            if isinstance(root_spans, str):
+                root_spans = json.loads(root_spans)
+            # Extract all spans with their timing data
+            spans_with_timing = self._extract_spans_with_timing(root_spans)
+            for span_data in spans_with_timing:
+                span, current_agent, latency_ms = span_data
+                span_kind = span.get("span_kind", "unknown")
+                if latency_ms is not None and latency_ms > 0:
+                    latency_data.append(
+                        {
+                            "ts": ts,
+                            "latency_ms": latency_ms,
+                            "span_kind": span_kind,
+                            "agent_name": current_agent,
+                        }
+                    )
+        if not latency_data:
+            return []
+        # Convert to DataFrame and create sketch metrics
+        df = pd.DataFrame(latency_data)
+        series = self.group_query_results_to_sketch_metrics(
+            df,
+            "latency_ms",
+            ["span_kind", "agent_name"],
+            "ts",
+        )
+        metric = self.series_to_metric(self.METRIC_NAME, series)
+        return [metric]
+    def _extract_spans_with_timing(
+        self, spans: list[str | dict[str, Any]], current_agent: str = "unknown"
+    ) -> list[tuple[dict[str, Any], str, int | None]]:
+        """Recursively extract spans with calculated latency in milliseconds"""
+        spans_with_timing = []
+        for span_to_parse in spans:
+            span = span_parser(span_to_parse)
+            # Update current agent name if this span is an AGENT
+            if span.get("span_kind") == "AGENT":
+                agent_name = extract_agent_name_from_span(span)
+                if agent_name:
+                    current_agent = agent_name
+            # Calculate latency if both start_time and end_time exist
+            start_time = span.get("start_time")
+            end_time = span.get("end_time")
+            latency_ms = None
+            if start_time and end_time:
+                try:
+                    # Parse ISO format timestamps and calculate latency in milliseconds
+                    # Assume same timezone for start and end time, specific TZ not important for latency calculation
+                    start_dt = datetime.fromisoformat(start_time)
+                    end_dt = datetime.fromisoformat(end_time)
+                    latency_ms = int((end_dt - start_dt).total_seconds() * 1000)
+                except (ValueError, TypeError) as e:
+                    logger.warning(
+                        f"Error calculating latency for span {span.get('span_id')}: {e}"
+                    )
+            spans_with_timing.append((span, current_agent, latency_ms))
+            # Recursively process children
+            if children := span.get("children", []):
+                spans_with_timing.extend(
+                    self._extract_spans_with_timing(children, current_agent)
+                )
+        return spans_with_timing

arthur_common/aggregations/functions/categorical_count.py CHANGED Viewed

@@ -18,7 +18,10 @@ from arthur_common.models.schema_definitions import (
     ScalarType,
     ScopeSchemaTag,
 )
-from arthur_common.tools.duckdb_data_loader import escape_identifier, escape_str_literal
+from arthur_common.tools.duckdb_data_loader import (
+    escape_str_literal,
+    unescape_identifier,
+)
 class CategoricalCountAggregationFunction(NumericAggregationFunction):
@@ -93,30 +96,27 @@ class CategoricalCountAggregationFunction(NumericAggregationFunction):
         ] = None,
     ) -> list[NumericMetric]:
         """Executed SQL with no segmentation columns:
-            select time_bucket(INTERVAL '5 minutes', {timestamp_col_escaped}) as ts, \
+            select time_bucket(INTERVAL '5 minutes', {timestamp_col}) as ts, \
                 count(*) as count, \
-                {categorical_col_escaped} as category, \
-                {categorical_col_name_escaped} as column_name \
+                {categorical_col} as category, \
+                {categorical_col_name_unescaped} as column_name \
                 from {dataset.dataset_table_name} \
                 where ts is not null \
                 group by ts, category
         """
         segmentation_cols = [] if not segmentation_cols else segmentation_cols
-        timestamp_col_escaped = escape_identifier(timestamp_col)
-        categorical_col_escaped = escape_identifier(categorical_col)
-        categorical_col_name_escaped = escape_str_literal(categorical_col)
+        categorical_col_name_unescaped = escape_str_literal(
+            unescape_identifier(categorical_col),
+        )
         # build query components with segmentation columns
-        escaped_segmentation_cols = [
-            escape_identifier(col) for col in segmentation_cols
-        ]
         all_select_clause_cols = [
-            f"time_bucket(INTERVAL '5 minutes', {timestamp_col_escaped}) as ts",
+            f"time_bucket(INTERVAL '5 minutes', {timestamp_col}) as ts",
             f"count(*) as count",
-            f"{categorical_col_escaped} as category",
-            f"{categorical_col_name_escaped} as column_name",
-        ] + escaped_segmentation_cols
-        all_group_by_cols = ["ts", "category"] + escaped_segmentation_cols
+            f"{categorical_col} as category",
+            f"{categorical_col_name_unescaped} as column_name",
+        ] + segmentation_cols
+        all_group_by_cols = ["ts", "category"] + segmentation_cols
         extra_dims = ["column_name", "category"]
         # build query

arthur-common 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl

arthur-common 2.1.58py3-none-any.whl → 2.4.13py3-none-any.whl