PyPI - arthur-common - Versions diffs - 2.1.56__py3-none-any.whl → 2.1.57__py3-none-any.whl - Mend

arthur-common 2.1.56py3-none-any.whl → 2.1.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arthur-common might be problematic. Click here for more details.

Files changed (4) hide show

arthur_common/aggregations/functions/agentic_aggregations.py CHANGED Viewed

@@ -36,7 +36,9 @@ def extract_spans_with_metrics_and_agents(root_spans):
     spans_with_metrics_and_agents = []
     def traverse_spans(spans, current_agent_name="unknown"):
-        for span in spans:
+        for span_str in spans:
+            span = json.loads(span_str) if type(span_str) == str else span_str
             # Update current agent name if this span is an AGENT
             if span.get("span_kind") == "AGENT":
                 try:
@@ -142,7 +144,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 root_spans
             FROM {dataset.dataset_table_name}
             WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -175,7 +177,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                 for metric_result in metric_results:
                     metric_type = metric_result.get("metric_type")
-                    details = metric_result.get("details", {})
+                    details = json.loads(metric_result.get("details", '{}'))
                     if metric_type == "ToolSelection":
                         tool_selection = details.get("tool_selection", {})
@@ -193,6 +195,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "ts": ts,
                                     "tool_selection_score": tool_selection_score,
                                     "tool_selection_reason": tool_selection_reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -209,6 +212,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "ts": ts,
                                     "tool_usage_score": tool_usage_score,
                                     "tool_usage_reason": tool_usage_reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -228,6 +232,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "score_type": "llm_relevance_score",
                                     "score_value": llm_score,
                                     "reason": reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -238,6 +243,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "score_type": "reranker_relevance_score",
                                     "score_value": reranker_score,
                                     "reason": reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -248,6 +254,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "score_type": "bert_f_score",
                                     "score_value": bert_score,
                                     "reason": reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -269,6 +276,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "score_type": "llm_relevance_score",
                                     "score_value": llm_score,
                                     "reason": reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -289,6 +297,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
                                     "score_type": "bert_f_score",
                                     "score_value": bert_score,
                                     "reason": reason,
+                                    "agent_name": agent_name,
                                 },
                             )
@@ -300,7 +309,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
             series = self.group_query_results_to_sketch_metrics(
                 df,
                 "tool_selection_score",
-                ["tool_selection_reason"],
+                ["tool_selection_reason", "agent_name"],
                 "ts",
             )
             metrics.append(
@@ -313,7 +322,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
             series = self.group_query_results_to_sketch_metrics(
                 df,
                 "tool_usage_score",
-                ["tool_usage_reason"],
+                ["tool_usage_reason", "agent_name"],
                 "ts",
             )
             metrics.append(self.series_to_metric(self.TOOL_USAGE_METRIC_NAME, series))
@@ -392,7 +401,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 root_spans
             FROM {dataset.dataset_table_name}
             WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -421,7 +430,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
                 for metric_result in metric_results:
                     metric_type = metric_result.get("metric_type")
-                    details = metric_result.get("details", {})
+                    details = json.loads(metric_result.get("details", '{}'))
                     if metric_type in ["QueryRelevance", "ResponseRelevance"]:
                         relevance_data = details.get(
@@ -517,7 +526,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 root_spans
             FROM {dataset.dataset_table_name}
             WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -546,7 +555,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
                 for metric_result in metric_results:
                     if metric_result.get("metric_type") == "ToolSelection":
-                        details = metric_result.get("details", {})
+                        details = json.loads(metric_result.get("details", '{}'))
                         tool_selection = details.get("tool_selection", {})
                         tool_selection_score = tool_selection.get("tool_selection")
@@ -638,7 +647,7 @@ class AgenticEventCountAggregation(NumericAggregationFunction):
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 COUNT(*) as count
             FROM {dataset.dataset_table_name}
             GROUP BY ts
@@ -695,7 +704,7 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 root_spans
             FROM {dataset.dataset_table_name}
             WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -716,7 +725,9 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
             # Count LLM spans in the tree
             def count_llm_spans(spans):
                 count = 0
-                for span in spans:
+                for span_str in spans:
+                    span = json.loads(span_str) if type(span_str) == str else span_str
                     # Check if this span is an LLM span
                     if span.get("span_kind") == "LLM":
                         count += 1
@@ -790,7 +801,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
         results = ddb_conn.sql(
             f"""
             SELECT
-                time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
+                time_bucket(INTERVAL '5 minutes', start_time) as ts,
                 root_spans
             FROM {dataset.dataset_table_name}
             WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -819,7 +830,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
                 for metric_result in metric_results:
                     if metric_result.get("metric_type") == "ToolSelection":
-                        details = metric_result.get("details", {})
+                        details = json.loads(metric_result.get("details", '{}'))
                         tool_selection = details.get("tool_selection", {})
                         tool_selection_score = tool_selection.get("tool_selection")

{arthur_common-2.1.56.dist-info → arthur_common-2.1.57.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arthur-common
-Version: 2.1.56
+Version: 2.1.57
 Summary: Utility code common to Arthur platform components.
 License: MIT
 Author: Arthur

{arthur_common-2.1.56.dist-info → arthur_common-2.1.57.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ arthur_common/aggregations/__init__.py,sha256=vISWyciQAtksa71OKeHNP-QyFGd1NzBKq_
 arthur_common/aggregations/aggregator.py,sha256=kS9Qru0AhZzZz4Ym20NT7aNrbcQaqg2zgBVYFogFbbg,7936
 arthur_common/aggregations/functions/README.md,sha256=MkZoTAJ94My96R5Z8GAxud7S6vyR0vgVi9gqdt9a4XY,5460
 arthur_common/aggregations/functions/__init__.py,sha256=HqC3UNRURX7ZQHgamTrQvfA8u_FiZGZ4I4eQW7Ooe5o,1299
-arthur_common/aggregations/functions/agentic_aggregations.py,sha256=35jHA2hI-NGmiFJCMON0Vo4sV9om8Acp3YEKSIVDLA8,32851
+arthur_common/aggregations/functions/agentic_aggregations.py,sha256=AXPuIgESf-q2JG4vRc8XYARFbI8R92e7uaR7cgaTMqY,33401
 arthur_common/aggregations/functions/categorical_count.py,sha256=wc1ovL8JoiSeoSTk9h1fgrLj1QuQeYYZmEqgffGc2cw,5328
 arthur_common/aggregations/functions/confusion_matrix.py,sha256=Zac-biMeIVyLRcMXWmENgYq8X4I7Trm8gOE5NRLGKU0,22108
 arthur_common/aggregations/functions/inference_count.py,sha256=SrRfxQVnX-wRTZ1zbqUKupPdACvfKeUpZDidZs45ZUY,4079
@@ -39,6 +39,6 @@ arthur_common/tools/functions.py,sha256=FWL4eWO5-vLp86WudT-MGUKvf2B8f02IdoXQFKd6
 arthur_common/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arthur_common/tools/schema_inferer.py,sha256=Ur4CXGAkd6ZMSU0nMNrkOEElsBopHXq0lctTV8X92W8,5188
 arthur_common/tools/time_utils.py,sha256=4gfiu9NXfvPZltiVNLSIQGylX6h2W0viNi9Kv4bKyfw,1410
-arthur_common-2.1.56.dist-info/METADATA,sha256=RCt5Wkd-AOQGbaADt5-XxHpzaSJPbluk_8NCBdHj0pg,1609
-arthur_common-2.1.56.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-arthur_common-2.1.56.dist-info/RECORD,,
+arthur_common-2.1.57.dist-info/METADATA,sha256=tciX3Iwg2PhiaJkObFd625vI7fcLO8S4JvICHdkzPvw,1609
+arthur_common-2.1.57.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+arthur_common-2.1.57.dist-info/RECORD,,

{arthur_common-2.1.56.dist-info → arthur_common-2.1.57.dist-info}/WHEEL RENAMED Viewed

File without changes

arthur-common 2.1.56__py3-none-any.whl → 2.1.57__py3-none-any.whl

Potentially problematic release.

arthur-common 2.1.56py3-none-any.whl → 2.1.57py3-none-any.whl