arthur-common 2.1.55__tar.gz → 2.1.57__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arthur-common might be problematic. Click here for more details.

Files changed (44) hide show
  1. {arthur_common-2.1.55 → arthur_common-2.1.57}/PKG-INFO +1 -1
  2. {arthur_common-2.1.55 → arthur_common-2.1.57}/pyproject.toml +1 -1
  3. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/agentic_aggregations.py +25 -14
  4. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/shield.py +5 -2
  5. {arthur_common-2.1.55 → arthur_common-2.1.57}/README.md +0 -0
  6. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/__init__.py +0 -0
  7. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/__init__.py +0 -0
  8. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/aggregator.py +0 -0
  9. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/README.md +0 -0
  10. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/__init__.py +0 -0
  11. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/categorical_count.py +0 -0
  12. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/confusion_matrix.py +0 -0
  13. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/inference_count.py +0 -0
  14. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/inference_count_by_class.py +0 -0
  15. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/inference_null_count.py +0 -0
  16. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/mean_absolute_error.py +0 -0
  17. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/mean_squared_error.py +0 -0
  18. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/multiclass_confusion_matrix.py +0 -0
  19. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +0 -0
  20. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/numeric_stats.py +0 -0
  21. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/numeric_sum.py +0 -0
  22. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/py.typed +0 -0
  23. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/functions/shield_aggregations.py +0 -0
  24. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/aggregations/py.typed +0 -0
  25. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/config/__init__.py +0 -0
  26. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/config/config.py +0 -0
  27. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/config/settings.yaml +0 -0
  28. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/__init__.py +0 -0
  29. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/connectors.py +0 -0
  30. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/datasets.py +0 -0
  31. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/metrics.py +0 -0
  32. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/py.typed +0 -0
  33. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/schema_definitions.py +0 -0
  34. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/models/task_job_specs.py +0 -0
  35. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/py.typed +0 -0
  36. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/__init__.py +0 -0
  37. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/aggregation_analyzer.py +0 -0
  38. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/aggregation_loader.py +0 -0
  39. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/duckdb_data_loader.py +0 -0
  40. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/duckdb_utils.py +0 -0
  41. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/functions.py +0 -0
  42. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/py.typed +0 -0
  43. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/schema_inferer.py +0 -0
  44. {arthur_common-2.1.55 → arthur_common-2.1.57}/src/arthur_common/tools/time_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arthur-common
3
- Version: 2.1.55
3
+ Version: 2.1.57
4
4
  Summary: Utility code common to Arthur platform components.
5
5
  License: MIT
6
6
  Author: Arthur
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "arthur-common"
3
- version = "2.1.55"
3
+ version = "2.1.57"
4
4
  description = "Utility code common to Arthur platform components."
5
5
  authors = ["Arthur <engineering@arthur.ai>"]
6
6
  license = "MIT"
@@ -36,7 +36,9 @@ def extract_spans_with_metrics_and_agents(root_spans):
36
36
  spans_with_metrics_and_agents = []
37
37
 
38
38
  def traverse_spans(spans, current_agent_name="unknown"):
39
- for span in spans:
39
+ for span_str in spans:
40
+ span = json.loads(span_str) if type(span_str) == str else span_str
41
+
40
42
  # Update current agent name if this span is an AGENT
41
43
  if span.get("span_kind") == "AGENT":
42
44
  try:
@@ -142,7 +144,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
142
144
  results = ddb_conn.sql(
143
145
  f"""
144
146
  SELECT
145
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
147
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
146
148
  root_spans
147
149
  FROM {dataset.dataset_table_name}
148
150
  WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -175,7 +177,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
175
177
 
176
178
  for metric_result in metric_results:
177
179
  metric_type = metric_result.get("metric_type")
178
- details = metric_result.get("details", {})
180
+ details = json.loads(metric_result.get("details", '{}'))
179
181
 
180
182
  if metric_type == "ToolSelection":
181
183
  tool_selection = details.get("tool_selection", {})
@@ -193,6 +195,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
193
195
  "ts": ts,
194
196
  "tool_selection_score": tool_selection_score,
195
197
  "tool_selection_reason": tool_selection_reason,
198
+ "agent_name": agent_name,
196
199
  },
197
200
  )
198
201
 
@@ -209,6 +212,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
209
212
  "ts": ts,
210
213
  "tool_usage_score": tool_usage_score,
211
214
  "tool_usage_reason": tool_usage_reason,
215
+ "agent_name": agent_name,
212
216
  },
213
217
  )
214
218
 
@@ -228,6 +232,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
228
232
  "score_type": "llm_relevance_score",
229
233
  "score_value": llm_score,
230
234
  "reason": reason,
235
+ "agent_name": agent_name,
231
236
  },
232
237
  )
233
238
 
@@ -238,6 +243,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
238
243
  "score_type": "reranker_relevance_score",
239
244
  "score_value": reranker_score,
240
245
  "reason": reason,
246
+ "agent_name": agent_name,
241
247
  },
242
248
  )
243
249
 
@@ -248,6 +254,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
248
254
  "score_type": "bert_f_score",
249
255
  "score_value": bert_score,
250
256
  "reason": reason,
257
+ "agent_name": agent_name,
251
258
  },
252
259
  )
253
260
 
@@ -269,6 +276,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
269
276
  "score_type": "llm_relevance_score",
270
277
  "score_value": llm_score,
271
278
  "reason": reason,
279
+ "agent_name": agent_name,
272
280
  },
273
281
  )
274
282
 
@@ -289,6 +297,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
289
297
  "score_type": "bert_f_score",
290
298
  "score_value": bert_score,
291
299
  "reason": reason,
300
+ "agent_name": agent_name,
292
301
  },
293
302
  )
294
303
 
@@ -300,7 +309,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
300
309
  series = self.group_query_results_to_sketch_metrics(
301
310
  df,
302
311
  "tool_selection_score",
303
- ["tool_selection_reason"],
312
+ ["tool_selection_reason", "agent_name"],
304
313
  "ts",
305
314
  )
306
315
  metrics.append(
@@ -313,7 +322,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
313
322
  series = self.group_query_results_to_sketch_metrics(
314
323
  df,
315
324
  "tool_usage_score",
316
- ["tool_usage_reason"],
325
+ ["tool_usage_reason", "agent_name"],
317
326
  "ts",
318
327
  )
319
328
  metrics.append(self.series_to_metric(self.TOOL_USAGE_METRIC_NAME, series))
@@ -392,7 +401,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
392
401
  results = ddb_conn.sql(
393
402
  f"""
394
403
  SELECT
395
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
404
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
396
405
  root_spans
397
406
  FROM {dataset.dataset_table_name}
398
407
  WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -421,7 +430,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
421
430
 
422
431
  for metric_result in metric_results:
423
432
  metric_type = metric_result.get("metric_type")
424
- details = metric_result.get("details", {})
433
+ details = json.loads(metric_result.get("details", '{}'))
425
434
 
426
435
  if metric_type in ["QueryRelevance", "ResponseRelevance"]:
427
436
  relevance_data = details.get(
@@ -517,7 +526,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
517
526
  results = ddb_conn.sql(
518
527
  f"""
519
528
  SELECT
520
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
529
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
521
530
  root_spans
522
531
  FROM {dataset.dataset_table_name}
523
532
  WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -546,7 +555,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
546
555
 
547
556
  for metric_result in metric_results:
548
557
  if metric_result.get("metric_type") == "ToolSelection":
549
- details = metric_result.get("details", {})
558
+ details = json.loads(metric_result.get("details", '{}'))
550
559
  tool_selection = details.get("tool_selection", {})
551
560
 
552
561
  tool_selection_score = tool_selection.get("tool_selection")
@@ -638,7 +647,7 @@ class AgenticEventCountAggregation(NumericAggregationFunction):
638
647
  results = ddb_conn.sql(
639
648
  f"""
640
649
  SELECT
641
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
650
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
642
651
  COUNT(*) as count
643
652
  FROM {dataset.dataset_table_name}
644
653
  GROUP BY ts
@@ -695,7 +704,7 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
695
704
  results = ddb_conn.sql(
696
705
  f"""
697
706
  SELECT
698
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
707
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
699
708
  root_spans
700
709
  FROM {dataset.dataset_table_name}
701
710
  WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -716,7 +725,9 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
716
725
  # Count LLM spans in the tree
717
726
  def count_llm_spans(spans):
718
727
  count = 0
719
- for span in spans:
728
+ for span_str in spans:
729
+ span = json.loads(span_str) if type(span_str) == str else span_str
730
+
720
731
  # Check if this span is an LLM span
721
732
  if span.get("span_kind") == "LLM":
722
733
  count += 1
@@ -790,7 +801,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
790
801
  results = ddb_conn.sql(
791
802
  f"""
792
803
  SELECT
793
- time_bucket(INTERVAL '5 minutes', to_timestamp(start_time / 1000000)) as ts,
804
+ time_bucket(INTERVAL '5 minutes', start_time) as ts,
794
805
  root_spans
795
806
  FROM {dataset.dataset_table_name}
796
807
  WHERE root_spans IS NOT NULL AND length(root_spans) > 0
@@ -819,7 +830,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
819
830
 
820
831
  for metric_result in metric_results:
821
832
  if metric_result.get("metric_type") == "ToolSelection":
822
- details = metric_result.get("details", {})
833
+ details = json.loads(metric_result.get("details", '{}'))
823
834
  tool_selection = details.get("tool_selection", {})
824
835
 
825
836
  tool_selection_score = tool_selection.get("tool_selection")
@@ -280,7 +280,10 @@ class TaskResponse(BaseModel):
280
280
  updated_at: int = Field(
281
281
  description="Time the task was created in unix milliseconds",
282
282
  )
283
- is_agentic: bool = Field(description="Whether the task is agentic or not")
283
+ is_agentic: Optional[bool] = Field(
284
+ description="Whether the task is agentic or not",
285
+ default=None,
286
+ )
284
287
  rules: List[RuleResponse] = Field(description="List of all the rules for the task.")
285
288
  metrics: Optional[List[MetricResponse]] = Field(
286
289
  description="List of all the metrics for the task.",
@@ -619,7 +622,7 @@ class NewMetricRequest(BaseModel):
619
622
 
620
623
  # If neither is set, default to use_llm_judge=True
621
624
  if relevance_threshold is None and (
622
- use_llm_judge is None or use_llm_judge == False
625
+ use_llm_judge is None or use_llm_judge == False
623
626
  ):
624
627
  config_values["use_llm_judge"] = True
625
628
 
File without changes