ragaai-catalyst 2.1.5b22__py3-none-any.whl → 2.1.5b23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,28 +2,30 @@ import json
2
2
  import os
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
- from typing import List, Any, Dict
5
+ from typing import List, Any, Dict, Optional
6
6
  import uuid
7
7
  import sys
8
8
  import tempfile
9
9
  import threading
10
10
  import time
11
- from ....ragaai_catalyst import RagaAICatalyst
12
- from ..data.data_structure import (
11
+
12
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_local_metric import calculate_metric
13
+ from ragaai_catalyst import RagaAICatalyst
14
+ from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
13
15
  Trace,
14
16
  Metadata,
15
17
  SystemInfo,
16
18
  Resources,
17
19
  Component,
18
20
  )
19
- from ..upload.upload_agentic_traces import UploadAgenticTraces
20
- from ..upload.upload_code import upload_code
21
- from ..upload.upload_trace_metric import upload_trace_metric
22
- from ..utils.file_name_tracker import TrackName
23
- from ..utils.zip_list_of_unique_files import zip_list_of_unique_files
24
- from ..utils.span_attributes import SpanAttributes
25
- from ..utils.create_dataset_schema import create_dataset_schema_with_trace
26
- from ..utils.system_monitor import SystemMonitor
21
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
22
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
23
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
24
+ from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
25
+ from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
26
+ from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
27
+ from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
28
+ from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
27
29
 
28
30
  import logging
29
31
 
@@ -181,40 +183,42 @@ class BaseTracer:
181
183
  self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
182
184
  self.trace.end_time = datetime.now().astimezone().isoformat()
183
185
 
184
- #track memory usage
186
+ # track memory usage
185
187
  self.tracking = False
186
188
  self.trace.metadata.resources.memory.values = self.memory_usage_list
187
189
 
188
- #track cpu usage
190
+ # track cpu usage
189
191
  self.trace.metadata.resources.cpu.values = self.cpu_usage_list
190
192
 
191
- #track network and disk usage
193
+ # track network and disk usage
192
194
  network_uploads, network_downloads = 0, 0
193
195
  disk_read, disk_write = 0, 0
194
-
196
+
195
197
  # Handle cases where lists might have different lengths
196
198
  min_len = min(len(self.network_usage_list), len(self.disk_usage_list))
197
199
  for i in range(min_len):
198
200
  network_usage = self.network_usage_list[i]
199
201
  disk_usage = self.disk_usage_list[i]
200
-
202
+
201
203
  # Safely get network usage values with defaults of 0
202
204
  network_uploads += network_usage.get('uploads', 0) or 0
203
205
  network_downloads += network_usage.get('downloads', 0) or 0
204
-
206
+
205
207
  # Safely get disk usage values with defaults of 0
206
208
  disk_read += disk_usage.get('disk_read', 0) or 0
207
209
  disk_write += disk_usage.get('disk_write', 0) or 0
208
210
 
209
- #track disk usage
211
+ # track disk usage
210
212
  disk_list_len = len(self.disk_usage_list)
211
213
  self.trace.metadata.resources.disk.read = [disk_read / disk_list_len if disk_list_len > 0 else 0]
212
214
  self.trace.metadata.resources.disk.write = [disk_write / disk_list_len if disk_list_len > 0 else 0]
213
215
 
214
- #track network usage
216
+ # track network usage
215
217
  network_list_len = len(self.network_usage_list)
216
- self.trace.metadata.resources.network.uploads = [network_uploads / network_list_len if network_list_len > 0 else 0]
217
- self.trace.metadata.resources.network.downloads = [network_downloads / network_list_len if network_list_len > 0 else 0]
218
+ self.trace.metadata.resources.network.uploads = [
219
+ network_uploads / network_list_len if network_list_len > 0 else 0]
220
+ self.trace.metadata.resources.network.downloads = [
221
+ network_downloads / network_list_len if network_list_len > 0 else 0]
218
222
 
219
223
  # update interval time
220
224
  self.trace.metadata.resources.cpu.interval = float(self.interval_time)
@@ -244,7 +248,7 @@ class BaseTracer:
244
248
  # Add metrics to trace before saving
245
249
  trace_data = self.trace.to_dict()
246
250
  trace_data["metrics"] = self.trace_metrics
247
-
251
+
248
252
  # Clean up trace_data before saving
249
253
  cleaned_trace_data = self._clean_trace(trace_data)
250
254
 
@@ -454,15 +458,16 @@ class BaseTracer:
454
458
  else existing_span.__dict__
455
459
  )
456
460
  if (
457
- existing_dict.get("hash_id")
458
- == span_dict.get("hash_id")
459
- and str(existing_dict.get("data", {}).get("input"))
460
- == str(span_dict.get("data", {}).get("input"))
461
- and str(existing_dict.get("data", {}).get("output"))
462
- == str(span_dict.get("data", {}).get("output"))
461
+ existing_dict.get("hash_id")
462
+ == span_dict.get("hash_id")
463
+ and str(existing_dict.get("data", {}).get("input"))
464
+ == str(span_dict.get("data", {}).get("input"))
465
+ and str(existing_dict.get("data", {}).get("output"))
466
+ == str(span_dict.get("data", {}).get("output"))
463
467
  ):
464
468
  unique_spans[i] = span
465
469
  break
470
+
466
471
  else:
467
472
  # For non-LLM spans, process their children if they exist
468
473
  if "data" in span_dict and "children" in span_dict["data"]:
@@ -473,8 +478,44 @@ class BaseTracer:
473
478
  span["data"]["children"] = filtered_children
474
479
  else:
475
480
  span.data["children"] = filtered_children
476
- unique_spans.append(span)
481
+ unique_spans.append(span)
477
482
 
483
+ # Process spans to update model information for LLM spans with same name
484
+ llm_spans_by_name = {}
485
+ for i, span in enumerate(unique_spans):
486
+ span_dict = span if isinstance(span, dict) else span.__dict__
487
+
488
+ if span_dict.get('type') == 'llm':
489
+ span_name = span_dict.get('name')
490
+ if span_name:
491
+ if span_name not in llm_spans_by_name:
492
+ llm_spans_by_name[span_name] = []
493
+ llm_spans_by_name[span_name].append((i, span_dict))
494
+
495
+ # Update model information for spans with same name
496
+ for spans_with_same_name in llm_spans_by_name.values():
497
+ if len(spans_with_same_name) > 1:
498
+ # Check if any span has non-default model
499
+ has_custom_model = any(
500
+ span[1].get('info', {}).get('model') != 'default'
501
+ for span in spans_with_same_name
502
+ )
503
+
504
+ # If we have a custom model, update all default models to 'custom'
505
+ if has_custom_model:
506
+ for idx, span_dict in spans_with_same_name:
507
+ if span_dict.get('info', {}).get('model') == 'default':
508
+ if isinstance(unique_spans[idx], dict):
509
+ if 'info' not in unique_spans[idx]:
510
+ unique_spans[idx]['info'] = {}
511
+ # unique_spans[idx]['info']['model'] = 'custom'
512
+ unique_spans[idx]['type'] = 'custom'
513
+ else:
514
+ if not hasattr(unique_spans[idx], 'info'):
515
+ unique_spans[idx].info = {}
516
+ # unique_spans[idx].info['model'] = 'custom'
517
+ unique_spans[idx].type = 'custom'
518
+
478
519
  return unique_spans
479
520
 
480
521
  # Remove any spans without hash ids
@@ -501,7 +542,7 @@ class BaseTracer:
501
542
  int: Next interaction ID to use
502
543
  """
503
544
  child_type = child.get("type")
504
-
545
+
505
546
  if child_type == "tool":
506
547
  # Tool call start
507
548
  interactions.append(
@@ -614,7 +655,7 @@ class BaseTracer:
614
655
  }
615
656
  )
616
657
  interaction_id += 1
617
-
658
+
618
659
  interactions.append(
619
660
  {
620
661
  "id": str(interaction_id),
@@ -795,7 +836,7 @@ class BaseTracer:
795
836
  }
796
837
  )
797
838
  interaction_id += 1
798
-
839
+
799
840
  interactions.append(
800
841
  {
801
842
  "id": str(interaction_id),
@@ -857,15 +898,83 @@ class BaseTracer:
857
898
 
858
899
  return {"workflow": sorted_interactions}
859
900
 
901
+ # TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
902
+
903
+ def execute_metrics(self,
904
+ name: str,
905
+ model: str,
906
+ provider: str,
907
+ prompt: str,
908
+ context: str,
909
+ response: str
910
+ ):
911
+ if not hasattr(self, 'trace'):
912
+ logger.warning("Cannot add metrics before trace is initialized. Call start() first.")
913
+ return
914
+
915
+ # Convert individual parameters to metric dict if needed
916
+ if isinstance(name, str):
917
+ metrics = [{
918
+ "name": name
919
+ }]
920
+ else:
921
+ # Handle dict or list input
922
+ metrics = name if isinstance(name, list) else [name] if isinstance(name, dict) else []
923
+
924
+ try:
925
+ for metric in metrics:
926
+ if not isinstance(metric, dict):
927
+ raise ValueError(f"Expected dict, got {type(metric)}")
928
+
929
+ if "name" not in metric :
930
+ raise ValueError("Metric must contain 'name'") #score was written not required here
931
+
932
+ # Handle duplicate metric names on executing metric
933
+ metric_name = metric["name"]
934
+ if metric_name in self.visited_metrics:
935
+ count = sum(1 for m in self.visited_metrics if m.startswith(metric_name))
936
+ metric_name = f"{metric_name}_{count + 1}"
937
+ self.visited_metrics.append(metric_name)
938
+
939
+ result = calculate_metric(project_id=self.project_id,
940
+ metric_name=metric_name,
941
+ model=model,
942
+ org_domain="raga",
943
+ provider=provider,
944
+ user_id="1", # self.user_details['id'],
945
+ prompt=prompt,
946
+ context=context,
947
+ response=response
948
+ )
949
+
950
+ result = result['data']
951
+ formatted_metric = {
952
+ "name": metric_name,
953
+ "score": result.get("score"),
954
+ "reason": result.get("reason", ""),
955
+ "source": "user",
956
+ "cost": result.get("cost"),
957
+ "latency": result.get("latency"),
958
+ "mappings": [],
959
+ "config": result.get("metric_config", {})
960
+ }
961
+
962
+ logger.debug(f"Executed metric: {formatted_metric}")
963
+
964
+ except ValueError as e:
965
+ logger.error(f"Validation Error: {e}")
966
+ except Exception as e:
967
+ logger.error(f"Error adding metric: {e}")
968
+
860
969
  def add_metrics(
861
- self,
862
- name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
863
- score: float | int = None,
864
- reasoning: str = "",
865
- cost: float = None,
866
- latency: float = None,
867
- metadata: Dict[str, Any] = None,
868
- config: Dict[str, Any] = None,
970
+ self,
971
+ name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
972
+ score: float | int = None,
973
+ reasoning: str = "",
974
+ cost: float = None,
975
+ latency: float = None,
976
+ metadata: Dict[str, Any] = None,
977
+ config: Dict[str, Any] = None,
869
978
  ):
870
979
  """Add metrics at the trace level.
871
980
 
@@ -909,7 +1018,7 @@ class BaseTracer:
909
1018
  for metric in metrics:
910
1019
  if not isinstance(metric, dict):
911
1020
  raise ValueError(f"Expected dict, got {type(metric)}")
912
-
1021
+
913
1022
  if "name" not in metric or "score" not in metric:
914
1023
  raise ValueError("Metric must contain 'name' and 'score' fields")
915
1024
 
@@ -921,7 +1030,7 @@ class BaseTracer:
921
1030
  self.visited_metrics.append(metric_name)
922
1031
 
923
1032
  formatted_metric = {
924
- "name": metric_name,
1033
+ "name": metric_name,
925
1034
  "score": metric["score"],
926
1035
  "reason": metric.get("reasoning", ""),
927
1036
  "source": "user",
@@ -931,7 +1040,7 @@ class BaseTracer:
931
1040
  "mappings": [],
932
1041
  "config": metric.get("config", {})
933
1042
  }
934
-
1043
+
935
1044
  self.trace_metrics.append(formatted_metric)
936
1045
  logger.debug(f"Added trace-level metric: {formatted_metric}")
937
1046
 
@@ -939,8 +1048,68 @@ class BaseTracer:
939
1048
  logger.error(f"Validation Error: {e}")
940
1049
  except Exception as e:
941
1050
  logger.error(f"Error adding metric: {e}")
942
-
1051
+
943
1052
  def span(self, span_name):
944
1053
  if span_name not in self.span_attributes_dict:
945
- self.span_attributes_dict[span_name] = SpanAttributes(span_name)
946
- return self.span_attributes_dict[span_name]
1054
+ self.span_attributes_dict[span_name] = SpanAttributes(span_name, self.project_id)
1055
+ return self.span_attributes_dict[span_name]
1056
+
1057
+ @staticmethod
1058
+ def get_formatted_metric(span_attributes_dict, project_id, name, prompt, span_context, response, span_gt):
1059
+ if name in span_attributes_dict:
1060
+ local_metrics = span_attributes_dict[name].local_metrics or []
1061
+ for metric in local_metrics:
1062
+ try:
1063
+ if metric.get("prompt") is not None:
1064
+ prompt = metric['prompt']
1065
+ if metric.get("response") is not None:
1066
+ response = metric['response']
1067
+ if metric.get('context') is not None:
1068
+ span_context = metric['context']
1069
+ if metric.get('gt') is not None:
1070
+ span_gt = metric['gt']
1071
+
1072
+ logger.info("calculating the metric, please wait....")
1073
+ result = calculate_metric(project_id=project_id,
1074
+ metric_name=metric.get("name"),
1075
+ model=metric.get("model"),
1076
+ provider=metric.get("provider"),
1077
+ prompt=prompt,
1078
+ context=span_context,
1079
+ response=response,
1080
+ expected_response=span_gt
1081
+ )
1082
+
1083
+ result = result['data']['data'][0]
1084
+ config = result['metric_config']
1085
+ metric_config = {
1086
+ "job_id": config.get("job_id"),
1087
+ "metric_name": config.get("displayName"),
1088
+ "model": config.get("model"),
1089
+ "org_domain": config.get("orgDomain"),
1090
+ "provider": config.get("provider"),
1091
+ "reason": config.get("reason"),
1092
+ "request_id": config.get("request_id"),
1093
+ "user_id": config.get("user_id"),
1094
+ "threshold": {
1095
+ "is_editable": config.get("threshold").get("isEditable"),
1096
+ "lte": config.get("threshold").get("lte")
1097
+ }
1098
+ }
1099
+ formatted_metric = {
1100
+ "name": metric.get("displayName"),
1101
+ "displayName": metric.get("displayName"),
1102
+ "score": result.get("score"),
1103
+ "reason": result.get("reason", ""),
1104
+ "source": "user",
1105
+ "cost": result.get("cost"),
1106
+ "latency": result.get("latency"),
1107
+ "mappings": [],
1108
+ "config": metric_config
1109
+ }
1110
+ return formatted_metric
1111
+ except ValueError as e:
1112
+ logger.error(f"Validation Error: {e}")
1113
+ except Exception as e:
1114
+ logger.error(f"Error executing metric: {e}")
1115
+