ragaai-catalyst 2.1.5b21__py3-none-any.whl → 2.1.5b23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. ragaai_catalyst/__init__.py +3 -1
  2. ragaai_catalyst/dataset.py +49 -1
  3. ragaai_catalyst/redteaming.py +171 -0
  4. ragaai_catalyst/synthetic_data_generation.py +40 -7
  5. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +57 -46
  6. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +218 -47
  7. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +17 -7
  8. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +327 -62
  9. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +0 -3
  10. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +17 -6
  11. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +72 -0
  12. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +32 -15
  13. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +21 -2
  14. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +33 -11
  15. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1204 -484
  16. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +79 -10
  17. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  18. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  19. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +40 -21
  20. ragaai_catalyst/tracers/distributed.py +7 -3
  21. ragaai_catalyst/tracers/tracer.py +9 -9
  22. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +0 -1
  23. {ragaai_catalyst-2.1.5b21.dist-info → ragaai_catalyst-2.1.5b23.dist-info}/METADATA +37 -2
  24. {ragaai_catalyst-2.1.5b21.dist-info → ragaai_catalyst-2.1.5b23.dist-info}/RECORD +27 -25
  25. {ragaai_catalyst-2.1.5b21.dist-info → ragaai_catalyst-2.1.5b23.dist-info}/LICENSE +0 -0
  26. {ragaai_catalyst-2.1.5b21.dist-info → ragaai_catalyst-2.1.5b23.dist-info}/WHEEL +0 -0
  27. {ragaai_catalyst-2.1.5b21.dist-info → ragaai_catalyst-2.1.5b23.dist-info}/top_level.txt +0 -0
@@ -2,28 +2,30 @@ import json
2
2
  import os
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
- from typing import List, Any, Dict
5
+ from typing import List, Any, Dict, Optional
6
6
  import uuid
7
7
  import sys
8
8
  import tempfile
9
9
  import threading
10
10
  import time
11
- from ....ragaai_catalyst import RagaAICatalyst
12
- from ..data.data_structure import (
11
+
12
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_local_metric import calculate_metric
13
+ from ragaai_catalyst import RagaAICatalyst
14
+ from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
13
15
  Trace,
14
16
  Metadata,
15
17
  SystemInfo,
16
18
  Resources,
17
19
  Component,
18
20
  )
19
- from ..upload.upload_agentic_traces import UploadAgenticTraces
20
- from ..upload.upload_code import upload_code
21
- from ..upload.upload_trace_metric import upload_trace_metric
22
- from ..utils.file_name_tracker import TrackName
23
- from ..utils.zip_list_of_unique_files import zip_list_of_unique_files
24
- from ..utils.span_attributes import SpanAttributes
25
- from ..utils.create_dataset_schema import create_dataset_schema_with_trace
26
- from ..utils.system_monitor import SystemMonitor
21
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
22
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
23
+ from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
24
+ from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
25
+ from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
26
+ from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
27
+ from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
28
+ from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
27
29
 
28
30
  import logging
29
31
 
@@ -83,6 +85,7 @@ class BaseTracer:
83
85
  self.tracking_thread = None
84
86
  self.tracking = False
85
87
  self.system_monitor = None
88
+ self.gt = None
86
89
 
87
90
  def _get_system_info(self) -> SystemInfo:
88
91
  return self.system_monitor.get_system_info()
@@ -180,40 +183,42 @@ class BaseTracer:
180
183
  self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
181
184
  self.trace.end_time = datetime.now().astimezone().isoformat()
182
185
 
183
- #track memory usage
186
+ # track memory usage
184
187
  self.tracking = False
185
188
  self.trace.metadata.resources.memory.values = self.memory_usage_list
186
189
 
187
- #track cpu usage
190
+ # track cpu usage
188
191
  self.trace.metadata.resources.cpu.values = self.cpu_usage_list
189
192
 
190
- #track network and disk usage
193
+ # track network and disk usage
191
194
  network_uploads, network_downloads = 0, 0
192
195
  disk_read, disk_write = 0, 0
193
-
196
+
194
197
  # Handle cases where lists might have different lengths
195
198
  min_len = min(len(self.network_usage_list), len(self.disk_usage_list))
196
199
  for i in range(min_len):
197
200
  network_usage = self.network_usage_list[i]
198
201
  disk_usage = self.disk_usage_list[i]
199
-
202
+
200
203
  # Safely get network usage values with defaults of 0
201
204
  network_uploads += network_usage.get('uploads', 0) or 0
202
205
  network_downloads += network_usage.get('downloads', 0) or 0
203
-
206
+
204
207
  # Safely get disk usage values with defaults of 0
205
208
  disk_read += disk_usage.get('disk_read', 0) or 0
206
209
  disk_write += disk_usage.get('disk_write', 0) or 0
207
210
 
208
- #track disk usage
211
+ # track disk usage
209
212
  disk_list_len = len(self.disk_usage_list)
210
213
  self.trace.metadata.resources.disk.read = [disk_read / disk_list_len if disk_list_len > 0 else 0]
211
214
  self.trace.metadata.resources.disk.write = [disk_write / disk_list_len if disk_list_len > 0 else 0]
212
215
 
213
- #track network usage
216
+ # track network usage
214
217
  network_list_len = len(self.network_usage_list)
215
- self.trace.metadata.resources.network.uploads = [network_uploads / network_list_len if network_list_len > 0 else 0]
216
- self.trace.metadata.resources.network.downloads = [network_downloads / network_list_len if network_list_len > 0 else 0]
218
+ self.trace.metadata.resources.network.uploads = [
219
+ network_uploads / network_list_len if network_list_len > 0 else 0]
220
+ self.trace.metadata.resources.network.downloads = [
221
+ network_downloads / network_list_len if network_list_len > 0 else 0]
217
222
 
218
223
  # update interval time
219
224
  self.trace.metadata.resources.cpu.interval = float(self.interval_time)
@@ -243,13 +248,14 @@ class BaseTracer:
243
248
  # Add metrics to trace before saving
244
249
  trace_data = self.trace.to_dict()
245
250
  trace_data["metrics"] = self.trace_metrics
246
-
251
+
247
252
  # Clean up trace_data before saving
248
253
  cleaned_trace_data = self._clean_trace(trace_data)
249
254
 
250
255
  # Format interactions and add to trace
251
256
  interactions = self.format_interactions()
252
- trace_data["workflow"] = interactions["workflow"]
257
+ # trace_data["workflow"] = interactions["workflow"]
258
+ cleaned_trace_data["workflow"] = interactions["workflow"]
253
259
 
254
260
  with open(filepath, "w") as f:
255
261
  json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)
@@ -452,15 +458,16 @@ class BaseTracer:
452
458
  else existing_span.__dict__
453
459
  )
454
460
  if (
455
- existing_dict.get("hash_id")
456
- == span_dict.get("hash_id")
457
- and str(existing_dict.get("data", {}).get("input"))
458
- == str(span_dict.get("data", {}).get("input"))
459
- and str(existing_dict.get("data", {}).get("output"))
460
- == str(span_dict.get("data", {}).get("output"))
461
+ existing_dict.get("hash_id")
462
+ == span_dict.get("hash_id")
463
+ and str(existing_dict.get("data", {}).get("input"))
464
+ == str(span_dict.get("data", {}).get("input"))
465
+ and str(existing_dict.get("data", {}).get("output"))
466
+ == str(span_dict.get("data", {}).get("output"))
461
467
  ):
462
468
  unique_spans[i] = span
463
469
  break
470
+
464
471
  else:
465
472
  # For non-LLM spans, process their children if they exist
466
473
  if "data" in span_dict and "children" in span_dict["data"]:
@@ -471,8 +478,44 @@ class BaseTracer:
471
478
  span["data"]["children"] = filtered_children
472
479
  else:
473
480
  span.data["children"] = filtered_children
474
- unique_spans.append(span)
481
+ unique_spans.append(span)
475
482
 
483
+ # Process spans to update model information for LLM spans with same name
484
+ llm_spans_by_name = {}
485
+ for i, span in enumerate(unique_spans):
486
+ span_dict = span if isinstance(span, dict) else span.__dict__
487
+
488
+ if span_dict.get('type') == 'llm':
489
+ span_name = span_dict.get('name')
490
+ if span_name:
491
+ if span_name not in llm_spans_by_name:
492
+ llm_spans_by_name[span_name] = []
493
+ llm_spans_by_name[span_name].append((i, span_dict))
494
+
495
+ # Update model information for spans with same name
496
+ for spans_with_same_name in llm_spans_by_name.values():
497
+ if len(spans_with_same_name) > 1:
498
+ # Check if any span has non-default model
499
+ has_custom_model = any(
500
+ span[1].get('info', {}).get('model') != 'default'
501
+ for span in spans_with_same_name
502
+ )
503
+
504
+ # If we have a custom model, update all default models to 'custom'
505
+ if has_custom_model:
506
+ for idx, span_dict in spans_with_same_name:
507
+ if span_dict.get('info', {}).get('model') == 'default':
508
+ if isinstance(unique_spans[idx], dict):
509
+ if 'info' not in unique_spans[idx]:
510
+ unique_spans[idx]['info'] = {}
511
+ # unique_spans[idx]['info']['model'] = 'custom'
512
+ unique_spans[idx]['type'] = 'custom'
513
+ else:
514
+ if not hasattr(unique_spans[idx], 'info'):
515
+ unique_spans[idx].info = {}
516
+ # unique_spans[idx].info['model'] = 'custom'
517
+ unique_spans[idx].type = 'custom'
518
+
476
519
  return unique_spans
477
520
 
478
521
  # Remove any spans without hash ids
@@ -499,7 +542,7 @@ class BaseTracer:
499
542
  int: Next interaction ID to use
500
543
  """
501
544
  child_type = child.get("type")
502
-
545
+
503
546
  if child_type == "tool":
504
547
  # Tool call start
505
548
  interactions.append(
@@ -612,7 +655,7 @@ class BaseTracer:
612
655
  }
613
656
  )
614
657
  interaction_id += 1
615
-
658
+
616
659
  interactions.append(
617
660
  {
618
661
  "id": str(interaction_id),
@@ -793,7 +836,7 @@ class BaseTracer:
793
836
  }
794
837
  )
795
838
  interaction_id += 1
796
-
839
+
797
840
  interactions.append(
798
841
  {
799
842
  "id": str(interaction_id),
@@ -855,15 +898,83 @@ class BaseTracer:
855
898
 
856
899
  return {"workflow": sorted_interactions}
857
900
 
901
+ # TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
902
+
903
+ def execute_metrics(self,
904
+ name: str,
905
+ model: str,
906
+ provider: str,
907
+ prompt: str,
908
+ context: str,
909
+ response: str
910
+ ):
911
+ if not hasattr(self, 'trace'):
912
+ logger.warning("Cannot add metrics before trace is initialized. Call start() first.")
913
+ return
914
+
915
+ # Convert individual parameters to metric dict if needed
916
+ if isinstance(name, str):
917
+ metrics = [{
918
+ "name": name
919
+ }]
920
+ else:
921
+ # Handle dict or list input
922
+ metrics = name if isinstance(name, list) else [name] if isinstance(name, dict) else []
923
+
924
+ try:
925
+ for metric in metrics:
926
+ if not isinstance(metric, dict):
927
+ raise ValueError(f"Expected dict, got {type(metric)}")
928
+
929
+ if "name" not in metric :
930
+ raise ValueError("Metric must contain 'name'") #score was written not required here
931
+
932
+ # Handle duplicate metric names on executing metric
933
+ metric_name = metric["name"]
934
+ if metric_name in self.visited_metrics:
935
+ count = sum(1 for m in self.visited_metrics if m.startswith(metric_name))
936
+ metric_name = f"{metric_name}_{count + 1}"
937
+ self.visited_metrics.append(metric_name)
938
+
939
+ result = calculate_metric(project_id=self.project_id,
940
+ metric_name=metric_name,
941
+ model=model,
942
+ org_domain="raga",
943
+ provider=provider,
944
+ user_id="1", # self.user_details['id'],
945
+ prompt=prompt,
946
+ context=context,
947
+ response=response
948
+ )
949
+
950
+ result = result['data']
951
+ formatted_metric = {
952
+ "name": metric_name,
953
+ "score": result.get("score"),
954
+ "reason": result.get("reason", ""),
955
+ "source": "user",
956
+ "cost": result.get("cost"),
957
+ "latency": result.get("latency"),
958
+ "mappings": [],
959
+ "config": result.get("metric_config", {})
960
+ }
961
+
962
+ logger.debug(f"Executed metric: {formatted_metric}")
963
+
964
+ except ValueError as e:
965
+ logger.error(f"Validation Error: {e}")
966
+ except Exception as e:
967
+ logger.error(f"Error adding metric: {e}")
968
+
858
969
  def add_metrics(
859
- self,
860
- name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
861
- score: float | int = None,
862
- reasoning: str = "",
863
- cost: float = None,
864
- latency: float = None,
865
- metadata: Dict[str, Any] = None,
866
- config: Dict[str, Any] = None,
970
+ self,
971
+ name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
972
+ score: float | int = None,
973
+ reasoning: str = "",
974
+ cost: float = None,
975
+ latency: float = None,
976
+ metadata: Dict[str, Any] = None,
977
+ config: Dict[str, Any] = None,
867
978
  ):
868
979
  """Add metrics at the trace level.
869
980
 
@@ -907,7 +1018,7 @@ class BaseTracer:
907
1018
  for metric in metrics:
908
1019
  if not isinstance(metric, dict):
909
1020
  raise ValueError(f"Expected dict, got {type(metric)}")
910
-
1021
+
911
1022
  if "name" not in metric or "score" not in metric:
912
1023
  raise ValueError("Metric must contain 'name' and 'score' fields")
913
1024
 
@@ -919,7 +1030,7 @@ class BaseTracer:
919
1030
  self.visited_metrics.append(metric_name)
920
1031
 
921
1032
  formatted_metric = {
922
- "name": metric_name,
1033
+ "name": metric_name,
923
1034
  "score": metric["score"],
924
1035
  "reason": metric.get("reasoning", ""),
925
1036
  "source": "user",
@@ -929,7 +1040,7 @@ class BaseTracer:
929
1040
  "mappings": [],
930
1041
  "config": metric.get("config", {})
931
1042
  }
932
-
1043
+
933
1044
  self.trace_metrics.append(formatted_metric)
934
1045
  logger.debug(f"Added trace-level metric: {formatted_metric}")
935
1046
 
@@ -937,8 +1048,68 @@ class BaseTracer:
937
1048
  logger.error(f"Validation Error: {e}")
938
1049
  except Exception as e:
939
1050
  logger.error(f"Error adding metric: {e}")
940
-
1051
+
941
1052
  def span(self, span_name):
942
1053
  if span_name not in self.span_attributes_dict:
943
- self.span_attributes_dict[span_name] = SpanAttributes(span_name)
944
- return self.span_attributes_dict[span_name]
1054
+ self.span_attributes_dict[span_name] = SpanAttributes(span_name, self.project_id)
1055
+ return self.span_attributes_dict[span_name]
1056
+
1057
+ @staticmethod
1058
+ def get_formatted_metric(span_attributes_dict, project_id, name, prompt, span_context, response, span_gt):
1059
+ if name in span_attributes_dict:
1060
+ local_metrics = span_attributes_dict[name].local_metrics or []
1061
+ for metric in local_metrics:
1062
+ try:
1063
+ if metric.get("prompt") is not None:
1064
+ prompt = metric['prompt']
1065
+ if metric.get("response") is not None:
1066
+ response = metric['response']
1067
+ if metric.get('context') is not None:
1068
+ span_context = metric['context']
1069
+ if metric.get('gt') is not None:
1070
+ span_gt = metric['gt']
1071
+
1072
+ logger.info("calculating the metric, please wait....")
1073
+ result = calculate_metric(project_id=project_id,
1074
+ metric_name=metric.get("name"),
1075
+ model=metric.get("model"),
1076
+ provider=metric.get("provider"),
1077
+ prompt=prompt,
1078
+ context=span_context,
1079
+ response=response,
1080
+ expected_response=span_gt
1081
+ )
1082
+
1083
+ result = result['data']['data'][0]
1084
+ config = result['metric_config']
1085
+ metric_config = {
1086
+ "job_id": config.get("job_id"),
1087
+ "metric_name": config.get("displayName"),
1088
+ "model": config.get("model"),
1089
+ "org_domain": config.get("orgDomain"),
1090
+ "provider": config.get("provider"),
1091
+ "reason": config.get("reason"),
1092
+ "request_id": config.get("request_id"),
1093
+ "user_id": config.get("user_id"),
1094
+ "threshold": {
1095
+ "is_editable": config.get("threshold").get("isEditable"),
1096
+ "lte": config.get("threshold").get("lte")
1097
+ }
1098
+ }
1099
+ formatted_metric = {
1100
+ "name": metric.get("displayName"),
1101
+ "displayName": metric.get("displayName"),
1102
+ "score": result.get("score"),
1103
+ "reason": result.get("reason", ""),
1104
+ "source": "user",
1105
+ "cost": result.get("cost"),
1106
+ "latency": result.get("latency"),
1107
+ "mappings": [],
1108
+ "config": metric_config
1109
+ }
1110
+ return formatted_metric
1111
+ except ValueError as e:
1112
+ logger.error(f"Validation Error: {e}")
1113
+ except Exception as e:
1114
+ logger.error(f"Error executing metric: {e}")
1115
+
@@ -45,7 +45,10 @@ class CustomTracerMixin:
45
45
  @functools.wraps(func)
46
46
  async def async_wrapper(*args, **kwargs):
47
47
  async_wrapper.metadata = metadata
48
- self.gt = kwargs.get('gt', None) if kwargs else None
48
+ gt = kwargs.get('gt') if kwargs else None
49
+ if gt is not None:
50
+ span = self.span(name)
51
+ span.add_gt(gt)
49
52
  return await self._trace_custom_execution(
50
53
  func, name or func.__name__, custom_type, version, trace_variables, *args, **kwargs
51
54
  )
@@ -54,7 +57,10 @@ class CustomTracerMixin:
54
57
  @functools.wraps(func)
55
58
  def sync_wrapper(*args, **kwargs):
56
59
  sync_wrapper.metadata = metadata
57
- self.gt = kwargs.get('gt', None) if kwargs else None
60
+ gt = kwargs.get('gt') if kwargs else None
61
+ if gt is not None:
62
+ span = self.span(name)
63
+ span.add_gt(gt)
58
64
  return self._trace_sync_custom_execution(
59
65
  func, name or func.__name__, custom_type, version, trace_variables, *args, **kwargs
60
66
  )
@@ -98,7 +104,7 @@ class CustomTracerMixin:
98
104
 
99
105
  try:
100
106
  # Execute the function
101
- result = func(*args, **kwargs)
107
+ result = self.file_tracker.trace_wrapper(func)(*args, **kwargs)
102
108
 
103
109
  # Calculate resource usage
104
110
  end_time = datetime.now().astimezone().isoformat()
@@ -186,7 +192,7 @@ class CustomTracerMixin:
186
192
 
187
193
  try:
188
194
  # Execute the function
189
- result = await func(*args, **kwargs)
195
+ result = await self.file_tracker.trace_wrapper(func)(*args, **kwargs)
190
196
 
191
197
  # Calculate resource usage
192
198
  end_time = datetime.now().astimezone().isoformat()
@@ -284,9 +290,13 @@ class CustomTracerMixin:
284
290
  "interactions": interactions
285
291
  }
286
292
 
287
- if self.gt:
288
- component["data"]["gt"] = self.gt
289
-
293
+ if kwargs["name"] in self.span_attributes_dict:
294
+ span_gt = self.span_attributes_dict[kwargs["name"]].gt
295
+ if span_gt is not None:
296
+ component["data"]["gt"] = span_gt
297
+ span_context = self.span_attributes_dict[kwargs["name"]].context
298
+ if span_context:
299
+ component["data"]["context"] = span_context
290
300
  return component
291
301
 
292
302
  def start_component(self, component_id):