ragaai-catalyst 2.1.5b22__py3-none-any.whl → 2.1.5b24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +3 -1
- ragaai_catalyst/dataset.py +49 -1
- ragaai_catalyst/redteaming.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +39 -33
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +208 -46
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +235 -62
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +1 -4
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +5 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +72 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +27 -11
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +64 -28
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +40 -21
- ragaai_catalyst/tracers/tracer.py +6 -3
- {ragaai_catalyst-2.1.5b22.dist-info → ragaai_catalyst-2.1.5b24.dist-info}/METADATA +37 -2
- {ragaai_catalyst-2.1.5b22.dist-info → ragaai_catalyst-2.1.5b24.dist-info}/RECORD +21 -19
- {ragaai_catalyst-2.1.5b22.dist-info → ragaai_catalyst-2.1.5b24.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.5b22.dist-info → ragaai_catalyst-2.1.5b24.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.5b22.dist-info → ragaai_catalyst-2.1.5b24.dist-info}/top_level.txt +0 -0
@@ -2,28 +2,30 @@ import json
|
|
2
2
|
import os
|
3
3
|
from datetime import datetime
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import List, Any, Dict
|
5
|
+
from typing import List, Any, Dict, Optional
|
6
6
|
import uuid
|
7
7
|
import sys
|
8
8
|
import tempfile
|
9
9
|
import threading
|
10
10
|
import time
|
11
|
-
|
12
|
-
from
|
11
|
+
|
12
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_local_metric import calculate_metric
|
13
|
+
from ragaai_catalyst import RagaAICatalyst
|
14
|
+
from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
|
13
15
|
Trace,
|
14
16
|
Metadata,
|
15
17
|
SystemInfo,
|
16
18
|
Resources,
|
17
19
|
Component,
|
18
20
|
)
|
19
|
-
from
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
21
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
|
22
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
|
23
|
+
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
|
24
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
|
25
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
|
26
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
|
27
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
|
28
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
|
27
29
|
|
28
30
|
import logging
|
29
31
|
|
@@ -181,40 +183,42 @@ class BaseTracer:
|
|
181
183
|
self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
|
182
184
|
self.trace.end_time = datetime.now().astimezone().isoformat()
|
183
185
|
|
184
|
-
#track memory usage
|
186
|
+
# track memory usage
|
185
187
|
self.tracking = False
|
186
188
|
self.trace.metadata.resources.memory.values = self.memory_usage_list
|
187
189
|
|
188
|
-
#track cpu usage
|
190
|
+
# track cpu usage
|
189
191
|
self.trace.metadata.resources.cpu.values = self.cpu_usage_list
|
190
192
|
|
191
|
-
#track network and disk usage
|
193
|
+
# track network and disk usage
|
192
194
|
network_uploads, network_downloads = 0, 0
|
193
195
|
disk_read, disk_write = 0, 0
|
194
|
-
|
196
|
+
|
195
197
|
# Handle cases where lists might have different lengths
|
196
198
|
min_len = min(len(self.network_usage_list), len(self.disk_usage_list))
|
197
199
|
for i in range(min_len):
|
198
200
|
network_usage = self.network_usage_list[i]
|
199
201
|
disk_usage = self.disk_usage_list[i]
|
200
|
-
|
202
|
+
|
201
203
|
# Safely get network usage values with defaults of 0
|
202
204
|
network_uploads += network_usage.get('uploads', 0) or 0
|
203
205
|
network_downloads += network_usage.get('downloads', 0) or 0
|
204
|
-
|
206
|
+
|
205
207
|
# Safely get disk usage values with defaults of 0
|
206
208
|
disk_read += disk_usage.get('disk_read', 0) or 0
|
207
209
|
disk_write += disk_usage.get('disk_write', 0) or 0
|
208
210
|
|
209
|
-
#track disk usage
|
211
|
+
# track disk usage
|
210
212
|
disk_list_len = len(self.disk_usage_list)
|
211
213
|
self.trace.metadata.resources.disk.read = [disk_read / disk_list_len if disk_list_len > 0 else 0]
|
212
214
|
self.trace.metadata.resources.disk.write = [disk_write / disk_list_len if disk_list_len > 0 else 0]
|
213
215
|
|
214
|
-
#track network usage
|
216
|
+
# track network usage
|
215
217
|
network_list_len = len(self.network_usage_list)
|
216
|
-
self.trace.metadata.resources.network.uploads = [
|
217
|
-
|
218
|
+
self.trace.metadata.resources.network.uploads = [
|
219
|
+
network_uploads / network_list_len if network_list_len > 0 else 0]
|
220
|
+
self.trace.metadata.resources.network.downloads = [
|
221
|
+
network_downloads / network_list_len if network_list_len > 0 else 0]
|
218
222
|
|
219
223
|
# update interval time
|
220
224
|
self.trace.metadata.resources.cpu.interval = float(self.interval_time)
|
@@ -244,7 +248,7 @@ class BaseTracer:
|
|
244
248
|
# Add metrics to trace before saving
|
245
249
|
trace_data = self.trace.to_dict()
|
246
250
|
trace_data["metrics"] = self.trace_metrics
|
247
|
-
|
251
|
+
|
248
252
|
# Clean up trace_data before saving
|
249
253
|
cleaned_trace_data = self._clean_trace(trace_data)
|
250
254
|
|
@@ -454,15 +458,16 @@ class BaseTracer:
|
|
454
458
|
else existing_span.__dict__
|
455
459
|
)
|
456
460
|
if (
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
461
|
+
existing_dict.get("hash_id")
|
462
|
+
== span_dict.get("hash_id")
|
463
|
+
and str(existing_dict.get("data", {}).get("input"))
|
464
|
+
== str(span_dict.get("data", {}).get("input"))
|
465
|
+
and str(existing_dict.get("data", {}).get("output"))
|
466
|
+
== str(span_dict.get("data", {}).get("output"))
|
463
467
|
):
|
464
468
|
unique_spans[i] = span
|
465
469
|
break
|
470
|
+
|
466
471
|
else:
|
467
472
|
# For non-LLM spans, process their children if they exist
|
468
473
|
if "data" in span_dict and "children" in span_dict["data"]:
|
@@ -473,8 +478,44 @@ class BaseTracer:
|
|
473
478
|
span["data"]["children"] = filtered_children
|
474
479
|
else:
|
475
480
|
span.data["children"] = filtered_children
|
476
|
-
unique_spans.append(span)
|
481
|
+
unique_spans.append(span)
|
477
482
|
|
483
|
+
# Process spans to update model information for LLM spans with same name
|
484
|
+
llm_spans_by_name = {}
|
485
|
+
for i, span in enumerate(unique_spans):
|
486
|
+
span_dict = span if isinstance(span, dict) else span.__dict__
|
487
|
+
|
488
|
+
if span_dict.get('type') == 'llm':
|
489
|
+
span_name = span_dict.get('name')
|
490
|
+
if span_name:
|
491
|
+
if span_name not in llm_spans_by_name:
|
492
|
+
llm_spans_by_name[span_name] = []
|
493
|
+
llm_spans_by_name[span_name].append((i, span_dict))
|
494
|
+
|
495
|
+
# Update model information for spans with same name
|
496
|
+
for spans_with_same_name in llm_spans_by_name.values():
|
497
|
+
if len(spans_with_same_name) > 1:
|
498
|
+
# Check if any span has non-default model
|
499
|
+
has_custom_model = any(
|
500
|
+
span[1].get('info', {}).get('model') != 'default'
|
501
|
+
for span in spans_with_same_name
|
502
|
+
)
|
503
|
+
|
504
|
+
# If we have a custom model, update all default models to 'custom'
|
505
|
+
if has_custom_model:
|
506
|
+
for idx, span_dict in spans_with_same_name:
|
507
|
+
if span_dict.get('info', {}).get('model') == 'default':
|
508
|
+
if isinstance(unique_spans[idx], dict):
|
509
|
+
if 'info' not in unique_spans[idx]:
|
510
|
+
unique_spans[idx]['info'] = {}
|
511
|
+
# unique_spans[idx]['info']['model'] = 'custom'
|
512
|
+
unique_spans[idx]['type'] = 'custom'
|
513
|
+
else:
|
514
|
+
if not hasattr(unique_spans[idx], 'info'):
|
515
|
+
unique_spans[idx].info = {}
|
516
|
+
# unique_spans[idx].info['model'] = 'custom'
|
517
|
+
unique_spans[idx].type = 'custom'
|
518
|
+
|
478
519
|
return unique_spans
|
479
520
|
|
480
521
|
# Remove any spans without hash ids
|
@@ -501,7 +542,7 @@ class BaseTracer:
|
|
501
542
|
int: Next interaction ID to use
|
502
543
|
"""
|
503
544
|
child_type = child.get("type")
|
504
|
-
|
545
|
+
|
505
546
|
if child_type == "tool":
|
506
547
|
# Tool call start
|
507
548
|
interactions.append(
|
@@ -614,7 +655,7 @@ class BaseTracer:
|
|
614
655
|
}
|
615
656
|
)
|
616
657
|
interaction_id += 1
|
617
|
-
|
658
|
+
|
618
659
|
interactions.append(
|
619
660
|
{
|
620
661
|
"id": str(interaction_id),
|
@@ -795,7 +836,7 @@ class BaseTracer:
|
|
795
836
|
}
|
796
837
|
)
|
797
838
|
interaction_id += 1
|
798
|
-
|
839
|
+
|
799
840
|
interactions.append(
|
800
841
|
{
|
801
842
|
"id": str(interaction_id),
|
@@ -857,15 +898,83 @@ class BaseTracer:
|
|
857
898
|
|
858
899
|
return {"workflow": sorted_interactions}
|
859
900
|
|
901
|
+
# TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
|
902
|
+
|
903
|
+
def execute_metrics(self,
|
904
|
+
name: str,
|
905
|
+
model: str,
|
906
|
+
provider: str,
|
907
|
+
prompt: str,
|
908
|
+
context: str,
|
909
|
+
response: str
|
910
|
+
):
|
911
|
+
if not hasattr(self, 'trace'):
|
912
|
+
logger.warning("Cannot add metrics before trace is initialized. Call start() first.")
|
913
|
+
return
|
914
|
+
|
915
|
+
# Convert individual parameters to metric dict if needed
|
916
|
+
if isinstance(name, str):
|
917
|
+
metrics = [{
|
918
|
+
"name": name
|
919
|
+
}]
|
920
|
+
else:
|
921
|
+
# Handle dict or list input
|
922
|
+
metrics = name if isinstance(name, list) else [name] if isinstance(name, dict) else []
|
923
|
+
|
924
|
+
try:
|
925
|
+
for metric in metrics:
|
926
|
+
if not isinstance(metric, dict):
|
927
|
+
raise ValueError(f"Expected dict, got {type(metric)}")
|
928
|
+
|
929
|
+
if "name" not in metric :
|
930
|
+
raise ValueError("Metric must contain 'name'") #score was written not required here
|
931
|
+
|
932
|
+
# Handle duplicate metric names on executing metric
|
933
|
+
metric_name = metric["name"]
|
934
|
+
if metric_name in self.visited_metrics:
|
935
|
+
count = sum(1 for m in self.visited_metrics if m.startswith(metric_name))
|
936
|
+
metric_name = f"{metric_name}_{count + 1}"
|
937
|
+
self.visited_metrics.append(metric_name)
|
938
|
+
|
939
|
+
result = calculate_metric(project_id=self.project_id,
|
940
|
+
metric_name=metric_name,
|
941
|
+
model=model,
|
942
|
+
org_domain="raga",
|
943
|
+
provider=provider,
|
944
|
+
user_id="1", # self.user_details['id'],
|
945
|
+
prompt=prompt,
|
946
|
+
context=context,
|
947
|
+
response=response
|
948
|
+
)
|
949
|
+
|
950
|
+
result = result['data']
|
951
|
+
formatted_metric = {
|
952
|
+
"name": metric_name,
|
953
|
+
"score": result.get("score"),
|
954
|
+
"reason": result.get("reason", ""),
|
955
|
+
"source": "user",
|
956
|
+
"cost": result.get("cost"),
|
957
|
+
"latency": result.get("latency"),
|
958
|
+
"mappings": [],
|
959
|
+
"config": result.get("metric_config", {})
|
960
|
+
}
|
961
|
+
|
962
|
+
logger.debug(f"Executed metric: {formatted_metric}")
|
963
|
+
|
964
|
+
except ValueError as e:
|
965
|
+
logger.error(f"Validation Error: {e}")
|
966
|
+
except Exception as e:
|
967
|
+
logger.error(f"Error adding metric: {e}")
|
968
|
+
|
860
969
|
def add_metrics(
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
970
|
+
self,
|
971
|
+
name: str | List[Dict[str, Any]] | Dict[str, Any] = None,
|
972
|
+
score: float | int = None,
|
973
|
+
reasoning: str = "",
|
974
|
+
cost: float = None,
|
975
|
+
latency: float = None,
|
976
|
+
metadata: Dict[str, Any] = None,
|
977
|
+
config: Dict[str, Any] = None,
|
869
978
|
):
|
870
979
|
"""Add metrics at the trace level.
|
871
980
|
|
@@ -909,7 +1018,7 @@ class BaseTracer:
|
|
909
1018
|
for metric in metrics:
|
910
1019
|
if not isinstance(metric, dict):
|
911
1020
|
raise ValueError(f"Expected dict, got {type(metric)}")
|
912
|
-
|
1021
|
+
|
913
1022
|
if "name" not in metric or "score" not in metric:
|
914
1023
|
raise ValueError("Metric must contain 'name' and 'score' fields")
|
915
1024
|
|
@@ -921,7 +1030,7 @@ class BaseTracer:
|
|
921
1030
|
self.visited_metrics.append(metric_name)
|
922
1031
|
|
923
1032
|
formatted_metric = {
|
924
|
-
"name": metric_name,
|
1033
|
+
"name": metric_name,
|
925
1034
|
"score": metric["score"],
|
926
1035
|
"reason": metric.get("reasoning", ""),
|
927
1036
|
"source": "user",
|
@@ -931,7 +1040,7 @@ class BaseTracer:
|
|
931
1040
|
"mappings": [],
|
932
1041
|
"config": metric.get("config", {})
|
933
1042
|
}
|
934
|
-
|
1043
|
+
|
935
1044
|
self.trace_metrics.append(formatted_metric)
|
936
1045
|
logger.debug(f"Added trace-level metric: {formatted_metric}")
|
937
1046
|
|
@@ -939,8 +1048,61 @@ class BaseTracer:
|
|
939
1048
|
logger.error(f"Validation Error: {e}")
|
940
1049
|
except Exception as e:
|
941
1050
|
logger.error(f"Error adding metric: {e}")
|
942
|
-
|
1051
|
+
|
943
1052
|
def span(self, span_name):
|
944
1053
|
if span_name not in self.span_attributes_dict:
|
945
|
-
self.span_attributes_dict[span_name] = SpanAttributes(span_name)
|
946
|
-
return self.span_attributes_dict[span_name]
|
1054
|
+
self.span_attributes_dict[span_name] = SpanAttributes(span_name, self.project_id)
|
1055
|
+
return self.span_attributes_dict[span_name]
|
1056
|
+
|
1057
|
+
@staticmethod
|
1058
|
+
def get_formatted_metric(span_attributes_dict, project_id, name):
|
1059
|
+
if name in span_attributes_dict:
|
1060
|
+
local_metrics = span_attributes_dict[name].local_metrics or []
|
1061
|
+
local_metrics_results = []
|
1062
|
+
for metric in local_metrics:
|
1063
|
+
try:
|
1064
|
+
logger.info("calculating the metric, please wait....")
|
1065
|
+
|
1066
|
+
mapping = metric.get("mapping", {})
|
1067
|
+
result = calculate_metric(project_id=project_id,
|
1068
|
+
metric_name=metric.get("name"),
|
1069
|
+
model=metric.get("model"),
|
1070
|
+
provider=metric.get("provider"),
|
1071
|
+
**mapping
|
1072
|
+
)
|
1073
|
+
|
1074
|
+
result = result['data']['data'][0]
|
1075
|
+
config = result['metric_config']
|
1076
|
+
metric_config = {
|
1077
|
+
"job_id": config.get("job_id"),
|
1078
|
+
"metric_name": config.get("displayName"),
|
1079
|
+
"model": config.get("model"),
|
1080
|
+
"org_domain": config.get("orgDomain"),
|
1081
|
+
"provider": config.get("provider"),
|
1082
|
+
"reason": config.get("reason"),
|
1083
|
+
"request_id": config.get("request_id"),
|
1084
|
+
"user_id": config.get("user_id"),
|
1085
|
+
"threshold": {
|
1086
|
+
"is_editable": config.get("threshold").get("isEditable"),
|
1087
|
+
"lte": config.get("threshold").get("lte")
|
1088
|
+
}
|
1089
|
+
}
|
1090
|
+
formatted_metric = {
|
1091
|
+
"name": metric.get("displayName"),
|
1092
|
+
"displayName": metric.get("displayName"),
|
1093
|
+
"score": result.get("score"),
|
1094
|
+
"reason": result.get("reason", ""),
|
1095
|
+
"source": "user",
|
1096
|
+
"cost": result.get("cost"),
|
1097
|
+
"latency": result.get("latency"),
|
1098
|
+
"mappings": [],
|
1099
|
+
"config": metric_config
|
1100
|
+
}
|
1101
|
+
local_metrics_results.append(formatted_metric)
|
1102
|
+
except ValueError as e:
|
1103
|
+
logger.error(f"Validation Error: {e}")
|
1104
|
+
except Exception as e:
|
1105
|
+
logger.error(f"Error executing metric: {e}")
|
1106
|
+
|
1107
|
+
return local_metrics_results
|
1108
|
+
|