ragaai-catalyst 2.1.5b30__py3-none-any.whl → 2.1.5b33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/ragaai_catalyst.py +37 -6
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +2 -2
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +2 -2
- ragaai_catalyst/redteaming/evaluator.py +2 -2
- ragaai_catalyst/redteaming/llm_generator.py +78 -25
- ragaai_catalyst/redteaming/{llm_generator_litellm.py → llm_generator_old.py} +30 -13
- ragaai_catalyst/redteaming/red_teaming.py +6 -4
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/synthetic_data_generation.py +23 -13
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +283 -95
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +675 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +73 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +53 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +9 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +10 -1
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- {ragaai_catalyst-2.1.5b30.dist-info → ragaai_catalyst-2.1.5b33.dist-info}/METADATA +92 -17
- {ragaai_catalyst-2.1.5b30.dist-info → ragaai_catalyst-2.1.5b33.dist-info}/RECORD +23 -20
- {ragaai_catalyst-2.1.5b30.dist-info → ragaai_catalyst-2.1.5b33.dist-info}/WHEEL +1 -1
- {ragaai_catalyst-2.1.5b30.dist-info → ragaai_catalyst-2.1.5b33.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.5b30.dist-info → ragaai_catalyst-2.1.5b33.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,13 @@
|
|
1
1
|
import requests
|
2
2
|
import json
|
3
3
|
import os
|
4
|
+
import time
|
5
|
+
import logging
|
4
6
|
from datetime import datetime
|
7
|
+
from urllib.parse import urlparse, urlunparse
|
8
|
+
import re
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
5
11
|
|
6
12
|
|
7
13
|
class UploadAgenticTraces:
|
@@ -33,17 +39,39 @@ class UploadAgenticTraces:
|
|
33
39
|
}
|
34
40
|
|
35
41
|
try:
|
42
|
+
start_time = time.time()
|
43
|
+
endpoint = f"{self.base_url}/v1/llm/presigned-url"
|
36
44
|
response = requests.request("GET",
|
37
|
-
|
45
|
+
endpoint,
|
38
46
|
headers=headers,
|
39
47
|
data=payload,
|
40
48
|
timeout=self.timeout)
|
49
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
50
|
+
logger.debug(
|
51
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
52
|
+
|
41
53
|
if response.status_code == 200:
|
42
|
-
|
43
|
-
|
54
|
+
presignedURLs = response.json()["data"]["presignedUrls"][0]
|
55
|
+
presignedurl = self.update_presigned_url(presignedURLs,self.base_url)
|
56
|
+
return presignedurl
|
57
|
+
|
44
58
|
except requests.exceptions.RequestException as e:
|
45
59
|
print(f"Error while getting presigned url: {e}")
|
46
60
|
return None
|
61
|
+
|
62
|
+
def update_presigned_url(self, presigned_url, base_url):
|
63
|
+
"""Replaces the domain (and port, if applicable) of the presigned URL
|
64
|
+
with that of the base URL only if the base URL contains 'localhost' or an IP address."""
|
65
|
+
presigned_parts = urlparse(presigned_url)
|
66
|
+
base_parts = urlparse(base_url)
|
67
|
+
# Check if base_url contains localhost or an IP address
|
68
|
+
if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
|
69
|
+
new_netloc = base_parts.hostname # Extract domain from base_url
|
70
|
+
if base_parts.port: # Add port if present in base_url
|
71
|
+
new_netloc += f":{base_parts.port}"
|
72
|
+
updated_parts = presigned_parts._replace(netloc=new_netloc)
|
73
|
+
return urlunparse(updated_parts)
|
74
|
+
return presigned_url
|
47
75
|
|
48
76
|
def _put_presigned_url(self, presignedUrl, filename):
|
49
77
|
headers = {
|
@@ -60,11 +88,15 @@ class UploadAgenticTraces:
|
|
60
88
|
print(f"Error while reading file: {e}")
|
61
89
|
return None
|
62
90
|
try:
|
91
|
+
start_time = time.time()
|
63
92
|
response = requests.request("PUT",
|
64
93
|
presignedUrl,
|
65
94
|
headers=headers,
|
66
95
|
data=payload,
|
67
96
|
timeout=self.timeout)
|
97
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
98
|
+
logger.debug(
|
99
|
+
f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
68
100
|
if response.status_code != 200 or response.status_code != 201:
|
69
101
|
return response, response.status_code
|
70
102
|
except requests.exceptions.RequestException as e:
|
@@ -83,11 +115,16 @@ class UploadAgenticTraces:
|
|
83
115
|
"datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
|
84
116
|
})
|
85
117
|
try:
|
118
|
+
start_time = time.time()
|
119
|
+
endpoint = f"{self.base_url}/v1/llm/insert/trace"
|
86
120
|
response = requests.request("POST",
|
87
|
-
|
121
|
+
endpoint,
|
88
122
|
headers=headers,
|
89
123
|
data=payload,
|
90
124
|
timeout=self.timeout)
|
125
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
126
|
+
logger.debug(
|
127
|
+
f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
91
128
|
if response.status_code != 200:
|
92
129
|
print(f"Error inserting traces: {response.json()['message']}")
|
93
130
|
return None
|
@@ -116,27 +153,43 @@ class UploadAgenticTraces:
|
|
116
153
|
"spanType": span["type"],
|
117
154
|
})
|
118
155
|
else:
|
119
|
-
datasetSpans.
|
156
|
+
datasetSpans.extend(self._get_agent_dataset_spans(span, datasetSpans))
|
157
|
+
datasetSpans = [dict(t) for t in set(tuple(sorted(d.items())) for d in datasetSpans)]
|
158
|
+
|
159
|
+
return datasetSpans
|
160
|
+
except Exception as e:
|
161
|
+
print(f"Error while reading dataset spans: {e}")
|
162
|
+
return None
|
163
|
+
|
164
|
+
def _get_agent_dataset_spans(self, span, datasetSpans):
|
165
|
+
datasetSpans.append({
|
120
166
|
"spanId": span["id"],
|
121
167
|
"spanName": span["name"],
|
122
168
|
"spanHash": span["hash_id"],
|
123
169
|
"spanType": span["type"],
|
124
170
|
})
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
171
|
+
children = span["data"]["children"]
|
172
|
+
for child in children:
|
173
|
+
if child["type"] != "agent":
|
174
|
+
existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
|
175
|
+
if existing_span is None:
|
176
|
+
datasetSpans.append({
|
177
|
+
"spanId": child["id"],
|
178
|
+
"spanName": child["name"],
|
179
|
+
"spanHash": child["hash_id"],
|
180
|
+
"spanType": child["type"],
|
181
|
+
})
|
182
|
+
else:
|
183
|
+
datasetSpans.append({
|
184
|
+
"spanId": child["id"],
|
185
|
+
"spanName": child["name"],
|
186
|
+
"spanHash": child["hash_id"],
|
187
|
+
"spanType": child["type"],
|
188
|
+
})
|
189
|
+
self._get_agent_dataset_spans(child, datasetSpans)
|
190
|
+
return datasetSpans
|
191
|
+
|
192
|
+
|
140
193
|
def upload_agentic_traces(self):
|
141
194
|
try:
|
142
195
|
presignedUrl = self._get_presigned_url()
|
@@ -2,23 +2,26 @@ from aiohttp import payload
|
|
2
2
|
import requests
|
3
3
|
import json
|
4
4
|
import os
|
5
|
+
import time
|
5
6
|
import logging
|
6
7
|
from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
|
7
8
|
logger = logging.getLogger(__name__)
|
9
|
+
from urllib.parse import urlparse, urlunparse
|
10
|
+
import re
|
8
11
|
|
9
|
-
def upload_code(hash_id, zip_path, project_name, dataset_name):
|
10
|
-
code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
|
12
|
+
def upload_code(hash_id, zip_path, project_name, dataset_name, base_url=None):
|
13
|
+
code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name, base_url)
|
11
14
|
|
12
15
|
if hash_id not in code_hashes_list:
|
13
|
-
presigned_url = _fetch_presigned_url(project_name, dataset_name)
|
16
|
+
presigned_url = _fetch_presigned_url(project_name, dataset_name, base_url)
|
14
17
|
_put_zip_presigned_url(project_name, presigned_url, zip_path)
|
15
18
|
|
16
|
-
response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
|
19
|
+
response = _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url)
|
17
20
|
return response
|
18
21
|
else:
|
19
22
|
return "Code already exists"
|
20
23
|
|
21
|
-
def _fetch_dataset_code_hashes(project_name, dataset_name):
|
24
|
+
def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None):
|
22
25
|
payload = {}
|
23
26
|
headers = {
|
24
27
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
@@ -26,11 +29,17 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
|
|
26
29
|
}
|
27
30
|
|
28
31
|
try:
|
32
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
33
|
+
start_time = time.time()
|
34
|
+
endpoint = f"{url_base}/v2/llm/dataset/code?datasetName={dataset_name}"
|
29
35
|
response = requests.request("GET",
|
30
|
-
|
36
|
+
endpoint,
|
31
37
|
headers=headers,
|
32
38
|
data=payload,
|
33
39
|
timeout=99999)
|
40
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
41
|
+
logger.debug(
|
42
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
34
43
|
|
35
44
|
if response.status_code == 200:
|
36
45
|
return response.json()["data"]["codeHashes"]
|
@@ -40,7 +49,22 @@ def _fetch_dataset_code_hashes(project_name, dataset_name):
|
|
40
49
|
logger.error(f"Failed to list datasets: {e}")
|
41
50
|
raise
|
42
51
|
|
43
|
-
|
52
|
+
|
53
|
+
def update_presigned_url(presigned_url, base_url):
|
54
|
+
"""Replaces the domain (and port, if applicable) of the presigned URL with that of the base URL."""
|
55
|
+
presigned_parts = urlparse(presigned_url)
|
56
|
+
base_parts = urlparse(base_url)
|
57
|
+
# Check if base_url contains localhost or an IP address
|
58
|
+
if re.match(r'^(localhost|\d{1,3}(\.\d{1,3}){3})$', base_parts.hostname):
|
59
|
+
new_netloc = base_parts.hostname # Extract domain from base_url
|
60
|
+
if base_parts.port: # Add port if present in base_url
|
61
|
+
new_netloc += f":{base_parts.port}"
|
62
|
+
updated_parts = presigned_parts._replace(netloc=new_netloc)
|
63
|
+
return urlunparse(updated_parts)
|
64
|
+
return presigned_url
|
65
|
+
|
66
|
+
|
67
|
+
def _fetch_presigned_url(project_name, dataset_name, base_url=None):
|
44
68
|
payload = json.dumps({
|
45
69
|
"datasetName": dataset_name,
|
46
70
|
"numFiles": 1,
|
@@ -54,14 +78,22 @@ def _fetch_presigned_url(project_name, dataset_name):
|
|
54
78
|
}
|
55
79
|
|
56
80
|
try:
|
81
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
82
|
+
start_time = time.time()
|
83
|
+
endpoint = f"{url_base}/v1/llm/presigned-url"
|
57
84
|
response = requests.request("GET",
|
58
|
-
|
85
|
+
endpoint,
|
59
86
|
headers=headers,
|
60
87
|
data=payload,
|
61
88
|
timeout=99999)
|
89
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
90
|
+
logger.debug(
|
91
|
+
f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
62
92
|
|
63
93
|
if response.status_code == 200:
|
64
|
-
|
94
|
+
presigned_url = response.json()["data"]["presignedUrls"][0]
|
95
|
+
presigned_url = update_presigned_url(presigned_url,RagaAICatalyst.BASE_URL)
|
96
|
+
return presigned_url
|
65
97
|
else:
|
66
98
|
raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
|
67
99
|
except requests.exceptions.RequestException as e:
|
@@ -80,15 +112,19 @@ def _put_zip_presigned_url(project_name, presignedUrl, filename):
|
|
80
112
|
with open(filename, 'rb') as f:
|
81
113
|
payload = f.read()
|
82
114
|
|
115
|
+
start_time = time.time()
|
83
116
|
response = requests.request("PUT",
|
84
117
|
presignedUrl,
|
85
118
|
headers=headers,
|
86
119
|
data=payload,
|
87
120
|
timeout=99999)
|
121
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
122
|
+
logger.debug(
|
123
|
+
f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
88
124
|
if response.status_code != 200 or response.status_code != 201:
|
89
125
|
return response, response.status_code
|
90
126
|
|
91
|
-
def _insert_code(dataset_name, hash_id, presigned_url, project_name):
|
127
|
+
def _insert_code(dataset_name, hash_id, presigned_url, project_name, base_url=None):
|
92
128
|
payload = json.dumps({
|
93
129
|
"datasetName": dataset_name,
|
94
130
|
"codeHash": hash_id,
|
@@ -102,11 +138,17 @@ def _insert_code(dataset_name, hash_id, presigned_url, project_name):
|
|
102
138
|
}
|
103
139
|
|
104
140
|
try:
|
141
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
142
|
+
start_time = time.time()
|
143
|
+
endpoint = f"{url_base}/v2/llm/dataset/code"
|
105
144
|
response = requests.request("POST",
|
106
|
-
|
145
|
+
endpoint,
|
107
146
|
headers=headers,
|
108
147
|
data=payload,
|
109
148
|
timeout=99999)
|
149
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
150
|
+
logger.debug(
|
151
|
+
f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
110
152
|
if response.status_code == 200:
|
111
153
|
return response.json()["message"]
|
112
154
|
else:
|
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
import requests
|
4
4
|
import os
|
5
5
|
import json
|
6
|
+
import time
|
6
7
|
from ....ragaai_catalyst import RagaAICatalyst
|
7
8
|
from ..utils.get_user_trace_metrics import get_user_trace_metrics
|
8
9
|
|
@@ -14,7 +15,7 @@ logging_level = (
|
|
14
15
|
)
|
15
16
|
|
16
17
|
|
17
|
-
def upload_trace_metric(json_file_path, dataset_name, project_name):
|
18
|
+
def upload_trace_metric(json_file_path, dataset_name, project_name, base_url=None):
|
18
19
|
try:
|
19
20
|
with open(json_file_path, "r") as f:
|
20
21
|
traces = json.load(f)
|
@@ -43,11 +44,17 @@ def upload_trace_metric(json_file_path, dataset_name, project_name):
|
|
43
44
|
"datasetName": dataset_name,
|
44
45
|
"metrics": metrics
|
45
46
|
})
|
47
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
48
|
+
start_time = time.time()
|
49
|
+
endpoint = f"{url_base}/v1/llm/trace/metrics"
|
46
50
|
response = requests.request("POST",
|
47
|
-
|
51
|
+
endpoint,
|
48
52
|
headers=headers,
|
49
53
|
data=payload,
|
50
54
|
timeout=10)
|
55
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
56
|
+
logger.debug(
|
57
|
+
f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
|
51
58
|
if response.status_code != 200:
|
52
59
|
raise ValueError(f"Error inserting agentic trace metrics")
|
53
60
|
except requests.exceptions.RequestException as e:
|
@@ -4,7 +4,7 @@ import re
|
|
4
4
|
import requests
|
5
5
|
from ragaai_catalyst.tracers.agentic_tracing.tracers.base import RagaAICatalyst
|
6
6
|
|
7
|
-
def create_dataset_schema_with_trace(project_name, dataset_name):
|
7
|
+
def create_dataset_schema_with_trace(project_name, dataset_name, base_url=None):
|
8
8
|
def make_request():
|
9
9
|
headers = {
|
10
10
|
"Content-Type": "application/json",
|
@@ -15,8 +15,10 @@ def create_dataset_schema_with_trace(project_name, dataset_name):
|
|
15
15
|
"datasetName": dataset_name,
|
16
16
|
"traceFolderUrl": None,
|
17
17
|
})
|
18
|
+
# Use provided base_url or fall back to default
|
19
|
+
url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
|
18
20
|
response = requests.request("POST",
|
19
|
-
f"{
|
21
|
+
f"{url_base}/v1/llm/dataset/logs",
|
20
22
|
headers=headers,
|
21
23
|
data=payload,
|
22
24
|
timeout=10
|
@@ -4,7 +4,7 @@ from .trace_utils import (
|
|
4
4
|
convert_usage_to_dict,
|
5
5
|
)
|
6
6
|
from importlib import resources
|
7
|
-
from litellm import model_cost
|
7
|
+
#from litellm import model_cost
|
8
8
|
import json
|
9
9
|
import os
|
10
10
|
import asyncio
|
@@ -14,6 +14,15 @@ import logging
|
|
14
14
|
|
15
15
|
logger = logging.getLogger(__name__)
|
16
16
|
|
17
|
+
def get_model_cost():
|
18
|
+
file="model_prices_and_context_window_backup.json"
|
19
|
+
d={}
|
20
|
+
with resources.open_text("ragaai_catalyst.tracers.utils", file) as f:
|
21
|
+
d= json.load(f)
|
22
|
+
return d
|
23
|
+
|
24
|
+
model_cost = get_model_cost()
|
25
|
+
|
17
26
|
def extract_model_name(args, kwargs, result):
|
18
27
|
"""Extract model name from kwargs or result"""
|
19
28
|
# First try direct model parameter
|