ragaai-catalyst 1.0.8.1__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +2 -1
- ragaai_catalyst/dataset.py +186 -126
- ragaai_catalyst/evaluation.py +369 -0
- ragaai_catalyst/experiment.py +1 -1
- ragaai_catalyst/prompt_manager.py +112 -54
- ragaai_catalyst/ragaai_catalyst.py +44 -17
- ragaai_catalyst/tracers/exporters/file_span_exporter.py +16 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +50 -27
- ragaai_catalyst/tracers/tracer.py +33 -26
- {ragaai_catalyst-1.0.8.1.dist-info → ragaai_catalyst-2.0.dist-info}/METADATA +13 -14
- ragaai_catalyst-2.0.dist-info/RECORD +23 -0
- ragaai_catalyst-1.0.8.1.dist-info/RECORD +0 -22
- {ragaai_catalyst-1.0.8.1.dist-info → ragaai_catalyst-2.0.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-1.0.8.1.dist-info → ragaai_catalyst-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,369 @@
|
|
1
|
+
import os
|
2
|
+
import requests
|
3
|
+
import pandas as pd
|
4
|
+
import io
|
5
|
+
from .ragaai_catalyst import RagaAICatalyst
|
6
|
+
import logging
|
7
|
+
import pdb
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
class Evaluation:
|
12
|
+
|
13
|
+
def __init__(self, project_name, dataset_name):
|
14
|
+
self.project_name = project_name
|
15
|
+
self.dataset_name = dataset_name
|
16
|
+
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
17
|
+
self.timeout = 10
|
18
|
+
self.jobId = None
|
19
|
+
self.num_projects=100
|
20
|
+
|
21
|
+
try:
|
22
|
+
response = requests.get(
|
23
|
+
f"{self.base_url}/v2/llm/projects?size={self.num_projects}",
|
24
|
+
headers={
|
25
|
+
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
26
|
+
},
|
27
|
+
timeout=self.timeout,
|
28
|
+
)
|
29
|
+
response.raise_for_status()
|
30
|
+
logger.debug("Projects list retrieved successfully")
|
31
|
+
|
32
|
+
project_list = [
|
33
|
+
project["name"] for project in response.json()["data"]["content"]
|
34
|
+
]
|
35
|
+
if project_name not in project_list:
|
36
|
+
raise ValueError("Project not found. Please enter a valid project name")
|
37
|
+
|
38
|
+
self.project_id = [
|
39
|
+
project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
|
40
|
+
][0]
|
41
|
+
|
42
|
+
except requests.exceptions.RequestException as e:
|
43
|
+
logger.error(f"Failed to retrieve projects list: {e}")
|
44
|
+
raise
|
45
|
+
|
46
|
+
try:
|
47
|
+
|
48
|
+
headers = {
|
49
|
+
'Content-Type': 'application/json',
|
50
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
51
|
+
"X-Project-Id": str(self.project_id),
|
52
|
+
}
|
53
|
+
json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
|
54
|
+
response = requests.post(
|
55
|
+
f"{self.base_url}/v2/llm/dataset",
|
56
|
+
headers=headers,
|
57
|
+
json=json_data,
|
58
|
+
timeout=self.timeout,
|
59
|
+
)
|
60
|
+
|
61
|
+
response.raise_for_status()
|
62
|
+
datasets_content = response.json()["data"]["content"]
|
63
|
+
dataset_list = [dataset["name"] for dataset in datasets_content]
|
64
|
+
|
65
|
+
if dataset_name not in dataset_list:
|
66
|
+
raise ValueError("Dataset not found. Please enter a valid dataset name")
|
67
|
+
|
68
|
+
self.dataset_id = [dataset["id"] for dataset in datasets_content if dataset["name"]==dataset_name][0]
|
69
|
+
|
70
|
+
except requests.exceptions.RequestException as e:
|
71
|
+
logger.error(f"Failed to retrieve dataset list: {e}")
|
72
|
+
raise
|
73
|
+
|
74
|
+
|
75
|
+
def list_metrics(self):
|
76
|
+
headers = {
|
77
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
78
|
+
'X-Project-Id': str(self.project_id),
|
79
|
+
}
|
80
|
+
try:
|
81
|
+
response = requests.get(
|
82
|
+
f'{self.base_url}/v1/llm/llm-metrics',
|
83
|
+
headers=headers)
|
84
|
+
response.raise_for_status()
|
85
|
+
metric_names = [metric["name"] for metric in response.json()["data"]["metrics"]]
|
86
|
+
return metric_names
|
87
|
+
except requests.exceptions.HTTPError as http_err:
|
88
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
89
|
+
except requests.exceptions.ConnectionError as conn_err:
|
90
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
91
|
+
except requests.exceptions.Timeout as timeout_err:
|
92
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
93
|
+
except requests.exceptions.RequestException as req_err:
|
94
|
+
logger.error(f"An error occurred: {req_err}")
|
95
|
+
except Exception as e:
|
96
|
+
logger.error(f"An unexpected error occurred: {e}")
|
97
|
+
return []
|
98
|
+
|
99
|
+
def _get_dataset_schema(self):
|
100
|
+
headers = {
|
101
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
102
|
+
'Content-Type': 'application/json',
|
103
|
+
'X-Project-Id': str(self.project_id),
|
104
|
+
}
|
105
|
+
data = {
|
106
|
+
"datasetId": str(self.dataset_id),
|
107
|
+
"fields": [],
|
108
|
+
"rowFilterList": []
|
109
|
+
}
|
110
|
+
try:
|
111
|
+
response = requests.post(
|
112
|
+
f'{self.base_url}/v1/llm/docs',
|
113
|
+
headers=headers,
|
114
|
+
json=data)
|
115
|
+
response.raise_for_status()
|
116
|
+
if response.status_code == 200:
|
117
|
+
return response.json()["data"]["columns"]
|
118
|
+
except requests.exceptions.HTTPError as http_err:
|
119
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
120
|
+
except requests.exceptions.ConnectionError as conn_err:
|
121
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
122
|
+
except requests.exceptions.Timeout as timeout_err:
|
123
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
124
|
+
except requests.exceptions.RequestException as req_err:
|
125
|
+
logger.error(f"An error occurred: {req_err}")
|
126
|
+
except Exception as e:
|
127
|
+
logger.error(f"An unexpected error occurred: {e}")
|
128
|
+
return {}
|
129
|
+
|
130
|
+
def _get_variablename_from_dataset_schema(self, schemaName, metric_name):
|
131
|
+
dataset_schema = self._get_dataset_schema()
|
132
|
+
variableName = None
|
133
|
+
for column in dataset_schema:
|
134
|
+
columnName = column["columnName"].split('_')[0]
|
135
|
+
displayName = column["displayName"]
|
136
|
+
if columnName==schemaName.lower():
|
137
|
+
variableName = displayName
|
138
|
+
if variableName:
|
139
|
+
return variableName
|
140
|
+
else:
|
141
|
+
raise ValueError(f"'{schemaName.lower()}' column is required for {metric_name} metric evaluation, but not found in dataset")
|
142
|
+
|
143
|
+
|
144
|
+
def _get_mapping(self, metric_name, metrics_schema):
|
145
|
+
mapping = []
|
146
|
+
for schema in metrics_schema:
|
147
|
+
if schema["name"]==metric_name:
|
148
|
+
requiredFields = schema["config"]["requiredFields"]
|
149
|
+
for field in requiredFields:
|
150
|
+
schemaName = field["name"]
|
151
|
+
variableName = self._get_variablename_from_dataset_schema(schemaName, metric_name)
|
152
|
+
mapping.append({"schemaName": schemaName, "variableName": variableName})
|
153
|
+
return mapping
|
154
|
+
|
155
|
+
def _get_metricParams(self):
|
156
|
+
return {
|
157
|
+
"metricSpec": {
|
158
|
+
"name": "metric_to_evaluate",
|
159
|
+
"config": {
|
160
|
+
"model": "null",
|
161
|
+
"params": {
|
162
|
+
"model": {
|
163
|
+
"value": "gpt-4o"
|
164
|
+
},
|
165
|
+
"threshold": {
|
166
|
+
"gte": 0.5
|
167
|
+
}
|
168
|
+
},
|
169
|
+
"mappings": "mappings"
|
170
|
+
},
|
171
|
+
"displayName": "displayName"
|
172
|
+
},
|
173
|
+
"rowFilterList": []
|
174
|
+
}
|
175
|
+
|
176
|
+
def _get_metrics_schema_response(self):
|
177
|
+
headers = {
|
178
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
179
|
+
'X-Project-Id': str(self.project_id),
|
180
|
+
}
|
181
|
+
try:
|
182
|
+
response = requests.get(
|
183
|
+
f'{self.base_url}/v1/llm/llm-metrics',
|
184
|
+
headers=headers)
|
185
|
+
response.raise_for_status()
|
186
|
+
metrics_schema = [metric for metric in response.json()["data"]["metrics"]]
|
187
|
+
return metrics_schema
|
188
|
+
except requests.exceptions.HTTPError as http_err:
|
189
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
190
|
+
except requests.exceptions.ConnectionError as conn_err:
|
191
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
192
|
+
except requests.exceptions.Timeout as timeout_err:
|
193
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
194
|
+
except requests.exceptions.RequestException as req_err:
|
195
|
+
logger.error(f"An error occurred: {req_err}")
|
196
|
+
except Exception as e:
|
197
|
+
logger.error(f"An unexpected error occurred: {e}")
|
198
|
+
return []
|
199
|
+
|
200
|
+
def _update_base_json(self, metrics):
|
201
|
+
metric_schema_mapping = {"datasetId":self.dataset_id}
|
202
|
+
metrics_schema_response = self._get_metrics_schema_response()
|
203
|
+
metricParams = []
|
204
|
+
for metric in metrics:
|
205
|
+
base_json = self._get_metricParams()
|
206
|
+
base_json["metricSpec"]["name"] = metric["name"]
|
207
|
+
if metric["config"]["model"]:
|
208
|
+
base_json["metricSpec"]["config"]["params"]["model"]["value"] = metric["config"]["model"]
|
209
|
+
base_json["metricSpec"]["displayName"] = metric["column_name"]
|
210
|
+
mappings = self._get_mapping(metric["name"], metrics_schema_response)
|
211
|
+
base_json["metricSpec"]["config"]["mappings"] = mappings
|
212
|
+
metricParams.append(base_json)
|
213
|
+
metric_schema_mapping["metricParams"] = metricParams
|
214
|
+
return metric_schema_mapping
|
215
|
+
|
216
|
+
def _get_executed_metrics_list(self):
|
217
|
+
headers = {
|
218
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
219
|
+
'X-Project-Id': str(self.project_id),
|
220
|
+
}
|
221
|
+
try:
|
222
|
+
response = requests.get(
|
223
|
+
f'{self.base_url}/v1/llm/filter?datasetId={str(self.dataset_id)}',
|
224
|
+
headers=headers
|
225
|
+
)
|
226
|
+
response.raise_for_status()
|
227
|
+
executed_metric_response = response.json()["data"]["filter"]
|
228
|
+
executed_metric_list = [item["displayName"] for item in executed_metric_response]
|
229
|
+
return executed_metric_list
|
230
|
+
except requests.exceptions.HTTPError as http_err:
|
231
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
232
|
+
except requests.exceptions.ConnectionError as conn_err:
|
233
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
234
|
+
except requests.exceptions.Timeout as timeout_err:
|
235
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
236
|
+
except requests.exceptions.RequestException as req_err:
|
237
|
+
logger.error(f"An error occurred: {req_err}")
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"An unexpected error occurred: {e}")
|
240
|
+
return []
|
241
|
+
|
242
|
+
def add_metrics(self, metrics):
|
243
|
+
executed_metric_list = self._get_executed_metrics_list()
|
244
|
+
column_names = [metric["column_name"] for metric in metrics]
|
245
|
+
for column_name in column_names:
|
246
|
+
if column_name in executed_metric_list:
|
247
|
+
raise ValueError(f"Column name '{column_name}' already exists.")
|
248
|
+
|
249
|
+
headers = {
|
250
|
+
'Content-Type': 'application/json',
|
251
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
252
|
+
'X-Project-Id': str(self.project_id),
|
253
|
+
}
|
254
|
+
metric_schema_mapping = self._update_base_json(metrics)
|
255
|
+
print(metric_schema_mapping)
|
256
|
+
try:
|
257
|
+
response = requests.post(
|
258
|
+
f'{self.base_url}/playground/metric-evaluation',
|
259
|
+
headers=headers,
|
260
|
+
json=metric_schema_mapping
|
261
|
+
)
|
262
|
+
if response.status_code == 400:
|
263
|
+
raise ValueError(response.json()["message"])
|
264
|
+
response.raise_for_status()
|
265
|
+
if response.json()["success"]:
|
266
|
+
print(response.json()["message"])
|
267
|
+
self.jobId = response.json()["data"]["jobId"]
|
268
|
+
|
269
|
+
except requests.exceptions.HTTPError as http_err:
|
270
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
271
|
+
except requests.exceptions.ConnectionError as conn_err:
|
272
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
273
|
+
except requests.exceptions.Timeout as timeout_err:
|
274
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
275
|
+
except requests.exceptions.RequestException as req_err:
|
276
|
+
logger.error(f"An error occurred: {req_err}")
|
277
|
+
except Exception as e:
|
278
|
+
logger.error(f"An unexpected error occurred: {e}")
|
279
|
+
|
280
|
+
def get_status(self):
|
281
|
+
headers = {
|
282
|
+
'Content-Type': 'application/json',
|
283
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
284
|
+
'X-Project-Id': str(self.project_id),
|
285
|
+
}
|
286
|
+
data = {"jobId": self.jobId}
|
287
|
+
try:
|
288
|
+
response = requests.post(
|
289
|
+
f'{self.base_url}/job/status',
|
290
|
+
headers=headers,
|
291
|
+
json=data)
|
292
|
+
response.raise_for_status()
|
293
|
+
print(response.json()["data"]["status"])
|
294
|
+
except requests.exceptions.HTTPError as http_err:
|
295
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
296
|
+
except requests.exceptions.ConnectionError as conn_err:
|
297
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
298
|
+
except requests.exceptions.Timeout as timeout_err:
|
299
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
300
|
+
except requests.exceptions.RequestException as req_err:
|
301
|
+
logger.error(f"An error occurred: {req_err}")
|
302
|
+
except Exception as e:
|
303
|
+
logger.error(f"An unexpected error occurred: {e}")
|
304
|
+
|
305
|
+
def get_results(self):
|
306
|
+
|
307
|
+
def get_presignedUrl():
|
308
|
+
headers = {
|
309
|
+
'Content-Type': 'application/json',
|
310
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
311
|
+
'X-Project-Id': str(self.project_id),
|
312
|
+
}
|
313
|
+
|
314
|
+
data = {
|
315
|
+
"fields": [
|
316
|
+
"*"
|
317
|
+
],
|
318
|
+
"datasetId": str(self.dataset_id),
|
319
|
+
"rowFilterList": [],
|
320
|
+
"export": True
|
321
|
+
}
|
322
|
+
try:
|
323
|
+
response = requests.post(
|
324
|
+
f'{self.base_url}/v1/llm/docs',
|
325
|
+
headers=headers,
|
326
|
+
json=data)
|
327
|
+
response.raise_for_status()
|
328
|
+
return response.json()
|
329
|
+
except requests.exceptions.HTTPError as http_err:
|
330
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
331
|
+
except requests.exceptions.ConnectionError as conn_err:
|
332
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
333
|
+
except requests.exceptions.Timeout as timeout_err:
|
334
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
335
|
+
except requests.exceptions.RequestException as req_err:
|
336
|
+
logger.error(f"An error occurred: {req_err}")
|
337
|
+
except Exception as e:
|
338
|
+
logger.error(f"An unexpected error occurred: {e}")
|
339
|
+
return {}
|
340
|
+
|
341
|
+
def parse_response():
|
342
|
+
try:
|
343
|
+
response = get_presignedUrl()
|
344
|
+
preSignedURL = response["data"]["preSignedURL"]
|
345
|
+
response = requests.get(preSignedURL)
|
346
|
+
response.raise_for_status()
|
347
|
+
return response.text
|
348
|
+
except requests.exceptions.HTTPError as http_err:
|
349
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
350
|
+
except requests.exceptions.ConnectionError as conn_err:
|
351
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
352
|
+
except requests.exceptions.Timeout as timeout_err:
|
353
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
354
|
+
except requests.exceptions.RequestException as req_err:
|
355
|
+
logger.error(f"An error occurred: {req_err}")
|
356
|
+
except Exception as e:
|
357
|
+
logger.error(f"An unexpected error occurred: {e}")
|
358
|
+
return ""
|
359
|
+
|
360
|
+
response_text = parse_response()
|
361
|
+
if response_text:
|
362
|
+
df = pd.read_csv(io.StringIO(response_text))
|
363
|
+
|
364
|
+
column_list = df.columns.to_list()
|
365
|
+
column_list = [col for col in column_list if not col.startswith('_')]
|
366
|
+
column_list = [col for col in column_list if '.' not in col]
|
367
|
+
return df[column_list]
|
368
|
+
else:
|
369
|
+
return pd.DataFrame()
|
ragaai_catalyst/experiment.py
CHANGED
@@ -476,7 +476,7 @@ class Experiment:
|
|
476
476
|
|
477
477
|
columns_list = x.columns.tolist()
|
478
478
|
#remove trace_uri from columns_list if it exists
|
479
|
-
columns_list = columns_list -
|
479
|
+
columns_list = list(set(columns_list) - {"trace_uri"})
|
480
480
|
x = x[columns_list]
|
481
481
|
|
482
482
|
return True, x
|