ragaai-catalyst 1.0.8.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +2 -1
- ragaai_catalyst/dataset.py +186 -126
- ragaai_catalyst/evaluation.py +369 -0
- ragaai_catalyst/experiment.py +1 -1
- ragaai_catalyst/prompt_manager.py +112 -54
- ragaai_catalyst/ragaai_catalyst.py +45 -20
- ragaai_catalyst/tracers/exporters/file_span_exporter.py +3 -2
- ragaai_catalyst/tracers/exporters/raga_exporter.py +50 -27
- ragaai_catalyst/tracers/tracer.py +33 -26
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/METADATA +3 -4
- ragaai_catalyst-2.0.1.dist-info/RECORD +23 -0
- ragaai_catalyst-1.0.8.2.dist-info/RECORD +0 -22
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/top_level.txt +0 -0
ragaai_catalyst/__init__.py
CHANGED
@@ -4,5 +4,6 @@ from .tracers import Tracer
|
|
4
4
|
from .utils import response_checker
|
5
5
|
from .dataset import Dataset
|
6
6
|
from .prompt_manager import PromptManager
|
7
|
+
from .evaluation import Evaluation
|
7
8
|
|
8
|
-
__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager"]
|
9
|
+
__all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation"]
|
ragaai_catalyst/dataset.py
CHANGED
@@ -16,11 +16,38 @@ class Dataset:
|
|
16
16
|
|
17
17
|
def __init__(self, project_name):
|
18
18
|
self.project_name = project_name
|
19
|
+
self.num_projects = 100
|
19
20
|
Dataset.BASE_URL = (
|
20
21
|
os.getenv("RAGAAI_CATALYST_BASE_URL")
|
21
22
|
if os.getenv("RAGAAI_CATALYST_BASE_URL")
|
22
|
-
else "https://
|
23
|
+
else "https://catalyst.raga.ai/api"
|
23
24
|
)
|
25
|
+
headers = {
|
26
|
+
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
27
|
+
}
|
28
|
+
try:
|
29
|
+
response = requests.get(
|
30
|
+
f"{Dataset.BASE_URL}/v2/llm/projects?size={self.num_projects}",
|
31
|
+
headers=headers,
|
32
|
+
timeout=self.TIMEOUT,
|
33
|
+
)
|
34
|
+
response.raise_for_status()
|
35
|
+
logger.debug("Projects list retrieved successfully")
|
36
|
+
|
37
|
+
project_list = [
|
38
|
+
project["name"] for project in response.json()["data"]["content"]
|
39
|
+
]
|
40
|
+
|
41
|
+
if project_name not in project_list:
|
42
|
+
raise ValueError("Project not found. Please enter a valid project name")
|
43
|
+
|
44
|
+
self.project_id = [
|
45
|
+
project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
|
46
|
+
][0]
|
47
|
+
|
48
|
+
except requests.exceptions.RequestException as e:
|
49
|
+
logger.error(f"Failed to retrieve projects list: {e}")
|
50
|
+
raise
|
24
51
|
|
25
52
|
def list_datasets(self):
|
26
53
|
"""
|
@@ -35,34 +62,44 @@ class Dataset:
|
|
35
62
|
|
36
63
|
def make_request():
|
37
64
|
headers = {
|
38
|
-
|
39
|
-
"
|
40
|
-
"X-Project-
|
41
|
-
}
|
42
|
-
params = {
|
43
|
-
"projectName": self.project_name,
|
44
|
-
}
|
45
|
-
response = requests.get(
|
46
|
-
f"{Dataset.BASE_URL}/v1/llm/sub-datasets",
|
47
|
-
headers=headers,
|
48
|
-
params=params,
|
49
|
-
timeout=Dataset.TIMEOUT,
|
50
|
-
)
|
51
|
-
return response
|
52
|
-
|
53
|
-
response = make_request()
|
54
|
-
response_checker(response, "Dataset.list_datasets")
|
55
|
-
if response.status_code == 401:
|
56
|
-
get_token() # Fetch a new token and set it in the environment
|
57
|
-
response = make_request() # Retry the request
|
58
|
-
if response.status_code != 200:
|
59
|
-
return {
|
60
|
-
"status_code": response.status_code,
|
61
|
-
"message": response.json(),
|
65
|
+
'Content-Type': 'application/json',
|
66
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
67
|
+
"X-Project-Id": str(self.project_id),
|
62
68
|
}
|
63
|
-
|
64
|
-
|
65
|
-
|
69
|
+
json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
|
70
|
+
try:
|
71
|
+
response = requests.post(
|
72
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset",
|
73
|
+
headers=headers,
|
74
|
+
json=json_data,
|
75
|
+
timeout=Dataset.TIMEOUT,
|
76
|
+
)
|
77
|
+
response.raise_for_status()
|
78
|
+
return response
|
79
|
+
except requests.exceptions.RequestException as e:
|
80
|
+
logger.error(f"Failed to list datasets: {e}")
|
81
|
+
raise
|
82
|
+
|
83
|
+
try:
|
84
|
+
response = make_request()
|
85
|
+
response_checker(response, "Dataset.list_datasets")
|
86
|
+
if response.status_code == 401:
|
87
|
+
get_token() # Fetch a new token and set it in the environment
|
88
|
+
response = make_request() # Retry the request
|
89
|
+
if response.status_code != 200:
|
90
|
+
return {
|
91
|
+
"status_code": response.status_code,
|
92
|
+
"message": response.json(),
|
93
|
+
}
|
94
|
+
datasets = response.json()["data"]["content"]
|
95
|
+
dataset_list = [dataset["name"] for dataset in datasets]
|
96
|
+
return dataset_list
|
97
|
+
except Exception as e:
|
98
|
+
logger.error(f"Error in list_datasets: {e}")
|
99
|
+
raise
|
100
|
+
|
101
|
+
def get_schema_mapping(self):
|
102
|
+
return ["traceid", "prompt", "context", "response", "expected_response", "expected_context", "timestamp", "metadata", "pipeline", "cost", "feedBack", "latency", "sanitized_response", "system_prompt", "traceUri"]
|
66
103
|
|
67
104
|
def create_from_trace(self, dataset_name, filter_list):
|
68
105
|
"""
|
@@ -91,85 +128,88 @@ class Dataset:
|
|
91
128
|
"subDatasetName": dataset_name,
|
92
129
|
"filterList": filter_list,
|
93
130
|
}
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
response = request_trace_creation() # Retry the request
|
107
|
-
if response.status_code != 200:
|
108
|
-
return response.json()["message"]
|
109
|
-
message = response.json()["message"]
|
110
|
-
return f"{message} {dataset_name}"
|
111
|
-
|
131
|
+
try:
|
132
|
+
response = requests.post(
|
133
|
+
f"{Dataset.BASE_URL}/v1/llm/sub-dataset",
|
134
|
+
headers=headers,
|
135
|
+
json=json_data,
|
136
|
+
timeout=Dataset.TIMEOUT,
|
137
|
+
)
|
138
|
+
response.raise_for_status()
|
139
|
+
return response
|
140
|
+
except requests.exceptions.RequestException as e:
|
141
|
+
logger.error(f"Failed to create dataset from trace: {e}")
|
142
|
+
raise
|
112
143
|
|
144
|
+
try:
|
145
|
+
response = request_trace_creation()
|
146
|
+
response_checker(response, "Dataset.create_dataset")
|
147
|
+
if response.status_code == 401:
|
148
|
+
get_token() # Fetch a new token and set it in the environment
|
149
|
+
response = request_trace_creation() # Retry the request
|
150
|
+
if response.status_code != 200:
|
151
|
+
return response.json()["message"]
|
152
|
+
message = response.json()["message"]
|
153
|
+
return f"{message} {dataset_name}"
|
154
|
+
except Exception as e:
|
155
|
+
logger.error(f"Error in create_from_trace: {e}")
|
156
|
+
raise
|
113
157
|
|
114
|
-
###################### CSV Upload APIs ###################
|
158
|
+
###################### CSV Upload APIs ###################
|
115
159
|
|
116
160
|
def get_csv_schema(self):
|
117
161
|
headers = {
|
118
162
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
119
163
|
"X-Project-Name": self.project_name,
|
120
164
|
}
|
121
|
-
|
165
|
+
try:
|
166
|
+
response = requests.get(
|
122
167
|
f"{Dataset.BASE_URL}/v1/llm/schema-elements",
|
123
168
|
headers=headers,
|
124
169
|
timeout=Dataset.TIMEOUT,
|
125
170
|
)
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
171
|
+
response.raise_for_status()
|
172
|
+
response_data = response.json()
|
173
|
+
if not response_data['success']:
|
174
|
+
raise ValueError('Unable to fetch Schema Elements for the CSV')
|
175
|
+
return response_data
|
176
|
+
except requests.exceptions.RequestException as e:
|
177
|
+
logger.error(f"Failed to get CSV schema: {e}")
|
178
|
+
raise
|
134
179
|
|
135
180
|
def create_from_csv(self, csv_path, dataset_name, schema_mapping):
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
keys = list(df.columns)
|
140
|
-
values = self.get_csv_schema()['data']['schemaElements']
|
141
|
-
print(type(values), values)
|
142
|
-
for k in schema_mapping.keys():
|
143
|
-
if k not in keys:
|
144
|
-
raise ValueError(f'--{k}-- column is not present in csv column but present in schema_mapping. Plase provide the right schema_mapping.')
|
145
|
-
for k in schema_mapping.values():
|
146
|
-
if k not in values:
|
147
|
-
raise ValueError(f'--{k}-- is not present in the schema_elements but present in schema_mapping. Plase provide the right schema_mapping.')
|
148
|
-
|
181
|
+
list_dataset = self.list_datasets()
|
182
|
+
if dataset_name in list_dataset:
|
183
|
+
raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
|
149
184
|
|
150
185
|
#### get presigned URL
|
151
186
|
def get_presignedUrl():
|
152
187
|
headers = {
|
153
188
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
154
|
-
"X-Project-
|
189
|
+
"X-Project-Id": str(self.project_id),
|
155
190
|
}
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
print('-- PresignedUrl fetched Succussfuly --')
|
168
|
-
print('filename: ', filename)
|
169
|
-
else:
|
170
|
-
raise ValueError('Unable to fetch presignedUrl')
|
171
|
-
|
191
|
+
try:
|
192
|
+
response = requests.get(
|
193
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
|
194
|
+
headers=headers,
|
195
|
+
timeout=Dataset.TIMEOUT,
|
196
|
+
)
|
197
|
+
response.raise_for_status()
|
198
|
+
return response.json()
|
199
|
+
except requests.exceptions.RequestException as e:
|
200
|
+
logger.error(f"Failed to get presigned URL: {e}")
|
201
|
+
raise
|
172
202
|
|
203
|
+
try:
|
204
|
+
presignedUrl = get_presignedUrl()
|
205
|
+
if presignedUrl['success']:
|
206
|
+
url = presignedUrl['data']['presignedUrl']
|
207
|
+
filename = presignedUrl['data']['fileName']
|
208
|
+
else:
|
209
|
+
raise ValueError('Unable to fetch presignedUrl')
|
210
|
+
except Exception as e:
|
211
|
+
logger.error(f"Error in get_presignedUrl: {e}")
|
212
|
+
raise
|
173
213
|
|
174
214
|
#### put csv to presigned URL
|
175
215
|
def put_csv_to_presignedUrl(url):
|
@@ -177,51 +217,71 @@ class Dataset:
|
|
177
217
|
'Content-Type': 'text/csv',
|
178
218
|
'x-ms-blob-type': 'BlockBlob',
|
179
219
|
}
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
raise ValueError('Unable to put csv to the presignedUrl')
|
194
|
-
else:
|
195
|
-
print('-- csv put to presignedUrl Succussfuly --')
|
196
|
-
|
220
|
+
try:
|
221
|
+
with open(csv_path, 'rb') as file:
|
222
|
+
response = requests.put(
|
223
|
+
url,
|
224
|
+
headers=headers,
|
225
|
+
data=file,
|
226
|
+
timeout=Dataset.TIMEOUT,
|
227
|
+
)
|
228
|
+
response.raise_for_status()
|
229
|
+
return response
|
230
|
+
except requests.exceptions.RequestException as e:
|
231
|
+
logger.error(f"Failed to put CSV to presigned URL: {e}")
|
232
|
+
raise
|
197
233
|
|
234
|
+
try:
|
235
|
+
put_csv_response = put_csv_to_presignedUrl(url)
|
236
|
+
if put_csv_response.status_code != 200:
|
237
|
+
raise ValueError('Unable to put csv to the presignedUrl')
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"Error in put_csv_to_presignedUrl: {e}")
|
240
|
+
raise
|
198
241
|
|
199
242
|
## Upload csv to elastic
|
200
243
|
def upload_csv_to_elastic(data):
|
201
244
|
header = {
|
245
|
+
'Content-Type': 'application/json',
|
202
246
|
'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
203
|
-
|
247
|
+
"X-Project-Id": str(self.project_id)
|
204
248
|
}
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
249
|
+
try:
|
250
|
+
response = requests.post(
|
251
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
|
252
|
+
headers=header,
|
253
|
+
json=data,
|
254
|
+
timeout=Dataset.TIMEOUT,
|
255
|
+
)
|
256
|
+
if response.status_code==400:
|
257
|
+
raise ValueError(response.json()["message"])
|
258
|
+
response.raise_for_status()
|
259
|
+
return response.json()
|
260
|
+
except requests.exceptions.RequestException as e:
|
261
|
+
logger.error(f"Failed to upload CSV to elastic: {e}")
|
262
|
+
raise
|
211
263
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
264
|
+
def generate_schema(mapping):
|
265
|
+
result = {}
|
266
|
+
for column, schema_element in mapping.items():
|
267
|
+
result[column] = {"columnType": schema_element}
|
268
|
+
return result
|
269
|
+
|
270
|
+
try:
|
271
|
+
schema_mapping = generate_schema(schema_mapping)
|
272
|
+
data = {
|
273
|
+
"projectId": str(self.project_id),
|
274
|
+
"datasetName": dataset_name,
|
275
|
+
"fileName": filename,
|
276
|
+
"schemaMapping": schema_mapping,
|
277
|
+
"opType": "insert",
|
278
|
+
"description": ""
|
279
|
+
}
|
280
|
+
upload_csv_response = upload_csv_to_elastic(data)
|
281
|
+
if not upload_csv_response['success']:
|
282
|
+
raise ValueError('Unable to upload csv')
|
283
|
+
else:
|
284
|
+
print(upload_csv_response['message'])
|
285
|
+
except Exception as e:
|
286
|
+
logger.error(f"Error in create_from_csv: {e}")
|
287
|
+
raise
|