ragaai-catalyst 1.0.8.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +2 -1
- ragaai_catalyst/dataset.py +186 -126
- ragaai_catalyst/evaluation.py +369 -0
- ragaai_catalyst/experiment.py +1 -1
- ragaai_catalyst/prompt_manager.py +112 -54
- ragaai_catalyst/ragaai_catalyst.py +45 -20
- ragaai_catalyst/tracers/exporters/file_span_exporter.py +3 -2
- ragaai_catalyst/tracers/exporters/raga_exporter.py +50 -27
- ragaai_catalyst/tracers/tracer.py +33 -26
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/METADATA +3 -4
- ragaai_catalyst-2.0.1.dist-info/RECORD +23 -0
- ragaai_catalyst-1.0.8.2.dist-info/RECORD +0 -22
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-1.0.8.2.dist-info → ragaai_catalyst-2.0.1.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ import requests
|
|
3
3
|
import json
|
4
4
|
import re
|
5
5
|
from .ragaai_catalyst import RagaAICatalyst
|
6
|
-
import
|
6
|
+
import copy
|
7
7
|
|
8
8
|
class PromptManager:
|
9
9
|
NUM_PROJECTS = 100
|
@@ -21,36 +21,28 @@ class PromptManager:
|
|
21
21
|
ValueError: If the project is not found.
|
22
22
|
"""
|
23
23
|
self.project_name = project_name
|
24
|
-
self.headers = {
|
25
|
-
"Content-Type": "application/json",
|
26
|
-
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
27
|
-
"X-Project-Name": self.project_name
|
28
|
-
}
|
29
24
|
self.base_url = f"{RagaAICatalyst.BASE_URL}/playground/prompt"
|
30
25
|
self.timeout = 10
|
26
|
+
self.size = 100 #Number of projects to fetch
|
31
27
|
|
32
28
|
try:
|
33
29
|
response = requests.get(
|
34
|
-
f"{RagaAICatalyst.BASE_URL}/projects",
|
35
|
-
params={
|
36
|
-
"size": str(self.NUM_PROJECTS),
|
37
|
-
"page": "0",
|
38
|
-
"type": "llm",
|
39
|
-
},
|
30
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/projects?size={self.size}",
|
40
31
|
headers={
|
41
|
-
"Content-Type": "application/json",
|
42
32
|
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
43
33
|
},
|
44
|
-
timeout=self.
|
34
|
+
timeout=self.timeout,
|
45
35
|
)
|
46
36
|
response.raise_for_status()
|
47
|
-
|
48
|
-
raise requests.RequestException(f"Error fetching projects: {str(e)}")
|
37
|
+
# logger.debug("Projects list retrieved successfully")
|
49
38
|
|
50
|
-
try:
|
51
39
|
project_list = [
|
52
40
|
project["name"] for project in response.json()["data"]["content"]
|
53
41
|
]
|
42
|
+
self.project_id = [
|
43
|
+
project["id"] for project in response.json()["data"]["content"] if project["name"]==project_name
|
44
|
+
][0]
|
45
|
+
|
54
46
|
except (KeyError, json.JSONDecodeError) as e:
|
55
47
|
raise ValueError(f"Error parsing project list: {str(e)}")
|
56
48
|
|
@@ -58,6 +50,12 @@ class PromptManager:
|
|
58
50
|
raise ValueError("Project not found. Please enter a valid project name")
|
59
51
|
|
60
52
|
|
53
|
+
self.headers = {
|
54
|
+
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
55
|
+
"X-Project-Id": str(self.project_id)
|
56
|
+
}
|
57
|
+
|
58
|
+
|
61
59
|
def list_prompts(self):
|
62
60
|
"""
|
63
61
|
List all available prompts.
|
@@ -164,6 +162,7 @@ class Prompt:
|
|
164
162
|
|
165
163
|
Raises:
|
166
164
|
requests.RequestException: If there's an error with the API request.
|
165
|
+
ValueError: If there's an error parsing the prompt list.
|
167
166
|
"""
|
168
167
|
try:
|
169
168
|
response = requests.get(url, headers=headers, timeout=timeout)
|
@@ -175,7 +174,7 @@ class Prompt:
|
|
175
174
|
except (KeyError, json.JSONDecodeError) as e:
|
176
175
|
raise ValueError(f"Error parsing prompt list: {str(e)}")
|
177
176
|
|
178
|
-
def
|
177
|
+
def _get_response_by_version(self, base_url, headers, timeout, prompt_name, version):
|
179
178
|
"""
|
180
179
|
Get a specific version of a prompt.
|
181
180
|
|
@@ -184,7 +183,15 @@ class Prompt:
|
|
184
183
|
headers (dict): The headers to be used in the request.
|
185
184
|
timeout (int): The timeout for the request.
|
186
185
|
prompt_name (str): The name of the prompt.
|
187
|
-
version (str): The version of the prompt.
|
186
|
+
version (str): The version of the prompt.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
response: The response object containing the prompt version data.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
requests.RequestException: If there's an error with the API request.
|
193
|
+
ValueError: If there's an error parsing the prompt version.
|
194
|
+
"""
|
188
195
|
try:
|
189
196
|
response = requests.get(f"{base_url}/version/{prompt_name}?version={version}",
|
190
197
|
headers=headers, timeout=timeout)
|
@@ -195,7 +202,23 @@ class Prompt:
|
|
195
202
|
raise ValueError(f"Error parsing prompt version: {str(e)}")
|
196
203
|
return response
|
197
204
|
|
198
|
-
def
|
205
|
+
def _get_response(self, base_url, headers, timeout, prompt_name):
|
206
|
+
"""
|
207
|
+
Get the latest version of a prompt.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
base_url (str): The base URL for the API.
|
211
|
+
headers (dict): The headers to be used in the request.
|
212
|
+
timeout (int): The timeout for the request.
|
213
|
+
prompt_name (str): The name of the prompt.
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
response: The response object containing the latest prompt version data.
|
217
|
+
|
218
|
+
Raises:
|
219
|
+
requests.RequestException: If there's an error with the API request.
|
220
|
+
ValueError: If there's an error parsing the prompt version.
|
221
|
+
"""
|
199
222
|
try:
|
200
223
|
response = requests.get(f"{base_url}/version/{prompt_name}",
|
201
224
|
headers=headers, timeout=timeout)
|
@@ -206,7 +229,7 @@ class Prompt:
|
|
206
229
|
raise ValueError(f"Error parsing prompt version: {str(e)}")
|
207
230
|
return response
|
208
231
|
|
209
|
-
def
|
232
|
+
def _get_prompt_by_version(self, base_url, headers, timeout, prompt_name, version):
|
210
233
|
"""
|
211
234
|
Get a specific version of a prompt.
|
212
235
|
|
@@ -223,7 +246,7 @@ class Prompt:
|
|
223
246
|
Raises:
|
224
247
|
requests.RequestException: If there's an error with the API request.
|
225
248
|
"""
|
226
|
-
response = self.
|
249
|
+
response = self._get_response_by_version(base_url, headers, timeout, prompt_name, version)
|
227
250
|
prompt_text = response.json()["data"]["docs"][0]["textFields"]
|
228
251
|
return prompt_text
|
229
252
|
|
@@ -245,12 +268,12 @@ class Prompt:
|
|
245
268
|
requests.RequestException: If there's an error with the API request.
|
246
269
|
"""
|
247
270
|
if version:
|
248
|
-
response = self.
|
271
|
+
response = self._get_response_by_version(base_url, headers, timeout, prompt_name, version)
|
249
272
|
prompt_text = response.json()["data"]["docs"][0]["textFields"]
|
250
273
|
prompt_parameters = response.json()["data"]["docs"][0]["modelSpecs"]["parameters"]
|
251
274
|
model = response.json()["data"]["docs"][0]["modelSpecs"]["model"]
|
252
275
|
else:
|
253
|
-
response = self.
|
276
|
+
response = self._get_response(base_url, headers, timeout, prompt_name)
|
254
277
|
prompt_text = response.json()["data"]["docs"][0]["textFields"]
|
255
278
|
prompt_parameters = response.json()["data"]["docs"][0]["modelSpecs"]["parameters"]
|
256
279
|
model = response.json()["data"]["docs"][0]["modelSpecs"]["model"]
|
@@ -272,6 +295,7 @@ class Prompt:
|
|
272
295
|
|
273
296
|
Raises:
|
274
297
|
requests.RequestException: If there's an error with the API request.
|
298
|
+
ValueError: If there's an error parsing the prompt versions.
|
275
299
|
"""
|
276
300
|
try:
|
277
301
|
response = requests.get(f"{base_url}/{prompt_name}/version",
|
@@ -280,7 +304,7 @@ class Prompt:
|
|
280
304
|
version_names = [version["name"] for version in response.json()["data"]]
|
281
305
|
prompt_versions = {}
|
282
306
|
for version in version_names:
|
283
|
-
prompt_versions[version] = self.
|
307
|
+
prompt_versions[version] = self._get_prompt_by_version(base_url, headers, timeout, prompt_name, version)
|
284
308
|
return prompt_versions
|
285
309
|
except requests.RequestException as e:
|
286
310
|
raise requests.RequestException(f"Error listing prompt versions: {str(e)}")
|
@@ -299,20 +323,43 @@ class PromptObject:
|
|
299
323
|
model (str): The model of the prompt.
|
300
324
|
"""
|
301
325
|
self.text = text
|
302
|
-
self.variables = self._extract_variables()
|
303
326
|
self.parameters = parameters
|
304
327
|
self.model = model
|
328
|
+
|
329
|
+
def _extract_variable_from_content(self, content):
|
330
|
+
"""
|
331
|
+
Extract variables from the content.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
content (str): The content containing variables.
|
305
335
|
|
306
|
-
|
336
|
+
Returns:
|
337
|
+
list: A list of variable names found in the content.
|
307
338
|
"""
|
308
|
-
|
339
|
+
pattern = r'\{\{(.*?)\}\}'
|
340
|
+
matches = re.findall(pattern, content)
|
341
|
+
variables = [match.strip() for match in matches if '"' not in match]
|
342
|
+
return variables
|
343
|
+
|
344
|
+
def _add_variable_value_to_content(self, content, user_variables):
|
345
|
+
"""
|
346
|
+
Add variable values to the content.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
content (str): The content containing variables.
|
350
|
+
user_variables (dict): A dictionary of user-provided variable values.
|
309
351
|
|
310
352
|
Returns:
|
311
|
-
|
353
|
+
str: The content with variables replaced by their values.
|
312
354
|
"""
|
313
|
-
|
314
|
-
|
315
|
-
|
355
|
+
variables = self._extract_variable_from_content(content)
|
356
|
+
for key, value in user_variables.items():
|
357
|
+
if not isinstance(value, str):
|
358
|
+
raise ValueError(f"Value for variable '{key}' must be a string, not {type(value).__name__}")
|
359
|
+
if key in variables:
|
360
|
+
content = content.replace(f"{{{{{key}}}}}", value)
|
361
|
+
return content
|
362
|
+
|
316
363
|
def compile(self, **kwargs):
|
317
364
|
"""
|
318
365
|
Compile the prompt by replacing variables with provided values.
|
@@ -326,27 +373,23 @@ class PromptObject:
|
|
326
373
|
Raises:
|
327
374
|
ValueError: If there are missing or extra variables, or if a value is not a string.
|
328
375
|
"""
|
329
|
-
required_variables =
|
376
|
+
required_variables = self.get_variables()
|
330
377
|
provided_variables = set(kwargs.keys())
|
331
378
|
|
332
|
-
missing_variables = required_variables
|
333
|
-
extra_variables = provided_variables
|
379
|
+
missing_variables = [item for item in required_variables if item not in provided_variables]
|
380
|
+
extra_variables = [item for item in provided_variables if item not in required_variables]
|
334
381
|
|
335
382
|
if missing_variables:
|
336
383
|
raise ValueError(f"Missing variable(s): {', '.join(missing_variables)}")
|
337
384
|
if extra_variables:
|
338
385
|
raise ValueError(f"Extra variable(s) provided: {', '.join(extra_variables)}")
|
339
|
-
# pdb.set_trace()
|
340
386
|
|
341
|
-
|
342
|
-
user_content = next(item["content"] for item in self.text if item["role"] == "user")
|
387
|
+
updated_text = copy.deepcopy(self.text)
|
343
388
|
|
344
|
-
for
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
compiled_prompt = [{"content": user_content if item["role"] == "user" else item["content"], "role": item["role"]} for item in self.text]
|
349
|
-
return compiled_prompt
|
389
|
+
for item in updated_text:
|
390
|
+
item["content"] = self._add_variable_value_to_content(item["content"], kwargs)
|
391
|
+
|
392
|
+
return updated_text
|
350
393
|
|
351
394
|
def get_variables(self):
|
352
395
|
"""
|
@@ -355,28 +398,43 @@ class PromptObject:
|
|
355
398
|
Returns:
|
356
399
|
list: A list of variable names found in the prompt text.
|
357
400
|
"""
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
401
|
+
variables = set()
|
402
|
+
for item in self.text:
|
403
|
+
content = item["content"]
|
404
|
+
for var in self._extract_variable_from_content(content):
|
405
|
+
variables.add(var)
|
406
|
+
if variables:
|
407
|
+
return list(variables)
|
408
|
+
else:
|
409
|
+
return []
|
362
410
|
|
363
|
-
|
364
|
-
|
411
|
+
def _convert_value(self, value, type_):
|
412
|
+
"""
|
413
|
+
Convert value based on type.
|
414
|
+
|
415
|
+
Args:
|
416
|
+
value: The value to be converted.
|
417
|
+
type_ (str): The type to convert the value to.
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
The converted value.
|
421
|
+
"""
|
365
422
|
if type_ == "float":
|
366
423
|
return float(value)
|
367
424
|
elif type_ == "int":
|
368
425
|
return int(value)
|
369
426
|
return value # Default case, return as is
|
370
427
|
|
371
|
-
def
|
428
|
+
def get_model_parameters(self):
|
372
429
|
"""
|
373
430
|
Get all parameters in the prompt text.
|
374
431
|
|
375
432
|
Returns:
|
376
433
|
dict: A dictionary of parameters found in the prompt text.
|
377
434
|
"""
|
378
|
-
parameters = {param["name"]: self.
|
435
|
+
parameters = {param["name"]: self._convert_value(param["value"], param["type"]) for param in self.parameters}
|
379
436
|
parameters["model"] = self.model
|
380
|
-
return parameters
|
437
|
+
return parameters
|
381
438
|
|
382
|
-
|
439
|
+
def get_prompt_content(self):
|
440
|
+
return self.text
|
@@ -57,14 +57,15 @@ class RagaAICatalyst:
|
|
57
57
|
if base_url:
|
58
58
|
RagaAICatalyst.BASE_URL = base_url
|
59
59
|
try:
|
60
|
+
self.get_token()
|
60
61
|
os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
|
61
62
|
except requests.exceptions.RequestException:
|
62
63
|
raise ConnectionError(
|
63
64
|
"The provided base_url is not accessible. Please re-check the base_url."
|
64
65
|
)
|
65
|
-
|
66
|
-
|
67
|
-
|
66
|
+
else:
|
67
|
+
# Get the token from the server
|
68
|
+
self.get_token()
|
68
69
|
|
69
70
|
# Set the API keys, if available
|
70
71
|
if self.api_keys:
|
@@ -187,7 +188,24 @@ class RagaAICatalyst:
|
|
187
188
|
logger.error("Token(s) not set")
|
188
189
|
return None
|
189
190
|
|
190
|
-
def
|
191
|
+
def project_use_cases(self):
|
192
|
+
try:
|
193
|
+
headers = {
|
194
|
+
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
195
|
+
}
|
196
|
+
response = requests.get(
|
197
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/usecase",
|
198
|
+
headers=headers,
|
199
|
+
timeout=self.TIMEOUT
|
200
|
+
)
|
201
|
+
response.raise_for_status() # Use raise_for_status to handle HTTP errors
|
202
|
+
usecase = response.json()["data"]["usecase"]
|
203
|
+
return usecase
|
204
|
+
except requests.exceptions.RequestException as e:
|
205
|
+
logger.error(f"Failed to retrieve project use cases: {e}")
|
206
|
+
return []
|
207
|
+
|
208
|
+
def create_project(self, project_name, usecase="Q/A", type="llm"):
|
191
209
|
"""
|
192
210
|
Creates a project with the given project_name, type, and description.
|
193
211
|
|
@@ -199,23 +217,32 @@ class RagaAICatalyst:
|
|
199
217
|
Returns:
|
200
218
|
str: A message indicating the success or failure of the project creation.
|
201
219
|
"""
|
202
|
-
|
220
|
+
# Check if the project already exists
|
221
|
+
existing_projects = self.list_projects()
|
222
|
+
if project_name in existing_projects:
|
223
|
+
raise ValueError(f"Project name '{project_name}' already exists. Please choose a different name.")
|
224
|
+
|
225
|
+
usecase_list = self.project_use_cases()
|
226
|
+
if usecase not in usecase_list:
|
227
|
+
raise ValueError(f"Select a valid usecase from {usecase_list}")
|
228
|
+
|
229
|
+
json_data = {"name": project_name, "type": type, "usecase": usecase}
|
203
230
|
headers = {
|
204
231
|
"Content-Type": "application/json",
|
205
232
|
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
206
233
|
}
|
207
234
|
try:
|
208
235
|
response = requests.post(
|
209
|
-
f"{RagaAICatalyst.BASE_URL}/
|
236
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/project",
|
210
237
|
headers=headers,
|
211
238
|
json=json_data,
|
212
239
|
timeout=self.TIMEOUT,
|
213
240
|
)
|
214
241
|
response.raise_for_status()
|
215
242
|
print(
|
216
|
-
f"Project Created Successfully with name {response.json()['data']['name']}"
|
243
|
+
f"Project Created Successfully with name {response.json()['data']['name']} & usecase {usecase}"
|
217
244
|
)
|
218
|
-
return f'Project Created Successfully with name {response.json()["data"]["name"]}'
|
245
|
+
return f'Project Created Successfully with name {response.json()["data"]["name"]} & usecase {usecase}'
|
219
246
|
|
220
247
|
except requests.exceptions.HTTPError as http_err:
|
221
248
|
if response.status_code == 401:
|
@@ -226,7 +253,7 @@ class RagaAICatalyst:
|
|
226
253
|
)
|
227
254
|
try:
|
228
255
|
response = requests.post(
|
229
|
-
f"{RagaAICatalyst.BASE_URL}/
|
256
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/project",
|
230
257
|
headers=headers,
|
231
258
|
json=json_data,
|
232
259
|
timeout=self.TIMEOUT,
|
@@ -257,6 +284,9 @@ class RagaAICatalyst:
|
|
257
284
|
)
|
258
285
|
return "An unexpected error occurred while creating the project"
|
259
286
|
|
287
|
+
def get_project_id(self, project_name):
|
288
|
+
pass
|
289
|
+
|
260
290
|
def list_projects(self, num_projects=100):
|
261
291
|
"""
|
262
292
|
Retrieves a list of projects with the specified number of projects.
|
@@ -267,25 +297,18 @@ class RagaAICatalyst:
|
|
267
297
|
Returns:
|
268
298
|
list: A list of project names retrieved successfully.
|
269
299
|
"""
|
270
|
-
params = {
|
271
|
-
"size": str(num_projects),
|
272
|
-
"page": "0",
|
273
|
-
"type": "llm",
|
274
|
-
}
|
275
300
|
headers = {
|
276
|
-
"Content-Type": "application/json",
|
277
301
|
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
278
302
|
}
|
279
303
|
try:
|
280
304
|
response = requests.get(
|
281
|
-
f"{RagaAICatalyst.BASE_URL}/projects",
|
282
|
-
params=params,
|
305
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/projects?size={num_projects}",
|
283
306
|
headers=headers,
|
284
307
|
timeout=self.TIMEOUT,
|
285
308
|
)
|
286
309
|
response.raise_for_status()
|
287
310
|
logger.debug("Projects list retrieved successfully")
|
288
|
-
|
311
|
+
|
289
312
|
project_list = [
|
290
313
|
project["name"] for project in response.json()["data"]["content"]
|
291
314
|
]
|
@@ -300,8 +323,7 @@ class RagaAICatalyst:
|
|
300
323
|
)
|
301
324
|
try:
|
302
325
|
response = requests.get(
|
303
|
-
f"{RagaAICatalyst.BASE_URL}/projects",
|
304
|
-
params=params,
|
326
|
+
f"{RagaAICatalyst.BASE_URL}/v2/llm/projects",
|
305
327
|
headers=headers,
|
306
328
|
timeout=self.TIMEOUT,
|
307
329
|
)
|
@@ -400,3 +422,6 @@ class RagaAICatalyst:
|
|
400
422
|
else:
|
401
423
|
logger.error("Failed to list metrics: %s", str(http_err))
|
402
424
|
return f"Failed to list metrics: {response.json().get('message', 'Unknown error')}"
|
425
|
+
except requests.exceptions.RequestException as e:
|
426
|
+
logger.error(f"Failed to list metrics: {e}")
|
427
|
+
return []
|
@@ -86,13 +86,15 @@ class FileSpanExporter(SpanExporter):
|
|
86
86
|
logger.debug(f"Writing jsonl file: {self.filename}")
|
87
87
|
f.write(json.dumps(export_data) + "\n")
|
88
88
|
|
89
|
+
|
89
90
|
if os.path.exists(json_file_path):
|
90
91
|
with open(json_file_path, "r") as f:
|
91
92
|
data = json.load(f)
|
92
93
|
data.append(export_data)
|
93
94
|
with open(json_file_path, "w") as f:
|
94
95
|
logger.debug(f"Appending to json file: {json_file_path}")
|
95
|
-
json.dump(data, f)
|
96
|
+
json.dump(data, f)
|
97
|
+
|
96
98
|
else:
|
97
99
|
with open(json_file_path, "w") as f:
|
98
100
|
logger.debug(f"Writing json file: {json_file_path}")
|
@@ -102,7 +104,6 @@ class FileSpanExporter(SpanExporter):
|
|
102
104
|
# self._upload_task = self._run_async(self._upload_traces(json_file_path= self.sync_file))
|
103
105
|
self._run_async(self._upload_traces(json_file_path=self.sync_file))
|
104
106
|
self.sync_file = json_file_path
|
105
|
-
|
106
107
|
# asyncio.run(self.server_upload(json_file_path))
|
107
108
|
|
108
109
|
def _run_async(self, coroutine):
|
@@ -7,6 +7,7 @@ from tqdm import tqdm
|
|
7
7
|
import requests
|
8
8
|
from ...ragaai_catalyst import RagaAICatalyst
|
9
9
|
import shutil
|
10
|
+
import pdb
|
10
11
|
|
11
12
|
logger = logging.getLogger(__name__)
|
12
13
|
|
@@ -27,9 +28,22 @@ class RagaExporter:
|
|
27
28
|
"log_source": "metadata",
|
28
29
|
"vector_store": "pipeline",
|
29
30
|
}
|
31
|
+
SCHEMA_MAPPING_NEW = {
|
32
|
+
"trace_id": {"columnType": "traceId"},
|
33
|
+
"trace_uri": {"columnType": "traceUri"},
|
34
|
+
"prompt": {"columnType": "prompt"},
|
35
|
+
"response":{"columnType": "response"},
|
36
|
+
"context": {"columnType": "context"},
|
37
|
+
"llm_model": {"columnType":"pipeline"},
|
38
|
+
"recorded_on": {"columnType": "metadata"},
|
39
|
+
"embed_model": {"columnType":"pipeline"},
|
40
|
+
"log_source": {"columnType": "metadata"},
|
41
|
+
"vector_store":{"columnType":"pipeline"},
|
42
|
+
"feedback": {"columnType":"feedBack"}
|
43
|
+
}
|
30
44
|
TIMEOUT = 10
|
31
45
|
|
32
|
-
def __init__(self, project_name):
|
46
|
+
def __init__(self, project_name, dataset_name):
|
33
47
|
"""
|
34
48
|
Initializes a new instance of the RagaExporter class.
|
35
49
|
|
@@ -41,6 +55,7 @@ class RagaExporter:
|
|
41
55
|
Exception: If the schema check fails or the schema creation fails.
|
42
56
|
"""
|
43
57
|
self.project_name = project_name
|
58
|
+
self.dataset_name = dataset_name
|
44
59
|
RagaExporter.BASE_URL = (
|
45
60
|
os.getenv("RAGAAI_CATALYST_BASE_URL")
|
46
61
|
if os.getenv("RAGAAI_CATALYST_BASE_URL")
|
@@ -55,15 +70,14 @@ class RagaExporter:
|
|
55
70
|
)
|
56
71
|
if not os.getenv("RAGAAI_CATALYST_TOKEN"):
|
57
72
|
get_token()
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
raise Exception("Failed to check schema. Please consider raising an issue.")
|
73
|
+
|
74
|
+
create_status_code = self._create_schema()
|
75
|
+
if create_status_code != 200:
|
76
|
+
raise Exception(
|
77
|
+
"Failed to create schema. Please consider raising an issue."
|
78
|
+
)
|
79
|
+
# elif status_code != 200:
|
80
|
+
# raise Exception("Failed to check schema. Please consider raising an issue.")
|
67
81
|
|
68
82
|
def _check_schema(self):
|
69
83
|
"""
|
@@ -95,6 +109,7 @@ class RagaExporter:
|
|
95
109
|
|
96
110
|
|
97
111
|
def compare_schemas(base_schema, project_schema):
|
112
|
+
|
98
113
|
differences = []
|
99
114
|
for key, base_value in base_schema.items():
|
100
115
|
if key not in project_schema:
|
@@ -117,12 +132,13 @@ class RagaExporter:
|
|
117
132
|
if response.status_code != 200:
|
118
133
|
return response.status_code
|
119
134
|
if response.status_code == 200:
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
135
|
+
pass
|
136
|
+
# project_schema = response.json()["data"]
|
137
|
+
# base_schema = RagaExporter.SCHEMA_MAPPING
|
138
|
+
# is_same, _ = compare_schemas(base_schema, project_schema)
|
139
|
+
# if not is_same:
|
140
|
+
# raise Exception(f"Trace cannot be logged to this Project because of schema difference. Create a new project to log trace")
|
141
|
+
# return response.status_code
|
126
142
|
return response.status_code
|
127
143
|
|
128
144
|
def _create_schema(self):
|
@@ -152,19 +168,21 @@ class RagaExporter:
|
|
152
168
|
"X-Project-Name": self.project_name,
|
153
169
|
}
|
154
170
|
json_data = {
|
155
|
-
"
|
156
|
-
"schemaMapping": RagaExporter.
|
171
|
+
"datasetName": self.dataset_name,
|
172
|
+
"schemaMapping": RagaExporter.SCHEMA_MAPPING_NEW,
|
157
173
|
"traceFolderUrl": None,
|
158
174
|
}
|
159
175
|
response = requests.post(
|
160
|
-
f"{RagaExporter.BASE_URL}/v1/llm/
|
176
|
+
f"{RagaExporter.BASE_URL}/v1/llm/dataset/logs",
|
161
177
|
headers=headers,
|
162
178
|
json=json_data,
|
163
179
|
timeout=RagaExporter.TIMEOUT,
|
164
180
|
)
|
181
|
+
|
165
182
|
return response
|
166
183
|
|
167
184
|
response = make_request()
|
185
|
+
|
168
186
|
if response.status_code == 401:
|
169
187
|
get_token() # Fetch a new token and set it in the environment
|
170
188
|
response = make_request() # Retry the request
|
@@ -178,6 +196,7 @@ class RagaExporter:
|
|
178
196
|
return status_code
|
179
197
|
|
180
198
|
async def get_presigned_url(self, session, num_files):
|
199
|
+
# pdb.set_trace()
|
181
200
|
"""
|
182
201
|
Asynchronously retrieves a presigned URL from the RagaExporter API.
|
183
202
|
|
@@ -194,8 +213,10 @@ class RagaExporter:
|
|
194
213
|
"""
|
195
214
|
|
196
215
|
async def make_request():
|
216
|
+
# pdb.set_trace()
|
217
|
+
|
197
218
|
json_data = {
|
198
|
-
"
|
219
|
+
"datasetName": self.dataset_name,
|
199
220
|
"numFiles": num_files,
|
200
221
|
}
|
201
222
|
headers = {
|
@@ -210,10 +231,9 @@ class RagaExporter:
|
|
210
231
|
timeout=RagaExporter.TIMEOUT,
|
211
232
|
) as response:
|
212
233
|
|
213
|
-
# print(json_response)
|
214
234
|
json_data = await response.json()
|
215
|
-
return response, json_data
|
216
235
|
|
236
|
+
return response, json_data
|
217
237
|
response, json_data = await make_request()
|
218
238
|
await self.response_checker_async(response, "RagaExporter.get_presigned_url")
|
219
239
|
if response.status == 401:
|
@@ -250,8 +270,8 @@ class RagaExporter:
|
|
250
270
|
}
|
251
271
|
|
252
272
|
json_data = {
|
253
|
-
"
|
254
|
-
"
|
273
|
+
"datasetName": self.dataset_name,
|
274
|
+
"presignedUrl": trace_uri,
|
255
275
|
}
|
256
276
|
|
257
277
|
async with session.post(
|
@@ -276,6 +296,8 @@ class RagaExporter:
|
|
276
296
|
return response.status
|
277
297
|
|
278
298
|
async def upload_file(self, session, url, file_path):
|
299
|
+
# pdb.set_trace()
|
300
|
+
# print('url', url)
|
279
301
|
"""
|
280
302
|
Asynchronously uploads a file using the given session, url, and file path.
|
281
303
|
Supports both regular and Azure blob storage URLs.
|
@@ -295,9 +317,8 @@ class RagaExporter:
|
|
295
317
|
"Content-Type": "application/json",
|
296
318
|
}
|
297
319
|
|
298
|
-
if "blob.core.windows.net" in url: # Azure
|
299
|
-
|
300
|
-
|
320
|
+
# if "blob.core.windows.net" in url: # Azure
|
321
|
+
# headers["x-ms-blob-type"] = "BlockBlob"
|
301
322
|
print(f"Uploading traces...")
|
302
323
|
logger.debug(f"Uploading file:{file_path} with url {url}")
|
303
324
|
|
@@ -324,6 +345,8 @@ class RagaExporter:
|
|
324
345
|
return response.status
|
325
346
|
|
326
347
|
async def check_and_upload_files(self, session, file_paths):
|
348
|
+
# print(file_paths)
|
349
|
+
# pdb.set_trace()
|
327
350
|
"""
|
328
351
|
Checks if there are files to upload, gets presigned URLs, uploads files, and streams them if successful.
|
329
352
|
|