ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
ragaai_catalyst/__init__.py
CHANGED
@@ -1,13 +1,34 @@
|
|
1
1
|
from .experiment import Experiment
|
2
2
|
from .ragaai_catalyst import RagaAICatalyst
|
3
|
-
from .tracers import Tracer
|
4
3
|
from .utils import response_checker
|
5
4
|
from .dataset import Dataset
|
6
5
|
from .prompt_manager import PromptManager
|
7
6
|
from .evaluation import Evaluation
|
8
7
|
from .synthetic_data_generation import SyntheticDataGeneration
|
8
|
+
from .redteaming import RedTeaming
|
9
9
|
from .guardrails_manager import GuardrailsManager
|
10
10
|
from .guard_executor import GuardExecutor
|
11
|
+
from .tracers import Tracer, init_tracing, trace_agent, trace_llm, trace_tool, current_span, trace_custom
|
12
|
+
from .redteaming import RedTeaming
|
11
13
|
|
12
14
|
|
13
|
-
|
15
|
+
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"Experiment",
|
19
|
+
"RagaAICatalyst",
|
20
|
+
"Tracer",
|
21
|
+
"PromptManager",
|
22
|
+
"Evaluation",
|
23
|
+
"SyntheticDataGeneration",
|
24
|
+
"RedTeaming",
|
25
|
+
"GuardrailsManager",
|
26
|
+
"GuardExecutor",
|
27
|
+
"init_tracing",
|
28
|
+
"trace_agent",
|
29
|
+
"trace_llm",
|
30
|
+
"trace_tool",
|
31
|
+
"current_span",
|
32
|
+
"trace_custom"
|
33
|
+
"RedTeaming"
|
34
|
+
]
|
ragaai_catalyst/dataset.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
import os
|
2
|
+
import csv
|
3
|
+
import json
|
4
|
+
import tempfile
|
2
5
|
import requests
|
3
6
|
from .utils import response_checker
|
4
7
|
from typing import Union
|
@@ -8,6 +11,10 @@ import pandas as pd
|
|
8
11
|
logger = logging.getLogger(__name__)
|
9
12
|
get_token = RagaAICatalyst.get_token
|
10
13
|
|
14
|
+
# Job status constants
|
15
|
+
JOB_STATUS_FAILED = "failed"
|
16
|
+
JOB_STATUS_IN_PROGRESS = "in_progress"
|
17
|
+
JOB_STATUS_COMPLETED = "success"
|
11
18
|
|
12
19
|
class Dataset:
|
13
20
|
BASE_URL = None
|
@@ -17,6 +24,7 @@ class Dataset:
|
|
17
24
|
self.project_name = project_name
|
18
25
|
self.num_projects = 99999
|
19
26
|
Dataset.BASE_URL = RagaAICatalyst.BASE_URL
|
27
|
+
self.jobId = None
|
20
28
|
headers = {
|
21
29
|
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
22
30
|
}
|
@@ -218,7 +226,6 @@ class Dataset:
|
|
218
226
|
try:
|
219
227
|
|
220
228
|
put_csv_response = put_csv_to_presignedUrl(url)
|
221
|
-
print(put_csv_response)
|
222
229
|
if put_csv_response.status_code not in (200, 201):
|
223
230
|
raise ValueError('Unable to put csv to the presignedUrl')
|
224
231
|
except Exception as e:
|
@@ -268,6 +275,460 @@ class Dataset:
|
|
268
275
|
raise ValueError('Unable to upload csv')
|
269
276
|
else:
|
270
277
|
print(upload_csv_response['message'])
|
278
|
+
self.jobId = upload_csv_response['data']['jobId']
|
271
279
|
except Exception as e:
|
272
280
|
logger.error(f"Error in create_from_csv: {e}")
|
273
281
|
raise
|
282
|
+
|
283
|
+
def add_rows(self, csv_path, dataset_name):
|
284
|
+
"""
|
285
|
+
Add rows to an existing dataset from a CSV file.
|
286
|
+
|
287
|
+
Args:
|
288
|
+
csv_path (str): Path to the CSV file to be added
|
289
|
+
dataset_name (str): Name of the existing dataset to add rows to
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValueError: If dataset does not exist or columns are incompatible
|
293
|
+
"""
|
294
|
+
# Get existing dataset columns
|
295
|
+
existing_columns = self.get_dataset_columns(dataset_name)
|
296
|
+
|
297
|
+
# Read the CSV file to check columns
|
298
|
+
try:
|
299
|
+
import pandas as pd
|
300
|
+
df = pd.read_csv(csv_path)
|
301
|
+
csv_columns = df.columns.tolist()
|
302
|
+
except Exception as e:
|
303
|
+
logger.error(f"Failed to read CSV file: {e}")
|
304
|
+
raise ValueError(f"Unable to read CSV file: {e}")
|
305
|
+
|
306
|
+
# Check column compatibility
|
307
|
+
for column in existing_columns:
|
308
|
+
if column not in csv_columns:
|
309
|
+
df[column] = None
|
310
|
+
|
311
|
+
# Get presigned URL for the CSV
|
312
|
+
def get_presignedUrl():
|
313
|
+
headers = {
|
314
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
315
|
+
"X-Project-Id": str(self.project_id),
|
316
|
+
}
|
317
|
+
try:
|
318
|
+
response = requests.get(
|
319
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
|
320
|
+
headers=headers,
|
321
|
+
timeout=Dataset.TIMEOUT,
|
322
|
+
)
|
323
|
+
response.raise_for_status()
|
324
|
+
return response.json()
|
325
|
+
except requests.exceptions.RequestException as e:
|
326
|
+
logger.error(f"Failed to get presigned URL: {e}")
|
327
|
+
raise
|
328
|
+
|
329
|
+
try:
|
330
|
+
presignedUrl = get_presignedUrl()
|
331
|
+
if presignedUrl['success']:
|
332
|
+
url = presignedUrl['data']['presignedUrl']
|
333
|
+
filename = presignedUrl['data']['fileName']
|
334
|
+
else:
|
335
|
+
raise ValueError('Unable to fetch presignedUrl')
|
336
|
+
except Exception as e:
|
337
|
+
logger.error(f"Error in get_presignedUrl: {e}")
|
338
|
+
raise
|
339
|
+
|
340
|
+
# Upload CSV to presigned URL
|
341
|
+
def put_csv_to_presignedUrl(url):
|
342
|
+
headers = {
|
343
|
+
'Content-Type': 'text/csv',
|
344
|
+
'x-ms-blob-type': 'BlockBlob',
|
345
|
+
}
|
346
|
+
try:
|
347
|
+
with open(csv_path, 'rb') as file:
|
348
|
+
response = requests.put(
|
349
|
+
url,
|
350
|
+
headers=headers,
|
351
|
+
data=file,
|
352
|
+
timeout=Dataset.TIMEOUT,
|
353
|
+
)
|
354
|
+
response.raise_for_status()
|
355
|
+
return response
|
356
|
+
except requests.exceptions.RequestException as e:
|
357
|
+
logger.error(f"Failed to put CSV to presigned URL: {e}")
|
358
|
+
raise
|
359
|
+
|
360
|
+
try:
|
361
|
+
put_csv_response = put_csv_to_presignedUrl(url)
|
362
|
+
if put_csv_response.status_code not in (200, 201):
|
363
|
+
raise ValueError('Unable to put csv to the presignedUrl')
|
364
|
+
except Exception as e:
|
365
|
+
logger.error(f"Error in put_csv_to_presignedUrl: {e}")
|
366
|
+
raise
|
367
|
+
|
368
|
+
# Prepare schema mapping (assuming same mapping as original dataset)
|
369
|
+
def generate_schema_mapping(dataset_name):
|
370
|
+
headers = {
|
371
|
+
'Content-Type': 'application/json',
|
372
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
373
|
+
"X-Project-Id": str(self.project_id),
|
374
|
+
}
|
375
|
+
json_data = {
|
376
|
+
"size": 12,
|
377
|
+
"page": "0",
|
378
|
+
"projectId": str(self.project_id),
|
379
|
+
"search": ""
|
380
|
+
}
|
381
|
+
try:
|
382
|
+
# First get dataset details
|
383
|
+
response = requests.post(
|
384
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset",
|
385
|
+
headers=headers,
|
386
|
+
json=json_data,
|
387
|
+
timeout=Dataset.TIMEOUT,
|
388
|
+
)
|
389
|
+
response.raise_for_status()
|
390
|
+
datasets = response.json()["data"]["content"]
|
391
|
+
dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
|
392
|
+
|
393
|
+
# Get dataset details to extract schema mapping
|
394
|
+
response = requests.get(
|
395
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
|
396
|
+
headers=headers,
|
397
|
+
timeout=Dataset.TIMEOUT,
|
398
|
+
)
|
399
|
+
response.raise_for_status()
|
400
|
+
|
401
|
+
# Extract schema mapping
|
402
|
+
schema_mapping = {}
|
403
|
+
for col in response.json()["data"]["datasetColumnsResponses"]:
|
404
|
+
schema_mapping[col["displayName"]] = {"columnType": col["columnType"]}
|
405
|
+
|
406
|
+
return schema_mapping
|
407
|
+
except requests.exceptions.RequestException as e:
|
408
|
+
logger.error(f"Failed to get schema mapping: {e}")
|
409
|
+
raise
|
410
|
+
|
411
|
+
# Upload CSV to elastic
|
412
|
+
try:
|
413
|
+
schema_mapping = generate_schema_mapping(dataset_name)
|
414
|
+
|
415
|
+
data = {
|
416
|
+
"projectId": str(self.project_id),
|
417
|
+
"datasetName": dataset_name,
|
418
|
+
"fileName": filename,
|
419
|
+
"schemaMapping": schema_mapping,
|
420
|
+
"opType": "update", # Use update for adding rows
|
421
|
+
"description": "Adding new rows to dataset"
|
422
|
+
}
|
423
|
+
|
424
|
+
headers = {
|
425
|
+
'Content-Type': 'application/json',
|
426
|
+
'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
427
|
+
"X-Project-Id": str(self.project_id)
|
428
|
+
}
|
429
|
+
|
430
|
+
response = requests.post(
|
431
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
|
432
|
+
headers=headers,
|
433
|
+
json=data,
|
434
|
+
timeout=Dataset.TIMEOUT,
|
435
|
+
)
|
436
|
+
|
437
|
+
if response.status_code == 400:
|
438
|
+
raise ValueError(response.json().get("message", "Failed to add rows"))
|
439
|
+
|
440
|
+
response.raise_for_status()
|
441
|
+
|
442
|
+
# Check response
|
443
|
+
response_data = response.json()
|
444
|
+
if response_data.get('success', False):
|
445
|
+
print(f"{response_data['message']}")
|
446
|
+
self.jobId = response_data['data']['jobId']
|
447
|
+
else:
|
448
|
+
raise ValueError(response_data.get('message', 'Failed to add rows'))
|
449
|
+
|
450
|
+
except Exception as e:
|
451
|
+
logger.error(f"Error in add_rows_to_dataset: {e}")
|
452
|
+
raise
|
453
|
+
|
454
|
+
def add_columns(self, text_fields, dataset_name, column_name, provider, model, variables={}):
|
455
|
+
"""
|
456
|
+
Add a column to a dataset with dynamically fetched model parameters
|
457
|
+
|
458
|
+
Args:
|
459
|
+
project_id (int): Project ID
|
460
|
+
dataset_id (int): Dataset ID
|
461
|
+
column_name (str): Name of the new column
|
462
|
+
provider (str): Name of the model provider
|
463
|
+
model (str): Name of the model
|
464
|
+
"""
|
465
|
+
# First, get model parameters
|
466
|
+
|
467
|
+
# Validate text_fields input
|
468
|
+
if not isinstance(text_fields, list):
|
469
|
+
raise ValueError("text_fields must be a list of dictionaries")
|
470
|
+
|
471
|
+
for field in text_fields:
|
472
|
+
if not isinstance(field, dict) or 'role' not in field or 'content' not in field:
|
473
|
+
raise ValueError("Each text field must be a dictionary with 'role' and 'content' keys")
|
474
|
+
|
475
|
+
# First, get the dataset ID
|
476
|
+
headers = {
|
477
|
+
'Content-Type': 'application/json',
|
478
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
479
|
+
"X-Project-Id": str(self.project_id),
|
480
|
+
}
|
481
|
+
json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
|
482
|
+
|
483
|
+
try:
|
484
|
+
# Get dataset list
|
485
|
+
response = requests.post(
|
486
|
+
f"{Dataset.BASE_URL}/v2/llm/dataset",
|
487
|
+
headers=headers,
|
488
|
+
json=json_data,
|
489
|
+
timeout=Dataset.TIMEOUT,
|
490
|
+
)
|
491
|
+
response.raise_for_status()
|
492
|
+
datasets = response.json()["data"]["content"]
|
493
|
+
|
494
|
+
# Find dataset ID
|
495
|
+
dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
|
496
|
+
|
497
|
+
if dataset_id is None:
|
498
|
+
raise ValueError(f"Dataset {dataset_name} not found")
|
499
|
+
|
500
|
+
|
501
|
+
|
502
|
+
parameters_url= f"{Dataset.BASE_URL}/playground/providers/models/parameters/list"
|
503
|
+
|
504
|
+
headers = {
|
505
|
+
'Content-Type': 'application/json',
|
506
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
507
|
+
"X-Project-Id": str(self.project_id),
|
508
|
+
}
|
509
|
+
|
510
|
+
# Fetch model parameters
|
511
|
+
parameters_payload = {
|
512
|
+
"providerName": provider,
|
513
|
+
"modelName": model
|
514
|
+
}
|
515
|
+
|
516
|
+
# Get model parameters
|
517
|
+
params_response = requests.post(
|
518
|
+
parameters_url,
|
519
|
+
headers=headers,
|
520
|
+
json=parameters_payload,
|
521
|
+
timeout=30
|
522
|
+
)
|
523
|
+
params_response.raise_for_status()
|
524
|
+
|
525
|
+
# Extract parameters
|
526
|
+
all_parameters = params_response.json().get('data', [])
|
527
|
+
|
528
|
+
# Filter and transform parameters for add-column API
|
529
|
+
formatted_parameters = []
|
530
|
+
for param in all_parameters:
|
531
|
+
value = param.get('value')
|
532
|
+
param_type = param.get('type')
|
533
|
+
|
534
|
+
if value is None:
|
535
|
+
formatted_param = {
|
536
|
+
"name": param.get('name'),
|
537
|
+
"value": None, # Pass None if the value is null
|
538
|
+
"type": param.get('type')
|
539
|
+
}
|
540
|
+
else:
|
541
|
+
# Improved type handling
|
542
|
+
if param_type == "float":
|
543
|
+
value = float(value) # Ensure value is converted to float
|
544
|
+
elif param_type == "int":
|
545
|
+
value = int(value) # Ensure value is converted to int
|
546
|
+
elif param_type == "bool":
|
547
|
+
value = bool(value) # Ensure value is converted to bool
|
548
|
+
elif param_type == "string":
|
549
|
+
value = str(value) # Ensure value is converted to string
|
550
|
+
else:
|
551
|
+
raise ValueError(f"Unsupported parameter type: {param_type}") # Handle unsupported types
|
552
|
+
|
553
|
+
formatted_param = {
|
554
|
+
"name": param.get('name'),
|
555
|
+
"value": value,
|
556
|
+
"type": param.get('type')
|
557
|
+
}
|
558
|
+
formatted_parameters.append(formatted_param)
|
559
|
+
dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
|
560
|
+
|
561
|
+
# Prepare payload for add column API
|
562
|
+
add_column_payload = {
|
563
|
+
"rowFilterList": [],
|
564
|
+
"columnName": column_name,
|
565
|
+
"datasetId": dataset_id,
|
566
|
+
"variables": variables,
|
567
|
+
"promptTemplate": {
|
568
|
+
"textFields": text_fields,
|
569
|
+
"modelSpecs": {
|
570
|
+
"model": f"{provider}/{model}",
|
571
|
+
"parameters": formatted_parameters
|
572
|
+
}
|
573
|
+
}
|
574
|
+
}
|
575
|
+
if variables:
|
576
|
+
variable_specs = []
|
577
|
+
for key, values in variables.items():
|
578
|
+
variable_specs.append({
|
579
|
+
"name": key,
|
580
|
+
"type": "string",
|
581
|
+
"schema": "query"
|
582
|
+
})
|
583
|
+
add_column_payload["promptTemplate"]["variableSpecs"] = variable_specs
|
584
|
+
|
585
|
+
# Make API call to add column
|
586
|
+
add_column_url = f"{Dataset.BASE_URL}/v2/llm/dataset/add-column"
|
587
|
+
|
588
|
+
response = requests.post(
|
589
|
+
add_column_url,
|
590
|
+
headers={
|
591
|
+
'Content-Type': 'application/json',
|
592
|
+
'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
593
|
+
"X-Project-Id": str(self.project_id)
|
594
|
+
},
|
595
|
+
json=add_column_payload,
|
596
|
+
timeout=30
|
597
|
+
)
|
598
|
+
|
599
|
+
# Check response
|
600
|
+
response.raise_for_status()
|
601
|
+
response_data = response.json()
|
602
|
+
|
603
|
+
if response_data.get('success', False):
|
604
|
+
print(f"Column '{column_name}' added successfully to dataset '{dataset_name}'")
|
605
|
+
self.jobId = response_data['data']['jobId']
|
606
|
+
else:
|
607
|
+
raise ValueError(response_data.get('message', 'Failed to add column'))
|
608
|
+
|
609
|
+
except requests.exceptions.RequestException as e:
|
610
|
+
print(f"Error adding column: {e}")
|
611
|
+
raise
|
612
|
+
|
613
|
+
def get_status(self):
|
614
|
+
headers = {
|
615
|
+
'Content-Type': 'application/json',
|
616
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
617
|
+
'X-Project-Id': str(self.project_id),
|
618
|
+
}
|
619
|
+
try:
|
620
|
+
response = requests.get(
|
621
|
+
f'{Dataset.BASE_URL}/job/status',
|
622
|
+
headers=headers,
|
623
|
+
timeout=30)
|
624
|
+
response.raise_for_status()
|
625
|
+
if response.json()["success"]:
|
626
|
+
|
627
|
+
status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId]
|
628
|
+
status_json = status_json[0]
|
629
|
+
if status_json == "Failed":
|
630
|
+
print("Job failed. No results to fetch.")
|
631
|
+
return JOB_STATUS_FAILED
|
632
|
+
elif status_json == "In Progress":
|
633
|
+
print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
|
634
|
+
return JOB_STATUS_IN_PROGRESS
|
635
|
+
elif status_json == "Completed":
|
636
|
+
print(f"Job completed. Fetching results.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
|
637
|
+
return JOB_STATUS_COMPLETED
|
638
|
+
else:
|
639
|
+
logger.error(f"Unknown status received: {status_json}")
|
640
|
+
return JOB_STATUS_FAILED
|
641
|
+
else:
|
642
|
+
logger.error("Request was not successful")
|
643
|
+
return JOB_STATUS_FAILED
|
644
|
+
except requests.exceptions.HTTPError as http_err:
|
645
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
646
|
+
return JOB_STATUS_FAILED
|
647
|
+
except requests.exceptions.ConnectionError as conn_err:
|
648
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
649
|
+
return JOB_STATUS_FAILED
|
650
|
+
except requests.exceptions.Timeout as timeout_err:
|
651
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
652
|
+
return JOB_STATUS_FAILED
|
653
|
+
except requests.exceptions.RequestException as req_err:
|
654
|
+
logger.error(f"An error occurred: {req_err}")
|
655
|
+
return JOB_STATUS_FAILED
|
656
|
+
except Exception as e:
|
657
|
+
logger.error(f"An unexpected error occurred: {e}")
|
658
|
+
return JOB_STATUS_FAILED
|
659
|
+
|
660
|
+
def _jsonl_to_csv(self, jsonl_file, csv_file):
|
661
|
+
"""Convert a JSONL file to a CSV file."""
|
662
|
+
with open(jsonl_file, 'r', encoding='utf-8') as infile:
|
663
|
+
data = [json.loads(line) for line in infile]
|
664
|
+
|
665
|
+
if not data:
|
666
|
+
print("Empty JSONL file.")
|
667
|
+
return
|
668
|
+
|
669
|
+
with open(csv_file, 'w', newline='', encoding='utf-8') as outfile:
|
670
|
+
writer = csv.DictWriter(outfile, fieldnames=data[0].keys())
|
671
|
+
writer.writeheader()
|
672
|
+
writer.writerows(data)
|
673
|
+
|
674
|
+
print(f"Converted {jsonl_file} to {csv_file}")
|
675
|
+
|
676
|
+
def create_from_jsonl(self, jsonl_path, dataset_name, schema_mapping):
|
677
|
+
tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
|
678
|
+
try:
|
679
|
+
self._jsonl_to_csv(jsonl_path, tmp_csv_path)
|
680
|
+
self.create_from_csv(tmp_csv_path, dataset_name, schema_mapping)
|
681
|
+
except (IOError, UnicodeError) as e:
|
682
|
+
logger.error(f"Error converting JSONL to CSV: {e}")
|
683
|
+
raise
|
684
|
+
finally:
|
685
|
+
if os.path.exists(tmp_csv_path):
|
686
|
+
try:
|
687
|
+
os.remove(tmp_csv_path)
|
688
|
+
except Exception as e:
|
689
|
+
logger.error(f"Error removing temporary CSV file: {e}")
|
690
|
+
|
691
|
+
def add_rows_from_jsonl(self, jsonl_path, dataset_name):
|
692
|
+
tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
|
693
|
+
try:
|
694
|
+
self._jsonl_to_csv(jsonl_path, tmp_csv_path)
|
695
|
+
self.add_rows(tmp_csv_path, dataset_name)
|
696
|
+
except (IOError, UnicodeError) as e:
|
697
|
+
logger.error(f"Error converting JSONL to CSV: {e}")
|
698
|
+
raise
|
699
|
+
finally:
|
700
|
+
if os.path.exists(tmp_csv_path):
|
701
|
+
try:
|
702
|
+
os.remove(tmp_csv_path)
|
703
|
+
except Exception as e:
|
704
|
+
logger.error(f"Error removing temporary CSV file: {e}")
|
705
|
+
|
706
|
+
def create_from_df(self, df, dataset_name, schema_mapping):
|
707
|
+
tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
|
708
|
+
try:
|
709
|
+
df.to_csv(tmp_csv_path, index=False)
|
710
|
+
self.create_from_csv(tmp_csv_path, dataset_name, schema_mapping)
|
711
|
+
except (IOError, UnicodeError) as e:
|
712
|
+
logger.error(f"Error converting DataFrame to CSV: {e}")
|
713
|
+
raise
|
714
|
+
finally:
|
715
|
+
if os.path.exists(tmp_csv_path):
|
716
|
+
try:
|
717
|
+
os.remove(tmp_csv_path)
|
718
|
+
except Exception as e:
|
719
|
+
logger.error(f"Error removing temporary CSV file: {e}")
|
720
|
+
|
721
|
+
def add_rows_from_df(self, df, dataset_name):
|
722
|
+
tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
|
723
|
+
try:
|
724
|
+
df.to_csv(tmp_csv_path, index=False)
|
725
|
+
self.add_rows(tmp_csv_path, dataset_name)
|
726
|
+
except (IOError, UnicodeError) as e:
|
727
|
+
logger.error(f"Error converting DataFrame to CSV: {e}")
|
728
|
+
raise
|
729
|
+
finally:
|
730
|
+
if os.path.exists(tmp_csv_path):
|
731
|
+
try:
|
732
|
+
os.remove(tmp_csv_path)
|
733
|
+
except Exception as e:
|
734
|
+
logger.error(f"Error removing temporary CSV file: {e}")
|
ragaai_catalyst/evaluation.py
CHANGED
@@ -4,16 +4,22 @@ import pandas as pd
|
|
4
4
|
import io
|
5
5
|
from .ragaai_catalyst import RagaAICatalyst
|
6
6
|
import logging
|
7
|
+
import json
|
7
8
|
|
8
9
|
logger = logging.getLogger(__name__)
|
9
10
|
|
11
|
+
# Job status constants
|
12
|
+
JOB_STATUS_FAILED = "failed"
|
13
|
+
JOB_STATUS_IN_PROGRESS = "in_progress"
|
14
|
+
JOB_STATUS_COMPLETED = "success"
|
15
|
+
|
10
16
|
class Evaluation:
|
11
17
|
|
12
18
|
def __init__(self, project_name, dataset_name):
|
13
19
|
self.project_name = project_name
|
14
20
|
self.dataset_name = dataset_name
|
15
21
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
16
|
-
self.timeout =
|
22
|
+
self.timeout = 20
|
17
23
|
self.jobId = None
|
18
24
|
self.num_projects=99999
|
19
25
|
|
@@ -352,6 +358,52 @@ class Evaluation:
|
|
352
358
|
except Exception as e:
|
353
359
|
logger.error(f"An unexpected error occurred: {e}")
|
354
360
|
|
361
|
+
def append_metrics(self, display_name):
|
362
|
+
if not isinstance(display_name, str):
|
363
|
+
raise ValueError("display_name should be a string")
|
364
|
+
|
365
|
+
headers = {
|
366
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
367
|
+
'X-Project-Id': str(self.project_id),
|
368
|
+
'Content-Type': 'application/json',
|
369
|
+
}
|
370
|
+
|
371
|
+
payload = json.dumps({
|
372
|
+
"datasetId": self.dataset_id,
|
373
|
+
"metricParams": [
|
374
|
+
{
|
375
|
+
"metricSpec": {
|
376
|
+
"displayName": display_name
|
377
|
+
}
|
378
|
+
}
|
379
|
+
]
|
380
|
+
})
|
381
|
+
|
382
|
+
try:
|
383
|
+
response = requests.request(
|
384
|
+
"POST",
|
385
|
+
f'{self.base_url}/v2/llm/metric-evaluation-rerun',
|
386
|
+
headers=headers,
|
387
|
+
data=payload,
|
388
|
+
timeout=self.timeout)
|
389
|
+
if response.status_code == 400:
|
390
|
+
raise ValueError(response.json()["message"])
|
391
|
+
response.raise_for_status()
|
392
|
+
if response.json()["success"]:
|
393
|
+
print(response.json()["message"])
|
394
|
+
self.jobId = response.json()["data"]["jobId"]
|
395
|
+
|
396
|
+
except requests.exceptions.HTTPError as http_err:
|
397
|
+
logger.error(f"HTTP error occurred: {http_err}")
|
398
|
+
except requests.exceptions.ConnectionError as conn_err:
|
399
|
+
logger.error(f"Connection error occurred: {conn_err}")
|
400
|
+
except requests.exceptions.Timeout as timeout_err:
|
401
|
+
logger.error(f"Timeout error occurred: {timeout_err}")
|
402
|
+
except requests.exceptions.RequestException as req_err:
|
403
|
+
logger.error(f"An error occurred: {req_err}")
|
404
|
+
except Exception as e:
|
405
|
+
logger.error(f"An unexpected error occurred: {e}")
|
406
|
+
|
355
407
|
def get_status(self):
|
356
408
|
headers = {
|
357
409
|
'Content-Type': 'application/json',
|
@@ -366,22 +418,36 @@ class Evaluation:
|
|
366
418
|
response.raise_for_status()
|
367
419
|
if response.json()["success"]:
|
368
420
|
status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId][0]
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
421
|
+
if status_json == "Failed":
|
422
|
+
print("Job failed. No results to fetch.")
|
423
|
+
return JOB_STATUS_FAILED
|
424
|
+
elif status_json == "In Progress":
|
425
|
+
print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
|
426
|
+
return JOB_STATUS_IN_PROGRESS
|
427
|
+
elif status_json == "Completed":
|
428
|
+
print(f"Job completed. Fetching results.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
|
429
|
+
return JOB_STATUS_COMPLETED
|
430
|
+
else:
|
431
|
+
logger.error(f"Unknown status received: {status_json}")
|
432
|
+
return JOB_STATUS_FAILED
|
433
|
+
else:
|
434
|
+
logger.error("Request was not successful")
|
435
|
+
return JOB_STATUS_FAILED
|
375
436
|
except requests.exceptions.HTTPError as http_err:
|
376
437
|
logger.error(f"HTTP error occurred: {http_err}")
|
438
|
+
return JOB_STATUS_FAILED
|
377
439
|
except requests.exceptions.ConnectionError as conn_err:
|
378
440
|
logger.error(f"Connection error occurred: {conn_err}")
|
441
|
+
return JOB_STATUS_FAILED
|
379
442
|
except requests.exceptions.Timeout as timeout_err:
|
380
443
|
logger.error(f"Timeout error occurred: {timeout_err}")
|
444
|
+
return JOB_STATUS_FAILED
|
381
445
|
except requests.exceptions.RequestException as req_err:
|
382
446
|
logger.error(f"An error occurred: {req_err}")
|
447
|
+
return JOB_STATUS_FAILED
|
383
448
|
except Exception as e:
|
384
449
|
logger.error(f"An unexpected error occurred: {e}")
|
450
|
+
return JOB_STATUS_FAILED
|
385
451
|
|
386
452
|
def get_results(self):
|
387
453
|
|
@@ -444,8 +510,11 @@ class Evaluation:
|
|
444
510
|
df = pd.read_csv(io.StringIO(response_text))
|
445
511
|
|
446
512
|
column_list = df.columns.to_list()
|
513
|
+
# Remove unwanted columns
|
447
514
|
column_list = [col for col in column_list if not col.startswith('_')]
|
448
515
|
column_list = [col for col in column_list if '.' not in col]
|
516
|
+
# Remove _claims_ columns
|
517
|
+
column_list = [col for col in column_list if '_claims_' not in col]
|
449
518
|
return df[column_list]
|
450
519
|
else:
|
451
520
|
return pd.DataFrame()
|