ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,34 @@
1
1
  from .experiment import Experiment
2
2
  from .ragaai_catalyst import RagaAICatalyst
3
- from .tracers import Tracer
4
3
  from .utils import response_checker
5
4
  from .dataset import Dataset
6
5
  from .prompt_manager import PromptManager
7
6
  from .evaluation import Evaluation
8
7
  from .synthetic_data_generation import SyntheticDataGeneration
8
+ from .redteaming import RedTeaming
9
9
  from .guardrails_manager import GuardrailsManager
10
10
  from .guard_executor import GuardExecutor
11
+ from .tracers import Tracer, init_tracing, trace_agent, trace_llm, trace_tool, current_span, trace_custom
12
+ from .redteaming import RedTeaming
11
13
 
12
14
 
13
- __all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation","SyntheticDataGeneration", "GuardrailsManager"]
15
+
16
+
17
+ __all__ = [
18
+ "Experiment",
19
+ "RagaAICatalyst",
20
+ "Tracer",
21
+ "PromptManager",
22
+ "Evaluation",
23
+ "SyntheticDataGeneration",
24
+ "RedTeaming",
25
+ "GuardrailsManager",
26
+ "GuardExecutor",
27
+ "init_tracing",
28
+ "trace_agent",
29
+ "trace_llm",
30
+ "trace_tool",
31
+ "current_span",
32
+ "trace_custom"
33
+ "RedTeaming"
34
+ ]
@@ -1,4 +1,7 @@
1
1
  import os
2
+ import csv
3
+ import json
4
+ import tempfile
2
5
  import requests
3
6
  from .utils import response_checker
4
7
  from typing import Union
@@ -8,6 +11,10 @@ import pandas as pd
8
11
  logger = logging.getLogger(__name__)
9
12
  get_token = RagaAICatalyst.get_token
10
13
 
14
+ # Job status constants
15
+ JOB_STATUS_FAILED = "failed"
16
+ JOB_STATUS_IN_PROGRESS = "in_progress"
17
+ JOB_STATUS_COMPLETED = "success"
11
18
 
12
19
  class Dataset:
13
20
  BASE_URL = None
@@ -17,6 +24,7 @@ class Dataset:
17
24
  self.project_name = project_name
18
25
  self.num_projects = 99999
19
26
  Dataset.BASE_URL = RagaAICatalyst.BASE_URL
27
+ self.jobId = None
20
28
  headers = {
21
29
  "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
22
30
  }
@@ -218,7 +226,6 @@ class Dataset:
218
226
  try:
219
227
 
220
228
  put_csv_response = put_csv_to_presignedUrl(url)
221
- print(put_csv_response)
222
229
  if put_csv_response.status_code not in (200, 201):
223
230
  raise ValueError('Unable to put csv to the presignedUrl')
224
231
  except Exception as e:
@@ -268,6 +275,460 @@ class Dataset:
268
275
  raise ValueError('Unable to upload csv')
269
276
  else:
270
277
  print(upload_csv_response['message'])
278
+ self.jobId = upload_csv_response['data']['jobId']
271
279
  except Exception as e:
272
280
  logger.error(f"Error in create_from_csv: {e}")
273
281
  raise
282
+
283
+ def add_rows(self, csv_path, dataset_name):
284
+ """
285
+ Add rows to an existing dataset from a CSV file.
286
+
287
+ Args:
288
+ csv_path (str): Path to the CSV file to be added
289
+ dataset_name (str): Name of the existing dataset to add rows to
290
+
291
+ Raises:
292
+ ValueError: If dataset does not exist or columns are incompatible
293
+ """
294
+ # Get existing dataset columns
295
+ existing_columns = self.get_dataset_columns(dataset_name)
296
+
297
+ # Read the CSV file to check columns
298
+ try:
299
+ import pandas as pd
300
+ df = pd.read_csv(csv_path)
301
+ csv_columns = df.columns.tolist()
302
+ except Exception as e:
303
+ logger.error(f"Failed to read CSV file: {e}")
304
+ raise ValueError(f"Unable to read CSV file: {e}")
305
+
306
+ # Check column compatibility
307
+ for column in existing_columns:
308
+ if column not in csv_columns:
309
+ df[column] = None
310
+
311
+ # Get presigned URL for the CSV
312
+ def get_presignedUrl():
313
+ headers = {
314
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
315
+ "X-Project-Id": str(self.project_id),
316
+ }
317
+ try:
318
+ response = requests.get(
319
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
320
+ headers=headers,
321
+ timeout=Dataset.TIMEOUT,
322
+ )
323
+ response.raise_for_status()
324
+ return response.json()
325
+ except requests.exceptions.RequestException as e:
326
+ logger.error(f"Failed to get presigned URL: {e}")
327
+ raise
328
+
329
+ try:
330
+ presignedUrl = get_presignedUrl()
331
+ if presignedUrl['success']:
332
+ url = presignedUrl['data']['presignedUrl']
333
+ filename = presignedUrl['data']['fileName']
334
+ else:
335
+ raise ValueError('Unable to fetch presignedUrl')
336
+ except Exception as e:
337
+ logger.error(f"Error in get_presignedUrl: {e}")
338
+ raise
339
+
340
+ # Upload CSV to presigned URL
341
+ def put_csv_to_presignedUrl(url):
342
+ headers = {
343
+ 'Content-Type': 'text/csv',
344
+ 'x-ms-blob-type': 'BlockBlob',
345
+ }
346
+ try:
347
+ with open(csv_path, 'rb') as file:
348
+ response = requests.put(
349
+ url,
350
+ headers=headers,
351
+ data=file,
352
+ timeout=Dataset.TIMEOUT,
353
+ )
354
+ response.raise_for_status()
355
+ return response
356
+ except requests.exceptions.RequestException as e:
357
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
358
+ raise
359
+
360
+ try:
361
+ put_csv_response = put_csv_to_presignedUrl(url)
362
+ if put_csv_response.status_code not in (200, 201):
363
+ raise ValueError('Unable to put csv to the presignedUrl')
364
+ except Exception as e:
365
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
366
+ raise
367
+
368
+ # Prepare schema mapping (assuming same mapping as original dataset)
369
+ def generate_schema_mapping(dataset_name):
370
+ headers = {
371
+ 'Content-Type': 'application/json',
372
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
373
+ "X-Project-Id": str(self.project_id),
374
+ }
375
+ json_data = {
376
+ "size": 12,
377
+ "page": "0",
378
+ "projectId": str(self.project_id),
379
+ "search": ""
380
+ }
381
+ try:
382
+ # First get dataset details
383
+ response = requests.post(
384
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
385
+ headers=headers,
386
+ json=json_data,
387
+ timeout=Dataset.TIMEOUT,
388
+ )
389
+ response.raise_for_status()
390
+ datasets = response.json()["data"]["content"]
391
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
392
+
393
+ # Get dataset details to extract schema mapping
394
+ response = requests.get(
395
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
396
+ headers=headers,
397
+ timeout=Dataset.TIMEOUT,
398
+ )
399
+ response.raise_for_status()
400
+
401
+ # Extract schema mapping
402
+ schema_mapping = {}
403
+ for col in response.json()["data"]["datasetColumnsResponses"]:
404
+ schema_mapping[col["displayName"]] = {"columnType": col["columnType"]}
405
+
406
+ return schema_mapping
407
+ except requests.exceptions.RequestException as e:
408
+ logger.error(f"Failed to get schema mapping: {e}")
409
+ raise
410
+
411
+ # Upload CSV to elastic
412
+ try:
413
+ schema_mapping = generate_schema_mapping(dataset_name)
414
+
415
+ data = {
416
+ "projectId": str(self.project_id),
417
+ "datasetName": dataset_name,
418
+ "fileName": filename,
419
+ "schemaMapping": schema_mapping,
420
+ "opType": "update", # Use update for adding rows
421
+ "description": "Adding new rows to dataset"
422
+ }
423
+
424
+ headers = {
425
+ 'Content-Type': 'application/json',
426
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
427
+ "X-Project-Id": str(self.project_id)
428
+ }
429
+
430
+ response = requests.post(
431
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
432
+ headers=headers,
433
+ json=data,
434
+ timeout=Dataset.TIMEOUT,
435
+ )
436
+
437
+ if response.status_code == 400:
438
+ raise ValueError(response.json().get("message", "Failed to add rows"))
439
+
440
+ response.raise_for_status()
441
+
442
+ # Check response
443
+ response_data = response.json()
444
+ if response_data.get('success', False):
445
+ print(f"{response_data['message']}")
446
+ self.jobId = response_data['data']['jobId']
447
+ else:
448
+ raise ValueError(response_data.get('message', 'Failed to add rows'))
449
+
450
+ except Exception as e:
451
+ logger.error(f"Error in add_rows_to_dataset: {e}")
452
+ raise
453
+
454
+ def add_columns(self, text_fields, dataset_name, column_name, provider, model, variables={}):
455
+ """
456
+ Add a column to a dataset with dynamically fetched model parameters
457
+
458
+ Args:
459
+ project_id (int): Project ID
460
+ dataset_id (int): Dataset ID
461
+ column_name (str): Name of the new column
462
+ provider (str): Name of the model provider
463
+ model (str): Name of the model
464
+ """
465
+ # First, get model parameters
466
+
467
+ # Validate text_fields input
468
+ if not isinstance(text_fields, list):
469
+ raise ValueError("text_fields must be a list of dictionaries")
470
+
471
+ for field in text_fields:
472
+ if not isinstance(field, dict) or 'role' not in field or 'content' not in field:
473
+ raise ValueError("Each text field must be a dictionary with 'role' and 'content' keys")
474
+
475
+ # First, get the dataset ID
476
+ headers = {
477
+ 'Content-Type': 'application/json',
478
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
479
+ "X-Project-Id": str(self.project_id),
480
+ }
481
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
482
+
483
+ try:
484
+ # Get dataset list
485
+ response = requests.post(
486
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
487
+ headers=headers,
488
+ json=json_data,
489
+ timeout=Dataset.TIMEOUT,
490
+ )
491
+ response.raise_for_status()
492
+ datasets = response.json()["data"]["content"]
493
+
494
+ # Find dataset ID
495
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
496
+
497
+ if dataset_id is None:
498
+ raise ValueError(f"Dataset {dataset_name} not found")
499
+
500
+
501
+
502
+ parameters_url= f"{Dataset.BASE_URL}/playground/providers/models/parameters/list"
503
+
504
+ headers = {
505
+ 'Content-Type': 'application/json',
506
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
507
+ "X-Project-Id": str(self.project_id),
508
+ }
509
+
510
+ # Fetch model parameters
511
+ parameters_payload = {
512
+ "providerName": provider,
513
+ "modelName": model
514
+ }
515
+
516
+ # Get model parameters
517
+ params_response = requests.post(
518
+ parameters_url,
519
+ headers=headers,
520
+ json=parameters_payload,
521
+ timeout=30
522
+ )
523
+ params_response.raise_for_status()
524
+
525
+ # Extract parameters
526
+ all_parameters = params_response.json().get('data', [])
527
+
528
+ # Filter and transform parameters for add-column API
529
+ formatted_parameters = []
530
+ for param in all_parameters:
531
+ value = param.get('value')
532
+ param_type = param.get('type')
533
+
534
+ if value is None:
535
+ formatted_param = {
536
+ "name": param.get('name'),
537
+ "value": None, # Pass None if the value is null
538
+ "type": param.get('type')
539
+ }
540
+ else:
541
+ # Improved type handling
542
+ if param_type == "float":
543
+ value = float(value) # Ensure value is converted to float
544
+ elif param_type == "int":
545
+ value = int(value) # Ensure value is converted to int
546
+ elif param_type == "bool":
547
+ value = bool(value) # Ensure value is converted to bool
548
+ elif param_type == "string":
549
+ value = str(value) # Ensure value is converted to string
550
+ else:
551
+ raise ValueError(f"Unsupported parameter type: {param_type}") # Handle unsupported types
552
+
553
+ formatted_param = {
554
+ "name": param.get('name'),
555
+ "value": value,
556
+ "type": param.get('type')
557
+ }
558
+ formatted_parameters.append(formatted_param)
559
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
560
+
561
+ # Prepare payload for add column API
562
+ add_column_payload = {
563
+ "rowFilterList": [],
564
+ "columnName": column_name,
565
+ "datasetId": dataset_id,
566
+ "variables": variables,
567
+ "promptTemplate": {
568
+ "textFields": text_fields,
569
+ "modelSpecs": {
570
+ "model": f"{provider}/{model}",
571
+ "parameters": formatted_parameters
572
+ }
573
+ }
574
+ }
575
+ if variables:
576
+ variable_specs = []
577
+ for key, values in variables.items():
578
+ variable_specs.append({
579
+ "name": key,
580
+ "type": "string",
581
+ "schema": "query"
582
+ })
583
+ add_column_payload["promptTemplate"]["variableSpecs"] = variable_specs
584
+
585
+ # Make API call to add column
586
+ add_column_url = f"{Dataset.BASE_URL}/v2/llm/dataset/add-column"
587
+
588
+ response = requests.post(
589
+ add_column_url,
590
+ headers={
591
+ 'Content-Type': 'application/json',
592
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
593
+ "X-Project-Id": str(self.project_id)
594
+ },
595
+ json=add_column_payload,
596
+ timeout=30
597
+ )
598
+
599
+ # Check response
600
+ response.raise_for_status()
601
+ response_data = response.json()
602
+
603
+ if response_data.get('success', False):
604
+ print(f"Column '{column_name}' added successfully to dataset '{dataset_name}'")
605
+ self.jobId = response_data['data']['jobId']
606
+ else:
607
+ raise ValueError(response_data.get('message', 'Failed to add column'))
608
+
609
+ except requests.exceptions.RequestException as e:
610
+ print(f"Error adding column: {e}")
611
+ raise
612
+
613
+ def get_status(self):
614
+ headers = {
615
+ 'Content-Type': 'application/json',
616
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
617
+ 'X-Project-Id': str(self.project_id),
618
+ }
619
+ try:
620
+ response = requests.get(
621
+ f'{Dataset.BASE_URL}/job/status',
622
+ headers=headers,
623
+ timeout=30)
624
+ response.raise_for_status()
625
+ if response.json()["success"]:
626
+
627
+ status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId]
628
+ status_json = status_json[0]
629
+ if status_json == "Failed":
630
+ print("Job failed. No results to fetch.")
631
+ return JOB_STATUS_FAILED
632
+ elif status_json == "In Progress":
633
+ print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
634
+ return JOB_STATUS_IN_PROGRESS
635
+ elif status_json == "Completed":
636
+ print(f"Job completed. Fetching results.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
637
+ return JOB_STATUS_COMPLETED
638
+ else:
639
+ logger.error(f"Unknown status received: {status_json}")
640
+ return JOB_STATUS_FAILED
641
+ else:
642
+ logger.error("Request was not successful")
643
+ return JOB_STATUS_FAILED
644
+ except requests.exceptions.HTTPError as http_err:
645
+ logger.error(f"HTTP error occurred: {http_err}")
646
+ return JOB_STATUS_FAILED
647
+ except requests.exceptions.ConnectionError as conn_err:
648
+ logger.error(f"Connection error occurred: {conn_err}")
649
+ return JOB_STATUS_FAILED
650
+ except requests.exceptions.Timeout as timeout_err:
651
+ logger.error(f"Timeout error occurred: {timeout_err}")
652
+ return JOB_STATUS_FAILED
653
+ except requests.exceptions.RequestException as req_err:
654
+ logger.error(f"An error occurred: {req_err}")
655
+ return JOB_STATUS_FAILED
656
+ except Exception as e:
657
+ logger.error(f"An unexpected error occurred: {e}")
658
+ return JOB_STATUS_FAILED
659
+
660
+ def _jsonl_to_csv(self, jsonl_file, csv_file):
661
+ """Convert a JSONL file to a CSV file."""
662
+ with open(jsonl_file, 'r', encoding='utf-8') as infile:
663
+ data = [json.loads(line) for line in infile]
664
+
665
+ if not data:
666
+ print("Empty JSONL file.")
667
+ return
668
+
669
+ with open(csv_file, 'w', newline='', encoding='utf-8') as outfile:
670
+ writer = csv.DictWriter(outfile, fieldnames=data[0].keys())
671
+ writer.writeheader()
672
+ writer.writerows(data)
673
+
674
+ print(f"Converted {jsonl_file} to {csv_file}")
675
+
676
+ def create_from_jsonl(self, jsonl_path, dataset_name, schema_mapping):
677
+ tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
678
+ try:
679
+ self._jsonl_to_csv(jsonl_path, tmp_csv_path)
680
+ self.create_from_csv(tmp_csv_path, dataset_name, schema_mapping)
681
+ except (IOError, UnicodeError) as e:
682
+ logger.error(f"Error converting JSONL to CSV: {e}")
683
+ raise
684
+ finally:
685
+ if os.path.exists(tmp_csv_path):
686
+ try:
687
+ os.remove(tmp_csv_path)
688
+ except Exception as e:
689
+ logger.error(f"Error removing temporary CSV file: {e}")
690
+
691
+ def add_rows_from_jsonl(self, jsonl_path, dataset_name):
692
+ tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
693
+ try:
694
+ self._jsonl_to_csv(jsonl_path, tmp_csv_path)
695
+ self.add_rows(tmp_csv_path, dataset_name)
696
+ except (IOError, UnicodeError) as e:
697
+ logger.error(f"Error converting JSONL to CSV: {e}")
698
+ raise
699
+ finally:
700
+ if os.path.exists(tmp_csv_path):
701
+ try:
702
+ os.remove(tmp_csv_path)
703
+ except Exception as e:
704
+ logger.error(f"Error removing temporary CSV file: {e}")
705
+
706
+ def create_from_df(self, df, dataset_name, schema_mapping):
707
+ tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
708
+ try:
709
+ df.to_csv(tmp_csv_path, index=False)
710
+ self.create_from_csv(tmp_csv_path, dataset_name, schema_mapping)
711
+ except (IOError, UnicodeError) as e:
712
+ logger.error(f"Error converting DataFrame to CSV: {e}")
713
+ raise
714
+ finally:
715
+ if os.path.exists(tmp_csv_path):
716
+ try:
717
+ os.remove(tmp_csv_path)
718
+ except Exception as e:
719
+ logger.error(f"Error removing temporary CSV file: {e}")
720
+
721
+ def add_rows_from_df(self, df, dataset_name):
722
+ tmp_csv_path = os.path.join(tempfile.gettempdir(), f"{dataset_name}.csv")
723
+ try:
724
+ df.to_csv(tmp_csv_path, index=False)
725
+ self.add_rows(tmp_csv_path, dataset_name)
726
+ except (IOError, UnicodeError) as e:
727
+ logger.error(f"Error converting DataFrame to CSV: {e}")
728
+ raise
729
+ finally:
730
+ if os.path.exists(tmp_csv_path):
731
+ try:
732
+ os.remove(tmp_csv_path)
733
+ except Exception as e:
734
+ logger.error(f"Error removing temporary CSV file: {e}")
@@ -4,16 +4,22 @@ import pandas as pd
4
4
  import io
5
5
  from .ragaai_catalyst import RagaAICatalyst
6
6
  import logging
7
+ import json
7
8
 
8
9
  logger = logging.getLogger(__name__)
9
10
 
11
+ # Job status constants
12
+ JOB_STATUS_FAILED = "failed"
13
+ JOB_STATUS_IN_PROGRESS = "in_progress"
14
+ JOB_STATUS_COMPLETED = "success"
15
+
10
16
  class Evaluation:
11
17
 
12
18
  def __init__(self, project_name, dataset_name):
13
19
  self.project_name = project_name
14
20
  self.dataset_name = dataset_name
15
21
  self.base_url = f"{RagaAICatalyst.BASE_URL}"
16
- self.timeout = 10
22
+ self.timeout = 20
17
23
  self.jobId = None
18
24
  self.num_projects=99999
19
25
 
@@ -352,6 +358,52 @@ class Evaluation:
352
358
  except Exception as e:
353
359
  logger.error(f"An unexpected error occurred: {e}")
354
360
 
361
+ def append_metrics(self, display_name):
362
+ if not isinstance(display_name, str):
363
+ raise ValueError("display_name should be a string")
364
+
365
+ headers = {
366
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
367
+ 'X-Project-Id': str(self.project_id),
368
+ 'Content-Type': 'application/json',
369
+ }
370
+
371
+ payload = json.dumps({
372
+ "datasetId": self.dataset_id,
373
+ "metricParams": [
374
+ {
375
+ "metricSpec": {
376
+ "displayName": display_name
377
+ }
378
+ }
379
+ ]
380
+ })
381
+
382
+ try:
383
+ response = requests.request(
384
+ "POST",
385
+ f'{self.base_url}/v2/llm/metric-evaluation-rerun',
386
+ headers=headers,
387
+ data=payload,
388
+ timeout=self.timeout)
389
+ if response.status_code == 400:
390
+ raise ValueError(response.json()["message"])
391
+ response.raise_for_status()
392
+ if response.json()["success"]:
393
+ print(response.json()["message"])
394
+ self.jobId = response.json()["data"]["jobId"]
395
+
396
+ except requests.exceptions.HTTPError as http_err:
397
+ logger.error(f"HTTP error occurred: {http_err}")
398
+ except requests.exceptions.ConnectionError as conn_err:
399
+ logger.error(f"Connection error occurred: {conn_err}")
400
+ except requests.exceptions.Timeout as timeout_err:
401
+ logger.error(f"Timeout error occurred: {timeout_err}")
402
+ except requests.exceptions.RequestException as req_err:
403
+ logger.error(f"An error occurred: {req_err}")
404
+ except Exception as e:
405
+ logger.error(f"An unexpected error occurred: {e}")
406
+
355
407
  def get_status(self):
356
408
  headers = {
357
409
  'Content-Type': 'application/json',
@@ -366,22 +418,36 @@ class Evaluation:
366
418
  response.raise_for_status()
367
419
  if response.json()["success"]:
368
420
  status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId][0]
369
- if status_json == "Failed":
370
- return print("Job failed. No results to fetch.")
371
- elif status_json == "In Progress":
372
- return print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
373
- elif status_json == "Completed":
374
- print(f"Job completed. Fetching results.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
421
+ if status_json == "Failed":
422
+ print("Job failed. No results to fetch.")
423
+ return JOB_STATUS_FAILED
424
+ elif status_json == "In Progress":
425
+ print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
426
+ return JOB_STATUS_IN_PROGRESS
427
+ elif status_json == "Completed":
428
+ print(f"Job completed. Fetching results.\nVisit Job Status: {self.base_url.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
429
+ return JOB_STATUS_COMPLETED
430
+ else:
431
+ logger.error(f"Unknown status received: {status_json}")
432
+ return JOB_STATUS_FAILED
433
+ else:
434
+ logger.error("Request was not successful")
435
+ return JOB_STATUS_FAILED
375
436
  except requests.exceptions.HTTPError as http_err:
376
437
  logger.error(f"HTTP error occurred: {http_err}")
438
+ return JOB_STATUS_FAILED
377
439
  except requests.exceptions.ConnectionError as conn_err:
378
440
  logger.error(f"Connection error occurred: {conn_err}")
441
+ return JOB_STATUS_FAILED
379
442
  except requests.exceptions.Timeout as timeout_err:
380
443
  logger.error(f"Timeout error occurred: {timeout_err}")
444
+ return JOB_STATUS_FAILED
381
445
  except requests.exceptions.RequestException as req_err:
382
446
  logger.error(f"An error occurred: {req_err}")
447
+ return JOB_STATUS_FAILED
383
448
  except Exception as e:
384
449
  logger.error(f"An unexpected error occurred: {e}")
450
+ return JOB_STATUS_FAILED
385
451
 
386
452
  def get_results(self):
387
453
 
@@ -444,8 +510,11 @@ class Evaluation:
444
510
  df = pd.read_csv(io.StringIO(response_text))
445
511
 
446
512
  column_list = df.columns.to_list()
513
+ # Remove unwanted columns
447
514
  column_list = [col for col in column_list if not col.startswith('_')]
448
515
  column_list = [col for col in column_list if '.' not in col]
516
+ # Remove _claims_ columns
517
+ column_list = [col for col in column_list if '_claims_' not in col]
449
518
  return df[column_list]
450
519
  else:
451
520
  return pd.DataFrame()