ragaai-catalyst 2.1.5b29__py3-none-any.whl → 2.1.5b30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. ragaai_catalyst/__init__.py +2 -0
  2. ragaai_catalyst/redteaming/__init__.py +7 -0
  3. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  4. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  5. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  6. ragaai_catalyst/redteaming/evaluator.py +125 -0
  7. ragaai_catalyst/redteaming/llm_generator.py +83 -0
  8. ragaai_catalyst/redteaming/llm_generator_litellm.py +66 -0
  9. ragaai_catalyst/redteaming/red_teaming.py +329 -0
  10. ragaai_catalyst/redteaming/requirements.txt +4 -0
  11. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  12. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  13. ragaai_catalyst/redteaming/upload_result.py +38 -0
  14. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  15. ragaai_catalyst/redteaming_old.py +171 -0
  16. ragaai_catalyst/synthetic_data_generation.py +344 -13
  17. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +2 -6
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +22 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +0 -13
  20. ragaai_catalyst/tracers/tracer.py +33 -2
  21. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b30.dist-info}/METADATA +19 -2
  22. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b30.dist-info}/RECORD +25 -12
  23. ragaai_catalyst/redteaming.py +0 -171
  24. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b30.dist-info}/LICENSE +0 -0
  25. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b30.dist-info}/WHEEL +0 -0
  26. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b30.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,28 @@
1
1
  import os
2
- from groq import Groq
3
- import google.generativeai as genai
4
- import openai
5
- import PyPDF2
2
+ import ast
6
3
  import csv
4
+ import json
5
+ import random
6
+ import PyPDF2
7
7
  import markdown
8
8
  import pandas as pd
9
- import json
10
- from litellm import completion
11
- import litellm
12
9
  from tqdm import tqdm
10
+
11
+ import openai
13
12
  import tiktoken
14
- # import internal_api_completion
15
- # import proxy_call
13
+ import litellm
14
+ import google.generativeai as genai
15
+ from groq import Groq
16
+ from litellm import completion
17
+
16
18
  from .internal_api_completion import api_completion as internal_api_completion
17
19
  from .proxy_call import api_completion as proxy_api_completion
18
- # from ragaai_catalyst import internal_api_completion
19
- # from ragaai_catalyst import proxy_call
20
- import ast
21
20
 
22
- # dotenv.load_dotenv()
21
+ from typing import Optional, List, Dict, Any
22
+
23
+ import logging
24
+
25
+ logger = logging.getLogger(__name__)
23
26
 
24
27
  class SyntheticDataGeneration:
25
28
  """
@@ -329,6 +332,59 @@ class SyntheticDataGeneration:
329
332
 
330
333
  json_data = json.loads(content)
331
334
  return pd.DataFrame(json_data)
335
+
336
+ def _generate_raw_llm_response(self, text, system_message: Optional[str] = None, model_config: Dict[str, Any] = dict(), api_key=None):
337
+ """
338
+ Generate questions using LiteLLM which supports multiple providers (OpenAI, Groq, Gemini, etc.).
339
+
340
+ Args:
341
+ text (str): The input text to generate questions from.
342
+ system_message (str): The system message for the AI model.
343
+ model_config (dict): Configuration dictionary containing model details.
344
+ Required keys:
345
+ - model: The model identifier (e.g., "gpt-4", "gemini-pro", "mixtral-8x7b-32768")
346
+ Optional keys:
347
+ - api_base: Custom API base URL if needed
348
+ - max_tokens: Maximum tokens in response
349
+ - temperature: Temperature for response generation
350
+ api_key (str, optional): The API key for the model provider.
351
+
352
+ Returns:
353
+ pandas.DataFrame: A DataFrame containing the generated questions and answers.
354
+
355
+ Raises:
356
+ Exception: If there's an error in generating the response.
357
+ """
358
+ messages = [
359
+ {"role": "system", "content": system_message},
360
+ {"role": "user", "content": text}
361
+ ]
362
+
363
+ completion_params = {
364
+ "model": model_config.get("model", 'gpt-4o'),
365
+ "messages": messages,
366
+ "api_key": api_key
367
+ }
368
+
369
+ if "api_base" in model_config:
370
+ completion_params["api_base"] = model_config["api_base"]
371
+ if "api_version" in model_config:
372
+ completion_params["api_version"] = model_config["api_version"]
373
+ if "max_tokens" in model_config:
374
+ completion_params["max_tokens"] = model_config["max_tokens"]
375
+ if "temperature" in model_config:
376
+ completion_params["temperature"] = model_config["temperature"]
377
+ if 'provider' in model_config:
378
+ completion_params['model'] = f'{model_config["provider"]}/{model_config["model"]}'
379
+
380
+ try:
381
+ response = completion(**completion_params)
382
+ except Exception as e:
383
+ if any(error in str(e).lower() for error in ["invalid api key", "incorrect api key", "unauthorized", "authentication"]):
384
+ raise ValueError(f"Invalid API key provided for {model_config.get('provider', 'the specified')} provider")
385
+ raise Exception(f"Error calling LLM API: {str(e)}")
386
+
387
+ return response.choices[0].message.content
332
388
 
333
389
  def _parse_response(self, response, provider):
334
390
  """
@@ -478,6 +534,281 @@ class SyntheticDataGeneration:
478
534
  list: A list of supported AI providers.
479
535
  """
480
536
  return ['gemini', 'openai','azure']
537
+
538
+ def _get_init_ex_gen_prompt(self):
539
+ prompt = '''
540
+ You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
541
+
542
+ **Inputs**
543
+ User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
544
+ User Examples[Optional]: The user may supply examples.
545
+ User Context[Optional]: The user may supply context to generate the examples from.
546
+ No of Examples: The total number of examples to produce.
547
+
548
+ **Steps to follow**
549
+ 1. Carefully analyze the user's instruction
550
+ 2. If user examples are provided, check whether the user’s instructions refer to them specifically.
551
+ 3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
552
+ 4. Comply with the system’s guidelines to generate examples, incorporating any user examples or user context as needed.
553
+
554
+ **Output Format**:
555
+ - Present examples in a multiline string with each line a separate example.
556
+ - Avoid markdown or special formatting.
557
+ - Omit any boilerplate texts.
558
+
559
+ **Instructions for Diversity**:
560
+ - Vary the examples by context, tone, and (if applicable) technical complexity.
561
+ - Include edge cases or unconventional scenarios.
562
+ - Ensure no two examples are conceptually identical.
563
+
564
+ **Final Notes**:
565
+ - Focus on both originality and practical relevance.
566
+ - Avoid repetitiveness in the examples.
567
+ '''
568
+ return prompt
569
+
570
+ def _get_iter_ex_gen_prompt(self):
571
+ prompt = '''
572
+ You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
573
+
574
+ **Inputs**
575
+ User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
576
+ User Examples[Optional]: The user may supply examples.
577
+ User Context[Optional]: The user may supply context to generate the examples from.
578
+ No of Examples: The total number of examples to produce.
579
+ Relevant Examples: Any examples that are relevant to the user's instruction.
580
+ Irrelevant Examples: Any examples that are not relevant to the user's instruction.
581
+
582
+ **Steps to follow**
583
+ 1. Carefully analyze the user's instruction
584
+ 2. If user examples are provided, check whether the user’s instructions refer to them specifically.
585
+ 3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
586
+ 4. Review the relevant and irrelevant examples present, understanding the differences in them.
587
+ 5. Comply with the user's instruction to generate examples, similar to relevant examples and dissimilar to irrelevant ones.
588
+
589
+ **Output Format**:
590
+ - Present examples in a multiline sting with each line a separate example.
591
+ - Avoid markdown or special formatting.
592
+ - Omit any boilerplate texts.
593
+
594
+ **Instructions for Diversity**:
595
+ - Vary the examples by context, tone, and (if applicable) technical complexity.
596
+ - Include edge cases or unconventional scenarios.
597
+ - Ensure no two examples are conceptually identical.
598
+
599
+ **Final Notes**:
600
+ - Focus on both originality and practical relevance.
601
+ - Avoid repetitiveness in the examples.
602
+ '''
603
+ return prompt
604
+
605
+ def _generate_examples_iter(
606
+ self,
607
+ user_instruction: str,
608
+ user_examples: Optional[List[str] | str] = None,
609
+ user_context: Optional[str] = None,
610
+ relevant_examples: List[str]=[], irrelevant_examples: List[str]=[],
611
+ no_examples: Optional[int] = None,
612
+ model_config: Dict[str, Any] = dict(),
613
+ api_key: Optional[str] = None
614
+ ):
615
+ if not no_examples:
616
+ no_examples = 5
617
+ relevant_examples_str = '\n'.join(relevant_examples)
618
+ irrelevant_examples_str = '\n'.join(irrelevant_examples)
619
+ user_message = f'**User Instruction:** {user_instruction}'
620
+ user_message += f'\n\n**No of Examples:** {no_examples}'
621
+ if user_examples:
622
+ if isinstance(user_examples, str):
623
+ user_examples_str = user_examples
624
+ elif isinstance(user_examples, list):
625
+ user_examples_str = "\n".join(user_examples)
626
+ else:
627
+ raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
628
+ user_message += f"\n\n**User Examples:** \n{user_examples_str}"
629
+ if relevant_examples:
630
+ user_message += f'\n\n**Relevant Examples:** \n{relevant_examples_str}'
631
+ if irrelevant_examples:
632
+ user_message += f'\n\n**Irrelevant Examples:** \n{irrelevant_examples_str}'
633
+ if user_context:
634
+ user_message += f'\n\n**User Context:** \n{user_context}'
635
+ system_prompt = self._get_iter_ex_gen_prompt()
636
+ return self._generate_raw_llm_response(user_message, system_prompt, model_config=model_config, api_key=api_key)
637
+
638
+ def _generate_examples(
639
+ self,
640
+ user_instruction:str,
641
+ user_examples:Optional[List[str]|str]=None,
642
+ user_context: Optional[str] = None,
643
+ no_examples:Optional[int]=None,
644
+ model_config: Dict[str, Any] = dict(),
645
+ api_key: Optional[str] = None
646
+ ):
647
+ if not no_examples:
648
+ no_examples = 5
649
+ user_message = f"**User Instruction:** {user_instruction}"
650
+ if user_examples:
651
+ if isinstance(user_examples, str):
652
+ user_examples_str = user_examples
653
+ elif isinstance(user_examples, list):
654
+ user_examples_str = "\n".join(user_examples)
655
+ else:
656
+ raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
657
+ user_message += f"\n\n**User Examples:** \n{user_examples_str}"
658
+ if user_context:
659
+ user_message += f'\n\n**User Context:** \n{user_context}'
660
+ user_message += f'\n\n**No of Examples:** {no_examples}'
661
+ init_system_prompt = self._get_init_ex_gen_prompt()
662
+ return self._generate_raw_llm_response(user_message, init_system_prompt, model_config=model_config, api_key=api_key)
663
+
664
+ def _get_valid_examples(self, user_indices_str: str, examples: List[str]):
665
+ valid_examples = []
666
+ try:
667
+ user_indices = user_indices_str.strip().split(',')
668
+ for index_str in user_indices:
669
+ try:
670
+ index = int(index_str)
671
+ if index <= 0 or index > len(examples):
672
+ continue
673
+ except ValueError as e:
674
+ continue
675
+ valid_examples.append(examples[index-1])
676
+ except Exception as e:
677
+ print(f'Error: {e}')
678
+ return valid_examples
679
+
680
+ def generate_examples(
681
+ self,
682
+ user_instruction: str,
683
+ user_examples:Optional[List[str] | str] = None,
684
+ no_examples: Optional[int] = None,
685
+ model_config: Optional[Dict[str, Any]] = None,
686
+ api_key: Optional[str] = None,
687
+ max_iter: int = 0,
688
+ **kwargs
689
+ ):
690
+ if not model_config:
691
+ model_config = {}
692
+ provider = model_config.get("provider")
693
+ api_base = model_config.get("api_base")
694
+ api_version = model_config.get("api_version")
695
+ self._initialize_client(provider, api_key, api_base, api_version, internal_llm_proxy=kwargs.get("internal_llm_proxy", None))
696
+
697
+ if not no_examples:
698
+ no_examples = 5
699
+ relevant_examples = []
700
+ irrelevant_examples = []
701
+ max_relevant_examples = 5
702
+ max_irrelevant_examples = 10
703
+ while len(relevant_examples) <= max_relevant_examples or len(irrelevant_examples) <= max_irrelevant_examples:
704
+ if max_iter <= 0:
705
+ break
706
+ if len(relevant_examples) > max_relevant_examples:
707
+ relevant_examples = random.sample(relevant_examples, max_relevant_examples)
708
+ if len(irrelevant_examples) > max_irrelevant_examples:
709
+ irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
710
+ if relevant_examples or irrelevant_examples:
711
+ examples_str = self._generate_examples_iter(
712
+ user_instruction = user_instruction,
713
+ user_examples = user_examples,
714
+ relevant_examples = relevant_examples,
715
+ irrelevant_examples = irrelevant_examples,
716
+ model_config = model_config,
717
+ api_key = api_key
718
+ )
719
+ else:
720
+ examples_str = self._generate_examples(
721
+ user_instruction = user_instruction,
722
+ user_examples = user_examples,
723
+ model_config = model_config,
724
+ api_key = api_key
725
+ )
726
+ examples = [example for example in examples_str.split('\n') if example.strip()]
727
+ print('Generated Examples:')
728
+ for i, example in enumerate(examples):
729
+ print(f'{i+1}. {example}')
730
+ relevant_indices = input('Enter the indices of relevant examples (comma-separated): ').strip()
731
+ if relevant_indices:
732
+ relevant_examples.extend(self._get_valid_examples(relevant_indices, examples))
733
+ irrelevant_indices = input('Enter the indices of irrelevant examples (comma-separated): ').strip()
734
+ if irrelevant_indices:
735
+ irrelevant_examples.extend(self._get_valid_examples(irrelevant_indices, examples))
736
+ max_iter -= 1
737
+ if len(relevant_examples) > max_relevant_examples:
738
+ fin_relevant_examples = random.sample(relevant_examples, max_relevant_examples)
739
+ else:
740
+ fin_relevant_examples = relevant_examples
741
+ if len(irrelevant_examples) > max_irrelevant_examples:
742
+ fin_irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
743
+ else:
744
+ fin_irrelevant_examples = irrelevant_examples
745
+ if relevant_examples or irrelevant_examples:
746
+ if len(relevant_examples) < no_examples:
747
+ more_no_examples = no_examples - len(relevant_examples)
748
+ final_examples_str = self._generate_examples_iter(
749
+ user_instruction = user_instruction,
750
+ user_examples = user_examples,
751
+ relevant_examples = fin_relevant_examples,
752
+ irrelevant_examples = fin_irrelevant_examples,
753
+ no_examples = more_no_examples,
754
+ model_config = model_config,
755
+ api_key = api_key
756
+ )
757
+ final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
758
+ final_examples.extend(relevant_examples)
759
+ else:
760
+ final_examples = random.sample(relevant_examples, no_examples)
761
+ else:
762
+ final_examples_str = self._generate_examples(
763
+ user_instruction = user_instruction,
764
+ user_examples = user_examples,
765
+ no_examples = no_examples,
766
+ model_config = model_config,
767
+ api_key = api_key
768
+ )
769
+ final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
770
+ return final_examples
771
+
772
+
773
+ def generate_examples_from_csv(
774
+ self,
775
+ csv_path: str,
776
+ dst_csv_path: Optional[str] = None,
777
+ no_examples: Optional[int] = None,
778
+ model_config: Optional[Dict[str, Any]] = None,
779
+ api_key: Optional[str] = None,
780
+ **kwargs
781
+ ):
782
+ if not no_examples:
783
+ no_examples = 5
784
+ df = pd.read_csv(csv_path)
785
+ assert 'user_instruction' in df.columns, 'The csv must have a column named user_instruction'
786
+ fin_df_list = []
787
+ for i, row in df.iterrows():
788
+ user_instruction = row['user_instruction']
789
+ user_examples = row.get('user_examples')
790
+ user_context = row.get('user_context')
791
+ row_dict = row.to_dict()
792
+ examples = self.generate_examples(
793
+ user_instruction = user_instruction,
794
+ user_examples = user_examples,
795
+ user_context = user_context,
796
+ no_examples = no_examples,
797
+ model_config = model_config,
798
+ api_key = api_key
799
+ )
800
+ row_dict['generated_examples'] = examples
801
+ fin_df_list.append(row_dict)
802
+ fin_df = pd.DataFrame(fin_df_list)
803
+ csv_file, csv_ext = os.path.splitext(csv_path)
804
+ if not dst_csv_path:
805
+ dst_csv_path = csv_file + '_with_examples' + csv_ext
806
+ dst_dir = os.path.dirname(dst_csv_path)
807
+ if dst_dir:
808
+ os.makedirs(dst_dir, exist_ok=True)
809
+ fin_df.to_csv(dst_csv_path)
810
+ logger.info(f'CSV with generated examples saved at {dst_csv_path}')
811
+
481
812
 
482
813
  # Usage:
483
814
  # from synthetic_data_generation import SyntheticDataGeneration
@@ -626,12 +626,8 @@ class LLMTracerMixin:
626
626
  # TODO TO check i/p and o/p is according or not
627
627
  input = input_data["args"] if hasattr(input_data, "args") else input_data
628
628
  output = output_data.output_response if output_data else None
629
- #print("Prompt input:",input)
630
629
  prompt = self.convert_to_content(input)
631
- #print("Prompt Output: ",prompt)
632
- #print("Response input: ",output)
633
630
  response = self.convert_to_content(output)
634
- #print("Response output: ",response)
635
631
 
636
632
  # TODO: Execute & Add the User requested metrics here
637
633
  formatted_metrics = BaseTracer.get_formatted_metric(self.span_attributes_dict, self.project_id, name)
@@ -778,7 +774,7 @@ class LLMTracerMixin:
778
774
  token_usage = extract_token_usage(result)
779
775
  else:
780
776
  token_usage = extract_token_usage(result)
781
- cost = calculate_llm_cost(token_usage, model_name, self.model_costs)
777
+ cost = calculate_llm_cost(token_usage, model_name, self.model_costs, self.model_custom_cost)
782
778
  parameters = extract_parameters(kwargs)
783
779
  input_data = extract_input_data(args, kwargs, result)
784
780
 
@@ -887,7 +883,7 @@ class LLMTracerMixin:
887
883
  token_usage = extract_token_usage(result)
888
884
  else:
889
885
  token_usage = extract_token_usage(result)
890
- cost = calculate_llm_cost(token_usage, model_name, self.model_costs)
886
+ cost = calculate_llm_cost(token_usage, model_name, self.model_costs, self.model_custom_cost)
891
887
  parameters = extract_parameters(kwargs)
892
888
  input_data = extract_input_data(args, kwargs, result)
893
889
 
@@ -45,6 +45,11 @@ def extract_model_name(args, kwargs, result):
45
45
  result = result.to_dict()
46
46
  if 'model_version' in result:
47
47
  model = result['model_version']
48
+ try:
49
+ if not model:
50
+ model = result.raw.model
51
+ except Exception as e:
52
+ pass
48
53
 
49
54
 
50
55
  # Normalize Google model names
@@ -150,6 +155,15 @@ def extract_token_usage(result):
150
155
  "total_tokens": getattr(metadata, "total_token_count", 0)
151
156
  }
152
157
 
158
+ # Handle ChatResponse format with raw usuage
159
+ if hasattr(result, "raw") and hasattr(result.raw, "usage"):
160
+ usage = result.raw.usage
161
+ return {
162
+ "prompt_tokens": getattr(usage, "prompt_tokens", 0),
163
+ "completion_tokens": getattr(usage, "completion_tokens", 0),
164
+ "total_tokens": getattr(usage, "total_tokens", 0)
165
+ }
166
+
153
167
  # Handle ChatResult format with generations
154
168
  if hasattr(result, "generations") and result.generations:
155
169
  # Get the first generation
@@ -195,6 +209,7 @@ def num_tokens_from_messages(model="gpt-4o-mini-2024-07-18", prompt_messages=Non
195
209
  - completion_tokens: Number of tokens in the completion
196
210
  - total_tokens: Total number of tokens
197
211
  """
212
+ #import pdb; pdb.set_trace()
198
213
  try:
199
214
  encoding = tiktoken.encoding_for_model(model)
200
215
  except KeyError:
@@ -207,8 +222,8 @@ def num_tokens_from_messages(model="gpt-4o-mini-2024-07-18", prompt_messages=Non
207
222
  "gpt-4-32k-0314",
208
223
  "gpt-4-0613",
209
224
  "gpt-4-32k-0613",
210
- "gpt-4o-mini-2024-07-18",
211
- "gpt-4o-2024-08-06"
225
+ "gpt-4o-2024-08-06",
226
+ "gpt-4o-mini-2024-07-18"
212
227
  }:
213
228
  tokens_per_message = 3
214
229
  tokens_per_name = 1
@@ -290,15 +305,18 @@ def extract_input_data(args, kwargs, result):
290
305
  }
291
306
 
292
307
 
293
- def calculate_llm_cost(token_usage, model_name, model_costs):
308
+ def calculate_llm_cost(token_usage, model_name, model_costs, model_custom_cost=None):
294
309
  """Calculate cost based on token usage and model"""
310
+ if model_custom_cost is None:
311
+ model_custom_cost = {}
312
+ model_costs.update(model_custom_cost)
295
313
  if not isinstance(token_usage, dict):
296
314
  token_usage = {
297
315
  "prompt_tokens": 0,
298
316
  "completion_tokens": 0,
299
317
  "total_tokens": token_usage if isinstance(token_usage, (int, float)) else 0
300
318
  }
301
-
319
+
302
320
  # Get model costs, defaulting to default costs if unknown
303
321
  model_cost = model_cost = model_costs.get(model_name, {
304
322
  "input_cost_per_token": 0.0,
@@ -129,25 +129,12 @@ class JupyterNotebookHandler:
129
129
  # Check if running in Colab
130
130
  if JupyterNotebookHandler.is_running_in_colab():
131
131
  try:
132
- from google.colab import drive
133
- if not os.path.exists('/content/drive'):
134
- drive.mount('/content/drive')
135
- # logger.info("Google Drive mounted successfully")
136
-
137
132
  # Look for notebooks in /content first
138
133
  ipynb_files = list(Path('/content').glob('*.ipynb'))
139
134
  if ipynb_files:
140
135
  current_nb = max(ipynb_files, key=os.path.getmtime)
141
136
  # logger.info(f"Found current Colab notebook: {current_nb}")
142
137
  return str(current_nb)
143
-
144
- # Then check Drive if mounted
145
- if os.path.exists('/content/drive'):
146
- drive_ipynb_files = list(Path('/content/drive').rglob('*.ipynb'))
147
- if drive_ipynb_files:
148
- current_nb = max(drive_ipynb_files, key=os.path.getmtime)
149
- # logger.info(f"Found Colab notebook in Drive: {current_nb}")
150
- return str(current_nb)
151
138
  except Exception as e:
152
139
  logger.warning(f"Error in Colab notebook detection: {str(e)}")
153
140
 
@@ -113,7 +113,7 @@ class Tracer(AgenticTracing):
113
113
  for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
114
114
  if key not in auto_instrumentation:
115
115
  auto_instrumentation[key] = True
116
-
116
+ self.model_custom_cost = {}
117
117
  super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
118
118
 
119
119
  self.project_name = project_name
@@ -176,7 +176,38 @@ class Tracer(AgenticTracing):
176
176
  self._upload_task = None
177
177
  # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
178
178
 
179
-
179
+ def set_model_cost(self, cost_config):
180
+ """
181
+ Set custom cost values for a specific model.
182
+
183
+ Args:
184
+ cost_config (dict): Dictionary containing model cost configuration with keys:
185
+ - model_name (str): Name of the model
186
+ - input_cost_per_token (float): Cost per input token
187
+ - output_cost_per_token (float): Cost per output token
188
+
189
+ Example:
190
+ tracer.set_model_cost({
191
+ "model_name": "gpt-4",
192
+ "input_cost_per_million_token": 6,
193
+ "output_cost_per_million_token": 2.40
194
+ })
195
+ """
196
+ if not isinstance(cost_config, dict):
197
+ raise TypeError("cost_config must be a dictionary")
198
+
199
+ required_keys = {"model_name", "input_cost_per_million_token", "output_cost_per_million_token"}
200
+ if not all(key in cost_config for key in required_keys):
201
+ raise ValueError(f"cost_config must contain all required keys: {required_keys}")
202
+
203
+ model_name = cost_config["model_name"]
204
+ self.model_custom_cost[model_name] = {
205
+ "input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
206
+ "output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
207
+ }
208
+
209
+
210
+
180
211
  def set_dataset_name(self, dataset_name):
181
212
  """
182
213
  Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ragaai_catalyst
3
- Version: 2.1.5b29
3
+ Version: 2.1.5b30
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>, Tushar Kumar <tushar.kumar@raga.ai>
6
6
  Requires-Python: <3.13,>=3.9
@@ -36,7 +36,8 @@ Requires-Dist: requests~=2.32.3
36
36
  Requires-Dist: GPUtil~=1.4.0
37
37
  Requires-Dist: ipynbname
38
38
  Requires-Dist: tiktoken>=0.7.0
39
- Requires-Dist: giskard~=2.16.0
39
+ Requires-Dist: tomli>=2.0.0
40
+ Requires-Dist: rich>=13.9.4
40
41
  Provides-Extra: dev
41
42
  Requires-Dist: pytest; extra == "dev"
42
43
  Requires-Dist: pytest-cov; extra == "dev"
@@ -534,6 +535,22 @@ sdg.get_supported_qna()
534
535
 
535
536
  # Get supported providers
536
537
  sdg.get_supported_providers()
538
+
539
+ # Generate examples
540
+ examples = sdg.generate_examples(
541
+ user_instruction = 'Generate query like this.',
542
+ user_examples = 'How to do it?', # Can be a string or list of strings.
543
+ user_context = 'Context to generate examples',
544
+ no_examples = 10,
545
+ model_config = {"provider":"openai","model":"gpt-4o-mini"}
546
+ )
547
+
548
+ # Generate examples from a csv
549
+ sdg.generate_examples_from_csv(
550
+ csv_path = 'path/to/csv',
551
+ no_examples = 5,
552
+ model_config = {'provider': 'openai', 'model': 'gpt-4o-mini'}
553
+ )
537
554
  ```
538
555
 
539
556