ragaai-catalyst 2.1.5b29__py3-none-any.whl → 2.1.5b31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. ragaai_catalyst/__init__.py +2 -0
  2. ragaai_catalyst/ragaai_catalyst.py +23 -0
  3. ragaai_catalyst/redteaming/__init__.py +7 -0
  4. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  5. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  6. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  7. ragaai_catalyst/redteaming/evaluator.py +125 -0
  8. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  9. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  10. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  11. ragaai_catalyst/redteaming/requirements.txt +4 -0
  12. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  13. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  14. ragaai_catalyst/redteaming/upload_result.py +38 -0
  15. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  16. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  17. ragaai_catalyst/redteaming_old.py +171 -0
  18. ragaai_catalyst/synthetic_data_generation.py +354 -13
  19. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +19 -42
  20. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +5 -13
  21. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +73 -11
  22. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +3 -1
  23. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -0
  24. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +28 -16
  25. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +0 -13
  26. ragaai_catalyst/tracers/tracer.py +31 -4
  27. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/METADATA +110 -18
  28. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/RECORD +31 -17
  29. ragaai_catalyst/redteaming.py +0 -171
  30. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/LICENSE +0 -0
  31. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/WHEEL +0 -0
  32. {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,28 @@
1
1
  import os
2
- from groq import Groq
3
- import google.generativeai as genai
4
- import openai
5
- import PyPDF2
2
+ import ast
6
3
  import csv
4
+ import json
5
+ import random
6
+ import PyPDF2
7
7
  import markdown
8
8
  import pandas as pd
9
- import json
10
- from litellm import completion
11
- import litellm
12
9
  from tqdm import tqdm
10
+
11
+ import openai
13
12
  import tiktoken
14
- # import internal_api_completion
15
- # import proxy_call
13
+ import litellm
14
+ import google.generativeai as genai
15
+ from groq import Groq
16
+ from litellm import completion
17
+
16
18
  from .internal_api_completion import api_completion as internal_api_completion
17
19
  from .proxy_call import api_completion as proxy_api_completion
18
- # from ragaai_catalyst import internal_api_completion
19
- # from ragaai_catalyst import proxy_call
20
- import ast
21
20
 
22
- # dotenv.load_dotenv()
21
+ from typing import Optional, List, Dict, Any
22
+
23
+ import logging
24
+
25
+ logger = logging.getLogger(__name__)
23
26
 
24
27
  class SyntheticDataGeneration:
25
28
  """
@@ -329,6 +332,59 @@ class SyntheticDataGeneration:
329
332
 
330
333
  json_data = json.loads(content)
331
334
  return pd.DataFrame(json_data)
335
+
336
+ def _generate_raw_llm_response(self, text, system_message: Optional[str] = None, model_config: Dict[str, Any] = dict(), api_key=None):
337
+ """
338
+ Generate questions using LiteLLM which supports multiple providers (OpenAI, Groq, Gemini, etc.).
339
+
340
+ Args:
341
+ text (str): The input text to generate questions from.
342
+ system_message (str): The system message for the AI model.
343
+ model_config (dict): Configuration dictionary containing model details.
344
+ Required keys:
345
+ - model: The model identifier (e.g., "gpt-4", "gemini-pro", "mixtral-8x7b-32768")
346
+ Optional keys:
347
+ - api_base: Custom API base URL if needed
348
+ - max_tokens: Maximum tokens in response
349
+ - temperature: Temperature for response generation
350
+ api_key (str, optional): The API key for the model provider.
351
+
352
+ Returns:
353
+ pandas.DataFrame: A DataFrame containing the generated questions and answers.
354
+
355
+ Raises:
356
+ Exception: If there's an error in generating the response.
357
+ """
358
+ messages = [
359
+ {"role": "system", "content": system_message},
360
+ {"role": "user", "content": text}
361
+ ]
362
+
363
+ completion_params = {
364
+ "model": model_config.get("model", 'gpt-4o'),
365
+ "messages": messages,
366
+ "api_key": api_key
367
+ }
368
+
369
+ if "api_base" in model_config:
370
+ completion_params["api_base"] = model_config["api_base"]
371
+ if "api_version" in model_config:
372
+ completion_params["api_version"] = model_config["api_version"]
373
+ if "max_tokens" in model_config:
374
+ completion_params["max_tokens"] = model_config["max_tokens"]
375
+ if "temperature" in model_config:
376
+ completion_params["temperature"] = model_config["temperature"]
377
+ if 'provider' in model_config:
378
+ completion_params['model'] = f'{model_config["provider"]}/{model_config["model"]}'
379
+
380
+ try:
381
+ response = completion(**completion_params)
382
+ except Exception as e:
383
+ if any(error in str(e).lower() for error in ["invalid api key", "incorrect api key", "unauthorized", "authentication"]):
384
+ raise ValueError(f"Invalid API key provided for {model_config.get('provider', 'the specified')} provider")
385
+ raise Exception(f"Error calling LLM API: {str(e)}")
386
+
387
+ return response.choices[0].message.content
332
388
 
333
389
  def _parse_response(self, response, provider):
334
390
  """
@@ -478,6 +534,291 @@ class SyntheticDataGeneration:
478
534
  list: A list of supported AI providers.
479
535
  """
480
536
  return ['gemini', 'openai','azure']
537
+
538
+ def _get_init_ex_gen_prompt(self):
539
+ prompt = '''
540
+ You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
541
+
542
+ **Inputs**
543
+ User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
544
+ User Examples[Optional]: The user may supply examples.
545
+ User Context[Optional]: The user may supply context to generate the examples from.
546
+ No of Examples: The total number of examples to produce.
547
+
548
+ **Steps to follow**
549
+ 1. Carefully analyze the user's instruction
550
+ 2. If user examples are provided, check whether the user’s instructions refer to them specifically.
551
+ 3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
552
+ 4. Comply with the system’s guidelines to generate examples, incorporating any user examples or user context as needed.
553
+
554
+ **Output Format**:
555
+ - Present examples in a multiline string with each line a separate example.
556
+ - Avoid markdown or special formatting.
557
+ - Omit any boilerplate texts.
558
+
559
+ **Instructions for Diversity**:
560
+ - Vary the examples by context, tone, and (if applicable) technical complexity.
561
+ - Include edge cases or unconventional scenarios.
562
+ - Ensure no two examples are conceptually identical.
563
+
564
+ **Final Notes**:
565
+ - Focus on both originality and practical relevance.
566
+ - Avoid repetitiveness in the examples.
567
+ '''
568
+ return prompt
569
+
570
+ def _get_iter_ex_gen_prompt(self):
571
+ prompt = '''
572
+ You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
573
+
574
+ **Inputs**
575
+ User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
576
+ User Examples[Optional]: The user may supply examples.
577
+ User Context[Optional]: The user may supply context to generate the examples from.
578
+ No of Examples: The total number of examples to produce.
579
+ Relevant Examples: Any examples that are relevant to the user's instruction.
580
+ Irrelevant Examples: Any examples that are not relevant to the user's instruction.
581
+
582
+ **Steps to follow**
583
+ 1. Carefully analyze the user's instruction
584
+ 2. If user examples are provided, check whether the user’s instructions refer to them specifically.
585
+ 3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
586
+ 4. Review the relevant and irrelevant examples present, understanding the differences in them.
587
+ 5. Comply with the user's instruction to generate examples, similar to relevant examples and dissimilar to irrelevant ones.
588
+
589
+ **Output Format**:
590
+ - Present examples in a multiline sting with each line a separate example.
591
+ - Avoid markdown or special formatting.
592
+ - Omit any boilerplate texts.
593
+
594
+ **Instructions for Diversity**:
595
+ - Vary the examples by context, tone, and (if applicable) technical complexity.
596
+ - Include edge cases or unconventional scenarios.
597
+ - Ensure no two examples are conceptually identical.
598
+
599
+ **Final Notes**:
600
+ - Focus on both originality and practical relevance.
601
+ - Avoid repetitiveness in the examples.
602
+ '''
603
+ return prompt
604
+
605
+ def _generate_examples_iter(
606
+ self,
607
+ user_instruction: str,
608
+ user_examples: Optional[List[str] | str] = None,
609
+ user_context: Optional[str] = None,
610
+ relevant_examples: List[str]=[],
611
+ irrelevant_examples: List[str]=[],
612
+ no_examples: Optional[int] = None,
613
+ model_config: Dict[str, Any] = dict(),
614
+ api_key: Optional[str] = None
615
+ ):
616
+ if no_examples is None:
617
+ no_examples = 5
618
+ relevant_examples_str = '\n'.join(relevant_examples)
619
+ irrelevant_examples_str = '\n'.join(irrelevant_examples)
620
+ user_message = f'**User Instruction:** {user_instruction}'
621
+ user_message += f'\n\n**No of Examples:** {no_examples}'
622
+ if user_examples:
623
+ if isinstance(user_examples, str):
624
+ user_examples_str = user_examples
625
+ elif isinstance(user_examples, list):
626
+ user_examples_str = "\n".join(user_examples)
627
+ else:
628
+ raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
629
+ user_message += f"\n\n**User Examples:** \n{user_examples_str}"
630
+ if relevant_examples:
631
+ user_message += f'\n\n**Relevant Examples:** \n{relevant_examples_str}'
632
+ if irrelevant_examples:
633
+ user_message += f'\n\n**Irrelevant Examples:** \n{irrelevant_examples_str}'
634
+ if user_context:
635
+ user_message += f'\n\n**User Context:** \n{user_context}'
636
+ system_prompt = self._get_iter_ex_gen_prompt()
637
+ return self._generate_raw_llm_response(user_message, system_prompt, model_config=model_config, api_key=api_key)
638
+
639
+ def _generate_examples(
640
+ self,
641
+ user_instruction:str,
642
+ user_examples:Optional[List[str]|str]=None,
643
+ user_context: Optional[str] = None,
644
+ no_examples:Optional[int]=None,
645
+ model_config: Dict[str, Any] = dict(),
646
+ api_key: Optional[str] = None
647
+ ):
648
+ if no_examples is None:
649
+ no_examples = 5
650
+ user_message = f"**User Instruction:** {user_instruction}"
651
+ if user_examples:
652
+ if isinstance(user_examples, str):
653
+ user_examples_str = user_examples
654
+ elif isinstance(user_examples, list):
655
+ user_examples_str = "\n".join(user_examples)
656
+ else:
657
+ raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
658
+ user_message += f"\n\n**User Examples:** \n{user_examples_str}"
659
+ if user_context:
660
+ user_message += f'\n\n**User Context:** \n{user_context}'
661
+ user_message += f'\n\n**No of Examples:** {no_examples}'
662
+ init_system_prompt = self._get_init_ex_gen_prompt()
663
+ return self._generate_raw_llm_response(user_message, init_system_prompt, model_config=model_config, api_key=api_key)
664
+
665
+ def _get_valid_examples(self, user_indices_str: str, examples: List[str]):
666
+ valid_examples = []
667
+ try:
668
+ user_indices = user_indices_str.strip().split(',')
669
+ for index_str in user_indices:
670
+ try:
671
+ index = int(index_str)
672
+ if index <= 0 or index > len(examples):
673
+ continue
674
+ except ValueError as e:
675
+ continue
676
+ valid_examples.append(examples[index-1])
677
+ except Exception as e:
678
+ print(f'Error: {e}')
679
+ return valid_examples
680
+
681
+ def generate_examples(
682
+ self,
683
+ user_instruction: str,
684
+ user_examples:Optional[List[str] | str] = None,
685
+ user_context: Optional[str] = None,
686
+ no_examples: Optional[int] = None,
687
+ model_config: Optional[Dict[str, Any]] = None,
688
+ api_key: Optional[str] = None,
689
+ max_iter: int = 0,
690
+ **kwargs
691
+ ):
692
+ if not model_config:
693
+ model_config = {}
694
+ provider = model_config.get("provider")
695
+ api_base = model_config.get("api_base")
696
+ api_version = model_config.get("api_version")
697
+ self._initialize_client(provider, api_key, api_base, api_version, internal_llm_proxy=kwargs.get("internal_llm_proxy", None))
698
+
699
+ if no_examples is None:
700
+ no_examples = 5
701
+ assert no_examples >= 0, 'The number of examples cannot be less than 0'
702
+ relevant_examples = []
703
+ irrelevant_examples = []
704
+ max_relevant_examples = 5
705
+ max_irrelevant_examples = 10
706
+ while len(relevant_examples) <= max_relevant_examples or len(irrelevant_examples) <= max_irrelevant_examples:
707
+ if max_iter <= 0:
708
+ break
709
+ if len(relevant_examples) > max_relevant_examples:
710
+ relevant_examples = random.sample(relevant_examples, max_relevant_examples)
711
+ if len(irrelevant_examples) > max_irrelevant_examples:
712
+ irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
713
+ if relevant_examples or irrelevant_examples:
714
+ examples_str = self._generate_examples_iter(
715
+ user_instruction = user_instruction,
716
+ user_examples = user_examples,
717
+ relevant_examples = relevant_examples,
718
+ irrelevant_examples = irrelevant_examples,
719
+ model_config = model_config,
720
+ api_key = api_key
721
+ )
722
+ else:
723
+ examples_str = self._generate_examples(
724
+ user_instruction = user_instruction,
725
+ user_examples = user_examples,
726
+ user_context = user_context,
727
+ model_config = model_config,
728
+ api_key = api_key
729
+ )
730
+ examples = [example for example in examples_str.split('\n') if example.strip()]
731
+ print('Generated Examples:')
732
+ for i, example in enumerate(examples):
733
+ print(f'{i+1}. {example}')
734
+ relevant_indices = input('Enter the indices of relevant examples (comma-separated): ').strip()
735
+ if relevant_indices:
736
+ relevant_examples.extend(self._get_valid_examples(relevant_indices, examples))
737
+ irrelevant_indices = input('Enter the indices of irrelevant examples (comma-separated): ').strip()
738
+ if irrelevant_indices:
739
+ irrelevant_examples.extend(self._get_valid_examples(irrelevant_indices, examples))
740
+ max_iter -= 1
741
+ if len(relevant_examples) > max_relevant_examples:
742
+ fin_relevant_examples = random.sample(relevant_examples, max_relevant_examples)
743
+ else:
744
+ fin_relevant_examples = relevant_examples
745
+ if len(irrelevant_examples) > max_irrelevant_examples:
746
+ fin_irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
747
+ else:
748
+ fin_irrelevant_examples = irrelevant_examples
749
+ if relevant_examples or irrelevant_examples:
750
+ if len(relevant_examples) < no_examples:
751
+ more_no_examples = no_examples - len(relevant_examples)
752
+ final_examples_str = self._generate_examples_iter(
753
+ user_instruction = user_instruction,
754
+ user_examples = user_examples,
755
+ user_context = user_context,
756
+ relevant_examples = fin_relevant_examples,
757
+ irrelevant_examples = fin_irrelevant_examples,
758
+ no_examples = more_no_examples,
759
+ model_config = model_config,
760
+ api_key = api_key
761
+ )
762
+ final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
763
+ final_examples.extend(relevant_examples)
764
+ else:
765
+ final_examples = random.sample(relevant_examples, no_examples)
766
+ else:
767
+ final_examples_str = self._generate_examples(
768
+ user_instruction = user_instruction,
769
+ user_examples = user_examples,
770
+ user_context = user_context,
771
+ no_examples = no_examples,
772
+ model_config = model_config,
773
+ api_key = api_key
774
+ )
775
+ final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
776
+ return final_examples
777
+
778
+
779
+ def generate_examples_from_csv(
780
+ self,
781
+ csv_path: str,
782
+ dst_csv_path: Optional[str] = None,
783
+ no_examples: Optional[int] = None,
784
+ model_config: Optional[Dict[str, Any]] = None,
785
+ api_key: Optional[str] = None,
786
+ **kwargs
787
+ ):
788
+ if no_examples is None:
789
+ no_examples = 5
790
+ assert no_examples >= 0, 'The number of examples cannot be less than 0'
791
+ df = pd.read_csv(csv_path)
792
+ assert 'user_instruction' in df.columns, 'The csv must have a column named user_instruction'
793
+ fin_df_list = []
794
+ for i, row in df.iterrows():
795
+ user_instruction = row['user_instruction']
796
+ user_examples = row.get('user_examples')
797
+ user_context = row.get('user_context')
798
+ row_dict = row.to_dict()
799
+ try:
800
+ examples = self.generate_examples(
801
+ user_instruction = user_instruction,
802
+ user_examples = user_examples,
803
+ user_context = user_context,
804
+ no_examples = no_examples,
805
+ model_config = model_config,
806
+ api_key = api_key
807
+ )
808
+ except Exception as e:
809
+ continue
810
+ row_dict['generated_examples'] = examples
811
+ fin_df_list.append(row_dict)
812
+ fin_df = pd.DataFrame(fin_df_list)
813
+ csv_file, csv_ext = os.path.splitext(csv_path)
814
+ if not dst_csv_path:
815
+ dst_csv_path = csv_file + '_with_examples' + csv_ext
816
+ dst_dir = os.path.dirname(dst_csv_path)
817
+ if dst_dir:
818
+ os.makedirs(dst_dir, exist_ok=True)
819
+ fin_df.to_csv(dst_csv_path)
820
+ logger.info(f'CSV with generated examples saved at {dst_csv_path}')
821
+
481
822
 
482
823
  # Usage:
483
824
  # from synthetic_data_generation import SyntheticDataGeneration
@@ -18,13 +18,9 @@ from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
18
18
  Resources,
19
19
  Component,
20
20
  )
21
- from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
22
- from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
23
- from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
24
21
  from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
25
22
  from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
26
23
  from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
27
- from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
28
24
  from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
29
25
 
30
26
  import logging
@@ -179,7 +175,10 @@ class BaseTracer:
179
175
  )
180
176
 
181
177
  def stop(self):
182
- """Stop the trace and save to JSON file"""
178
+ """Stop the trace and save to JSON file.
179
+ Trace upload will happen in a separate process and continue even if the main program exits.
180
+ """
181
+ from ..upload.trace_upload_manager import TraceUploadManager, TraceUploadTask
183
182
  if hasattr(self, "trace"):
184
183
  self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
185
184
  self.trace.end_time = datetime.now().astimezone().isoformat()
@@ -263,45 +262,25 @@ class BaseTracer:
263
262
 
264
263
  logger.info(" Traces saved successfully.")
265
264
  logger.debug(f"Trace saved to {filepath}")
266
- # Upload traces
267
-
268
- json_file_path = str(filepath)
269
- project_name = self.project_name
270
- project_id = self.project_id
271
- dataset_name = self.dataset_name
272
- user_detail = self.user_details
273
- base_url = RagaAICatalyst.BASE_URL
274
-
275
- ## create dataset schema
276
- response = create_dataset_schema_with_trace(
277
- dataset_name=dataset_name, project_name=project_name
278
- )
279
-
280
- ##Upload trace metrics
281
- response = upload_trace_metric(
282
- json_file_path=json_file_path,
283
- dataset_name=self.dataset_name,
265
+ # Submit trace upload task to the manager
266
+ upload_task = TraceUploadTask(
267
+ json_file_path=str(filepath),
284
268
  project_name=self.project_name,
285
- )
286
-
287
- upload_traces = UploadAgenticTraces(
288
- json_file_path=json_file_path,
289
- project_name=project_name,
290
- project_id=project_id,
291
- dataset_name=dataset_name,
292
- user_detail=user_detail,
293
- base_url=base_url,
294
- )
295
- upload_traces.upload_agentic_traces()
296
-
297
- # Upload Codehash
298
- response = upload_code(
269
+ project_id=self.project_id,
270
+ dataset_name=self.dataset_name,
271
+ user_detail=self.user_details,
272
+ base_url=RagaAICatalyst.BASE_URL,
299
273
  hash_id=hash_id,
300
274
  zip_path=zip_path,
301
- project_name=project_name,
302
- dataset_name=dataset_name,
275
+ max_retries=2, # Allow 2 retries
276
+ retry_delay=1.0 # 1 second between retries
303
277
  )
304
- print(response)
278
+
279
+ # Get upload manager singleton and submit task
280
+ upload_manager = TraceUploadManager()
281
+ upload_manager.submit_upload(upload_task)
282
+
283
+ logger.info(f"Trace upload task submitted and will continue in background")
305
284
 
306
285
  # Cleanup
307
286
  self.components = []
@@ -899,8 +878,6 @@ class BaseTracer:
899
878
 
900
879
  return {"workflow": sorted_interactions}
901
880
 
902
- # TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
903
-
904
881
  def execute_metrics(self,
905
882
  name: str,
906
883
  model: str,
@@ -12,7 +12,6 @@ import contextvars
12
12
  import traceback
13
13
  import importlib
14
14
  import sys
15
- from litellm import model_cost
16
15
  import logging
17
16
 
18
17
  try:
@@ -48,12 +47,9 @@ class LLMTracerMixin:
48
47
  super().__init__(*args, **kwargs)
49
48
  self.file_tracker = TrackName()
50
49
  self.patches = []
51
- try:
52
- self.model_costs = model_cost
53
- except Exception as e:
54
- self.model_costs = {
55
- "default": {"input_cost_per_token": 0.0, "output_cost_per_token": 0.0}
56
- }
50
+ # Get model costs from manager
51
+ from ..utils.cost_manager import cost_manager
52
+ self.cost_manager = cost_manager # Store reference to cost manager
57
53
  self.MAX_PARAMETERS_TO_DISPLAY = 10
58
54
  self.current_llm_call_name = contextvars.ContextVar(
59
55
  "llm_call_name", default=None
@@ -626,12 +622,8 @@ class LLMTracerMixin:
626
622
  # TODO TO check i/p and o/p is according or not
627
623
  input = input_data["args"] if hasattr(input_data, "args") else input_data
628
624
  output = output_data.output_response if output_data else None
629
- #print("Prompt input:",input)
630
625
  prompt = self.convert_to_content(input)
631
- #print("Prompt Output: ",prompt)
632
- #print("Response input: ",output)
633
626
  response = self.convert_to_content(output)
634
- #print("Response output: ",response)
635
627
 
636
628
  # TODO: Execute & Add the User requested metrics here
637
629
  formatted_metrics = BaseTracer.get_formatted_metric(self.span_attributes_dict, self.project_id, name)
@@ -778,7 +770,7 @@ class LLMTracerMixin:
778
770
  token_usage = extract_token_usage(result)
779
771
  else:
780
772
  token_usage = extract_token_usage(result)
781
- cost = calculate_llm_cost(token_usage, model_name, self.model_costs)
773
+ cost = calculate_llm_cost(token_usage, model_name)
782
774
  parameters = extract_parameters(kwargs)
783
775
  input_data = extract_input_data(args, kwargs, result)
784
776
 
@@ -887,7 +879,7 @@ class LLMTracerMixin:
887
879
  token_usage = extract_token_usage(result)
888
880
  else:
889
881
  token_usage = extract_token_usage(result)
890
- cost = calculate_llm_cost(token_usage, model_name, self.model_costs)
882
+ cost = calculate_llm_cost(token_usage, model_name)
891
883
  parameters = extract_parameters(kwargs)
892
884
  input_data = extract_input_data(args, kwargs, result)
893
885
 
@@ -2,6 +2,7 @@ import requests
2
2
  import json
3
3
  import os
4
4
  from datetime import datetime
5
+ from urllib.parse import urlparse, urlunparse
5
6
 
6
7
 
7
8
  class UploadAgenticTraces:
@@ -20,12 +21,71 @@ class UploadAgenticTraces:
20
21
  self.base_url = base_url
21
22
  self.timeout = 30
22
23
 
23
-
24
+ @staticmethod
25
+ def _normalize_url_core(url):
26
+ """Normalize the core domain of a URL by removing common prefixes and handling ports.
27
+
28
+ Args:
29
+ url (str): The URL to normalize
30
+
31
+ Returns:
32
+ str: The normalized core domain
33
+ """
34
+ parsed = urlparse(url.rstrip('/'))
35
+ netloc = parsed.netloc.lower()
36
+
37
+ # Split host and port
38
+ host = netloc.split(':')[0]
39
+
40
+ # Remove common prefixes
41
+ if host.startswith('www.'):
42
+ host = host[4:]
43
+
44
+ return host
45
+
46
+ def _reconcile_urls(self, presigned_url, base_url):
47
+ """Reconcile two URLs by using the base URL's core if they differ.
48
+
49
+ Args:
50
+ presigned_url (str): The presigned URL from the server
51
+ base_url (str): The base URL to compare against
52
+
53
+ Returns:
54
+ str: The reconciled URL
55
+ """
56
+ # Get normalized core domains
57
+ presigned_core = self._normalize_url_core(presigned_url)
58
+ base_core = self._normalize_url_core(base_url)
59
+
60
+ # If cores are same, return original presigned URL
61
+ if presigned_core == base_core:
62
+ return presigned_url
63
+
64
+ # Parse URLs
65
+ parsed_base = urlparse(base_url.rstrip('/'))
66
+ parsed_presigned = urlparse(presigned_url)
67
+
68
+ # Remove API version paths from base_url if present
69
+ base_path = parsed_base.path
70
+ for suffix in ['/api', '/v1']:
71
+ if base_path.endswith(suffix):
72
+ base_path = base_path[:-len(suffix)]
73
+
74
+ # Construct new URL using components
75
+ return urlunparse((
76
+ parsed_base.scheme,
77
+ parsed_base.netloc,
78
+ parsed_presigned.path, # Use presigned path
79
+ parsed_presigned.params,
80
+ parsed_presigned.query,
81
+ parsed_presigned.fragment
82
+ ))
83
+
24
84
  def _get_presigned_url(self):
25
85
  payload = json.dumps({
26
- "datasetName": self.dataset_name,
27
- "numFiles": 1,
28
- })
86
+ "datasetName": self.dataset_name,
87
+ "numFiles": 1,
88
+ })
29
89
  headers = {
30
90
  "Content-Type": "application/json",
31
91
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
@@ -33,14 +93,16 @@ class UploadAgenticTraces:
33
93
  }
34
94
 
35
95
  try:
36
- response = requests.request("GET",
37
- f"{self.base_url}/v1/llm/presigned-url",
38
- headers=headers,
39
- data=payload,
40
- timeout=self.timeout)
96
+ response = requests.request(
97
+ "GET",
98
+ f"{self.base_url}/v1/llm/presigned-url",
99
+ headers=headers,
100
+ data=payload,
101
+ timeout=self.timeout
102
+ )
41
103
  if response.status_code == 200:
42
- presignedUrls = response.json()["data"]["presignedUrls"][0]
43
- return presignedUrls
104
+ presigned_url = response.json()["data"]["presignedUrls"][0]
105
+ return self._reconcile_urls(presigned_url, self.base_url)
44
106
  except requests.exceptions.RequestException as e:
45
107
  print(f"Error while getting presigned url: {e}")
46
108
  return None
@@ -4,6 +4,7 @@ import json
4
4
  import os
5
5
  import logging
6
6
  from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
7
+ from .upload_agentic_traces import UploadAgenticTraces
7
8
  logger = logging.getLogger(__name__)
8
9
 
9
10
  def upload_code(hash_id, zip_path, project_name, dataset_name):
@@ -61,7 +62,8 @@ def _fetch_presigned_url(project_name, dataset_name):
61
62
  timeout=99999)
62
63
 
63
64
  if response.status_code == 200:
64
- return response.json()["data"]["presignedUrls"][0]
65
+ presigned_url = response.json()["data"]["presignedUrls"][0]
66
+ return UploadAgenticTraces._reconcile_urls(presigned_url, RagaAICatalyst.BASE_URL)
65
67
  else:
66
68
  raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
67
69
  except requests.exceptions.RequestException as e:
@@ -6,6 +6,7 @@ from ragaai_catalyst.tracers.agentic_tracing.tracers.base import RagaAICatalyst
6
6
 
7
7
  def create_dataset_schema_with_trace(project_name, dataset_name):
8
8
  def make_request():
9
+ import pdb; pdb.set_trace()
9
10
  headers = {
10
11
  "Content-Type": "application/json",
11
12
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",