ragaai-catalyst 2.1.5b29__py3-none-any.whl → 2.1.5b31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +2 -0
- ragaai_catalyst/ragaai_catalyst.py +23 -0
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +354 -13
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +19 -42
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +5 -13
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +73 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +28 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +0 -13
- ragaai_catalyst/tracers/tracer.py +31 -4
- {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/METADATA +110 -18
- {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/RECORD +31 -17
- ragaai_catalyst/redteaming.py +0 -171
- {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.5b29.dist-info → ragaai_catalyst-2.1.5b31.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,28 @@
|
|
1
1
|
import os
|
2
|
-
|
3
|
-
import google.generativeai as genai
|
4
|
-
import openai
|
5
|
-
import PyPDF2
|
2
|
+
import ast
|
6
3
|
import csv
|
4
|
+
import json
|
5
|
+
import random
|
6
|
+
import PyPDF2
|
7
7
|
import markdown
|
8
8
|
import pandas as pd
|
9
|
-
import json
|
10
|
-
from litellm import completion
|
11
|
-
import litellm
|
12
9
|
from tqdm import tqdm
|
10
|
+
|
11
|
+
import openai
|
13
12
|
import tiktoken
|
14
|
-
|
15
|
-
|
13
|
+
import litellm
|
14
|
+
import google.generativeai as genai
|
15
|
+
from groq import Groq
|
16
|
+
from litellm import completion
|
17
|
+
|
16
18
|
from .internal_api_completion import api_completion as internal_api_completion
|
17
19
|
from .proxy_call import api_completion as proxy_api_completion
|
18
|
-
# from ragaai_catalyst import internal_api_completion
|
19
|
-
# from ragaai_catalyst import proxy_call
|
20
|
-
import ast
|
21
20
|
|
22
|
-
|
21
|
+
from typing import Optional, List, Dict, Any
|
22
|
+
|
23
|
+
import logging
|
24
|
+
|
25
|
+
logger = logging.getLogger(__name__)
|
23
26
|
|
24
27
|
class SyntheticDataGeneration:
|
25
28
|
"""
|
@@ -329,6 +332,59 @@ class SyntheticDataGeneration:
|
|
329
332
|
|
330
333
|
json_data = json.loads(content)
|
331
334
|
return pd.DataFrame(json_data)
|
335
|
+
|
336
|
+
def _generate_raw_llm_response(self, text, system_message: Optional[str] = None, model_config: Dict[str, Any] = dict(), api_key=None):
|
337
|
+
"""
|
338
|
+
Generate questions using LiteLLM which supports multiple providers (OpenAI, Groq, Gemini, etc.).
|
339
|
+
|
340
|
+
Args:
|
341
|
+
text (str): The input text to generate questions from.
|
342
|
+
system_message (str): The system message for the AI model.
|
343
|
+
model_config (dict): Configuration dictionary containing model details.
|
344
|
+
Required keys:
|
345
|
+
- model: The model identifier (e.g., "gpt-4", "gemini-pro", "mixtral-8x7b-32768")
|
346
|
+
Optional keys:
|
347
|
+
- api_base: Custom API base URL if needed
|
348
|
+
- max_tokens: Maximum tokens in response
|
349
|
+
- temperature: Temperature for response generation
|
350
|
+
api_key (str, optional): The API key for the model provider.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
pandas.DataFrame: A DataFrame containing the generated questions and answers.
|
354
|
+
|
355
|
+
Raises:
|
356
|
+
Exception: If there's an error in generating the response.
|
357
|
+
"""
|
358
|
+
messages = [
|
359
|
+
{"role": "system", "content": system_message},
|
360
|
+
{"role": "user", "content": text}
|
361
|
+
]
|
362
|
+
|
363
|
+
completion_params = {
|
364
|
+
"model": model_config.get("model", 'gpt-4o'),
|
365
|
+
"messages": messages,
|
366
|
+
"api_key": api_key
|
367
|
+
}
|
368
|
+
|
369
|
+
if "api_base" in model_config:
|
370
|
+
completion_params["api_base"] = model_config["api_base"]
|
371
|
+
if "api_version" in model_config:
|
372
|
+
completion_params["api_version"] = model_config["api_version"]
|
373
|
+
if "max_tokens" in model_config:
|
374
|
+
completion_params["max_tokens"] = model_config["max_tokens"]
|
375
|
+
if "temperature" in model_config:
|
376
|
+
completion_params["temperature"] = model_config["temperature"]
|
377
|
+
if 'provider' in model_config:
|
378
|
+
completion_params['model'] = f'{model_config["provider"]}/{model_config["model"]}'
|
379
|
+
|
380
|
+
try:
|
381
|
+
response = completion(**completion_params)
|
382
|
+
except Exception as e:
|
383
|
+
if any(error in str(e).lower() for error in ["invalid api key", "incorrect api key", "unauthorized", "authentication"]):
|
384
|
+
raise ValueError(f"Invalid API key provided for {model_config.get('provider', 'the specified')} provider")
|
385
|
+
raise Exception(f"Error calling LLM API: {str(e)}")
|
386
|
+
|
387
|
+
return response.choices[0].message.content
|
332
388
|
|
333
389
|
def _parse_response(self, response, provider):
|
334
390
|
"""
|
@@ -478,6 +534,291 @@ class SyntheticDataGeneration:
|
|
478
534
|
list: A list of supported AI providers.
|
479
535
|
"""
|
480
536
|
return ['gemini', 'openai','azure']
|
537
|
+
|
538
|
+
def _get_init_ex_gen_prompt(self):
|
539
|
+
prompt = '''
|
540
|
+
You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
|
541
|
+
|
542
|
+
**Inputs**
|
543
|
+
User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
|
544
|
+
User Examples[Optional]: The user may supply examples.
|
545
|
+
User Context[Optional]: The user may supply context to generate the examples from.
|
546
|
+
No of Examples: The total number of examples to produce.
|
547
|
+
|
548
|
+
**Steps to follow**
|
549
|
+
1. Carefully analyze the user's instruction
|
550
|
+
2. If user examples are provided, check whether the user’s instructions refer to them specifically.
|
551
|
+
3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
|
552
|
+
4. Comply with the system’s guidelines to generate examples, incorporating any user examples or user context as needed.
|
553
|
+
|
554
|
+
**Output Format**:
|
555
|
+
- Present examples in a multiline string with each line a separate example.
|
556
|
+
- Avoid markdown or special formatting.
|
557
|
+
- Omit any boilerplate texts.
|
558
|
+
|
559
|
+
**Instructions for Diversity**:
|
560
|
+
- Vary the examples by context, tone, and (if applicable) technical complexity.
|
561
|
+
- Include edge cases or unconventional scenarios.
|
562
|
+
- Ensure no two examples are conceptually identical.
|
563
|
+
|
564
|
+
**Final Notes**:
|
565
|
+
- Focus on both originality and practical relevance.
|
566
|
+
- Avoid repetitiveness in the examples.
|
567
|
+
'''
|
568
|
+
return prompt
|
569
|
+
|
570
|
+
def _get_iter_ex_gen_prompt(self):
|
571
|
+
prompt = '''
|
572
|
+
You are an expert example generator. Your task is to produce creative, relevant and varied examples according to the user instructions.
|
573
|
+
|
574
|
+
**Inputs**
|
575
|
+
User Instruction: The user will provide guidance on how to generate examples, possibly accompanied by their own examples.
|
576
|
+
User Examples[Optional]: The user may supply examples.
|
577
|
+
User Context[Optional]: The user may supply context to generate the examples from.
|
578
|
+
No of Examples: The total number of examples to produce.
|
579
|
+
Relevant Examples: Any examples that are relevant to the user's instruction.
|
580
|
+
Irrelevant Examples: Any examples that are not relevant to the user's instruction.
|
581
|
+
|
582
|
+
**Steps to follow**
|
583
|
+
1. Carefully analyze the user's instruction
|
584
|
+
2. If user examples are provided, check whether the user’s instructions refer to them specifically.
|
585
|
+
3. If user context is provided, understand it thoroughly and identify relevant parts to generate examples.
|
586
|
+
4. Review the relevant and irrelevant examples present, understanding the differences in them.
|
587
|
+
5. Comply with the user's instruction to generate examples, similar to relevant examples and dissimilar to irrelevant ones.
|
588
|
+
|
589
|
+
**Output Format**:
|
590
|
+
- Present examples in a multiline sting with each line a separate example.
|
591
|
+
- Avoid markdown or special formatting.
|
592
|
+
- Omit any boilerplate texts.
|
593
|
+
|
594
|
+
**Instructions for Diversity**:
|
595
|
+
- Vary the examples by context, tone, and (if applicable) technical complexity.
|
596
|
+
- Include edge cases or unconventional scenarios.
|
597
|
+
- Ensure no two examples are conceptually identical.
|
598
|
+
|
599
|
+
**Final Notes**:
|
600
|
+
- Focus on both originality and practical relevance.
|
601
|
+
- Avoid repetitiveness in the examples.
|
602
|
+
'''
|
603
|
+
return prompt
|
604
|
+
|
605
|
+
def _generate_examples_iter(
|
606
|
+
self,
|
607
|
+
user_instruction: str,
|
608
|
+
user_examples: Optional[List[str] | str] = None,
|
609
|
+
user_context: Optional[str] = None,
|
610
|
+
relevant_examples: List[str]=[],
|
611
|
+
irrelevant_examples: List[str]=[],
|
612
|
+
no_examples: Optional[int] = None,
|
613
|
+
model_config: Dict[str, Any] = dict(),
|
614
|
+
api_key: Optional[str] = None
|
615
|
+
):
|
616
|
+
if no_examples is None:
|
617
|
+
no_examples = 5
|
618
|
+
relevant_examples_str = '\n'.join(relevant_examples)
|
619
|
+
irrelevant_examples_str = '\n'.join(irrelevant_examples)
|
620
|
+
user_message = f'**User Instruction:** {user_instruction}'
|
621
|
+
user_message += f'\n\n**No of Examples:** {no_examples}'
|
622
|
+
if user_examples:
|
623
|
+
if isinstance(user_examples, str):
|
624
|
+
user_examples_str = user_examples
|
625
|
+
elif isinstance(user_examples, list):
|
626
|
+
user_examples_str = "\n".join(user_examples)
|
627
|
+
else:
|
628
|
+
raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
|
629
|
+
user_message += f"\n\n**User Examples:** \n{user_examples_str}"
|
630
|
+
if relevant_examples:
|
631
|
+
user_message += f'\n\n**Relevant Examples:** \n{relevant_examples_str}'
|
632
|
+
if irrelevant_examples:
|
633
|
+
user_message += f'\n\n**Irrelevant Examples:** \n{irrelevant_examples_str}'
|
634
|
+
if user_context:
|
635
|
+
user_message += f'\n\n**User Context:** \n{user_context}'
|
636
|
+
system_prompt = self._get_iter_ex_gen_prompt()
|
637
|
+
return self._generate_raw_llm_response(user_message, system_prompt, model_config=model_config, api_key=api_key)
|
638
|
+
|
639
|
+
def _generate_examples(
|
640
|
+
self,
|
641
|
+
user_instruction:str,
|
642
|
+
user_examples:Optional[List[str]|str]=None,
|
643
|
+
user_context: Optional[str] = None,
|
644
|
+
no_examples:Optional[int]=None,
|
645
|
+
model_config: Dict[str, Any] = dict(),
|
646
|
+
api_key: Optional[str] = None
|
647
|
+
):
|
648
|
+
if no_examples is None:
|
649
|
+
no_examples = 5
|
650
|
+
user_message = f"**User Instruction:** {user_instruction}"
|
651
|
+
if user_examples:
|
652
|
+
if isinstance(user_examples, str):
|
653
|
+
user_examples_str = user_examples
|
654
|
+
elif isinstance(user_examples, list):
|
655
|
+
user_examples_str = "\n".join(user_examples)
|
656
|
+
else:
|
657
|
+
raise ValueError(f'Expected string or list of strings as user_examples got {type(user_examples)}')
|
658
|
+
user_message += f"\n\n**User Examples:** \n{user_examples_str}"
|
659
|
+
if user_context:
|
660
|
+
user_message += f'\n\n**User Context:** \n{user_context}'
|
661
|
+
user_message += f'\n\n**No of Examples:** {no_examples}'
|
662
|
+
init_system_prompt = self._get_init_ex_gen_prompt()
|
663
|
+
return self._generate_raw_llm_response(user_message, init_system_prompt, model_config=model_config, api_key=api_key)
|
664
|
+
|
665
|
+
def _get_valid_examples(self, user_indices_str: str, examples: List[str]):
|
666
|
+
valid_examples = []
|
667
|
+
try:
|
668
|
+
user_indices = user_indices_str.strip().split(',')
|
669
|
+
for index_str in user_indices:
|
670
|
+
try:
|
671
|
+
index = int(index_str)
|
672
|
+
if index <= 0 or index > len(examples):
|
673
|
+
continue
|
674
|
+
except ValueError as e:
|
675
|
+
continue
|
676
|
+
valid_examples.append(examples[index-1])
|
677
|
+
except Exception as e:
|
678
|
+
print(f'Error: {e}')
|
679
|
+
return valid_examples
|
680
|
+
|
681
|
+
def generate_examples(
|
682
|
+
self,
|
683
|
+
user_instruction: str,
|
684
|
+
user_examples:Optional[List[str] | str] = None,
|
685
|
+
user_context: Optional[str] = None,
|
686
|
+
no_examples: Optional[int] = None,
|
687
|
+
model_config: Optional[Dict[str, Any]] = None,
|
688
|
+
api_key: Optional[str] = None,
|
689
|
+
max_iter: int = 0,
|
690
|
+
**kwargs
|
691
|
+
):
|
692
|
+
if not model_config:
|
693
|
+
model_config = {}
|
694
|
+
provider = model_config.get("provider")
|
695
|
+
api_base = model_config.get("api_base")
|
696
|
+
api_version = model_config.get("api_version")
|
697
|
+
self._initialize_client(provider, api_key, api_base, api_version, internal_llm_proxy=kwargs.get("internal_llm_proxy", None))
|
698
|
+
|
699
|
+
if no_examples is None:
|
700
|
+
no_examples = 5
|
701
|
+
assert no_examples >= 0, 'The number of examples cannot be less than 0'
|
702
|
+
relevant_examples = []
|
703
|
+
irrelevant_examples = []
|
704
|
+
max_relevant_examples = 5
|
705
|
+
max_irrelevant_examples = 10
|
706
|
+
while len(relevant_examples) <= max_relevant_examples or len(irrelevant_examples) <= max_irrelevant_examples:
|
707
|
+
if max_iter <= 0:
|
708
|
+
break
|
709
|
+
if len(relevant_examples) > max_relevant_examples:
|
710
|
+
relevant_examples = random.sample(relevant_examples, max_relevant_examples)
|
711
|
+
if len(irrelevant_examples) > max_irrelevant_examples:
|
712
|
+
irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
|
713
|
+
if relevant_examples or irrelevant_examples:
|
714
|
+
examples_str = self._generate_examples_iter(
|
715
|
+
user_instruction = user_instruction,
|
716
|
+
user_examples = user_examples,
|
717
|
+
relevant_examples = relevant_examples,
|
718
|
+
irrelevant_examples = irrelevant_examples,
|
719
|
+
model_config = model_config,
|
720
|
+
api_key = api_key
|
721
|
+
)
|
722
|
+
else:
|
723
|
+
examples_str = self._generate_examples(
|
724
|
+
user_instruction = user_instruction,
|
725
|
+
user_examples = user_examples,
|
726
|
+
user_context = user_context,
|
727
|
+
model_config = model_config,
|
728
|
+
api_key = api_key
|
729
|
+
)
|
730
|
+
examples = [example for example in examples_str.split('\n') if example.strip()]
|
731
|
+
print('Generated Examples:')
|
732
|
+
for i, example in enumerate(examples):
|
733
|
+
print(f'{i+1}. {example}')
|
734
|
+
relevant_indices = input('Enter the indices of relevant examples (comma-separated): ').strip()
|
735
|
+
if relevant_indices:
|
736
|
+
relevant_examples.extend(self._get_valid_examples(relevant_indices, examples))
|
737
|
+
irrelevant_indices = input('Enter the indices of irrelevant examples (comma-separated): ').strip()
|
738
|
+
if irrelevant_indices:
|
739
|
+
irrelevant_examples.extend(self._get_valid_examples(irrelevant_indices, examples))
|
740
|
+
max_iter -= 1
|
741
|
+
if len(relevant_examples) > max_relevant_examples:
|
742
|
+
fin_relevant_examples = random.sample(relevant_examples, max_relevant_examples)
|
743
|
+
else:
|
744
|
+
fin_relevant_examples = relevant_examples
|
745
|
+
if len(irrelevant_examples) > max_irrelevant_examples:
|
746
|
+
fin_irrelevant_examples = random.sample(irrelevant_examples, max_irrelevant_examples)
|
747
|
+
else:
|
748
|
+
fin_irrelevant_examples = irrelevant_examples
|
749
|
+
if relevant_examples or irrelevant_examples:
|
750
|
+
if len(relevant_examples) < no_examples:
|
751
|
+
more_no_examples = no_examples - len(relevant_examples)
|
752
|
+
final_examples_str = self._generate_examples_iter(
|
753
|
+
user_instruction = user_instruction,
|
754
|
+
user_examples = user_examples,
|
755
|
+
user_context = user_context,
|
756
|
+
relevant_examples = fin_relevant_examples,
|
757
|
+
irrelevant_examples = fin_irrelevant_examples,
|
758
|
+
no_examples = more_no_examples,
|
759
|
+
model_config = model_config,
|
760
|
+
api_key = api_key
|
761
|
+
)
|
762
|
+
final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
|
763
|
+
final_examples.extend(relevant_examples)
|
764
|
+
else:
|
765
|
+
final_examples = random.sample(relevant_examples, no_examples)
|
766
|
+
else:
|
767
|
+
final_examples_str = self._generate_examples(
|
768
|
+
user_instruction = user_instruction,
|
769
|
+
user_examples = user_examples,
|
770
|
+
user_context = user_context,
|
771
|
+
no_examples = no_examples,
|
772
|
+
model_config = model_config,
|
773
|
+
api_key = api_key
|
774
|
+
)
|
775
|
+
final_examples = [example for example in final_examples_str.split('\n') if example.strip()]
|
776
|
+
return final_examples
|
777
|
+
|
778
|
+
|
779
|
+
def generate_examples_from_csv(
|
780
|
+
self,
|
781
|
+
csv_path: str,
|
782
|
+
dst_csv_path: Optional[str] = None,
|
783
|
+
no_examples: Optional[int] = None,
|
784
|
+
model_config: Optional[Dict[str, Any]] = None,
|
785
|
+
api_key: Optional[str] = None,
|
786
|
+
**kwargs
|
787
|
+
):
|
788
|
+
if no_examples is None:
|
789
|
+
no_examples = 5
|
790
|
+
assert no_examples >= 0, 'The number of examples cannot be less than 0'
|
791
|
+
df = pd.read_csv(csv_path)
|
792
|
+
assert 'user_instruction' in df.columns, 'The csv must have a column named user_instruction'
|
793
|
+
fin_df_list = []
|
794
|
+
for i, row in df.iterrows():
|
795
|
+
user_instruction = row['user_instruction']
|
796
|
+
user_examples = row.get('user_examples')
|
797
|
+
user_context = row.get('user_context')
|
798
|
+
row_dict = row.to_dict()
|
799
|
+
try:
|
800
|
+
examples = self.generate_examples(
|
801
|
+
user_instruction = user_instruction,
|
802
|
+
user_examples = user_examples,
|
803
|
+
user_context = user_context,
|
804
|
+
no_examples = no_examples,
|
805
|
+
model_config = model_config,
|
806
|
+
api_key = api_key
|
807
|
+
)
|
808
|
+
except Exception as e:
|
809
|
+
continue
|
810
|
+
row_dict['generated_examples'] = examples
|
811
|
+
fin_df_list.append(row_dict)
|
812
|
+
fin_df = pd.DataFrame(fin_df_list)
|
813
|
+
csv_file, csv_ext = os.path.splitext(csv_path)
|
814
|
+
if not dst_csv_path:
|
815
|
+
dst_csv_path = csv_file + '_with_examples' + csv_ext
|
816
|
+
dst_dir = os.path.dirname(dst_csv_path)
|
817
|
+
if dst_dir:
|
818
|
+
os.makedirs(dst_dir, exist_ok=True)
|
819
|
+
fin_df.to_csv(dst_csv_path)
|
820
|
+
logger.info(f'CSV with generated examples saved at {dst_csv_path}')
|
821
|
+
|
481
822
|
|
482
823
|
# Usage:
|
483
824
|
# from synthetic_data_generation import SyntheticDataGeneration
|
@@ -18,13 +18,9 @@ from ragaai_catalyst.tracers.agentic_tracing.data.data_structure import (
|
|
18
18
|
Resources,
|
19
19
|
Component,
|
20
20
|
)
|
21
|
-
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_agentic_traces import UploadAgenticTraces
|
22
|
-
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
|
23
|
-
from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
|
24
21
|
from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
|
25
22
|
from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
|
26
23
|
from ragaai_catalyst.tracers.agentic_tracing.utils.span_attributes import SpanAttributes
|
27
|
-
from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
|
28
24
|
from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
|
29
25
|
|
30
26
|
import logging
|
@@ -179,7 +175,10 @@ class BaseTracer:
|
|
179
175
|
)
|
180
176
|
|
181
177
|
def stop(self):
|
182
|
-
"""Stop the trace and save to JSON file
|
178
|
+
"""Stop the trace and save to JSON file.
|
179
|
+
Trace upload will happen in a separate process and continue even if the main program exits.
|
180
|
+
"""
|
181
|
+
from ..upload.trace_upload_manager import TraceUploadManager, TraceUploadTask
|
183
182
|
if hasattr(self, "trace"):
|
184
183
|
self.trace.data[0]["end_time"] = datetime.now().astimezone().isoformat()
|
185
184
|
self.trace.end_time = datetime.now().astimezone().isoformat()
|
@@ -263,45 +262,25 @@ class BaseTracer:
|
|
263
262
|
|
264
263
|
logger.info(" Traces saved successfully.")
|
265
264
|
logger.debug(f"Trace saved to {filepath}")
|
266
|
-
#
|
267
|
-
|
268
|
-
|
269
|
-
project_name = self.project_name
|
270
|
-
project_id = self.project_id
|
271
|
-
dataset_name = self.dataset_name
|
272
|
-
user_detail = self.user_details
|
273
|
-
base_url = RagaAICatalyst.BASE_URL
|
274
|
-
|
275
|
-
## create dataset schema
|
276
|
-
response = create_dataset_schema_with_trace(
|
277
|
-
dataset_name=dataset_name, project_name=project_name
|
278
|
-
)
|
279
|
-
|
280
|
-
##Upload trace metrics
|
281
|
-
response = upload_trace_metric(
|
282
|
-
json_file_path=json_file_path,
|
283
|
-
dataset_name=self.dataset_name,
|
265
|
+
# Submit trace upload task to the manager
|
266
|
+
upload_task = TraceUploadTask(
|
267
|
+
json_file_path=str(filepath),
|
284
268
|
project_name=self.project_name,
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
project_name=project_name,
|
290
|
-
project_id=project_id,
|
291
|
-
dataset_name=dataset_name,
|
292
|
-
user_detail=user_detail,
|
293
|
-
base_url=base_url,
|
294
|
-
)
|
295
|
-
upload_traces.upload_agentic_traces()
|
296
|
-
|
297
|
-
# Upload Codehash
|
298
|
-
response = upload_code(
|
269
|
+
project_id=self.project_id,
|
270
|
+
dataset_name=self.dataset_name,
|
271
|
+
user_detail=self.user_details,
|
272
|
+
base_url=RagaAICatalyst.BASE_URL,
|
299
273
|
hash_id=hash_id,
|
300
274
|
zip_path=zip_path,
|
301
|
-
|
302
|
-
|
275
|
+
max_retries=2, # Allow 2 retries
|
276
|
+
retry_delay=1.0 # 1 second between retries
|
303
277
|
)
|
304
|
-
|
278
|
+
|
279
|
+
# Get upload manager singleton and submit task
|
280
|
+
upload_manager = TraceUploadManager()
|
281
|
+
upload_manager.submit_upload(upload_task)
|
282
|
+
|
283
|
+
logger.info(f"Trace upload task submitted and will continue in background")
|
305
284
|
|
306
285
|
# Cleanup
|
307
286
|
self.components = []
|
@@ -899,8 +878,6 @@ class BaseTracer:
|
|
899
878
|
|
900
879
|
return {"workflow": sorted_interactions}
|
901
880
|
|
902
|
-
# TODO: Add support for execute metrics. Maintain list of all metrics to be added for this span
|
903
|
-
|
904
881
|
def execute_metrics(self,
|
905
882
|
name: str,
|
906
883
|
model: str,
|
@@ -12,7 +12,6 @@ import contextvars
|
|
12
12
|
import traceback
|
13
13
|
import importlib
|
14
14
|
import sys
|
15
|
-
from litellm import model_cost
|
16
15
|
import logging
|
17
16
|
|
18
17
|
try:
|
@@ -48,12 +47,9 @@ class LLMTracerMixin:
|
|
48
47
|
super().__init__(*args, **kwargs)
|
49
48
|
self.file_tracker = TrackName()
|
50
49
|
self.patches = []
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
self.model_costs = {
|
55
|
-
"default": {"input_cost_per_token": 0.0, "output_cost_per_token": 0.0}
|
56
|
-
}
|
50
|
+
# Get model costs from manager
|
51
|
+
from ..utils.cost_manager import cost_manager
|
52
|
+
self.cost_manager = cost_manager # Store reference to cost manager
|
57
53
|
self.MAX_PARAMETERS_TO_DISPLAY = 10
|
58
54
|
self.current_llm_call_name = contextvars.ContextVar(
|
59
55
|
"llm_call_name", default=None
|
@@ -626,12 +622,8 @@ class LLMTracerMixin:
|
|
626
622
|
# TODO TO check i/p and o/p is according or not
|
627
623
|
input = input_data["args"] if hasattr(input_data, "args") else input_data
|
628
624
|
output = output_data.output_response if output_data else None
|
629
|
-
#print("Prompt input:",input)
|
630
625
|
prompt = self.convert_to_content(input)
|
631
|
-
#print("Prompt Output: ",prompt)
|
632
|
-
#print("Response input: ",output)
|
633
626
|
response = self.convert_to_content(output)
|
634
|
-
#print("Response output: ",response)
|
635
627
|
|
636
628
|
# TODO: Execute & Add the User requested metrics here
|
637
629
|
formatted_metrics = BaseTracer.get_formatted_metric(self.span_attributes_dict, self.project_id, name)
|
@@ -778,7 +770,7 @@ class LLMTracerMixin:
|
|
778
770
|
token_usage = extract_token_usage(result)
|
779
771
|
else:
|
780
772
|
token_usage = extract_token_usage(result)
|
781
|
-
cost = calculate_llm_cost(token_usage, model_name
|
773
|
+
cost = calculate_llm_cost(token_usage, model_name)
|
782
774
|
parameters = extract_parameters(kwargs)
|
783
775
|
input_data = extract_input_data(args, kwargs, result)
|
784
776
|
|
@@ -887,7 +879,7 @@ class LLMTracerMixin:
|
|
887
879
|
token_usage = extract_token_usage(result)
|
888
880
|
else:
|
889
881
|
token_usage = extract_token_usage(result)
|
890
|
-
cost = calculate_llm_cost(token_usage, model_name
|
882
|
+
cost = calculate_llm_cost(token_usage, model_name)
|
891
883
|
parameters = extract_parameters(kwargs)
|
892
884
|
input_data = extract_input_data(args, kwargs, result)
|
893
885
|
|
@@ -2,6 +2,7 @@ import requests
|
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
from datetime import datetime
|
5
|
+
from urllib.parse import urlparse, urlunparse
|
5
6
|
|
6
7
|
|
7
8
|
class UploadAgenticTraces:
|
@@ -20,12 +21,71 @@ class UploadAgenticTraces:
|
|
20
21
|
self.base_url = base_url
|
21
22
|
self.timeout = 30
|
22
23
|
|
23
|
-
|
24
|
+
@staticmethod
|
25
|
+
def _normalize_url_core(url):
|
26
|
+
"""Normalize the core domain of a URL by removing common prefixes and handling ports.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
url (str): The URL to normalize
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: The normalized core domain
|
33
|
+
"""
|
34
|
+
parsed = urlparse(url.rstrip('/'))
|
35
|
+
netloc = parsed.netloc.lower()
|
36
|
+
|
37
|
+
# Split host and port
|
38
|
+
host = netloc.split(':')[0]
|
39
|
+
|
40
|
+
# Remove common prefixes
|
41
|
+
if host.startswith('www.'):
|
42
|
+
host = host[4:]
|
43
|
+
|
44
|
+
return host
|
45
|
+
|
46
|
+
def _reconcile_urls(self, presigned_url, base_url):
|
47
|
+
"""Reconcile two URLs by using the base URL's core if they differ.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
presigned_url (str): The presigned URL from the server
|
51
|
+
base_url (str): The base URL to compare against
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
str: The reconciled URL
|
55
|
+
"""
|
56
|
+
# Get normalized core domains
|
57
|
+
presigned_core = self._normalize_url_core(presigned_url)
|
58
|
+
base_core = self._normalize_url_core(base_url)
|
59
|
+
|
60
|
+
# If cores are same, return original presigned URL
|
61
|
+
if presigned_core == base_core:
|
62
|
+
return presigned_url
|
63
|
+
|
64
|
+
# Parse URLs
|
65
|
+
parsed_base = urlparse(base_url.rstrip('/'))
|
66
|
+
parsed_presigned = urlparse(presigned_url)
|
67
|
+
|
68
|
+
# Remove API version paths from base_url if present
|
69
|
+
base_path = parsed_base.path
|
70
|
+
for suffix in ['/api', '/v1']:
|
71
|
+
if base_path.endswith(suffix):
|
72
|
+
base_path = base_path[:-len(suffix)]
|
73
|
+
|
74
|
+
# Construct new URL using components
|
75
|
+
return urlunparse((
|
76
|
+
parsed_base.scheme,
|
77
|
+
parsed_base.netloc,
|
78
|
+
parsed_presigned.path, # Use presigned path
|
79
|
+
parsed_presigned.params,
|
80
|
+
parsed_presigned.query,
|
81
|
+
parsed_presigned.fragment
|
82
|
+
))
|
83
|
+
|
24
84
|
def _get_presigned_url(self):
|
25
85
|
payload = json.dumps({
|
26
|
-
|
27
|
-
|
28
|
-
|
86
|
+
"datasetName": self.dataset_name,
|
87
|
+
"numFiles": 1,
|
88
|
+
})
|
29
89
|
headers = {
|
30
90
|
"Content-Type": "application/json",
|
31
91
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
@@ -33,14 +93,16 @@ class UploadAgenticTraces:
|
|
33
93
|
}
|
34
94
|
|
35
95
|
try:
|
36
|
-
response = requests.request(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
96
|
+
response = requests.request(
|
97
|
+
"GET",
|
98
|
+
f"{self.base_url}/v1/llm/presigned-url",
|
99
|
+
headers=headers,
|
100
|
+
data=payload,
|
101
|
+
timeout=self.timeout
|
102
|
+
)
|
41
103
|
if response.status_code == 200:
|
42
|
-
|
43
|
-
return
|
104
|
+
presigned_url = response.json()["data"]["presignedUrls"][0]
|
105
|
+
return self._reconcile_urls(presigned_url, self.base_url)
|
44
106
|
except requests.exceptions.RequestException as e:
|
45
107
|
print(f"Error while getting presigned url: {e}")
|
46
108
|
return None
|
@@ -4,6 +4,7 @@ import json
|
|
4
4
|
import os
|
5
5
|
import logging
|
6
6
|
from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
|
7
|
+
from .upload_agentic_traces import UploadAgenticTraces
|
7
8
|
logger = logging.getLogger(__name__)
|
8
9
|
|
9
10
|
def upload_code(hash_id, zip_path, project_name, dataset_name):
|
@@ -61,7 +62,8 @@ def _fetch_presigned_url(project_name, dataset_name):
|
|
61
62
|
timeout=99999)
|
62
63
|
|
63
64
|
if response.status_code == 200:
|
64
|
-
|
65
|
+
presigned_url = response.json()["data"]["presignedUrls"][0]
|
66
|
+
return UploadAgenticTraces._reconcile_urls(presigned_url, RagaAICatalyst.BASE_URL)
|
65
67
|
else:
|
66
68
|
raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
|
67
69
|
except requests.exceptions.RequestException as e:
|
@@ -6,6 +6,7 @@ from ragaai_catalyst.tracers.agentic_tracing.tracers.base import RagaAICatalyst
|
|
6
6
|
|
7
7
|
def create_dataset_schema_with_trace(project_name, dataset_name):
|
8
8
|
def make_request():
|
9
|
+
import pdb; pdb.set_trace()
|
9
10
|
headers = {
|
10
11
|
"Content-Type": "application/json",
|
11
12
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|