PyPI - ragaai-catalyst - Versions diffs - 2.1.5b20__tar.gz → 2.1.5b22__tar.gz - Mend

ragaai-catalyst 2.1.5b20tar.gz → 2.1.5b22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.2
 Name: ragaai_catalyst
-Version: 2.1.5b20
+Version: 2.1.5b22
 Summary: RAGA AI CATALYST
-Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>, Tanaya Pakhale <tanaya.pakhale@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>
+Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
 Requires-Python: <3.13,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/examples/travel_agent/agents.py RENAMED Viewed

@@ -20,6 +20,13 @@ class ItineraryAgent:
             cost=0.01,
             latency=0.5,
         )
+        current_span().add_metrics(
+            name="itinerary_planning",
+            score=0.8,
+            reasoning="Planning comprehensive travel itinerary",
+            cost=0.01,
+            latency=0.5,
+        )
         # Get weather information
         weather = weather_tool(user_preferences["destination"])

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/pyproject.toml RENAMED Viewed

@@ -9,15 +9,14 @@ readme = "README.md"
 requires-python = ">=3.9,<3.13"
 # license = {file = "LICENSE"}
-version = "2.1.5.beta.20"
+version = "2.1.5.beta.22"
 authors = [
     {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
-    {name = "Siddhartha Kosti", email = "siddhartha.kosti@raga.ai"},
-    {name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"},
-    {name = "Tanaya Pakhale", email = "tanaya.pakhale@raga.ai"},
-    {name = "Ritika Goel", email = "ritika.goel@raga.ai"},
     {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
     {name = "Dushyant Mahajan", email = "dushyant.mahajan@raga.ai"},
+    {name = "Siddhartha Kosti", email = "siddhartha.kosti@raga.ai"},
+    {name = "Ritika Goel", email = "ritika.goel@raga.ai"},
+    {name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"}
 ]
 dependencies = [

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/ragaai_catalyst/dataset.py RENAMED Viewed

@@ -9,6 +9,10 @@ import pandas as pd
 logger = logging.getLogger(__name__)
 get_token = RagaAICatalyst.get_token
+# Job status constants
+JOB_STATUS_FAILED = "failed"
+JOB_STATUS_IN_PROGRESS = "in_progress"
+JOB_STATUS_COMPLETED = "success"
 class Dataset:
     BASE_URL = None
@@ -18,6 +22,7 @@ class Dataset:
         self.project_name = project_name
         self.num_projects = 99999
         Dataset.BASE_URL = RagaAICatalyst.BASE_URL
+        self.jobId = None
         headers = {
             "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
         }
@@ -219,7 +224,6 @@ class Dataset:
         try:
             put_csv_response = put_csv_to_presignedUrl(url)
-            print(put_csv_response)
             if put_csv_response.status_code not in (200, 201):
                 raise ValueError('Unable to put csv to the presignedUrl')
         except Exception as e:
@@ -269,6 +273,7 @@ class Dataset:
                 raise ValueError('Unable to upload csv')
             else:
                 print(upload_csv_response['message'])
+                self.jobId = upload_csv_response['data']['jobId']
         except Exception as e:
             logger.error(f"Error in create_from_csv: {e}")
             raise
@@ -436,6 +441,7 @@ class Dataset:
             response_data = response.json()
             if response_data.get('success', False):
                 print(f"{response_data['message']}")
+                self.jobId = response_data['data']['jobId']
             else:
                 raise ValueError(response_data.get('message', 'Failed to add rows'))
@@ -594,6 +600,7 @@ class Dataset:
             if response_data.get('success', False):
                 print(f"Column '{column_name}' added successfully to dataset '{dataset_name}'")
+                self.jobId = response_data['data']['jobId']
             else:
                 raise ValueError(response_data.get('message', 'Failed to add column'))
@@ -601,3 +608,49 @@ class Dataset:
             print(f"Error adding column: {e}")
             raise
+    def get_status(self):
+        headers = {
+            'Content-Type': 'application/json',
+            "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
+            'X-Project-Id': str(self.project_id),
+        }
+        try:
+            response = requests.get(
+                f'{Dataset.BASE_URL}/job/status',
+                headers=headers,
+                timeout=30)
+            response.raise_for_status()
+            if response.json()["success"]:
+                status_json = [item["status"] for item in response.json()["data"]["content"] if item["id"]==self.jobId]
+                status_json = status_json[0]
+                if status_json == "Failed":
+                    print("Job failed. No results to fetch.")
+                    return JOB_STATUS_FAILED
+                elif status_json == "In Progress":
+                    print(f"Job in progress. Please wait while the job completes.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to track")
+                    return JOB_STATUS_IN_PROGRESS
+                elif status_json == "Completed":
+                    print(f"Job completed. Fetching results.\nVisit Job Status: {Dataset.BASE_URL.removesuffix('/api')}/projects/job-status?projectId={self.project_id} to check")
+                    return JOB_STATUS_COMPLETED
+                else:
+                    logger.error(f"Unknown status received: {status_json}")
+                    return JOB_STATUS_FAILED
+            else:
+                logger.error("Request was not successful")
+                return JOB_STATUS_FAILED
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"HTTP error occurred: {http_err}")
+            return JOB_STATUS_FAILED
+        except requests.exceptions.ConnectionError as conn_err:
+            logger.error(f"Connection error occurred: {conn_err}")
+            return JOB_STATUS_FAILED
+        except requests.exceptions.Timeout as timeout_err:
+            logger.error(f"Timeout error occurred: {timeout_err}")
+            return JOB_STATUS_FAILED
+        except requests.exceptions.RequestException as req_err:
+            logger.error(f"An error occurred: {req_err}")
+            return JOB_STATUS_FAILED
+        except Exception as e:
+            logger.error(f"An unexpected error occurred: {e}")
+            return JOB_STATUS_FAILED

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/ragaai_catalyst/synthetic_data_generation.py RENAMED Viewed

@@ -8,7 +8,9 @@ import markdown
 import pandas as pd
 import json
 from litellm import completion
+import litellm
 from tqdm import tqdm
+import tiktoken
 # import internal_api_completion
 # import proxy_call
 from .internal_api_completion import api_completion as internal_api_completion
@@ -48,13 +50,18 @@ class SyntheticDataGeneration:
         Raises:
             ValueError: If an invalid provider is specified or API key is missing.
         """
+        text_validity = self.validate_input(text)
+        if text_validity:
+            raise ValueError(text_validity)
         BATCH_SIZE = 5  # Optimal batch size for maintaining response quality
         provider = model_config.get("provider")
         model = model_config.get("model")
         api_base = model_config.get("api_base")
+        api_version = model_config.get("api_version")
         # Initialize the appropriate client based on provider
-        self._initialize_client(provider, api_key, api_base, internal_llm_proxy=kwargs.get("internal_llm_proxy", None))
+        self._initialize_client(provider, api_key, api_base, api_version, internal_llm_proxy=kwargs.get("internal_llm_proxy", None))
         # Initialize progress bar
         pbar = tqdm(total=n, desc="Generating QA pairs")
@@ -88,7 +95,7 @@ class SyntheticDataGeneration:
                     pbar.update(len(batch_df))
             except Exception as e:
-                print(f"Batch generation failed.")
+                print(f"Batch generation failed:{str(e)}")
                 if any(error in str(e) for error in FAILURE_CASES):
                     raise Exception(f"{e}")
@@ -139,7 +146,7 @@ class SyntheticDataGeneration:
         return final_df
-    def _initialize_client(self, provider, api_key, api_base=None, internal_llm_proxy=None):
+    def _initialize_client(self, provider, api_key, api_base=None, api_version=None, internal_llm_proxy=None):
         """Initialize the appropriate client based on provider."""
         if not provider:
             raise ValueError("Model configuration must be provided with a valid provider and model.")
@@ -158,7 +165,17 @@ class SyntheticDataGeneration:
             if api_key is None and os.getenv("OPENAI_API_KEY") is None and internal_llm_proxy is None:
                 raise ValueError("API key must be provided for OpenAI.")
             openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        elif provider == "azure":
+            if api_key is None and os.getenv("AZURE_API_KEY") is None and internal_llm_proxy is None:
+                raise ValueError("API key must be provided for Azure.")
+            litellm.api_key = api_key or os.getenv("AZURE_API_KEY")
+            if api_base is None and os.getenv("AZURE_API_BASE") is None and internal_llm_proxy is None:
+                raise ValueError("API Base must be provided for Azure.")
+            litellm.api_base = api_base or os.getenv("AZURE_API_BASE")
+            if api_version is None and os.getenv("AZURE_API_VERSION") is None and internal_llm_proxy is None:
+                raise ValueError("API version must be provided for Azure.")
+            litellm.api_version = api_version or os.getenv("AZURE_API_VERSION")
         else:
             raise ValueError(f"Provider is not recognized.")
@@ -189,7 +206,15 @@ class SyntheticDataGeneration:
             kwargs=kwargs
         )
+    def validate_input(self,text):
+        if not text.strip():
+            return 'Empty Text provided for qna generation. Please provide valid text'
+        encoding = tiktoken.encoding_for_model("gpt-4")
+        tokens = encoding.encode(text)
+        if len(tokens)<5:
+            return 'Very Small Text provided for qna generation. Please provide longer text'
+        return False
     def _get_system_message(self, question_type, n):
@@ -274,10 +299,14 @@ class SyntheticDataGeneration:
         # Add optional parameters if they exist in model_config
         if "api_base" in model_config:
             completion_params["api_base"] = model_config["api_base"]
+        if "api_version" in model_config:
+            completion_params["api_version"] = model_config["api_version"]
         if "max_tokens" in model_config:
             completion_params["max_tokens"] = model_config["max_tokens"]
         if "temperature" in model_config:
             completion_params["temperature"] = model_config["temperature"]
+        if 'provider' in model_config:
+            completion_params['model'] = f'{model_config["provider"]}/{model_config["model"]}'
         # Make the API call using LiteLLM
         try:
@@ -318,9 +347,13 @@ class SyntheticDataGeneration:
             list_start_index = data.find('[')  # Find the index of the first '['
             substring_data = data[list_start_index:] if list_start_index != -1 else data  # Slice from the list start
             data = substring_data
+        elif provider == "azure":
+            data = response.choices[0].message.content.replace('\n', '')
+            list_start_index = data.find('[')  # Find the index of the first '['
+            substring_data = data[list_start_index:] if list_start_index != -1 else data  # Slice from the list start
+            data = substring_data
         else:
-            raise ValueError("Invalid provider. Choose 'groq', 'gemini', or 'openai'.")
+            raise ValueError("Invalid provider. Choose 'groq', 'gemini', 'azure' or 'openai'.")
         try:
             json_data = json.loads(data)
             return pd.DataFrame(json_data)

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py RENAMED Viewed

@@ -101,7 +101,10 @@ class AgentTracerMixin:
                 original_init = target.__init__
                 def wrapped_init(self, *args, **kwargs):
-                    self.gt = kwargs.get("gt", None) if kwargs else None
+                    gt = kwargs.get("gt") if kwargs else None
+                    if gt is not None:
+                        span = self.span(name)
+                        span.add_gt(gt)
                     # Set agent context before initializing
                     component_id = str(uuid.uuid4())
                     hash_id = top_level_hash_id
@@ -159,7 +162,10 @@ class AgentTracerMixin:
                                 @self.file_tracker.trace_decorator
                                 @functools.wraps(method)
                                 def wrapped_method(self, *args, **kwargs):
-                                    self.gt = kwargs.get("gt", None) if kwargs else None
+                                    gt = kwargs.get("gt") if kwargs else None
+                                    if gt is not None:
+                                        span = tracer.span(name)
+                                        span.add_gt(gt)
                                     # Set this agent as current during method execution
                                     token = tracer.current_agent_id.set(
                                         self._agent_component_id
@@ -247,6 +253,7 @@ class AgentTracerMixin:
                             agent_type,
                             version,
                             capabilities,
+                            top_level_hash_id,
                             *args,
                             **kwargs,
                         )
@@ -256,10 +263,9 @@ class AgentTracerMixin:
         return decorator
     def _trace_sync_agent_execution(
-        self, func, name, agent_type, version, capabilities, *args, **kwargs
+        self, func, name, agent_type, version, capabilities, top_level_hash_id, *args, **kwargs
     ):
-        # Generate a unique hash_id for this execution context
-        hash_id = str(uuid.uuid4())
+        hash_id = top_level_hash_id
         """Synchronous version of agent tracing"""
         if not self.is_active:
@@ -275,7 +281,10 @@ class AgentTracerMixin:
         component_id = str(uuid.uuid4())
         # Extract ground truth if present
-        ground_truth = kwargs.pop("gt", None) if kwargs else None
+        ground_truth = kwargs.pop("gt") if kwargs else None
+        if ground_truth is not None:
+            span = self.span(name)
+            span.add_gt(ground_truth)
         # Get parent agent ID if exists
         parent_agent_id = self.current_agent_id.get()
@@ -293,7 +302,7 @@ class AgentTracerMixin:
         try:
             # Execute the agent
-            result = func(*args, **kwargs)
+            result = self.file_tracker.trace_wrapper(func)(*args, **kwargs)
             # Calculate resource usage
             end_memory = psutil.Process().memory_info().rss
@@ -320,9 +329,6 @@ class AgentTracerMixin:
                 children=children,
                 parent_id=parent_agent_id,
             )
-            # Add ground truth to component data if present
-            if ground_truth is not None:
-                agent_component["data"]["gt"] = ground_truth
             # Add this component as a child to parent's children list
             parent_children.append(agent_component)
@@ -398,7 +404,10 @@ class AgentTracerMixin:
         component_id = str(uuid.uuid4())
         # Extract ground truth if present
-        ground_truth = kwargs.pop("gt", None) if kwargs else None
+        ground_truth = kwargs.pop("gt") if kwargs else None
+        if ground_truth is not None:
+            span = self.span(name)
+            span.add_gt(ground_truth)
         # Get parent agent ID if exists
         parent_agent_id = self.current_agent_id.get()
@@ -414,7 +423,7 @@ class AgentTracerMixin:
         try:
             # Execute the agent
-            result = await func(*args, **kwargs)
+            result = await self.file_tracker.trace_wrapper(func)(*args, **kwargs)
             # Calculate resource usage
             end_memory = psutil.Process().memory_info().rss
@@ -441,10 +450,6 @@ class AgentTracerMixin:
                 parent_id=parent_agent_id,
             )
-            # Add ground truth to component data if present
-            if ground_truth is not None:
-                agent_component["data"]["gt"] = ground_truth
             # Add this component as a child to parent's children list
             parent_children.append(agent_component)
             self.agent_children.set(parent_children)
@@ -576,8 +581,13 @@ class AgentTracerMixin:
             "interactions": interactions,
         }
-        if self.gt:
-            component["data"]["gt"] = self.gt
+        if name in self.span_attributes_dict:
+            span_gt = self.span_attributes_dict[name].gt
+            if span_gt is not None:
+                component["data"]["gt"] = span_gt
+            span_context = self.span_attributes_dict[name].context
+            if span_context:
+                component["data"]["context"] = span_context
         # Reset the SpanAttributes context variable
         self.span_attributes_dict[kwargs["name"]] = SpanAttributes(kwargs["name"])

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/ragaai_catalyst/tracers/agentic_tracing/tracers/base.py RENAMED Viewed

@@ -83,6 +83,7 @@ class BaseTracer:
         self.tracking_thread = None
         self.tracking = False
         self.system_monitor = None
+        self.gt = None
     def _get_system_info(self) -> SystemInfo:
         return self.system_monitor.get_system_info()
@@ -249,7 +250,8 @@ class BaseTracer:
             # Format interactions and add to trace
             interactions = self.format_interactions()
-            trace_data["workflow"] = interactions["workflow"]
+            # trace_data["workflow"] = interactions["workflow"]
+            cleaned_trace_data["workflow"] = interactions["workflow"]
             with open(filepath, "w") as f:
                 json.dump(cleaned_trace_data, f, cls=TracerJSONEncoder, indent=2)

{ragaai_catalyst-2.1.5b20 → ragaai_catalyst-2.1.5b22}/ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py RENAMED Viewed

@@ -45,7 +45,10 @@ class CustomTracerMixin:
             @functools.wraps(func)
             async def async_wrapper(*args, **kwargs):
                 async_wrapper.metadata = metadata
-                self.gt = kwargs.get('gt', None) if kwargs else None
+                gt = kwargs.get('gt') if kwargs else None
+                if gt is not None:
+                    span = self.span(name)
+                    span.add_gt(gt)
                 return await self._trace_custom_execution(
                     func, name or func.__name__, custom_type, version, trace_variables, *args, **kwargs
                 )
@@ -54,7 +57,10 @@ class CustomTracerMixin:
             @functools.wraps(func)
             def sync_wrapper(*args, **kwargs):
                 sync_wrapper.metadata = metadata
-                self.gt = kwargs.get('gt', None) if kwargs else None
+                gt = kwargs.get('gt') if kwargs else None
+                if gt is not None:
+                    span = self.span(name)
+                    span.add_gt(gt)
                 return self._trace_sync_custom_execution(
                     func, name or func.__name__, custom_type, version, trace_variables, *args, **kwargs
                 )
@@ -98,7 +104,7 @@ class CustomTracerMixin:
         try:
             # Execute the function
-            result = func(*args, **kwargs)
+            result = self.file_tracker.trace_wrapper(func)(*args, **kwargs)
             # Calculate resource usage
             end_time = datetime.now().astimezone().isoformat()
@@ -186,7 +192,7 @@ class CustomTracerMixin:
         try:
             # Execute the function
-            result = await func(*args, **kwargs)
+            result = await self.file_tracker.trace_wrapper(func)(*args, **kwargs)
             # Calculate resource usage
             end_time = datetime.now().astimezone().isoformat()
@@ -284,9 +290,13 @@ class CustomTracerMixin:
             "interactions": interactions
         }
-        if self.gt:
-            component["data"]["gt"] = self.gt
+        if kwargs["name"] in self.span_attributes_dict:
+            span_gt = self.span_attributes_dict[kwargs["name"]].gt
+            if span_gt is not None:
+                component["data"]["gt"] = span_gt
+            span_context = self.span_attributes_dict[kwargs["name"]].context
+            if span_context:
+                component["data"]["context"] = span_context
         return component
     def start_component(self, component_id):

ragaai-catalyst 2.1.5b20__tar.gz → 2.1.5b22__tar.gz

ragaai-catalyst 2.1.5b20tar.gz → 2.1.5b22tar.gz